diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 03fbf8c1..0ce880f1 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -34,4 +34,4 @@ jobs: uses: actions/upload-artifact@v4 with: name: documentation - path: docs/_build/html/ + path: docs/build/html/ diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index fc0916f9..afe675a1 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -26,13 +26,13 @@ jobs: pip install "black==25.9.0" isort flake8 mypy ruff - name: Run black (code formatting) - run: black --check --diff src/alignment/ tests/ scripts/ + run: black --check --diff src/nodelens/ tests/ scripts/ - name: Run isort (import sorting) - run: isort --check-only --diff --profile black src/alignment/ tests/ scripts/ + run: isort --check-only --diff --profile black src/nodelens/ tests/ scripts/ - name: Run flake8 (linting) - run: flake8 src/alignment/ tests/ scripts/ --max-line-length=100 --ignore=E203,W503 || echo "Flake8 has warnings" + run: flake8 src/nodelens/ tests/ scripts/ --max-line-length=100 --ignore=E203,W503 || echo "Flake8 has warnings" - name: Run mypy (type checking) - run: mypy src/alignment/ --ignore-missing-imports --no-strict-optional || echo "Type checking has warnings" + run: mypy src/nodelens/ --ignore-missing-imports --no-strict-optional || echo "Type checking has warnings" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index bc13e190..02aaefd1 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -5,6 +5,9 @@ on: tags: - 'v*' +permissions: + contents: write + jobs: release: runs-on: ubuntu-latest @@ -13,7 +16,7 @@ jobs: - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.10' @@ -28,20 +31,26 @@ jobs: - name: Check package run: twine check dist/* - - name: Upload to PyPI - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} - run: twine upload dist/* - - name: Create GitHub Release uses: softprops/action-gh-release@v1 with: + files: dist/* body: | - ## Installation + Public research-code release for NodeLens. + + The Python package is imported as `nodelens`. + + ## Install From Source ```bash - pip install alignment-framework + git clone https://github.com/KempnerInstitute/nodelens.git + cd nodelens + pip install -e . ``` + + ## Supernodes And SCAR Artifacts + + Derived artifacts are available at: + https://huggingface.co/datasets/hsafaai/supernodes-scar-artifacts draft: false prerelease: false diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a0107cde..65ed907d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -45,11 +45,11 @@ jobs: - name: Run tests run: | - pytest tests/ -v --cov=src/alignment --cov-report=xml --cov-report=html --cov-report=term-missing --tb=short -ra + pytest tests/ -v --cov=src/nodelens --cov-report=xml --cov-report=html --cov-report=term-missing --tb=short -ra - name: Check coverage threshold run: | - coverage report --fail-under=25 + python -m coverage report --fail-under=20 - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..36bbfe07 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,29 @@ +cff-version: 1.2.0 +message: "If you use this code or the Supernodes and SCAR artifacts, please cite the paper and archived release." +title: "NodeLens" +version: "0.2.0" +repository-code: "https://github.com/KempnerInstitute/nodelens" +url: "https://github.com/KempnerInstitute/nodelens" +license: "MIT" +authors: + - family-names: "Cherilyn" + given-names: "Audrey" + affiliation: "Kempner Institute at Harvard University" + - family-names: "Safaai" + given-names: "Houman" + affiliation: "Kempner Institute at Harvard University" +preferred-citation: + type: article + title: "Supernodes and Halos: Loss-Critical Hubs in LLM Feed-Forward Layers" + authors: + - family-names: "Cherilyn" + given-names: "Audrey" + affiliation: "Kempner Institute at Harvard University" + - family-names: "Safaai" + given-names: "Houman" + affiliation: "Kempner Institute at Harvard University" + year: 2026 + url: "https://github.com/KempnerInstitute/nodelens" + identifiers: + - type: url + value: "https://huggingface.co/datasets/hsafaai/supernodes-scar-artifacts" diff --git a/README.md b/README.md index 16283ad7..b0bd795f 100644 --- a/README.md +++ b/README.md @@ -1,34 +1,65 @@ -# Alignment Framework +# NodeLens -Neural network analysis and structured pruning using alignment metrics and information theory. +Node and channel metrics for neural network interpretability, importance, and interventions. -[![Lint](https://github.com/KempnerInstitute/alignment/actions/workflows/lint.yml/badge.svg)](https://github.com/KempnerInstitute/alignment/actions/workflows/lint.yml) -[![Pre-commit](https://github.com/KempnerInstitute/alignment/actions/workflows/pre-commit.yml/badge.svg)](https://github.com/KempnerInstitute/alignment/actions/workflows/pre-commit.yml) -[![Code Lines](https://img.shields.io/tokei/lines/github/KempnerInstitute/alignment?logo=files&logoColor=white)](https://github.com/KempnerInstitute/alignment) -[![CLI](https://img.shields.io/badge/CLI-scripts%2Frun_experiment.py-121011?logo=gnubash&logoColor=white)](scripts/run_experiment.py) +[![Tests](https://github.com/KempnerInstitute/nodelens/actions/workflows/test.yml/badge.svg)](https://github.com/KempnerInstitute/nodelens/actions/workflows/test.yml) +[![Lint](https://github.com/KempnerInstitute/nodelens/actions/workflows/lint.yml/badge.svg)](https://github.com/KempnerInstitute/nodelens/actions/workflows/lint.yml) +[![Documentation](https://github.com/KempnerInstitute/nodelens/actions/workflows/docs.yml/badge.svg)](https://github.com/KempnerInstitute/nodelens/actions/workflows/docs.yml) +[![Release](https://github.com/KempnerInstitute/nodelens/actions/workflows/release.yml/badge.svg)](https://github.com/KempnerInstitute/nodelens/actions/workflows/release.yml) [![Python](https://img.shields.io/badge/python-%3E%3D3.8-3776AB?logo=python&logoColor=white)](pyproject.toml) +[![Artifacts](https://img.shields.io/badge/Hugging%20Face-artifacts-ffcc33)](https://huggingface.co/datasets/hsafaai/supernodes-scar-artifacts) +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) + +NodeLens is a research codebase for studying which channels, neurons, and +features matter most for model behavior. The Python package is imported as +`nodelens`. + +The repository supports two related workflows: + +- General metric analysis for vision models, transformers, and LLMs. +- Paper-specific releases under `projects/`, including the Supernodes and SCAR + artifact workflow. + +## What The Code Does + +```mermaid +flowchart LR + A[Model + calibration data] --> B[Capture activations and gradients] + B --> C[Compute channel metrics] + C --> D[Identify loss-critical cores] + C --> E[Estimate redundancy and halo structure] + D --> F[Structured pruning and ablation probes] + E --> F + F --> G[Figures, tables, manifests, HF artifacts] +``` -## Overview - -This framework provides tools for analyzing and pruning neural networks through: +Core capabilities: -- **Alignment metrics**: Rayleigh quotient, activation-based importance -- **Information-theoretic analysis**: Mutual information, redundancy, synergy -- **Cluster-based analysis**: Functional type identification, cross-layer halo tracking -- **Structured pruning**: Channel/neuron removal with multiple scoring strategies +- Loss-sensitive channel scoring, including SCAR loss-proxy metrics. +- Activation, curvature, Taylor, Rayleigh quotient, and information-theoretic metrics. +- Structured pruning strategies for channel-level model analysis. +- Cluster and halo-style analyses for local redundancy structure. +- Reproducible project folders for paper artifacts and public releases. -**Supported architectures**: MLPs, CNNs (ResNet, VGG, MobileNet), Transformers, LLMs (LLaMA, Mistral, Qwen) +Supported model families include MLPs, CNNs, transformer language models, and +LLM backends through Hugging Face causal language models. ## Installation ```bash -git clone https://github.com/KempnerInstitute/alignment.git -cd alignment +git clone https://github.com/KempnerInstitute/nodelens.git +cd nodelens conda env create -f environment.yml -conda activate alignment +conda activate nodelens pip install -e . ``` +For documentation and optional analysis tools: + +```bash +pip install -e .[all] +``` + ## Quick Start ```bash @@ -36,130 +67,85 @@ pip install -e . python scripts/run_experiment.py --config configs/examples/mnist_basic.yaml # CNN pruning -python scripts/run_experiment.py --config configs/examples/resnet_pruning.yaml +python scripts/run_experiment.py --config configs/vision_prune/resnet18_cifar10_full.yaml -# LLM analysis -python scripts/run_experiment.py --config configs/paper/llama3_8b_full.yaml - -# Cluster-based analysis -python scripts/run_experiment.py --config configs/cluster_analysis/resnet18_cifar10_full.yaml +# LLM supernode and SCAR analysis +python scripts/run_experiment.py --config configs/prune_llm/llama3_8b_unified.yaml ``` -## Experiment Types - -| Type | Description | Config Example | -|------|-------------|----------------| -| `alignment_analysis` | General alignment metrics | `mnist_basic.yaml` | -| `llm_alignment` | LLM supernode/SCAR analysis | `llama3_8b_full.yaml` | -| `cluster_analysis` | Metric-space clustering with halos | `resnet18_cifar10_full.yaml` | - -## Metrics - -| Category | Metrics | -|----------|---------| -| Activation | `activation_l2_norm`, `activation_variance`, `activation_outlier_index` | -| Alignment | `rayleigh_quotient`, `delta_alignment` | -| Information | `mutual_information_gaussian`, `pairwise_redundancy_gaussian`, `gaussian_pid_synergy_mmi` | -| SCAR (LLM) | `scar_activation_power`, `scar_taylor`, `scar_curvature`, `scar_loss_proxy` | -| Synergy | `synergy_continuous_target` (with logit margin) | - -## Cluster-Based Analysis - -The cluster analysis framework groups channels/neurons into functional types: - -| Type | Characteristics | Pruning Implication | -|------|-----------------|---------------------| -| Critical | High RQ, Low Redundancy, High Synergy | Protect | -| Redundant | Moderate RQ, High Redundancy | Target for pruning | -| Synergistic | Moderate RQ, High Synergy | Preserve pairs | -| Background | Low on all metrics | Safe to remove | - -Cross-layer halo analysis tracks downstream dependencies to predict cascade effects. - -## Pruning Strategies - -| Strategy | Description | -|----------|-------------| -| `magnitude` | Prune by weight magnitude | -| `alignment` | Prune by alignment score | -| `composite` | Combine multiple metrics | -| `cluster_aware` | Use cluster membership and halo analysis | -| `random` | Random baseline | +Package the public Supernodes and SCAR artifacts: -## Project Structure +```bash +python projects/supernodes_scar/scripts/prepare_hf_artifacts.py \ + --output-dir outputs/supernodes_scar_hf \ + --clean +python projects/supernodes_scar/scripts/verify_hf_artifacts.py \ + outputs/supernodes_scar_hf ``` -alignment/ -├── configs/ -| ├── cluster_analysis/ # Cluster-based analysis configs -| ├── paper/ # Paper experiment configs -| └── examples/ # Example configs -├── scripts/ -| ├── run_experiment.py # Main entry point -| └── run_analysis.py # Post-hoc analysis -├── src/alignment/ -| ├── analysis/ # Visualization, clustering, cascade analysis -| ├── experiments/ # Experiment classes -| ├── metrics/ # Importance metrics -| ├── models/ # Model wrappers -| └── pruning/ # Pruning strategies -├── tests/ # Unit tests -└── docs/ # Documentation -``` - -## Key Modules - -### Analysis -- `MetricSpaceClustering`: K-means clustering in (RQ, Redundancy, Synergy) space -- `CrossLayerHaloAnalysis`: Track downstream channel dependencies -- `CascadeAnalysis`: Validate importance via ablation -- `UnifiedVisualizer`: Generate analysis plots -### Experiments -- `GeneralAlignmentExperiment`: Vision model analysis -- `LLMAlignmentExperiment`: LLM supernode and SCAR analysis -- `ClusterAnalysisExperiment`: Cluster-based analysis for any architecture - -### Metrics -- `RayleighQuotient`: Input-weight alignment -- `PairwiseRedundancyGaussian`: Gaussian MI-based redundancy -- `SynergyContinuousTarget`: PID synergy with continuous target -- SCAR metrics for LLMs +## Paper Releases + +Paper-specific release material lives under `projects/`. Reusable library code +stays in `src/nodelens`, while each project folder records the exact configs, +artifact layout, reproducibility notes, and release checklist for a paper. + +Current project: + +- `projects/supernodes_scar/`: release material for "Supernodes and Halos: + Loss-Critical Hubs in LLM Feed-Forward Layers". + +Derived artifacts for this project are staged on Hugging Face: + +- `https://huggingface.co/datasets/hsafaai/supernodes-scar-artifacts` + +## Main Concepts + +| Area | Examples | +|------|----------| +| Activation metrics | `activation_l2_norm`, `activation_variance`, `activation_outlier_index` | +| Alignment metrics | `rayleigh_quotient`, `delta_alignment` | +| Information metrics | `mutual_information_gaussian`, `pairwise_redundancy_gaussian`, `gaussian_pid_synergy_mmi` | +| SCAR metrics | `scar_activation_power`, `scar_taylor`, `scar_curvature`, `scar_loss_proxy` | +| Pruning strategies | `magnitude`, `alignment`, `composite`, `cluster_aware`, `random` | + +## Repository Layout + +```text +nodelens/ +|-- configs/ +| |-- prune_llm/ # LLM and SCAR configs +| |-- vision_prune/ # Vision pruning configs +| `-- examples/ # Small example configs +|-- projects/ # Paper-specific release material +|-- scripts/ +| |-- run_experiment.py # Main experiment entry point +| `-- run_analysis.py # Post-hoc analysis +|-- src/nodelens/ +| |-- analysis/ # Visualization, clustering, cascade analysis +| |-- experiments/ # Experiment classes +| |-- metrics/ # Importance metrics +| |-- models/ # Model wrappers +| `-- pruning/ # Pruning strategies +|-- tests/ # Unit tests +`-- docs/ # Documentation +``` ## Documentation -- [Usage Guide](docs/usage.md) - Running experiments and configuration -- [API Reference](docs/api_reference.md) - Core classes and functions -- [LLM Guide](docs/llm_guide.md) - LLM-specific analysis -- [Metric Consistency](docs/METRIC_CONSISTENCY.md) - Theory-code verification - -## Configuration - -```yaml -experiment_type: cluster_analysis # or llm_alignment, alignment_analysis +- [Usage Guide](docs/usage.md) +- [API Reference](docs/api_reference.md) +- [LLM Guide](docs/llm_guide.md) +- [Metric Consistency](docs/METRIC_CONSISTENCY.md) +- [Supernodes and SCAR Release Notes](projects/supernodes_scar/README.md) -model: - name: resnet18 - pretrained: true +Build the Sphinx docs locally: -dataset: - name: cifar10 - batch_size: 128 - -clustering: - n_clusters: 4 - compute_stability: true - -halo_analysis: - percentile: 90.0 - -pruning: - ratios: [0.3, 0.5, 0.7] - methods: [magnitude, taylor, cluster_aware] +```bash +cd docs +make html ``` -See `configs/template.yaml` for complete parameter reference. - ## Testing ```bash @@ -167,6 +153,11 @@ pytest tests/ pytest tests/unit/ -v ``` +## Citation + +If you use the Supernodes and SCAR release, please cite the paper and the +archived code/artifact versions listed in `CITATION.cff`. + ## License -See LICENSE file. +This repository is released under the MIT license. See [LICENSE](LICENSE). diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 00000000..2b59d779 --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,69 @@ +# Release Checklist + +This checklist keeps a public release reproducible without exposing private +draft files, cluster paths, logs, or model weights. + +## Before Tagging + +- Confirm that paper-facing code lives in `src/nodelens/` and + `projects/supernodes_scar/`. +- Confirm that private draft material under `drafts/` is not tracked for the + public release. +- Run the test and documentation checks listed below. +- Rebuild the Hugging Face artifact bundle and verify checksums. +- Record the code tag, Hugging Face revision, and arXiv version together. +- Rebuild the artifact bundle after the final commit so + `metadata/release_metadata.json` records `repo_dirty: false`. + +## Local Checks + +```bash +python -m pip install -e . +PYTHONPATH=src python -c "import nodelens; print(nodelens.__version__)" +pytest tests/unit -q +cd docs && make html +``` + +## Artifact Bundle + +```bash +python projects/supernodes_scar/scripts/prepare_hf_artifacts.py \ + --output-dir outputs/supernodes_scar_hf \ + --clean + +python projects/supernodes_scar/scripts/verify_hf_artifacts.py \ + outputs/supernodes_scar_hf +``` + +Upload the verified folder to: + +```text +https://huggingface.co/datasets/hsafaai/supernodes-scar-artifacts +``` + +## GitHub Tag + +Use an annotated tag for the public paper release: + +```bash +git tag -a v0.2.0-supernodes-scar -m "Supernodes and SCAR public release" +git push origin v0.2.0-supernodes-scar +``` + +The current release workflow builds and checks the package, then creates a +GitHub release. It does not publish to PyPI. + +## Suggested GitHub Release Notes + +```text +Supernodes and SCAR public release. + +Includes: +- loss-proxy and SCAR pruning code +- paper configs +- Supernodes and SCAR release docs +- scripts for staging Hugging Face artifacts + +Artifacts: +https://huggingface.co/datasets/hsafaai/supernodes-scar-artifacts +``` diff --git a/configs/README.md b/configs/README.md index c6539f45..7bbe8d8b 100644 --- a/configs/README.md +++ b/configs/README.md @@ -1,44 +1,70 @@ # Configuration Files -## Structure +NodeLens experiments are driven by YAML configs. The same runner is used for +small examples, vision pruning studies, and LLM supernode/SCAR experiments: +```bash +python scripts/run_experiment.py --config path/to/config.yaml ``` + +The Python package is imported as `nodelens`. + +## Directory Map + +```text configs/ -├── template.yaml # Complete template with all options -├── unified_template.yaml # Unified format template -├── vision_prune/ # Vision model pruning configs -| ├── resnet18_cifar10_full.yaml -| ├── resnet18_cifar10_unified.yaml # Unified format version -| ├── resnet50_imagenet100.yaml -| ├── vgg16_cifar10_full.yaml -| └── mobilenetv2_cifar10_full.yaml -├── prune_llm/ # LLM pruning configs -| ├── llama3_8b_full.yaml -| ├── llama3_8b_unified.yaml # Unified format version -| ├── llama2_7b_full.yaml -| ├── mistral_7b_full.yaml -| └── qwen2_7b_full.yaml -└── examples/ # Example configs - ├── mnist_basic.yaml - ├── resnet_pruning.yaml - └── llm_alignment.yaml +|-- template.yaml # Full legacy-format reference +|-- unified_template.yaml # Unified-format reference +|-- examples/ # Small runnable examples and smoke tests +|-- vision_prune/ # Vision clustering, halo, and channel pruning +`-- prune_llm/ # LLM supernode, SCAR, and paper-scale configs ``` -## Usage +Large private sweep grids are not kept in the public config tree. Public +release material for a paper lives under `projects//`; reusable +experiment configs live here. + +## Which Config Should I Use? + +| Goal | Start with | +|------|------------| +| Quick install check on MNIST | `configs/examples/mnist_basic.yaml` | +| Small vision pruning example | `configs/examples/resnet_pruning.yaml` | +| Vision metric clustering and halo analysis | `configs/vision_prune/resnet18_cifar10_unified.yaml` | +| Larger vision pruning benchmark | `configs/vision_prune/resnet50_imagenet100_unified.yaml` | +| Minimal LLM supernode example | `configs/examples/llm_alignment.yaml` | +| Main 8B LLM SCAR suite | `configs/prune_llm/llama3_8b_unified.yaml` | +| Cross-model 7B/8B LLM checks | `configs/prune_llm/{llama2,mistral,qwen2}_7b_unified.yaml` | +| 70B mechanism check | `configs/prune_llm/llama3_70b_scale_mechanism.yaml` | +| 70B structured pruning curves | `configs/prune_llm/llama3_70b_scale_pruning_curves.yaml` | +| OLMo checkpoint trajectory | `configs/prune_llm/olmo2_7b_ckpt_template.yaml` | + +## Experiment Types + +| Type | Used for | Typical configs | +|------|----------|-----------------| +| `alignment_analysis` | General activation, alignment, and pruning analysis for small models | `configs/examples/*.yaml` | +| `cluster_analysis` | Vision channel metrics, metric-space clustering, halo analysis, cascade tests, and structured pruning | `configs/vision_prune/*.yaml` | +| `llm_alignment` | Hugging Face causal LMs, SCAR loss-proxy metrics, supernodes, halos, perplexity, and LLM structured pruning | `configs/prune_llm/*.yaml` | +| `vision_synergy` | Older focused vision synergy experiments | `configs/examples/vision_synergy.yaml` | + +## Common Commands ```bash -python scripts/run_experiment.py --config configs/cluster_analysis/resnet18_cifar10_full.yaml -python scripts/run_experiment.py --config configs/paper/llama3_8b_full.yaml -python scripts/run_experiment.py --config configs/examples/resnet_pruning.yaml -``` +# Quick smoke test +python scripts/run_experiment.py --config configs/examples/mnist_basic.yaml -## Experiment Types +# Vision clustering and pruning +python scripts/run_experiment.py --config configs/vision_prune/resnet18_cifar10_unified.yaml + +# LLM supernode and SCAR analysis +python scripts/run_experiment.py --config configs/prune_llm/llama3_8b_unified.yaml -| Type | Description | -|------|-------------| -| `alignment_analysis` | General alignment metrics | -| `llm_alignment` | LLM supernode/SCAR analysis | -| `cluster_analysis` | Metric-space clustering with halos | +# Override output location without editing the YAML +python scripts/run_experiment.py \ + --config configs/prune_llm/llama3_8b_unified.yaml \ + --base-output-dir /path/to/results +``` ## Configuration Blocks @@ -55,6 +81,10 @@ python scripts/run_experiment.py --config configs/examples/resnet_pruning.yaml | `pruning` | Strategy, sparsity_levels, scoring | | `llm` | LLM-specific: scar_metrics, evaluate_perplexity | +Not every block is used by every experiment type. Vision configs usually use +`clustering`, `halo_analysis`, and `cascade_analysis`; LLM configs usually use +`supernode`, `halo_analysis`, `llm`, and `pruning`. + ## Metrics Available metrics for `metrics.enabled`: @@ -95,17 +125,22 @@ cascade_analysis: n_remove_per_cluster: 5 ``` -## LLM Configuration +## Minimal LLM Configuration ```yaml -experiment_type: llm_alignment +experiment: + type: "llm_alignment" -model_config: +model: + name: "hf_causal_lm" model_id: "meta-llama/Llama-3.1-8B" - torch_dtype: "bfloat16" + dtype: "bfloat16" -do_scar_metrics: true -scar_num_samples: 100 +metrics: + scar: + enabled: true + num_samples: 64 + max_length: 512 supernode: enabled: true @@ -113,6 +148,9 @@ supernode: protect_core: true ``` +LLM configs require access to the model provider, enough GPU memory, and the +right license acceptance for gated models. + ## Unified Configuration Format The framework supports a **unified configuration format** that works consistently @@ -190,7 +228,7 @@ output: ### Loading Unified Configs ```python -from alignment.configs import load_unified_config +from nodelens.configs import load_unified_config # Works with both old and unified formats! config = load_unified_config("configs/vision_prune/resnet18_cifar10_unified.yaml") diff --git a/configs/examples/alexnet_pruning.yaml b/configs/examples/alexnet_pruning.yaml index 3f6c971b..afb10868 100644 --- a/configs/examples/alexnet_pruning.yaml +++ b/configs/examples/alexnet_pruning.yaml @@ -17,7 +17,7 @@ model: # ImageNet dataset path on Kempner cluster dataset: name: "imagenet" - data_path: "/n/holylfs06/LABS/kempner_shared/Everyone/testbed/vision/imagenet_1k" + data_path: "/path/to/datasets/imagenet_1k" batch_size: 128 num_workers: 4 diff --git a/configs/examples/llama3_extended_analysis.yaml b/configs/examples/llama3_extended_analysis.yaml index 2b4df48a..6d852f39 100644 --- a/configs/examples/llama3_extended_analysis.yaml +++ b/configs/examples/llama3_extended_analysis.yaml @@ -8,7 +8,8 @@ # 3. Cross-layer redundancy (redundancy with previous layer) # 4. Layer transition efficiency (new information per layer) # -# Based on theoretical framework in drafts/alignment_notes/alignment_red.tex +# Use this as a heavier LLM analysis example after the smaller llm_alignment +# example is working in your environment. # ============================================================================ experiment: diff --git a/configs/examples/resnet_pruning.yaml b/configs/examples/resnet_pruning.yaml index 84b09871..f6526969 100644 --- a/configs/examples/resnet_pruning.yaml +++ b/configs/examples/resnet_pruning.yaml @@ -16,10 +16,10 @@ model: # Dataset options: # - cifar10/cifar100: ./data (auto-downloads, but ResNet expects 224x224) -# - imagenet: /n/holylfs06/LABS/kempner_shared/Everyone/testbed/vision/imagenet_1k +# - imagenet: /path/to/datasets/imagenet_1k dataset: name: "imagenet" - data_path: "/n/holylfs06/LABS/kempner_shared/Everyone/testbed/vision/imagenet_1k" + data_path: "/path/to/datasets/imagenet_1k" batch_size: 128 num_workers: 4 diff --git a/configs/examples/vision_pruning_test.yaml b/configs/examples/vision_pruning_test.yaml index 331a54be..261b0a83 100644 --- a/configs/examples/vision_pruning_test.yaml +++ b/configs/examples/vision_pruning_test.yaml @@ -1,7 +1,7 @@ # Vision Pruning Test (AlexNet on ImageNet) # Comprehensive metrics with pruning strategies # -# Based on drafts/alignment_notes/alignment_red.tex: +# Metric notes: # - RQ measures alignment with input covariance # - Gaussian MI is directly related to RQ for linear-Gaussian models # - Redundancy I(Y_i; Y_j) = -0.5 * log(1 - ρ²) measures overlap between neurons @@ -22,10 +22,10 @@ model: # Dataset options: # - cifar10/cifar100: ./data (auto-downloads) -# - imagenet: /n/holylfs06/LABS/kempner_shared/Everyone/testbed/vision/imagenet_1k +# - imagenet: /path/to/datasets/imagenet_1k dataset: name: "imagenet" - data_path: "/n/holylfs06/LABS/kempner_shared/Everyone/testbed/vision/imagenet_1k" + data_path: "/path/to/datasets/imagenet_1k" batch_size: 16 # Reduced for memory efficiency with unfold mode num_workers: 4 diff --git a/configs/paper/llama3_lp_validation_improved.yaml b/configs/paper/llama3_lp_validation_improved.yaml deleted file mode 100644 index abf6686f..00000000 --- a/configs/paper/llama3_lp_validation_improved.yaml +++ /dev/null @@ -1,57 +0,0 @@ -# Improved LP ablation validation config -# Key changes: 4x more texts (32 vs 8), 2x longer (512 vs 256), more layers (stride 4 vs 8) -# This should give more stable ΔNLL estimates with more positive values - -name: llama3_8b_paper_results_lp_validation_improved -description: "LP validation with increased data for cleaner scatter plots" - -experiment_type: llm_alignment -model_name: hf_causal_lm - -model_config: - model_id: "meta-llama/Llama-3.1-8B" - torch_dtype: bfloat16 - device_map: auto - -dataset_name: wikitext -batch_size: 1 -device: cuda -seed: 42 - -# Only run the LP validation probe -supernode: - enabled: true - score_metric: scar_loss_proxy - core_fraction: 0.01 - follower_fraction: 0.10 - - # Improved LP ablation validation settings - lp_ablation_validation: - enabled: true - layer_stride: 4 # More layers (was 8) - layer_indices: null # All layers at stride - num_texts: 32 # 4x more texts (was 8) - max_length: 512 # 2x longer (was 256) - num_channels: 128 # Same (can increase to 256 if desired) - quantile_bins: 8 - seed: 0 - - # Disable other probes to speed up run - read_halo_analysis: - enabled: false - conditional_halo_ablation: - enabled: false - -# Pruning settings (minimal - just for SCAR scores) -pruning: - methods: [scar] - sparsity_levels: [0.0] # No actual pruning, just compute scores - -# Evaluation -evaluate: - compute_scar_metrics: true - num_calibration_samples: 128 - -# Output -plots_dir: ./figures -results_dir: ./results diff --git a/configs/prune_llm/README.md b/configs/prune_llm/README.md index 1845c9bc..7a0b07c9 100644 --- a/configs/prune_llm/README.md +++ b/configs/prune_llm/README.md @@ -1,15 +1,30 @@ -# SCAR Paper Experiment Configurations - -Configurations for generating results in the SCAR LLM pruning paper. - -## Configurations - -| Config | Model | Layers | FFN Width | Runtime | -|--------|-------|--------|-----------|---------| -| `llama3_8b_unified.yaml` | LLaMA-3.1-8B | 32 | 14336 | 6-8h | -| `mistral_7b_unified.yaml` | Mistral-7B | 32 | 14336 | 4-6h | -| `llama2_7b_unified.yaml` | LLaMA-2-7B | 32 | 11008 | 4-6h | -| `qwen2_7b_unified.yaml` | Qwen2-7B | 28 | 18944 | 4-6h | +# LLM And SCAR Configurations + +These configs run Hugging Face causal-language-model experiments for loss +sensitivity, supernode analysis, halo analysis, structured FFN channel pruning, +and SCAR-style pruning baselines. + +The configs are useful in two modes: + +| Mode | Purpose | +|------|---------| +| Mechanism probes | Compute loss-proxy concentration, supernodes, activation overlap, and halo summaries | +| Pruning probes | Apply structured FFN channel pruning and evaluate perplexity or downstream tasks | + +## Main Configs + +| Config | Model | Purpose | Typical runtime | +|--------|-------|---------|-----------------| +| `llama3_8b_unified.yaml` | Llama-3.1-8B | Main 8B SCAR suite | 6-8h | +| `llama3_8b_mechanism_probes.yaml` | Llama-3.1-8B | Mechanism-only checks | 1-2h | +| `llama2_7b_unified.yaml` | Llama-2-7B | Cross-model 7B validation | 4-6h | +| `mistral_7b_unified.yaml` | Mistral-7B | Cross-model 7B validation | 4-6h | +| `qwen2_7b_unified.yaml` | Qwen2-7B | Cross-model 7B validation | 4-6h | +| `llama3_70b_scale_mechanism.yaml` | Llama-3.1-70B | Large-model concentration check | Hardware dependent | +| `llama3_70b_scale_pruning_curves.yaml` | Llama-3.1-70B | Large-model structured pruning curves | Hardware dependent | +| `llama3_70b_scale_sparsegpt_curves.yaml` | Llama-3.1-70B | Structured SparseGPT comparison | Hardware dependent | +| `olmo2_7b_ckpt_template.yaml` | OLMo-2-7B checkpoints | Training-trajectory mechanism probe | Per checkpoint | +| `olmo2_7b_pruning_curves.yaml` | OLMo-2-7B | Final-checkpoint pruning replication | 4-6h | ## Quick Start @@ -18,16 +33,6 @@ Run single model: python scripts/run_experiment.py --config configs/prune_llm/llama3_8b_unified.yaml ``` -Paper batch launchers now live under: -```bash -drafts/LLM_prune/paper/slurm_jobs/ -``` - -See: -```bash -drafts/LLM_prune/paper/slurm_jobs/README.md -``` - Override base output directory: ```bash python scripts/run_experiment.py \ @@ -40,7 +45,7 @@ python scripts/run_experiment.py \ Each job creates a unique directory based on timestamp and SLURM job ID: ``` -/n/holylfs06/LABS/kempner_project_b/Lab/alignment/Prune_LLM/ +/path/to/results/Prune_LLM/ ├── llama3_8b_paper_results_20241209_143052_12345678/ | ├── results/ # JSON results files | | ├── results_20241209_143052.json @@ -75,6 +80,10 @@ Each job creates a unique directory based on timestamp and SLURM job ID: | Magnitude baseline | `activation_l2_norm` | | SOTA baselines | `wanda`, `sparsegpt` | +All LLM pruning configs use structured FFN channel pruning unless explicitly +noted. A channel is removed consistently across the corresponding FFN +projection group, which is different from unstructured element-wise pruning. + ## Analyses 1. **Supernode Distribution**: Loss proxy histograms, concentration across depth @@ -101,7 +110,7 @@ The `output.base_dir` setting controls where job directories are created: ```yaml output: # Creates: {base_dir}/{experiment_name}_{timestamp}_{job_id}/ - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/Prune_LLM" + base_dir: "/path/to/results/Prune_LLM" # Fallback if base_dir is not set (legacy) dir: "./results/paper/llama3_8b" @@ -124,7 +133,21 @@ Compared to example configs, paper configs include: ## Resource Requirements -- **GPU**: 1x A100 80GB or H100 -- **Memory**: ~60GB GPU memory for 8B models -- **Storage**: ~50GB per model -- **Time**: ~20-30 hours total for all models +Approximate requirements for the 7B/8B configs: + +| Resource | Typical value | +|----------|---------------| +| GPU | 1x A100 80GB or H100 preferred | +| GPU memory | About 60GB for full 8B pruning/evaluation configs | +| Storage | About 50GB per model run if all figures/results are saved | +| Time | 4-8h for one full 7B/8B model config | + +The 70B configs need substantially more memory and may require tensor +parallelism, CPU offload, or a multi-GPU node depending on the local setup. + +## Notes On Reproducibility + +- The public configs do not include model weights or datasets. +- Gated models require accepting the provider license before running. +- Use `--base-output-dir` to keep large run outputs outside the repository. +- Paper artifact packaging is handled by `projects/supernodes_scar/`. diff --git a/configs/prune_llm/llama2_7b_unified.yaml b/configs/prune_llm/llama2_7b_unified.yaml index 98e0b100..29ab476c 100644 --- a/configs/prune_llm/llama2_7b_unified.yaml +++ b/configs/prune_llm/llama2_7b_unified.yaml @@ -265,7 +265,7 @@ visualization: # OUTPUT # ----------------------------------------------------------------------------- output: - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/Prune_LLM" + base_dir: "/path/to/results/Prune_LLM" dir: "./results/paper/llama2_7b" save_metrics: true save_figures: true diff --git a/configs/prune_llm/llama3_8b_unified.yaml b/configs/prune_llm/llama3_8b_unified.yaml index 17da8e28..b3c735b0 100644 --- a/configs/prune_llm/llama3_8b_unified.yaml +++ b/configs/prune_llm/llama3_8b_unified.yaml @@ -298,7 +298,7 @@ visualization: # Uses job directory structure: creates unique folders for each run # Directory format: {base_dir}/{experiment_name}_{timestamp}_{job_id}/ output: - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/Prune_LLM" + base_dir: "/path/to/results/Prune_LLM" # dir is ignored when base_dir is set dir: "./results/paper/llama3_8b" save_metrics: true diff --git a/configs/prune_llm/mistral_7b_unified.yaml b/configs/prune_llm/mistral_7b_unified.yaml index 7b2fbc6a..307d06f5 100644 --- a/configs/prune_llm/mistral_7b_unified.yaml +++ b/configs/prune_llm/mistral_7b_unified.yaml @@ -264,7 +264,7 @@ visualization: # OUTPUT # ----------------------------------------------------------------------------- output: - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/Prune_LLM" + base_dir: "/path/to/results/Prune_LLM" dir: "./results/paper/mistral_7b" save_metrics: true save_figures: true diff --git a/configs/prune_llm/olmo2_7b_ckpt_template.yaml b/configs/prune_llm/olmo2_7b_ckpt_template.yaml index 51adca4a..5a4b9ba4 100644 --- a/configs/prune_llm/olmo2_7b_ckpt_template.yaml +++ b/configs/prune_llm/olmo2_7b_ckpt_template.yaml @@ -5,8 +5,8 @@ # - Compute LP concentration for a *single* OLMo-2-1124-7B training checkpoint. # - Identical mechanism-only setup to llama3_70b_scale_mechanism.yaml so the # resulting JSONs are aggregable across model / checkpoint. -# - The placeholders @REVISION@ and @RUN_TAG@ are substituted per-checkpoint -# by paper/slurm_jobs/run_olmo2_7b_checkpoint_trajectory.sh. +# - Template tokens @REVISION@ and @RUN_TAG@ are substituted per checkpoint +# by the OLMo checkpoint sweep launcher used for the paper. # ============================================================================ experiment: diff --git a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_main.yaml b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_main.yaml index 8b669c75..49b2935e 100644 --- a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_main.yaml +++ b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_main.yaml @@ -5,8 +5,8 @@ # - Compute LP concentration for a *single* OLMo-2-1124-7B training checkpoint. # - Identical mechanism-only setup to llama3_70b_scale_mechanism.yaml so the # resulting JSONs are aggregable across model / checkpoint. -# - The placeholders main and main are substituted per-checkpoint -# by paper/slurm_jobs/run_olmo2_7b_checkpoint_trajectory.sh. +# - Generated from the checkpoint template with revision main and run tag main +# by the OLMo checkpoint sweep launcher used for the paper. # ============================================================================ experiment: diff --git a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step150_1B.yaml b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step150_1B.yaml index 79e054e6..c920f3a6 100644 --- a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step150_1B.yaml +++ b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step150_1B.yaml @@ -5,8 +5,8 @@ # - Compute LP concentration for a *single* OLMo-2-1124-7B training checkpoint. # - Identical mechanism-only setup to llama3_70b_scale_mechanism.yaml so the # resulting JSONs are aggregable across model / checkpoint. -# - The placeholders stage1-step150-tokens1B and s1_step150_1B are substituted per-checkpoint -# by paper/slurm_jobs/run_olmo2_7b_checkpoint_trajectory.sh. +# - Generated from the checkpoint template with revision stage1-step150-tokens1B and run tag s1_step150_1B +# by the OLMo checkpoint sweep launcher used for the paper. # ============================================================================ experiment: diff --git a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step20000_84B.yaml b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step20000_84B.yaml index e8b822a4..0fa40e0e 100644 --- a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step20000_84B.yaml +++ b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step20000_84B.yaml @@ -5,8 +5,8 @@ # - Compute LP concentration for a *single* OLMo-2-1124-7B training checkpoint. # - Identical mechanism-only setup to llama3_70b_scale_mechanism.yaml so the # resulting JSONs are aggregable across model / checkpoint. -# - The placeholders stage1-step20000-tokens84B and s1_step20000_84B are substituted per-checkpoint -# by paper/slurm_jobs/run_olmo2_7b_checkpoint_trajectory.sh. +# - Generated from the checkpoint template with revision stage1-step20000-tokens84B and run tag s1_step20000_84B +# by the OLMo checkpoint sweep launcher used for the paper. # ============================================================================ experiment: diff --git a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step2000_9B.yaml b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step2000_9B.yaml index 12f75c70..96471465 100644 --- a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step2000_9B.yaml +++ b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step2000_9B.yaml @@ -5,8 +5,8 @@ # - Compute LP concentration for a *single* OLMo-2-1124-7B training checkpoint. # - Identical mechanism-only setup to llama3_70b_scale_mechanism.yaml so the # resulting JSONs are aggregable across model / checkpoint. -# - The placeholders stage1-step2000-tokens9B and s1_step2000_9B are substituted per-checkpoint -# by paper/slurm_jobs/run_olmo2_7b_checkpoint_trajectory.sh. +# - Generated from the checkpoint template with revision stage1-step2000-tokens9B and run tag s1_step2000_9B +# by the OLMo checkpoint sweep launcher used for the paper. # ============================================================================ experiment: diff --git a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step300000_1259B.yaml b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step300000_1259B.yaml index 86464d4d..b56916b6 100644 --- a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step300000_1259B.yaml +++ b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step300000_1259B.yaml @@ -5,8 +5,8 @@ # - Compute LP concentration for a *single* OLMo-2-1124-7B training checkpoint. # - Identical mechanism-only setup to llama3_70b_scale_mechanism.yaml so the # resulting JSONs are aggregable across model / checkpoint. -# - The placeholders stage1-step300000-tokens1259B and s1_step300000_1259B are substituted per-checkpoint -# by paper/slurm_jobs/run_olmo2_7b_checkpoint_trajectory.sh. +# - Generated from the checkpoint template with revision stage1-step300000-tokens1259B and run tag s1_step300000_1259B +# by the OLMo checkpoint sweep launcher used for the paper. # ============================================================================ experiment: diff --git a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step5000_21B.yaml b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step5000_21B.yaml index 056d9e81..098974aa 100644 --- a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step5000_21B.yaml +++ b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step5000_21B.yaml @@ -5,8 +5,8 @@ # - Compute LP concentration for a *single* OLMo-2-1124-7B training checkpoint. # - Identical mechanism-only setup to llama3_70b_scale_mechanism.yaml so the # resulting JSONs are aggregable across model / checkpoint. -# - The placeholders stage1-step5000-tokens21B and s1_step5000_21B are substituted per-checkpoint -# by paper/slurm_jobs/run_olmo2_7b_checkpoint_trajectory.sh. +# - Generated from the checkpoint template with revision stage1-step5000-tokens21B and run tag s1_step5000_21B +# by the OLMo checkpoint sweep launcher used for the paper. # ============================================================================ experiment: diff --git a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step600000_2517B.yaml b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step600000_2517B.yaml index a5477cd9..810abbd4 100644 --- a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step600000_2517B.yaml +++ b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step600000_2517B.yaml @@ -5,8 +5,8 @@ # - Compute LP concentration for a *single* OLMo-2-1124-7B training checkpoint. # - Identical mechanism-only setup to llama3_70b_scale_mechanism.yaml so the # resulting JSONs are aggregable across model / checkpoint. -# - The placeholders stage1-step600000-tokens2517B and s1_step600000_2517B are substituted per-checkpoint -# by paper/slurm_jobs/run_olmo2_7b_checkpoint_trajectory.sh. +# - Generated from the checkpoint template with revision stage1-step600000-tokens2517B and run tag s1_step600000_2517B +# by the OLMo checkpoint sweep launcher used for the paper. # ============================================================================ experiment: diff --git a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step600_3B.yaml b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step600_3B.yaml index fb310a5c..4b261b15 100644 --- a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step600_3B.yaml +++ b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step600_3B.yaml @@ -5,8 +5,8 @@ # - Compute LP concentration for a *single* OLMo-2-1124-7B training checkpoint. # - Identical mechanism-only setup to llama3_70b_scale_mechanism.yaml so the # resulting JSONs are aggregable across model / checkpoint. -# - The placeholders stage1-step600-tokens3B and s1_step600_3B are substituted per-checkpoint -# by paper/slurm_jobs/run_olmo2_7b_checkpoint_trajectory.sh. +# - Generated from the checkpoint template with revision stage1-step600-tokens3B and run tag s1_step600_3B +# by the OLMo checkpoint sweep launcher used for the paper. # ============================================================================ experiment: diff --git a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step900000_3775B.yaml b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step900000_3775B.yaml index 4024242f..3ff30193 100644 --- a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step900000_3775B.yaml +++ b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step900000_3775B.yaml @@ -5,8 +5,8 @@ # - Compute LP concentration for a *single* OLMo-2-1124-7B training checkpoint. # - Identical mechanism-only setup to llama3_70b_scale_mechanism.yaml so the # resulting JSONs are aggregable across model / checkpoint. -# - The placeholders stage1-step900000-tokens3775B and s1_step900000_3775B are substituted per-checkpoint -# by paper/slurm_jobs/run_olmo2_7b_checkpoint_trajectory.sh. +# - Generated from the checkpoint template with revision stage1-step900000-tokens3775B and run tag s1_step900000_3775B +# by the OLMo checkpoint sweep launcher used for the paper. # ============================================================================ experiment: diff --git a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step99000_416B.yaml b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step99000_416B.yaml index 285e7b43..2ec123d3 100644 --- a/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step99000_416B.yaml +++ b/configs/prune_llm/olmo2_ckpts_generated/olmo2_7b_s1_step99000_416B.yaml @@ -5,8 +5,8 @@ # - Compute LP concentration for a *single* OLMo-2-1124-7B training checkpoint. # - Identical mechanism-only setup to llama3_70b_scale_mechanism.yaml so the # resulting JSONs are aggregable across model / checkpoint. -# - The placeholders stage1-step99000-tokens416B and s1_step99000_416B are substituted per-checkpoint -# by paper/slurm_jobs/run_olmo2_7b_checkpoint_trajectory.sh. +# - Generated from the checkpoint template with revision stage1-step99000-tokens416B and run tag s1_step99000_416B +# by the OLMo checkpoint sweep launcher used for the paper. # ============================================================================ experiment: diff --git a/configs/prune_llm/qwen2_7b_unified.yaml b/configs/prune_llm/qwen2_7b_unified.yaml index 04596a86..8be3c76f 100644 --- a/configs/prune_llm/qwen2_7b_unified.yaml +++ b/configs/prune_llm/qwen2_7b_unified.yaml @@ -266,7 +266,7 @@ visualization: # OUTPUT # ----------------------------------------------------------------------------- output: - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/Prune_LLM" + base_dir: "/path/to/results/Prune_LLM" dir: "./results/paper/qwen2_7b" save_metrics: true save_figures: true diff --git a/configs/template.yaml b/configs/template.yaml index 83f82aab..8eff2c12 100644 --- a/configs/template.yaml +++ b/configs/template.yaml @@ -456,7 +456,7 @@ visualization: # For local runs, a unique 8-character ID is generated # # Recommended for cluster runs: -# base_output_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/Prune_LLM" +# base_output_dir: "/path/to/results/Prune_LLM" base_output_dir: null log_dir: "./logs" diff --git a/configs/unified_template.yaml b/configs/unified_template.yaml index f95b4a4d..1efd0c1b 100644 --- a/configs/unified_template.yaml +++ b/configs/unified_template.yaml @@ -379,7 +379,7 @@ visualization: output: # Base directory for job-specific outputs (recommended for cluster runs) # Each experiment creates a unique subdirectory with timestamp and job ID - base_dir: null # e.g., "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/Prune_LLM" + base_dir: null # e.g., "/path/to/results/Prune_LLM" # Direct output directory (legacy, used if base_dir is null) dir: "./results" diff --git a/configs/vision_prune/README.md b/configs/vision_prune/README.md index 65d09ccb..ae48c721 100644 --- a/configs/vision_prune/README.md +++ b/configs/vision_prune/README.md @@ -1,6 +1,8 @@ -# Cluster Analysis Experiment Configurations +# Vision Clustering And Pruning Configurations -This directory contains configurations for **cluster-based neural network analysis** - a general framework that works on any architecture. +This directory contains reusable configs for vision channel analysis, metric +clustering, halo-style cross-layer analysis, cascade tests, and structured +channel pruning. ## Overview @@ -14,18 +16,24 @@ The cluster-based analysis pipeline identifies functional types of neurons/chann ## Supported Architectures -- **Vision**: ResNet, VGG, MobileNet, EfficientNet, etc. -- **LLMs**: Can be adapted for FFN analysis (see LLM configs) -- **Any model** with Conv2d or Linear layers +- ResNet, VGG, AlexNet, MobileNetV2, and related ConvNet models. +- Any model with Conv2d or Linear layers can usually be adapted by adding a config. +- LLM FFN analysis uses the separate configs in `configs/prune_llm/`. ## Configuration Files | Config | Model | Dataset | Purpose | |--------|-------|---------|---------| | `resnet18_cifar10_full.yaml` | ResNet-18 | CIFAR-10 | Full analysis | +| `resnet18_cifar10_unified.yaml` | ResNet-18 | CIFAR-10 | Unified-format full analysis | +| `resnet18_cifar100_unified.yaml` | ResNet-18 | CIFAR-100 | Cross-dataset check | | `vgg16_cifar10_full.yaml` | VGG-16-BN | CIFAR-10 | Full analysis | | `mobilenetv2_cifar10_full.yaml` | MobileNetV2 | CIFAR-10 | Full analysis | | `resnet50_imagenet100.yaml` | ResNet-50 | ImageNet-100 | Large-scale analysis | +| `alexnet_imagenet1k_unified_fastprune.yaml` | AlexNet | ImageNet-1k | Fast pruning-oriented run | + +Large private sweep grids are intentionally not kept here. This directory is +for reusable public examples and representative benchmark configs. ## Running Experiments @@ -33,16 +41,16 @@ Use the unified `run_experiment.py` script (same as all other experiments): ```bash # Run full analysis (experiment_type is read from config) -python scripts/run_experiment.py --config configs/cluster_analysis/resnet18_cifar10_full.yaml +python scripts/run_experiment.py --config configs/vision_prune/resnet18_cifar10_full.yaml # Override device -python scripts/run_experiment.py --config configs/cluster_analysis/resnet18_cifar10_full.yaml --device cuda:1 +python scripts/run_experiment.py --config configs/vision_prune/resnet18_cifar10_full.yaml --device cuda:1 # Override seed for reproducibility study -python scripts/run_experiment.py --config configs/cluster_analysis/resnet18_cifar10_full.yaml --seed 123 +python scripts/run_experiment.py --config configs/vision_prune/resnet18_cifar10_full.yaml --seed 123 # Specify output directory -python scripts/run_experiment.py --config configs/cluster_analysis/vgg16_cifar10_full.yaml \ +python scripts/run_experiment.py --config configs/vision_prune/vgg16_cifar10_full.yaml \ --output-dir results/cluster_analysis/vgg16_run1 ``` @@ -128,7 +136,15 @@ The 4-cluster structure identifies: | **Synergistic** | Mod RQ, Low Red, High Syn | Preserve pairs | | **Background** | Low on all metrics | Safe to remove | -## Related Papers +## Choosing A Vision Config + +| Need | Recommended config | +|------|--------------------| +| Fast local check | `configs/examples/resnet_pruning.yaml` | +| Full CIFAR-10 clustering and pruning | `resnet18_cifar10_unified.yaml` | +| Compare architecture effects | `vgg16_cifar10_unified.yaml`, `mobilenetv2_cifar10_unified.yaml` | +| Test larger input/model scale | `resnet50_imagenet100_unified.yaml` | +| Run fast pruning without full clustering detail | `alexnet_imagenet1k_unified_fastprune.yaml` | -- Vision paper: `drafts/alignment_notes/alignment_red.tex` -- LLM paper: `drafts/LLM_prune/scar_paper_icml_v4.tex` +Use the `_unified.yaml` files for new work. The `_full.yaml` files are kept for +compatibility with older experiments. diff --git a/configs/vision_prune/alexnet_imagenet1k_unified_fastprune.yaml b/configs/vision_prune/alexnet_imagenet1k_unified_fastprune.yaml index 6c45aafe..1b24f055 100644 --- a/configs/vision_prune/alexnet_imagenet1k_unified_fastprune.yaml +++ b/configs/vision_prune/alexnet_imagenet1k_unified_fastprune.yaml @@ -117,7 +117,7 @@ visualization: enabled: false output: - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red" + base_dir: "/path/to/results/losslens_vision" dir: "./results/vision/alexnet_imagenet1k" save_metrics: true save_clusters: true diff --git a/configs/vision_prune/mobilenetv2_cifar100_unified_paper_uniform_pointwise.yaml b/configs/vision_prune/mobilenetv2_cifar100_unified_paper_uniform_pointwise.yaml index fe001113..851e4f7f 100644 --- a/configs/vision_prune/mobilenetv2_cifar100_unified_paper_uniform_pointwise.yaml +++ b/configs/vision_prune/mobilenetv2_cifar100_unified_paper_uniform_pointwise.yaml @@ -165,7 +165,7 @@ visualization: metric_distributions: true output: - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red" + base_dir: "/path/to/results/losslens_vision" dir: "./results/vision/mobilenetv2_cifar100" save_metrics: true save_clusters: true diff --git a/configs/vision_prune/mobilenetv2_cifar10_unified.yaml b/configs/vision_prune/mobilenetv2_cifar10_unified.yaml index aa0c9fcb..bc2301cc 100644 --- a/configs/vision_prune/mobilenetv2_cifar10_unified.yaml +++ b/configs/vision_prune/mobilenetv2_cifar10_unified.yaml @@ -343,7 +343,7 @@ visualization: # OUTPUT # ----------------------------------------------------------------------------- output: - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/Prune_LLM" + base_dir: "/path/to/results/Prune_LLM" dir: "./results/vision/mobilenetv2_cifar10" save_metrics: true save_clusters: true diff --git a/configs/vision_prune/mobilenetv2_cifar10_unified_paper_uniform_pointwise.yaml b/configs/vision_prune/mobilenetv2_cifar10_unified_paper_uniform_pointwise.yaml index 6a1e2aa5..69abe861 100644 --- a/configs/vision_prune/mobilenetv2_cifar10_unified_paper_uniform_pointwise.yaml +++ b/configs/vision_prune/mobilenetv2_cifar10_unified_paper_uniform_pointwise.yaml @@ -162,7 +162,7 @@ visualization: metric_distributions: true output: - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red" + base_dir: "/path/to/results/losslens_vision" dir: "./results/vision/mobilenetv2_cifar10" save_metrics: true save_clusters: true diff --git a/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_locked_pilot_seed123.yaml b/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_locked_pilot_seed123.yaml deleted file mode 100644 index 1575d971..00000000 --- a/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_locked_pilot_seed123.yaml +++ /dev/null @@ -1,249 +0,0 @@ -{ - "name": "convnext_tiny_imagenet100_locked_pilot_seed123", - "description": "ConvNeXt-T ImageNet-100 pilot for NeurIPS 2026 (locked pruning subset)", - "tags": ["paper2026", "convnext", "imagenet100", "pilot_pruning", "seed123"], - "experiment_type": "cluster_analysis", - "model_name": "torchvision_model", - "model_config": { - "model_name": "convnext_tiny" - }, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 123, - "train_before_dropout": true, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": ["all", "rq_red", "rq_syn", "red_syn"], - "run_permutation_baseline": false, - "n_permutations": 50, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [0.0, 0.1, 0.3, 0.5, 0.7, 0.9], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red" - ], - "pruning_amounts": [0.1, 0.3, 0.5, 0.7, 0.8, 0.9], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": false - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/convnext_tiny_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/convnext_tiny_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/convnext_pilot_pruning_20260409", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": ["perplexity"], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": false, - "do_connectivity_pruning": false, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": false, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": false - }, - "generalized_importance": {}, - "do_halo_analysis": false, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_locked_pilot_seed42.yaml b/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_locked_pilot_seed42.yaml deleted file mode 100644 index 0b4bc94f..00000000 --- a/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_locked_pilot_seed42.yaml +++ /dev/null @@ -1,249 +0,0 @@ -{ - "name": "convnext_tiny_imagenet100_locked_pilot_seed42", - "description": "ConvNeXt-T ImageNet-100 pilot for NeurIPS 2026 (locked pruning subset)", - "tags": ["paper2026", "convnext", "imagenet100", "pilot_pruning", "seed42"], - "experiment_type": "cluster_analysis", - "model_name": "torchvision_model", - "model_config": { - "model_name": "convnext_tiny" - }, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": ["all", "rq_red", "rq_syn", "red_syn"], - "run_permutation_baseline": false, - "n_permutations": 50, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [0.0, 0.1, 0.3, 0.5, 0.7, 0.9], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red" - ], - "pruning_amounts": [0.1, 0.3, 0.5, 0.7, 0.8, 0.9], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": false - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/convnext_tiny_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/convnext_tiny_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/convnext_pilot_pruning_20260409", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": ["perplexity"], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": false, - "do_connectivity_pruning": false, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": false, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": false - }, - "generalized_importance": {}, - "do_halo_analysis": false, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_locked_pilot_seed456.yaml b/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_locked_pilot_seed456.yaml deleted file mode 100644 index 31a5e2d0..00000000 --- a/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_locked_pilot_seed456.yaml +++ /dev/null @@ -1,249 +0,0 @@ -{ - "name": "convnext_tiny_imagenet100_locked_pilot_seed456", - "description": "ConvNeXt-T ImageNet-100 pilot for NeurIPS 2026 (locked pruning subset)", - "tags": ["paper2026", "convnext", "imagenet100", "pilot_pruning", "seed456"], - "experiment_type": "cluster_analysis", - "model_name": "torchvision_model", - "model_config": { - "model_name": "convnext_tiny" - }, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 456, - "train_before_dropout": true, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": ["all", "rq_red", "rq_syn", "red_syn"], - "run_permutation_baseline": false, - "n_permutations": 50, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [0.0, 0.1, 0.3, 0.5, 0.7, 0.9], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red" - ], - "pruning_amounts": [0.1, 0.3, 0.5, 0.7, 0.8, 0.9], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": false - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/convnext_tiny_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/convnext_tiny_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/convnext_pilot_pruning_20260409", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": ["perplexity"], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": false, - "do_connectivity_pruning": false, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": false, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": false - }, - "generalized_importance": {}, - "do_halo_analysis": false, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_struct_seed123.yaml b/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_struct_seed123.yaml deleted file mode 100644 index c1c1ca2c..00000000 --- a/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_struct_seed123.yaml +++ /dev/null @@ -1,213 +0,0 @@ -{ - "name": "convnext_tiny_imagenet100_struct_seed123", - "description": "ConvNeXt-T ImageNet-100 pilot for NeurIPS 2026 (structural consistency)", - "tags": ["paper2026", "convnext", "imagenet100", "pilot_structural", "seed123"], - "experiment_type": "cluster_analysis", - "model_name": "torchvision_model", - "model_config": { - "model_name": "convnext_tiny" - }, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 123, - "train_before_dropout": true, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": ["all", "rq_red", "rq_syn", "red_syn"], - "run_permutation_baseline": false, - "n_permutations": 50, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": false - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/convnext_tiny_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/convnext_tiny_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/convnext_pilot_structural_20260409", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": ["perplexity"], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": false, - "do_connectivity_pruning": false, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": false, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": false - }, - "generalized_importance": {}, - "do_halo_analysis": false, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_struct_seed42.yaml b/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_struct_seed42.yaml deleted file mode 100644 index 679aeda7..00000000 --- a/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_struct_seed42.yaml +++ /dev/null @@ -1,213 +0,0 @@ -{ - "name": "convnext_tiny_imagenet100_struct_seed42", - "description": "ConvNeXt-T ImageNet-100 pilot for NeurIPS 2026 (structural consistency)", - "tags": ["paper2026", "convnext", "imagenet100", "pilot_structural", "seed42"], - "experiment_type": "cluster_analysis", - "model_name": "torchvision_model", - "model_config": { - "model_name": "convnext_tiny" - }, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": ["all", "rq_red", "rq_syn", "red_syn"], - "run_permutation_baseline": false, - "n_permutations": 50, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": false - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/convnext_tiny_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/convnext_tiny_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/convnext_pilot_structural_20260409", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": ["perplexity"], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": false, - "do_connectivity_pruning": false, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": false, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": false - }, - "generalized_importance": {}, - "do_halo_analysis": false, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_struct_seed456.yaml b/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_struct_seed456.yaml deleted file mode 100644 index 2cb01850..00000000 --- a/configs/vision_prune/paper_2026_convnext_pilot/convnext_tiny_imagenet100_struct_seed456.yaml +++ /dev/null @@ -1,213 +0,0 @@ -{ - "name": "convnext_tiny_imagenet100_struct_seed456", - "description": "ConvNeXt-T ImageNet-100 pilot for NeurIPS 2026 (structural consistency)", - "tags": ["paper2026", "convnext", "imagenet100", "pilot_structural", "seed456"], - "experiment_type": "cluster_analysis", - "model_name": "torchvision_model", - "model_config": { - "model_name": "convnext_tiny" - }, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 456, - "train_before_dropout": true, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": ["all", "rq_red", "rq_syn", "red_syn"], - "run_permutation_baseline": false, - "n_permutations": 50, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": false - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/convnext_tiny_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/convnext_tiny_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/convnext_pilot_structural_20260409", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": ["perplexity"], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": false, - "do_connectivity_pruning": false, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": false, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": false - }, - "generalized_importance": {}, - "do_halo_analysis": false, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_locked/alexnet_imagenet100_cluster_analysis.yaml b/configs/vision_prune/paper_2026_locked/alexnet_imagenet100_cluster_analysis.yaml deleted file mode 100644 index 1aef53f5..00000000 --- a/configs/vision_prune/paper_2026_locked/alexnet_imagenet100_cluster_analysis.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "alexnet_imagenet100_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "alexnet", - "model_config": {}, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 20, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.5, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 150, - "visualization_options": { - "enabled": true, - "save_format": "png", - "dpi": 150, - "generate": [ - "metric_distributions", - "cluster_scatter", - "cluster_evolution", - "halo_influence_matrix", - "pruning_curves", - "cascade_damage" - ] - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/alexnet_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/alexnet_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "threshold_percentile": 90, - "influence_type": "activation_weighted", - "skip_residual_edges": false - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_locked/convnext_tiny_imagenet100_smoke.yaml b/configs/vision_prune/paper_2026_locked/convnext_tiny_imagenet100_smoke.yaml deleted file mode 100644 index ca1ff6ea..00000000 --- a/configs/vision_prune/paper_2026_locked/convnext_tiny_imagenet100_smoke.yaml +++ /dev/null @@ -1,204 +0,0 @@ -{ - "name": "convnext_tiny_imagenet100_smoke", - "description": "Low-cost compatibility pilot for a modern CNN backbone in the NeurIPS 2026 paper pipeline using the pretrained ImageNet-1K head.", - "tags": ["paper_2026", "convnext", "smoke_test"], - "experiment_type": "cluster_analysis", - "model_name": "torchvision_model", - "model_config": { - "model_name": "convnext_tiny" - }, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 16, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 1, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 5, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": false, - "measure_alignment_during_training": false, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 64, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 8, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 20, - "synergy_pairs": 5, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": ["all", "rq_red", "rq_syn", "red_syn"], - "run_permutation_baseline": false, - "n_permutations": 20, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 64, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 128, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 64, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": { - "enabled": false - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/convnext_tiny_imagenet100_smoke/checkpoints", - "checkpoint_interval": 1000, - "save_best": false, - "log_dir": "./results/vision/convnext_tiny_imagenet100_smoke", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": ["perplexity"], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": false, - "do_connectivity_pruning": false, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": false, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": false - }, - "generalized_importance": {}, - "do_halo_analysis": false, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": 2, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_locked/index.json b/configs/vision_prune/paper_2026_locked/index.json deleted file mode 100644 index 26f71186..00000000 --- a/configs/vision_prune/paper_2026_locked/index.json +++ /dev/null @@ -1,61 +0,0 @@ -{ - "manifest": "drafts/alignment_notes/paper_artifacts/run_manifest.json", - "out_dir": "configs/vision_prune/paper_2026_locked", - "prefer_seed": 42, - "experiments": { - "alexnet_imagenet100_cluster_analysis": { - "selected_seed": 42, - "selected_run_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/alexnet_imagenet100_cluster_analysis_20260126_132305_57092814", - "selected_slurm_job_id": "57092814", - "config_path": "configs/vision_prune/paper_2026_locked/alexnet_imagenet100_cluster_analysis.yaml" - }, - "mobilenetv2_cifar100_cluster_analysis": { - "selected_seed": 42, - "selected_run_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589", - "selected_slurm_job_id": "57211589", - "config_path": "configs/vision_prune/paper_2026_locked/mobilenetv2_cifar100_cluster_analysis.yaml" - }, - "mobilenetv2_cifar10_cluster_analysis": { - "selected_seed": 42, - "selected_run_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar10_cluster_analysis_20260126_123831_57082560", - "selected_slurm_job_id": "57082560", - "config_path": "configs/vision_prune/paper_2026_locked/mobilenetv2_cifar10_cluster_analysis.yaml" - }, - "resnet18_cifar100_cluster_analysis": { - "selected_seed": 42, - "selected_run_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546", - "selected_slurm_job_id": "57211546", - "config_path": "configs/vision_prune/paper_2026_locked/resnet18_cifar100_cluster_analysis.yaml" - }, - "resnet18_cifar10_cluster_analysis": { - "selected_seed": 42, - "selected_run_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar10_cluster_analysis_20260126_123830_57082553", - "selected_slurm_job_id": "57082553", - "config_path": "configs/vision_prune/paper_2026_locked/resnet18_cifar10_cluster_analysis.yaml" - }, - "resnet50_imagenet100_cluster_analysis": { - "selected_seed": 42, - "selected_run_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet50_imagenet100_cluster_analysis_20260126_123831_57082563", - "selected_slurm_job_id": "57082563", - "config_path": "configs/vision_prune/paper_2026_locked/resnet50_imagenet100_cluster_analysis.yaml" - }, - "vgg16_cifar100_cluster_analysis": { - "selected_seed": 42, - "selected_run_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547", - "selected_slurm_job_id": "57211547", - "config_path": "configs/vision_prune/paper_2026_locked/vgg16_cifar100_cluster_analysis.yaml" - }, - "vgg16_cifar10_cluster_analysis": { - "selected_seed": 42, - "selected_run_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar10_cluster_analysis_20260126_123831_57082555", - "selected_slurm_job_id": "57082555", - "config_path": "configs/vision_prune/paper_2026_locked/vgg16_cifar10_cluster_analysis.yaml" - }, - "vgg16_imagenet100_cluster_analysis": { - "selected_seed": 42, - "selected_run_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_imagenet100_cluster_analysis_20260206_162917_59203901", - "selected_slurm_job_id": "59203901", - "config_path": "configs/vision_prune/paper_2026_locked/vgg16_imagenet100_cluster_analysis.yaml" - } - } -} diff --git a/configs/vision_prune/paper_2026_locked/mobilenetv2_cifar100_cluster_analysis.yaml b/configs/vision_prune/paper_2026_locked/mobilenetv2_cifar100_cluster_analysis.yaml deleted file mode 100644 index c8f503b4..00000000 --- a/configs/vision_prune/paper_2026_locked/mobilenetv2_cifar100_cluster_analysis.yaml +++ /dev/null @@ -1,323 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "network_slimming", - "geometric_median", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_locked/mobilenetv2_cifar10_cluster_analysis.yaml b/configs/vision_prune/paper_2026_locked/mobilenetv2_cifar10_cluster_analysis.yaml deleted file mode 100644 index 86078788..00000000 --- a/configs/vision_prune/paper_2026_locked/mobilenetv2_cifar10_cluster_analysis.yaml +++ /dev/null @@ -1,318 +0,0 @@ -{ - "name": "mobilenetv2_cifar10_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 50, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "network_slimming", - "geometric_median", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_locked/mobilenetv2_imagenet100_cluster_analysis.yaml b/configs/vision_prune/paper_2026_locked/mobilenetv2_imagenet100_cluster_analysis.yaml deleted file mode 100644 index 3c78d90f..00000000 --- a/configs/vision_prune/paper_2026_locked/mobilenetv2_imagenet100_cluster_analysis.yaml +++ /dev/null @@ -1,321 +0,0 @@ -{ - "name": "mobilenetv2_imagenet100_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "network_slimming", - "geometric_median", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 3, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 50, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_locked/mobilenetv2_tinyimagenet_cluster_analysis.yaml b/configs/vision_prune/paper_2026_locked/mobilenetv2_tinyimagenet_cluster_analysis.yaml deleted file mode 100644 index 46b1f4ee..00000000 --- a/configs/vision_prune/paper_2026_locked/mobilenetv2_tinyimagenet_cluster_analysis.yaml +++ /dev/null @@ -1,325 +0,0 @@ -{ - "name": "mobilenetv2_tinyimagenet_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "mobilenetv2", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "network_slimming", - "geometric_median", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 3, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 50, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_locked/resnet18_cifar100_cluster_analysis.yaml b/configs/vision_prune/paper_2026_locked/resnet18_cifar100_cluster_analysis.yaml deleted file mode 100644 index eca4e73c..00000000 --- a/configs/vision_prune/paper_2026_locked/resnet18_cifar100_cluster_analysis.yaml +++ /dev/null @@ -1,290 +0,0 @@ -{ - "name": "resnet18_cifar100_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "network_slimming", - "geometric_median", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_locked/resnet18_cifar10_cluster_analysis.yaml b/configs/vision_prune/paper_2026_locked/resnet18_cifar10_cluster_analysis.yaml deleted file mode 100644 index 482bad91..00000000 --- a/configs/vision_prune/paper_2026_locked/resnet18_cifar10_cluster_analysis.yaml +++ /dev/null @@ -1,428 +0,0 @@ -{ - "name": "resnet18_cifar10_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "network_slimming", - "geometric_median", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": true, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - } -} diff --git a/configs/vision_prune/paper_2026_locked/resnet18_imagenet100_cluster_analysis.yaml b/configs/vision_prune/paper_2026_locked/resnet18_imagenet100_cluster_analysis.yaml deleted file mode 100644 index 652bbfeb..00000000 --- a/configs/vision_prune/paper_2026_locked/resnet18_imagenet100_cluster_analysis.yaml +++ /dev/null @@ -1,321 +0,0 @@ -{ - "name": "resnet18_imagenet100_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "network_slimming", - "geometric_median", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 3, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 50, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_locked/resnet18_tinyimagenet_cluster_analysis.yaml b/configs/vision_prune/paper_2026_locked/resnet18_tinyimagenet_cluster_analysis.yaml deleted file mode 100644 index 1d90ecf2..00000000 --- a/configs/vision_prune/paper_2026_locked/resnet18_tinyimagenet_cluster_analysis.yaml +++ /dev/null @@ -1,325 +0,0 @@ -{ - "name": "resnet18_tinyimagenet_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "network_slimming", - "geometric_median", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 3, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 50, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_locked/resnet50_imagenet100_cluster_analysis.yaml b/configs/vision_prune/paper_2026_locked/resnet50_imagenet100_cluster_analysis.yaml deleted file mode 100644 index 7551a7f3..00000000 --- a/configs/vision_prune/paper_2026_locked/resnet50_imagenet100_cluster_analysis.yaml +++ /dev/null @@ -1,321 +0,0 @@ -{ - "name": "resnet50_imagenet100_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "resnet50", - "model_config": {}, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "network_slimming", - "geometric_median", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 3, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 50, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet50_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet50_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_locked/vgg16_cifar100_cluster_analysis.yaml b/configs/vision_prune/paper_2026_locked/vgg16_cifar100_cluster_analysis.yaml deleted file mode 100644 index feabd0c1..00000000 --- a/configs/vision_prune/paper_2026_locked/vgg16_cifar100_cluster_analysis.yaml +++ /dev/null @@ -1,290 +0,0 @@ -{ - "name": "vgg16_cifar100_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "network_slimming", - "geometric_median", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_locked/vgg16_cifar10_cluster_analysis.yaml b/configs/vision_prune/paper_2026_locked/vgg16_cifar10_cluster_analysis.yaml deleted file mode 100644 index 6034fa37..00000000 --- a/configs/vision_prune/paper_2026_locked/vgg16_cifar10_cluster_analysis.yaml +++ /dev/null @@ -1,423 +0,0 @@ -{ - "name": "vgg16_cifar10_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "network_slimming", - "geometric_median", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - } -} diff --git a/configs/vision_prune/paper_2026_locked/vgg16_imagenet100_cluster_analysis.yaml b/configs/vision_prune/paper_2026_locked/vgg16_imagenet100_cluster_analysis.yaml deleted file mode 100644 index 31b61db3..00000000 --- a/configs/vision_prune/paper_2026_locked/vgg16_imagenet100_cluster_analysis.yaml +++ /dev/null @@ -1,326 +0,0 @@ -{ - "name": "vgg16_imagenet100_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 20, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "rq", - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "taylor_act", - "network_slimming", - "geometric_median", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 3, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 50, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": true, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_locked/vgg16_tinyimagenet_cluster_analysis.yaml b/configs/vision_prune/paper_2026_locked/vgg16_tinyimagenet_cluster_analysis.yaml deleted file mode 100644 index c3fbee4e..00000000 --- a/configs/vision_prune/paper_2026_locked/vgg16_tinyimagenet_cluster_analysis.yaml +++ /dev/null @@ -1,325 +0,0 @@ -{ - "name": "vgg16_tinyimagenet_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "vgg16", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "network_slimming", - "geometric_median", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 3, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 50, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/alexnet_imagenet100_locked_breadth_seed1011.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/alexnet_imagenet100_locked_breadth_seed1011.yaml deleted file mode 100644 index 73361693..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/alexnet_imagenet100_locked_breadth_seed1011.yaml +++ /dev/null @@ -1,305 +0,0 @@ -{ - "name": "alexnet_imagenet100_locked_breadth_seed1011", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "alexnet", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "alexnet", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/alexnet_imagenet100_cluster_analysis_s50refine_20260306_073243_64241303/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 20, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.5, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 150, - "visualization_options": { - "enabled": true, - "save_format": "png", - "dpi": 150, - "generate": [ - "metric_distributions", - "cluster_scatter", - "cluster_evolution", - "halo_influence_matrix", - "pruning_curves", - "cascade_damage" - ] - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/alexnet_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/alexnet_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "threshold_percentile": 90, - "influence_type": "activation_weighted", - "skip_residual_edges": false - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/alexnet_imagenet100_locked_breadth_seed123.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/alexnet_imagenet100_locked_breadth_seed123.yaml deleted file mode 100644 index 8c703155..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/alexnet_imagenet100_locked_breadth_seed123.yaml +++ /dev/null @@ -1,305 +0,0 @@ -{ - "name": "alexnet_imagenet100_locked_breadth_seed123", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "alexnet", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "alexnet", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/alexnet_imagenet100_cluster_analysis_s50refine_20260306_064154_64241300/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 20, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.5, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 150, - "visualization_options": { - "enabled": true, - "save_format": "png", - "dpi": 150, - "generate": [ - "metric_distributions", - "cluster_scatter", - "cluster_evolution", - "halo_influence_matrix", - "pruning_curves", - "cascade_damage" - ] - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/alexnet_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/alexnet_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "threshold_percentile": 90, - "influence_type": "activation_weighted", - "skip_residual_edges": false - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/alexnet_imagenet100_locked_breadth_seed42.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/alexnet_imagenet100_locked_breadth_seed42.yaml deleted file mode 100644 index 3e2b6de2..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/alexnet_imagenet100_locked_breadth_seed42.yaml +++ /dev/null @@ -1,305 +0,0 @@ -{ - "name": "alexnet_imagenet100_locked_breadth_seed42", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "alexnet", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "alexnet", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/alexnet_imagenet100_cluster_analysis_s50refine_20260306_063506_64241299/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 20, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.5, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 150, - "visualization_options": { - "enabled": true, - "save_format": "png", - "dpi": 150, - "generate": [ - "metric_distributions", - "cluster_scatter", - "cluster_evolution", - "halo_influence_matrix", - "pruning_curves", - "cascade_damage" - ] - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/alexnet_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/alexnet_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "threshold_percentile": 90, - "influence_type": "activation_weighted", - "skip_residual_edges": false - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/alexnet_imagenet100_locked_breadth_seed456.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/alexnet_imagenet100_locked_breadth_seed456.yaml deleted file mode 100644 index 9f444c70..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/alexnet_imagenet100_locked_breadth_seed456.yaml +++ /dev/null @@ -1,305 +0,0 @@ -{ - "name": "alexnet_imagenet100_locked_breadth_seed456", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "alexnet", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "alexnet", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/alexnet_imagenet100_cluster_analysis_s50refine_20260306_070452_64241301/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 20, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.5, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 150, - "visualization_options": { - "enabled": true, - "save_format": "png", - "dpi": 150, - "generate": [ - "metric_distributions", - "cluster_scatter", - "cluster_evolution", - "halo_influence_matrix", - "pruning_curves", - "cascade_damage" - ] - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/alexnet_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/alexnet_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "threshold_percentile": 90, - "influence_type": "activation_weighted", - "skip_residual_edges": false - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/alexnet_imagenet100_locked_breadth_seed789.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/alexnet_imagenet100_locked_breadth_seed789.yaml deleted file mode 100644 index 4dbcf9f3..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/alexnet_imagenet100_locked_breadth_seed789.yaml +++ /dev/null @@ -1,305 +0,0 @@ -{ - "name": "alexnet_imagenet100_locked_breadth_seed789", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "alexnet", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "alexnet", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/alexnet_imagenet100_cluster_analysis_s50refine_20260306_072619_64241302/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 20, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.5, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 150, - "visualization_options": { - "enabled": true, - "save_format": "png", - "dpi": 150, - "generate": [ - "metric_distributions", - "cluster_scatter", - "cluster_evolution", - "halo_influence_matrix", - "pruning_curves", - "cascade_damage" - ] - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/alexnet_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/alexnet_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "threshold_percentile": 90, - "influence_type": "activation_weighted", - "skip_residual_edges": false - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_cifar10_locked_breadth_seed1011.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_cifar10_locked_breadth_seed1011.yaml deleted file mode 100644 index 9638f661..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_cifar10_locked_breadth_seed1011.yaml +++ /dev/null @@ -1,325 +0,0 @@ -{ - "name": "mobilenetv2_cifar10_locked_breadth_seed1011", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "cifar10", - "mobilenetv2", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar10_cluster_analysis_20260207_193655_59409622/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_cifar10_locked_breadth_seed123.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_cifar10_locked_breadth_seed123.yaml deleted file mode 100644 index db4b3718..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_cifar10_locked_breadth_seed123.yaml +++ /dev/null @@ -1,325 +0,0 @@ -{ - "name": "mobilenetv2_cifar10_locked_breadth_seed123", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "cifar10", - "mobilenetv2", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar10_cluster_analysis_20260126_123830_57082561/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_cifar10_locked_breadth_seed42.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_cifar10_locked_breadth_seed42.yaml deleted file mode 100644 index 591f0c2c..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_cifar10_locked_breadth_seed42.yaml +++ /dev/null @@ -1,325 +0,0 @@ -{ - "name": "mobilenetv2_cifar10_locked_breadth_seed42", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "cifar10", - "mobilenetv2", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar10_cluster_analysis_20260126_123831_57082560/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_cifar10_locked_breadth_seed456.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_cifar10_locked_breadth_seed456.yaml deleted file mode 100644 index 75002bf0..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_cifar10_locked_breadth_seed456.yaml +++ /dev/null @@ -1,325 +0,0 @@ -{ - "name": "mobilenetv2_cifar10_locked_breadth_seed456", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "cifar10", - "mobilenetv2", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar10_cluster_analysis_20260126_123831_57082559/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_cifar10_locked_breadth_seed789.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_cifar10_locked_breadth_seed789.yaml deleted file mode 100644 index 3046ed6b..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_cifar10_locked_breadth_seed789.yaml +++ /dev/null @@ -1,325 +0,0 @@ -{ - "name": "mobilenetv2_cifar10_locked_breadth_seed789", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "cifar10", - "mobilenetv2", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar10_cluster_analysis_20260207_193700_59409621/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_imagenet100_locked_breadth_seed1011.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_imagenet100_locked_breadth_seed1011.yaml deleted file mode 100644 index 07993035..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_imagenet100_locked_breadth_seed1011.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "mobilenetv2_imagenet100_locked_breadth_seed1011", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "mobilenetv2", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_imagenet100_cluster_analysis_s50refine_20260306_054545_64241293/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_imagenet100_locked_breadth_seed123.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_imagenet100_locked_breadth_seed123.yaml deleted file mode 100644 index b44f66e9..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_imagenet100_locked_breadth_seed123.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "mobilenetv2_imagenet100_locked_breadth_seed123", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "mobilenetv2", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_imagenet100_cluster_analysis_s50refine_20260306_045426_64241290/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_imagenet100_locked_breadth_seed42.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_imagenet100_locked_breadth_seed42.yaml deleted file mode 100644 index 98fa45cd..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_imagenet100_locked_breadth_seed42.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "mobilenetv2_imagenet100_locked_breadth_seed42", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "mobilenetv2", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_imagenet100_cluster_analysis_s50refine_20260306_043742_64241289/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_imagenet100_locked_breadth_seed456.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_imagenet100_locked_breadth_seed456.yaml deleted file mode 100644 index 066f6d66..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_imagenet100_locked_breadth_seed456.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "mobilenetv2_imagenet100_locked_breadth_seed456", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "mobilenetv2", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_imagenet100_cluster_analysis_s50refine_20260306_045917_64241291/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_imagenet100_locked_breadth_seed789.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_imagenet100_locked_breadth_seed789.yaml deleted file mode 100644 index dfce3e68..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_imagenet100_locked_breadth_seed789.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "mobilenetv2_imagenet100_locked_breadth_seed789", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "mobilenetv2", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_imagenet100_cluster_analysis_s50refine_20260306_052432_64241292/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_tinyimagenet_locked_breadth_seed1011.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_tinyimagenet_locked_breadth_seed1011.yaml deleted file mode 100644 index a90560a7..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_tinyimagenet_locked_breadth_seed1011.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "mobilenetv2_tinyimagenet_locked_breadth_seed1011", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "tinyimagenet", - "mobilenetv2", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenetv2", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_tinyimagenet_cluster_analysis_seed1011_20260218_073158_60879287/checkpoints/trained_model.pth", - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_tinyimagenet_locked_breadth_seed123.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_tinyimagenet_locked_breadth_seed123.yaml deleted file mode 100644 index 9b2f3bd5..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_tinyimagenet_locked_breadth_seed123.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "mobilenetv2_tinyimagenet_locked_breadth_seed123", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "tinyimagenet", - "mobilenetv2", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenetv2", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_tinyimagenet_cluster_analysis_seed123_20260218_052659_60939090/checkpoints/trained_model.pth", - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_tinyimagenet_locked_breadth_seed42.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_tinyimagenet_locked_breadth_seed42.yaml deleted file mode 100644 index e7550525..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_tinyimagenet_locked_breadth_seed42.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "mobilenetv2_tinyimagenet_locked_breadth_seed42", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "tinyimagenet", - "mobilenetv2", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenetv2", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_tinyimagenet_cluster_analysis_seed42_20260218_045227_60937327/checkpoints/trained_model.pth", - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_tinyimagenet_locked_breadth_seed456.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_tinyimagenet_locked_breadth_seed456.yaml deleted file mode 100644 index 94f914f9..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_tinyimagenet_locked_breadth_seed456.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "mobilenetv2_tinyimagenet_locked_breadth_seed456", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "tinyimagenet", - "mobilenetv2", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenetv2", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_tinyimagenet_cluster_analysis_seed456_20260218_062612_60942010/checkpoints/trained_model.pth", - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_tinyimagenet_locked_breadth_seed789.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_tinyimagenet_locked_breadth_seed789.yaml deleted file mode 100644 index 0ad80696..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/mobilenetv2_tinyimagenet_locked_breadth_seed789.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "mobilenetv2_tinyimagenet_locked_breadth_seed789", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "tinyimagenet", - "mobilenetv2", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenetv2", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_tinyimagenet_cluster_analysis_seed789_20260218_070006_60944058/checkpoints/trained_model.pth", - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_cifar10_locked_breadth_seed1011.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_cifar10_locked_breadth_seed1011.yaml deleted file mode 100644 index e317e180..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_cifar10_locked_breadth_seed1011.yaml +++ /dev/null @@ -1,435 +0,0 @@ -{ - "name": "resnet18_cifar10_locked_breadth_seed1011", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "cifar10", - "resnet18", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar10_cluster_analysis_20260206_162747_59203880/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": true, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - }, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_cifar10_locked_breadth_seed123.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_cifar10_locked_breadth_seed123.yaml deleted file mode 100644 index 96b33bfd..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_cifar10_locked_breadth_seed123.yaml +++ /dev/null @@ -1,435 +0,0 @@ -{ - "name": "resnet18_cifar10_locked_breadth_seed123", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "cifar10", - "resnet18", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar10_cluster_analysis_20260126_123831_57082554/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": true, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - }, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_cifar10_locked_breadth_seed42.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_cifar10_locked_breadth_seed42.yaml deleted file mode 100644 index 15cc3b96..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_cifar10_locked_breadth_seed42.yaml +++ /dev/null @@ -1,435 +0,0 @@ -{ - "name": "resnet18_cifar10_locked_breadth_seed42", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "cifar10", - "resnet18", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar10_cluster_analysis_20260126_123830_57082553/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": true, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - }, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_cifar10_locked_breadth_seed456.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_cifar10_locked_breadth_seed456.yaml deleted file mode 100644 index ed4a0013..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_cifar10_locked_breadth_seed456.yaml +++ /dev/null @@ -1,435 +0,0 @@ -{ - "name": "resnet18_cifar10_locked_breadth_seed456", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "cifar10", - "resnet18", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar10_cluster_analysis_20260126_123832_57082549/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": true, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - }, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_cifar10_locked_breadth_seed789.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_cifar10_locked_breadth_seed789.yaml deleted file mode 100644 index f7029f05..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_cifar10_locked_breadth_seed789.yaml +++ /dev/null @@ -1,435 +0,0 @@ -{ - "name": "resnet18_cifar10_locked_breadth_seed789", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "cifar10", - "resnet18", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar10_cluster_analysis_20260206_162741_59203877/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": true, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - }, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_imagenet100_locked_breadth_seed1011.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_imagenet100_locked_breadth_seed1011.yaml deleted file mode 100644 index 665ceade..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_imagenet100_locked_breadth_seed1011.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "resnet18_imagenet100_locked_breadth_seed1011", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "resnet18", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_imagenet100_cluster_analysis_s50refine_20260306_035935_64241280/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_imagenet100_locked_breadth_seed123.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_imagenet100_locked_breadth_seed123.yaml deleted file mode 100644 index 79f78ebb..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_imagenet100_locked_breadth_seed123.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "resnet18_imagenet100_locked_breadth_seed123", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "resnet18", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_imagenet100_cluster_analysis_s50refine_20260306_023255_64241274/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_imagenet100_locked_breadth_seed42.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_imagenet100_locked_breadth_seed42.yaml deleted file mode 100644 index b2a3a056..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_imagenet100_locked_breadth_seed42.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "resnet18_imagenet100_locked_breadth_seed42", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "resnet18", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_imagenet100_cluster_analysis_s50refine_20260306_023248_64241272/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_imagenet100_locked_breadth_seed456.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_imagenet100_locked_breadth_seed456.yaml deleted file mode 100644 index 9f56d197..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_imagenet100_locked_breadth_seed456.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "resnet18_imagenet100_locked_breadth_seed456", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "resnet18", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_imagenet100_cluster_analysis_s50refine_20260306_023642_64241276/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_imagenet100_locked_breadth_seed789.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_imagenet100_locked_breadth_seed789.yaml deleted file mode 100644 index 3aca8802..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_imagenet100_locked_breadth_seed789.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "resnet18_imagenet100_locked_breadth_seed789", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "resnet18", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_imagenet100_cluster_analysis_s50refine_20260306_025815_64241278/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_tinyimagenet_locked_breadth_seed1011.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_tinyimagenet_locked_breadth_seed1011.yaml deleted file mode 100644 index f87609ee..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_tinyimagenet_locked_breadth_seed1011.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "resnet18_tinyimagenet_locked_breadth_seed1011", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "tinyimagenet", - "resnet18", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_tinyimagenet_cluster_analysis_seed1011_20260218_010016_60919345/checkpoints/trained_model.pth", - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_tinyimagenet_locked_breadth_seed123.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_tinyimagenet_locked_breadth_seed123.yaml deleted file mode 100644 index 0b2b42a1..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_tinyimagenet_locked_breadth_seed123.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "resnet18_tinyimagenet_locked_breadth_seed123", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "tinyimagenet", - "resnet18", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_tinyimagenet_cluster_analysis_seed123_20260217_223946_60879289/checkpoints/trained_model.pth", - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_tinyimagenet_locked_breadth_seed42.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_tinyimagenet_locked_breadth_seed42.yaml deleted file mode 100644 index 4ef9b051..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_tinyimagenet_locked_breadth_seed42.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "resnet18_tinyimagenet_locked_breadth_seed42", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "tinyimagenet", - "resnet18", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_tinyimagenet_cluster_analysis_seed42_20260217_223944_60879288/checkpoints/trained_model.pth", - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_tinyimagenet_locked_breadth_seed456.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_tinyimagenet_locked_breadth_seed456.yaml deleted file mode 100644 index 7b628be3..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_tinyimagenet_locked_breadth_seed456.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "resnet18_tinyimagenet_locked_breadth_seed456", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "tinyimagenet", - "resnet18", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_tinyimagenet_cluster_analysis_seed456_20260217_235456_60903461/checkpoints/trained_model.pth", - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_tinyimagenet_locked_breadth_seed789.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_tinyimagenet_locked_breadth_seed789.yaml deleted file mode 100644 index 5069bca7..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet18_tinyimagenet_locked_breadth_seed789.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "resnet18_tinyimagenet_locked_breadth_seed789", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "tinyimagenet", - "resnet18", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_tinyimagenet_cluster_analysis_seed789_20260218_000359_60904287/checkpoints/trained_model.pth", - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet50_imagenet100_locked_breadth_seed1011.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet50_imagenet100_locked_breadth_seed1011.yaml deleted file mode 100644 index ea097fda..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet50_imagenet100_locked_breadth_seed1011.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "resnet50_imagenet100_locked_breadth_seed1011", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "resnet50", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet50", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet50_imagenet100_cluster_analysis_mb200_20260306_030010_64241279/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet50_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet50_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet50_imagenet100_locked_breadth_seed123.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet50_imagenet100_locked_breadth_seed123.yaml deleted file mode 100644 index 27354ffb..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet50_imagenet100_locked_breadth_seed123.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "resnet50_imagenet100_locked_breadth_seed123", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "resnet50", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet50", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet50_imagenet100_cluster_analysis_mb200_20260306_023258_64241273/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet50_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet50_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet50_imagenet100_locked_breadth_seed42.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet50_imagenet100_locked_breadth_seed42.yaml deleted file mode 100644 index 4633c6aa..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet50_imagenet100_locked_breadth_seed42.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "resnet50_imagenet100_locked_breadth_seed42", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "resnet50", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet50", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet50_imagenet100_cluster_analysis_mb200_20260306_023248_64241271/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet50_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet50_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet50_imagenet100_locked_breadth_seed456.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet50_imagenet100_locked_breadth_seed456.yaml deleted file mode 100644 index e7c7d440..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet50_imagenet100_locked_breadth_seed456.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "resnet50_imagenet100_locked_breadth_seed456", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "resnet50", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet50", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet50_imagenet100_cluster_analysis_mb200_20260306_023256_64241275/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet50_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet50_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet50_imagenet100_locked_breadth_seed789.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet50_imagenet100_locked_breadth_seed789.yaml deleted file mode 100644 index edb4caf2..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/resnet50_imagenet100_locked_breadth_seed789.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "resnet50_imagenet100_locked_breadth_seed789", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "resnet50", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet50", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet50_imagenet100_cluster_analysis_mb200_20260306_025559_64241277/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet50_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet50_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_cifar10_locked_breadth_seed123.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_cifar10_locked_breadth_seed123.yaml deleted file mode 100644 index 80ec125d..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_cifar10_locked_breadth_seed123.yaml +++ /dev/null @@ -1,430 +0,0 @@ -{ - "name": "vgg16_cifar10_locked_breadth_seed123", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "cifar10", - "vgg16", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar10_cluster_analysis_20260126_123830_57082556/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - }, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_cifar10_locked_breadth_seed42.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_cifar10_locked_breadth_seed42.yaml deleted file mode 100644 index 2fe3f782..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_cifar10_locked_breadth_seed42.yaml +++ /dev/null @@ -1,430 +0,0 @@ -{ - "name": "vgg16_cifar10_locked_breadth_seed42", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "cifar10", - "vgg16", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar10_cluster_analysis_20260126_123831_57082555/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - }, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_cifar10_locked_breadth_seed456.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_cifar10_locked_breadth_seed456.yaml deleted file mode 100644 index 32f8f59c..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_cifar10_locked_breadth_seed456.yaml +++ /dev/null @@ -1,430 +0,0 @@ -{ - "name": "vgg16_cifar10_locked_breadth_seed456", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "cifar10", - "vgg16", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar10_cluster_analysis_20260126_123830_57082552/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - }, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_imagenet100_locked_breadth_seed1011.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_imagenet100_locked_breadth_seed1011.yaml deleted file mode 100644 index 6ce91e28..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_imagenet100_locked_breadth_seed1011.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "vgg16_imagenet100_locked_breadth_seed1011", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "vgg16", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_imagenet100_cluster_analysis_s50refine_20260306_043333_64241285/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 20, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "rq", - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_imagenet100_locked_breadth_seed123.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_imagenet100_locked_breadth_seed123.yaml deleted file mode 100644 index 22ea1ea5..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_imagenet100_locked_breadth_seed123.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "vgg16_imagenet100_locked_breadth_seed123", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "vgg16", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_imagenet100_cluster_analysis_s50refine_20260306_040222_64241282/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 20, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "rq", - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_imagenet100_locked_breadth_seed42.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_imagenet100_locked_breadth_seed42.yaml deleted file mode 100644 index a8ac094a..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_imagenet100_locked_breadth_seed42.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "vgg16_imagenet100_locked_breadth_seed42", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "vgg16", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_imagenet100_cluster_analysis_s50refine_20260306_040107_64241281/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 20, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "rq", - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_imagenet100_locked_breadth_seed456.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_imagenet100_locked_breadth_seed456.yaml deleted file mode 100644 index c8318a12..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_imagenet100_locked_breadth_seed456.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "vgg16_imagenet100_locked_breadth_seed456", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "vgg16", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_imagenet100_cluster_analysis_s50refine_20260306_042343_64241283/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 20, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "rq", - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_imagenet100_locked_breadth_seed789.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_imagenet100_locked_breadth_seed789.yaml deleted file mode 100644 index fdd532df..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_imagenet100_locked_breadth_seed789.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "vgg16_imagenet100_locked_breadth_seed789", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "imagenet100", - "vgg16", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_imagenet100_cluster_analysis_s50refine_20260306_043236_64241284/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 20, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "rq", - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_tinyimagenet_locked_breadth_seed1011.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_tinyimagenet_locked_breadth_seed1011.yaml deleted file mode 100644 index 74d90a76..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_tinyimagenet_locked_breadth_seed1011.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "vgg16_tinyimagenet_locked_breadth_seed1011", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "tinyimagenet", - "vgg16", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_tinyimagenet_cluster_analysis_seed1011_20260218_040351_60935233/checkpoints/trained_model.pth", - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_tinyimagenet_locked_breadth_seed123.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_tinyimagenet_locked_breadth_seed123.yaml deleted file mode 100644 index 92b4677f..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_tinyimagenet_locked_breadth_seed123.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "vgg16_tinyimagenet_locked_breadth_seed123", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "tinyimagenet", - "vgg16", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_tinyimagenet_cluster_analysis_seed123_20260218_020537_60926060/checkpoints/trained_model.pth", - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_tinyimagenet_locked_breadth_seed42.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_tinyimagenet_locked_breadth_seed42.yaml deleted file mode 100644 index e5486a36..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_tinyimagenet_locked_breadth_seed42.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "vgg16_tinyimagenet_locked_breadth_seed42", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "tinyimagenet", - "vgg16", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_tinyimagenet_cluster_analysis_seed42_20260218_011136_60921155/checkpoints/trained_model.pth", - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_tinyimagenet_locked_breadth_seed456.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_tinyimagenet_locked_breadth_seed456.yaml deleted file mode 100644 index d1a05932..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_tinyimagenet_locked_breadth_seed456.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "vgg16_tinyimagenet_locked_breadth_seed456", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "tinyimagenet", - "vgg16", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_tinyimagenet_cluster_analysis_seed456_20260218_024046_60929405/checkpoints/trained_model.pth", - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_tinyimagenet_locked_breadth_seed789.yaml b/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_tinyimagenet_locked_breadth_seed789.yaml deleted file mode 100644 index 196f8ec1..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_breadth/vgg16_tinyimagenet_locked_breadth_seed789.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "vgg16_tinyimagenet_locked_breadth_seed789", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "locked_benchmark_breadth", - "tinyimagenet", - "vgg16", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16", - "model_config": { - "num_classes": 200 - }, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_tinyimagenet_cluster_analysis_seed789_20260218_032859_60932756/checkpoints/trained_model.pth", - "dataset_name": "tinyimagenet", - "dataset_config": { - "root": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200" - }, - "data_path": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/DATA/tiny-imagenet-200", - "batch_size": 128, - "num_workers": 8, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "chip", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_tinyimagenet/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_tinyimagenet", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_breadth_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/mobilenetv2_cifar10_locked_extension_seed1011.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/mobilenetv2_cifar10_locked_extension_seed1011.yaml deleted file mode 100644 index b217b478..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/mobilenetv2_cifar10_locked_extension_seed1011.yaml +++ /dev/null @@ -1,324 +0,0 @@ -{ - "name": "mobilenetv2_cifar10_locked_extension_seed1011", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "cifar10", - "mobilenetv2", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar10_cluster_analysis_20260207_193655_59409622/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/mobilenetv2_cifar10_locked_extension_seed123.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/mobilenetv2_cifar10_locked_extension_seed123.yaml deleted file mode 100644 index 3550ea26..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/mobilenetv2_cifar10_locked_extension_seed123.yaml +++ /dev/null @@ -1,324 +0,0 @@ -{ - "name": "mobilenetv2_cifar10_locked_extension_seed123", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "cifar10", - "mobilenetv2", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar10_cluster_analysis_20260126_123830_57082561/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/mobilenetv2_cifar10_locked_extension_seed42.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/mobilenetv2_cifar10_locked_extension_seed42.yaml deleted file mode 100644 index 1eaa8a30..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/mobilenetv2_cifar10_locked_extension_seed42.yaml +++ /dev/null @@ -1,324 +0,0 @@ -{ - "name": "mobilenetv2_cifar10_locked_extension_seed42", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "cifar10", - "mobilenetv2", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar10_cluster_analysis_20260126_123831_57082560/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/mobilenetv2_cifar10_locked_extension_seed456.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/mobilenetv2_cifar10_locked_extension_seed456.yaml deleted file mode 100644 index ee52210f..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/mobilenetv2_cifar10_locked_extension_seed456.yaml +++ /dev/null @@ -1,324 +0,0 @@ -{ - "name": "mobilenetv2_cifar10_locked_extension_seed456", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "cifar10", - "mobilenetv2", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar10_cluster_analysis_20260126_123831_57082559/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/mobilenetv2_cifar10_locked_extension_seed789.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/mobilenetv2_cifar10_locked_extension_seed789.yaml deleted file mode 100644 index 7ae16db3..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/mobilenetv2_cifar10_locked_extension_seed789.yaml +++ /dev/null @@ -1,324 +0,0 @@ -{ - "name": "mobilenetv2_cifar10_locked_extension_seed789", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "cifar10", - "mobilenetv2", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar10_cluster_analysis_20260207_193700_59409621/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet18_cifar10_locked_extension_seed1011.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet18_cifar10_locked_extension_seed1011.yaml deleted file mode 100644 index 88c880d8..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet18_cifar10_locked_extension_seed1011.yaml +++ /dev/null @@ -1,434 +0,0 @@ -{ - "name": "resnet18_cifar10_locked_extension_seed1011", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "cifar10", - "resnet18", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar10_cluster_analysis_20260206_162747_59203880/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": true, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - }, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet18_cifar10_locked_extension_seed123.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet18_cifar10_locked_extension_seed123.yaml deleted file mode 100644 index 3760b5db..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet18_cifar10_locked_extension_seed123.yaml +++ /dev/null @@ -1,434 +0,0 @@ -{ - "name": "resnet18_cifar10_locked_extension_seed123", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "cifar10", - "resnet18", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar10_cluster_analysis_20260126_123831_57082554/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": true, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - }, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet18_cifar10_locked_extension_seed42.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet18_cifar10_locked_extension_seed42.yaml deleted file mode 100644 index 62f3fbb7..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet18_cifar10_locked_extension_seed42.yaml +++ /dev/null @@ -1,434 +0,0 @@ -{ - "name": "resnet18_cifar10_locked_extension_seed42", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "cifar10", - "resnet18", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar10_cluster_analysis_20260126_123830_57082553/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": true, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - }, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet18_cifar10_locked_extension_seed456.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet18_cifar10_locked_extension_seed456.yaml deleted file mode 100644 index b2edb903..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet18_cifar10_locked_extension_seed456.yaml +++ /dev/null @@ -1,434 +0,0 @@ -{ - "name": "resnet18_cifar10_locked_extension_seed456", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "cifar10", - "resnet18", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar10_cluster_analysis_20260126_123832_57082549/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": true, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - }, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet18_cifar10_locked_extension_seed789.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet18_cifar10_locked_extension_seed789.yaml deleted file mode 100644 index 016026b8..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet18_cifar10_locked_extension_seed789.yaml +++ /dev/null @@ -1,434 +0,0 @@ -{ - "name": "resnet18_cifar10_locked_extension_seed789", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "cifar10", - "resnet18", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar10_cluster_analysis_20260206_162741_59203877/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": true, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - }, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet50_imagenet100_locked_extension_seed1011.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet50_imagenet100_locked_extension_seed1011.yaml deleted file mode 100644 index b6670689..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet50_imagenet100_locked_extension_seed1011.yaml +++ /dev/null @@ -1,327 +0,0 @@ -{ - "name": "resnet50_imagenet100_locked_extension_seed1011", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "imagenet100", - "resnet50", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet50", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet50_imagenet100_cluster_analysis_mb200_20260306_030010_64241279/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet50_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet50_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet50_imagenet100_locked_extension_seed123.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet50_imagenet100_locked_extension_seed123.yaml deleted file mode 100644 index acb662c0..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet50_imagenet100_locked_extension_seed123.yaml +++ /dev/null @@ -1,327 +0,0 @@ -{ - "name": "resnet50_imagenet100_locked_extension_seed123", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "imagenet100", - "resnet50", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet50", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet50_imagenet100_cluster_analysis_mb200_20260306_023258_64241273/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet50_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet50_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet50_imagenet100_locked_extension_seed42.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet50_imagenet100_locked_extension_seed42.yaml deleted file mode 100644 index 629fb7c9..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet50_imagenet100_locked_extension_seed42.yaml +++ /dev/null @@ -1,327 +0,0 @@ -{ - "name": "resnet50_imagenet100_locked_extension_seed42", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "imagenet100", - "resnet50", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet50", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet50_imagenet100_cluster_analysis_mb200_20260306_023248_64241271/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet50_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet50_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet50_imagenet100_locked_extension_seed456.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet50_imagenet100_locked_extension_seed456.yaml deleted file mode 100644 index 4dbe273d..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet50_imagenet100_locked_extension_seed456.yaml +++ /dev/null @@ -1,327 +0,0 @@ -{ - "name": "resnet50_imagenet100_locked_extension_seed456", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "imagenet100", - "resnet50", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet50", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet50_imagenet100_cluster_analysis_mb200_20260306_023256_64241275/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet50_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet50_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet50_imagenet100_locked_extension_seed789.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet50_imagenet100_locked_extension_seed789.yaml deleted file mode 100644 index 4bde67d4..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/resnet50_imagenet100_locked_extension_seed789.yaml +++ /dev/null @@ -1,327 +0,0 @@ -{ - "name": "resnet50_imagenet100_locked_extension_seed789", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "imagenet100", - "resnet50", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet50", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet50_imagenet100_cluster_analysis_mb200_20260306_025559_64241277/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet50_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet50_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/vgg16_cifar10_locked_extension_seed123.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/vgg16_cifar10_locked_extension_seed123.yaml deleted file mode 100644 index 1396b9b3..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/vgg16_cifar10_locked_extension_seed123.yaml +++ /dev/null @@ -1,429 +0,0 @@ -{ - "name": "vgg16_cifar10_locked_extension_seed123", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "cifar10", - "vgg16", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar10_cluster_analysis_20260126_123830_57082556/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - }, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/vgg16_cifar10_locked_extension_seed42.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/vgg16_cifar10_locked_extension_seed42.yaml deleted file mode 100644 index c15affdf..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/vgg16_cifar10_locked_extension_seed42.yaml +++ /dev/null @@ -1,429 +0,0 @@ -{ - "name": "vgg16_cifar10_locked_extension_seed42", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "cifar10", - "vgg16", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar10_cluster_analysis_20260126_123831_57082555/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - }, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_benchmark_extension/vgg16_cifar10_locked_extension_seed456.yaml b/configs/vision_prune/paper_2026_locked_benchmark_extension/vgg16_cifar10_locked_extension_seed456.yaml deleted file mode 100644 index 663a2617..00000000 --- a/configs/vision_prune/paper_2026_locked_benchmark_extension/vgg16_cifar10_locked_extension_seed456.yaml +++ /dev/null @@ -1,429 +0,0 @@ -{ - "name": "vgg16_cifar10_locked_extension_seed456", - "description": "Locked benchmark extension for NeurIPS two-axis consistency checks", - "tags": [ - "paper2026", - "locked_benchmark_extension", - "cifar10", - "vgg16", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar10_cluster_analysis_20260126_123830_57082552/checkpoints/trained_model.pth", - "dataset_name": "cifar10", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 50, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid", - "composite_pid_no_red", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": { - "enabled": true, - "accuracy_vs_sparsity": true, - "accuracy_vs_flops": true, - "accuracy_vs_params": true, - "methods_to_compare": [ - "random", - "magnitude", - "taylor", - "composite", - "cluster_aware", - "network_slimming" - ] - }, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": { - "enabled": true, - "by_layer": true, - "by_cluster": true - }, - "layer_importance_heatmap": true, - "sensitivity_curves": true, - "efficiency_tradeoffs": { - "enabled": true, - "accuracy_vs_flops": true, - "accuracy_vs_latency": true, - "accuracy_vs_params": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "redundancy", - "synergy" - ], - [ - "magnitude", - "rayleigh_quotient" - ], - [ - "magnitude", - "taylor" - ], - [ - "taylor", - "rayleigh_quotient" - ] - ], - "save_plots": true, - "cluster_analysis": { - "enabled": true, - "scatter_3d": true, - "cluster_evolution_by_layer": true, - "cluster_purity": true - }, - "layer_importance": { - "enabled": true, - "heatmap": true, - "bar_chart": true - }, - "fine_tuning_recovery": { - "enabled": true, - "by_method": true, - "by_sparsity": true - } - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar10/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar10", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_benchmark_extension_20260407", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": { - "layer_indices": "all", - "save_scores": true, - "generate_plots": true, - "metrics": [ - "rayleigh_quotient", - "redundancy", - "synergy", - "magnitude", - "taylor", - "activation_sparsity" - ], - "plots": { - "histograms": true, - "scatter_plots": true, - "pruning_curves": true, - "layer_comparison": true, - "filter_correlation": true - }, - "scatter_pairs": [ - [ - "rayleigh_quotient", - "redundancy" - ], - [ - "rayleigh_quotient", - "synergy" - ], - [ - "magnitude", - "taylor" - ], - [ - "redundancy", - "synergy" - ] - ] - }, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_hybrid_seed1011.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_hybrid_seed1011.yaml deleted file mode 100644 index 9a34d062..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_hybrid_seed1011.yaml +++ /dev/null @@ -1,326 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_locked_headline_hybrid_seed1011", - "description": "Locked unified rerun of Taylor-allocation hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "hybrid", - "mobilenetv2", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084525_57211530/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "cluster_aware_stratified_spectral_pid_rs_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_hybrid_seed123.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_hybrid_seed123.yaml deleted file mode 100644 index eca88b28..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_hybrid_seed123.yaml +++ /dev/null @@ -1,326 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_locked_headline_hybrid_seed123", - "description": "Locked unified rerun of Taylor-allocation hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "hybrid", - "mobilenetv2", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084458_57217336/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "cluster_aware_stratified_spectral_pid_rs_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_hybrid_seed42.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_hybrid_seed42.yaml deleted file mode 100644 index 22fae0f7..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_hybrid_seed42.yaml +++ /dev/null @@ -1,326 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_locked_headline_hybrid_seed42", - "description": "Locked unified rerun of Taylor-allocation hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "hybrid", - "mobilenetv2", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "cluster_aware_stratified_spectral_pid_rs_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_hybrid_seed456.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_hybrid_seed456.yaml deleted file mode 100644 index e2a57a2f..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_hybrid_seed456.yaml +++ /dev/null @@ -1,326 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_locked_headline_hybrid_seed456", - "description": "Locked unified rerun of Taylor-allocation hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "hybrid", - "mobilenetv2", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084514_57217362/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "cluster_aware_stratified_spectral_pid_rs_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_hybrid_seed789.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_hybrid_seed789.yaml deleted file mode 100644 index e96aeefc..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_hybrid_seed789.yaml +++ /dev/null @@ -1,326 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_locked_headline_hybrid_seed789", - "description": "Locked unified rerun of Taylor-allocation hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "hybrid", - "mobilenetv2", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084522_57217372/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "cluster_aware_stratified_spectral_pid_rs_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_main_seed1011.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_main_seed1011.yaml deleted file mode 100644 index 50e4f9c8..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_main_seed1011.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_locked_headline_main_seed1011", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "main", - "mobilenetv2", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084525_57211530/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "composite_pid_no_red", - "composite_pid_unique", - "cluster_aware_stratified_spectral_pid_rs_ixy", - "cluster_aware_stratified_spectral_pid_rt_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_main_seed123.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_main_seed123.yaml deleted file mode 100644 index 5181a4c5..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_main_seed123.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_locked_headline_main_seed123", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "main", - "mobilenetv2", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084458_57217336/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "composite_pid_no_red", - "composite_pid_unique", - "cluster_aware_stratified_spectral_pid_rs_ixy", - "cluster_aware_stratified_spectral_pid_rt_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_main_seed42.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_main_seed42.yaml deleted file mode 100644 index 1b79366d..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_main_seed42.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_locked_headline_main_seed42", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "main", - "mobilenetv2", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "composite_pid_no_red", - "composite_pid_unique", - "cluster_aware_stratified_spectral_pid_rs_ixy", - "cluster_aware_stratified_spectral_pid_rt_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_main_seed456.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_main_seed456.yaml deleted file mode 100644 index 4624df66..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_main_seed456.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_locked_headline_main_seed456", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "main", - "mobilenetv2", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084514_57217362/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "composite_pid_no_red", - "composite_pid_unique", - "cluster_aware_stratified_spectral_pid_rs_ixy", - "cluster_aware_stratified_spectral_pid_rt_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_main_seed789.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_main_seed789.yaml deleted file mode 100644 index 72d11957..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/mobilenetv2_cifar100_locked_headline_main_seed789.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_locked_headline_main_seed789", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "main", - "mobilenetv2", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084522_57217372/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "composite_pid_no_red", - "composite_pid_unique", - "cluster_aware_stratified_spectral_pid_rs_ixy", - "cluster_aware_stratified_spectral_pid_rt_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_hybrid_seed1011.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_hybrid_seed1011.yaml deleted file mode 100644 index 4192656a..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_hybrid_seed1011.yaml +++ /dev/null @@ -1,293 +0,0 @@ -{ - "name": "resnet18_cifar100_locked_headline_hybrid_seed1011", - "description": "Locked unified rerun of Taylor-allocation hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "hybrid", - "resnet18", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080111_57211528/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "cluster_aware_stratified_spectral_pid_rs_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_hybrid_seed123.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_hybrid_seed123.yaml deleted file mode 100644 index 582ba86f..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_hybrid_seed123.yaml +++ /dev/null @@ -1,293 +0,0 @@ -{ - "name": "resnet18_cifar100_locked_headline_hybrid_seed123", - "description": "Locked unified rerun of Taylor-allocation hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "hybrid", - "resnet18", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080045_57211555/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "cluster_aware_stratified_spectral_pid_rs_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_hybrid_seed42.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_hybrid_seed42.yaml deleted file mode 100644 index 1572da69..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_hybrid_seed42.yaml +++ /dev/null @@ -1,293 +0,0 @@ -{ - "name": "resnet18_cifar100_locked_headline_hybrid_seed42", - "description": "Locked unified rerun of Taylor-allocation hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "hybrid", - "resnet18", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "cluster_aware_stratified_spectral_pid_rs_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_hybrid_seed456.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_hybrid_seed456.yaml deleted file mode 100644 index cba9737c..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_hybrid_seed456.yaml +++ /dev/null @@ -1,293 +0,0 @@ -{ - "name": "resnet18_cifar100_locked_headline_hybrid_seed456", - "description": "Locked unified rerun of Taylor-allocation hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "hybrid", - "resnet18", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080106_57211563/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "cluster_aware_stratified_spectral_pid_rs_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_hybrid_seed789.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_hybrid_seed789.yaml deleted file mode 100644 index 959eec06..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_hybrid_seed789.yaml +++ /dev/null @@ -1,293 +0,0 @@ -{ - "name": "resnet18_cifar100_locked_headline_hybrid_seed789", - "description": "Locked unified rerun of Taylor-allocation hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "hybrid", - "resnet18", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080112_57211572/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "cluster_aware_stratified_spectral_pid_rs_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_main_seed1011.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_main_seed1011.yaml deleted file mode 100644 index 9008402d..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_main_seed1011.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_locked_headline_main_seed1011", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "main", - "resnet18", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080111_57211528/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "composite_pid_no_red", - "composite_pid_unique", - "cluster_aware_stratified_spectral_pid_rs_ixy", - "cluster_aware_stratified_spectral_pid_rt_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_main_seed123.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_main_seed123.yaml deleted file mode 100644 index 0032f854..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_main_seed123.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_locked_headline_main_seed123", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "main", - "resnet18", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080045_57211555/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "composite_pid_no_red", - "composite_pid_unique", - "cluster_aware_stratified_spectral_pid_rs_ixy", - "cluster_aware_stratified_spectral_pid_rt_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_main_seed42.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_main_seed42.yaml deleted file mode 100644 index 1f0c7aaf..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_main_seed42.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_locked_headline_main_seed42", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "main", - "resnet18", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "composite_pid_no_red", - "composite_pid_unique", - "cluster_aware_stratified_spectral_pid_rs_ixy", - "cluster_aware_stratified_spectral_pid_rt_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_main_seed456.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_main_seed456.yaml deleted file mode 100644 index 6196d7c5..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_main_seed456.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_locked_headline_main_seed456", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "main", - "resnet18", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080106_57211563/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "composite_pid_no_red", - "composite_pid_unique", - "cluster_aware_stratified_spectral_pid_rs_ixy", - "cluster_aware_stratified_spectral_pid_rt_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_main_seed789.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_main_seed789.yaml deleted file mode 100644 index c20576ac..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/resnet18_cifar100_locked_headline_main_seed789.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_locked_headline_main_seed789", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "main", - "resnet18", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080112_57211572/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "composite_pid_no_red", - "composite_pid_unique", - "cluster_aware_stratified_spectral_pid_rs_ixy", - "cluster_aware_stratified_spectral_pid_rt_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_hybrid_seed1011.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_hybrid_seed1011.yaml deleted file mode 100644 index 2468665e..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_hybrid_seed1011.yaml +++ /dev/null @@ -1,293 +0,0 @@ -{ - "name": "vgg16_cifar100_locked_headline_hybrid_seed1011", - "description": "Locked unified rerun of Taylor-allocation hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "hybrid", - "vgg16", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080043_57211529/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "cluster_aware_stratified_spectral_pid_rs_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_hybrid_seed123.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_hybrid_seed123.yaml deleted file mode 100644 index 53ac45b6..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_hybrid_seed123.yaml +++ /dev/null @@ -1,293 +0,0 @@ -{ - "name": "vgg16_cifar100_locked_headline_hybrid_seed123", - "description": "Locked unified rerun of Taylor-allocation hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "hybrid", - "vgg16", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211556/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "cluster_aware_stratified_spectral_pid_rs_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_hybrid_seed42.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_hybrid_seed42.yaml deleted file mode 100644 index 681887fb..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_hybrid_seed42.yaml +++ /dev/null @@ -1,293 +0,0 @@ -{ - "name": "vgg16_cifar100_locked_headline_hybrid_seed42", - "description": "Locked unified rerun of Taylor-allocation hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "hybrid", - "vgg16", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "cluster_aware_stratified_spectral_pid_rs_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_hybrid_seed456.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_hybrid_seed456.yaml deleted file mode 100644 index f2020aa9..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_hybrid_seed456.yaml +++ /dev/null @@ -1,293 +0,0 @@ -{ - "name": "vgg16_cifar100_locked_headline_hybrid_seed456", - "description": "Locked unified rerun of Taylor-allocation hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "hybrid", - "vgg16", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080038_57211564/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "cluster_aware_stratified_spectral_pid_rs_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_hybrid_seed789.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_hybrid_seed789.yaml deleted file mode 100644 index 7caabe30..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_hybrid_seed789.yaml +++ /dev/null @@ -1,293 +0,0 @@ -{ - "name": "vgg16_cifar100_locked_headline_hybrid_seed789", - "description": "Locked unified rerun of Taylor-allocation hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "hybrid", - "vgg16", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211573/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "cluster_aware_stratified_spectral_pid_rs_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_main_seed1011.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_main_seed1011.yaml deleted file mode 100644 index 0a570b9d..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_main_seed1011.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_locked_headline_main_seed1011", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "main", - "vgg16", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080043_57211529/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "composite_pid_no_red", - "composite_pid_unique", - "cluster_aware_stratified_spectral_pid_rs_ixy", - "cluster_aware_stratified_spectral_pid_rt_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_main_seed123.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_main_seed123.yaml deleted file mode 100644 index 2d822f1a..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_main_seed123.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_locked_headline_main_seed123", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "main", - "vgg16", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211556/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "composite_pid_no_red", - "composite_pid_unique", - "cluster_aware_stratified_spectral_pid_rs_ixy", - "cluster_aware_stratified_spectral_pid_rt_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_main_seed42.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_main_seed42.yaml deleted file mode 100644 index ca9c818d..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_main_seed42.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_locked_headline_main_seed42", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "main", - "vgg16", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "composite_pid_no_red", - "composite_pid_unique", - "cluster_aware_stratified_spectral_pid_rs_ixy", - "cluster_aware_stratified_spectral_pid_rt_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_main_seed456.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_main_seed456.yaml deleted file mode 100644 index 72579798..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_main_seed456.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_locked_headline_main_seed456", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "main", - "vgg16", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080038_57211564/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "composite_pid_no_red", - "composite_pid_unique", - "cluster_aware_stratified_spectral_pid_rs_ixy", - "cluster_aware_stratified_spectral_pid_rt_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_main_seed789.yaml b/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_main_seed789.yaml deleted file mode 100644 index 9fee9317..00000000 --- a/configs/vision_prune/paper_2026_locked_headline_reruns/vgg16_cifar100_locked_headline_main_seed789.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_locked_headline_main_seed789", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "locked_headline_rerun", - "main", - "vgg16", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211573/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "cluster_aware_spectral_rs_ixy", - "composite_pid", - "composite_pid_no_red", - "composite_pid_unique", - "cluster_aware_stratified_spectral_pid_rs_ixy", - "cluster_aware_stratified_spectral_pid_rt_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_locked_headline_20260309", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_global_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_global_seed1011.yaml deleted file mode 100644 index f3fdd27f..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_global_seed1011.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_global_seed1011", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "mobilenetv2", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084525_57211530/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_global_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_global_seed123.yaml deleted file mode 100644 index 4985a7b2..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_global_seed123.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_global_seed123", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "mobilenetv2", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084458_57217336/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_global_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_global_seed42.yaml deleted file mode 100644 index d8092185..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_global_seed42.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_global_seed42", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "mobilenetv2", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_global_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_global_seed456.yaml deleted file mode 100644 index 2b4fce96..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_global_seed456.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_global_seed456", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "mobilenetv2", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084514_57217362/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_global_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_global_seed789.yaml deleted file mode 100644 index ff617186..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_global_seed789.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_global_seed789", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "mobilenetv2", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084522_57217372/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_hybrid_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_hybrid_seed1011.yaml deleted file mode 100644 index 0cd5c44f..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_hybrid_seed1011.yaml +++ /dev/null @@ -1,329 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_hybrid_seed1011", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "mobilenetv2", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084525_57211530/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_hybrid_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_hybrid_seed123.yaml deleted file mode 100644 index e5e02cd1..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_hybrid_seed123.yaml +++ /dev/null @@ -1,329 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_hybrid_seed123", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "mobilenetv2", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084458_57217336/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_hybrid_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_hybrid_seed42.yaml deleted file mode 100644 index 6cb93797..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_hybrid_seed42.yaml +++ /dev/null @@ -1,329 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_hybrid_seed42", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "mobilenetv2", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_hybrid_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_hybrid_seed456.yaml deleted file mode 100644 index 28b19991..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_hybrid_seed456.yaml +++ /dev/null @@ -1,329 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_hybrid_seed456", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "mobilenetv2", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084514_57217362/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_hybrid_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_hybrid_seed789.yaml deleted file mode 100644 index f18d53b4..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_hybrid_seed789.yaml +++ /dev/null @@ -1,329 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_hybrid_seed789", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "mobilenetv2", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084522_57217372/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_uniform_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_uniform_seed1011.yaml deleted file mode 100644 index fc4587ec..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_uniform_seed1011.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_uniform_seed1011", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "mobilenetv2", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084525_57211530/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_uniform_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_uniform_seed123.yaml deleted file mode 100644 index 31ce959a..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_uniform_seed123.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_uniform_seed123", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "mobilenetv2", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084458_57217336/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_uniform_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_uniform_seed42.yaml deleted file mode 100644 index 0e6e3f48..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_uniform_seed42.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_uniform_seed42", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "mobilenetv2", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_uniform_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_uniform_seed456.yaml deleted file mode 100644 index 9c866696..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_uniform_seed456.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_uniform_seed456", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "mobilenetv2", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084514_57217362/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_uniform_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_uniform_seed789.yaml deleted file mode 100644 index c5abcb5b..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/mobilenetv2_cifar100_metric_usage_uniform_seed789.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_uniform_seed789", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "mobilenetv2", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084522_57217372/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_global_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_global_seed1011.yaml deleted file mode 100644 index 527b86d8..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_global_seed1011.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_global_seed1011", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "resnet18", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080111_57211528/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_global_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_global_seed123.yaml deleted file mode 100644 index 8fe111ca..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_global_seed123.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_global_seed123", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "resnet18", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080045_57211555/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_global_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_global_seed42.yaml deleted file mode 100644 index 8e036dde..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_global_seed42.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_global_seed42", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "resnet18", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_global_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_global_seed456.yaml deleted file mode 100644 index 267beaaa..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_global_seed456.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_global_seed456", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "resnet18", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080106_57211563/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_global_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_global_seed789.yaml deleted file mode 100644 index c7ee5ab9..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_global_seed789.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_global_seed789", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "resnet18", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080112_57211572/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_hybrid_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_hybrid_seed1011.yaml deleted file mode 100644 index 332ad4a0..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_hybrid_seed1011.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_hybrid_seed1011", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "resnet18", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080111_57211528/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_hybrid_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_hybrid_seed123.yaml deleted file mode 100644 index 8a0f7fdc..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_hybrid_seed123.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_hybrid_seed123", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "resnet18", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080045_57211555/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_hybrid_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_hybrid_seed42.yaml deleted file mode 100644 index 03c3e395..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_hybrid_seed42.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_hybrid_seed42", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "resnet18", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_hybrid_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_hybrid_seed456.yaml deleted file mode 100644 index 1cf26573..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_hybrid_seed456.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_hybrid_seed456", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "resnet18", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080106_57211563/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_hybrid_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_hybrid_seed789.yaml deleted file mode 100644 index f7b7f144..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_hybrid_seed789.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_hybrid_seed789", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "resnet18", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080112_57211572/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_uniform_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_uniform_seed1011.yaml deleted file mode 100644 index a9af8525..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_uniform_seed1011.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_uniform_seed1011", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "resnet18", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080111_57211528/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_uniform_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_uniform_seed123.yaml deleted file mode 100644 index 2d8ffa22..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_uniform_seed123.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_uniform_seed123", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "resnet18", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080045_57211555/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_uniform_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_uniform_seed42.yaml deleted file mode 100644 index 60090a38..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_uniform_seed42.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_uniform_seed42", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "resnet18", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_uniform_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_uniform_seed456.yaml deleted file mode 100644 index 6393b81e..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_uniform_seed456.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_uniform_seed456", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "resnet18", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080106_57211563/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_uniform_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_uniform_seed789.yaml deleted file mode 100644 index b36469f9..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/resnet18_cifar100_metric_usage_uniform_seed789.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_uniform_seed789", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "resnet18", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080112_57211572/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_global_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_global_seed1011.yaml deleted file mode 100644 index fb76f3e5..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_global_seed1011.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_global_seed1011", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "vgg16", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080043_57211529/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_global_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_global_seed123.yaml deleted file mode 100644 index ea0e75e9..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_global_seed123.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_global_seed123", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "vgg16", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211556/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_global_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_global_seed42.yaml deleted file mode 100644 index d070dfc1..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_global_seed42.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_global_seed42", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "vgg16", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_global_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_global_seed456.yaml deleted file mode 100644 index bebc7d4c..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_global_seed456.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_global_seed456", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "vgg16", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080038_57211564/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_global_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_global_seed789.yaml deleted file mode 100644 index 0fdd3018..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_global_seed789.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_global_seed789", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "vgg16", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211573/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_hybrid_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_hybrid_seed1011.yaml deleted file mode 100644 index 60757b9c..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_hybrid_seed1011.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_hybrid_seed1011", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "vgg16", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080043_57211529/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_hybrid_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_hybrid_seed123.yaml deleted file mode 100644 index 13072e05..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_hybrid_seed123.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_hybrid_seed123", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "vgg16", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211556/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_hybrid_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_hybrid_seed42.yaml deleted file mode 100644 index e1f0fbf2..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_hybrid_seed42.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_hybrid_seed42", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "vgg16", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_hybrid_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_hybrid_seed456.yaml deleted file mode 100644 index d3e62c8e..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_hybrid_seed456.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_hybrid_seed456", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "vgg16", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080038_57211564/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_hybrid_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_hybrid_seed789.yaml deleted file mode 100644 index 3fc95fd3..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_hybrid_seed789.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_hybrid_seed789", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "vgg16", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211573/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_uniform_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_uniform_seed1011.yaml deleted file mode 100644 index 18667e52..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_uniform_seed1011.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_uniform_seed1011", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "vgg16", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080043_57211529/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_uniform_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_uniform_seed123.yaml deleted file mode 100644 index d798d813..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_uniform_seed123.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_uniform_seed123", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "vgg16", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211556/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_uniform_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_uniform_seed42.yaml deleted file mode 100644 index 5cdcad17..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_uniform_seed42.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_uniform_seed42", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "vgg16", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_uniform_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_uniform_seed456.yaml deleted file mode 100644 index b7f7d91b..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_uniform_seed456.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_uniform_seed456", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "vgg16", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080038_57211564/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_uniform_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_uniform_seed789.yaml deleted file mode 100644 index ae19a425..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation/vgg16_cifar100_metric_usage_uniform_seed789.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_uniform_seed789", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "vgg16", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211573/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_20260311", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_global_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_global_seed1011.yaml deleted file mode 100644 index 109728f7..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_global_seed1011.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_global_seed1011", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "mobilenetv2", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084525_57211530/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_global_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_global_seed123.yaml deleted file mode 100644 index 62fe345a..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_global_seed123.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_global_seed123", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "mobilenetv2", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084458_57217336/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_global_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_global_seed42.yaml deleted file mode 100644 index 2d66f7da..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_global_seed42.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_global_seed42", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "mobilenetv2", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_global_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_global_seed456.yaml deleted file mode 100644 index 530d3c4c..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_global_seed456.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_global_seed456", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "mobilenetv2", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084514_57217362/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_global_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_global_seed789.yaml deleted file mode 100644 index 904e4a1b..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_global_seed789.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_global_seed789", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "mobilenetv2", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084522_57217372/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_hybrid_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_hybrid_seed1011.yaml deleted file mode 100644 index b21bbd36..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_hybrid_seed1011.yaml +++ /dev/null @@ -1,329 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_hybrid_seed1011", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "mobilenetv2", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084525_57211530/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_hybrid_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_hybrid_seed123.yaml deleted file mode 100644 index 71ef774d..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_hybrid_seed123.yaml +++ /dev/null @@ -1,329 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_hybrid_seed123", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "mobilenetv2", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084458_57217336/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_hybrid_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_hybrid_seed42.yaml deleted file mode 100644 index a684e39c..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_hybrid_seed42.yaml +++ /dev/null @@ -1,329 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_hybrid_seed42", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "mobilenetv2", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_hybrid_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_hybrid_seed456.yaml deleted file mode 100644 index 9eafc94f..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_hybrid_seed456.yaml +++ /dev/null @@ -1,329 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_hybrid_seed456", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "mobilenetv2", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084514_57217362/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_hybrid_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_hybrid_seed789.yaml deleted file mode 100644 index 041c1a83..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_hybrid_seed789.yaml +++ /dev/null @@ -1,329 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_hybrid_seed789", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "mobilenetv2", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084522_57217372/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_uniform_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_uniform_seed1011.yaml deleted file mode 100644 index 4083b728..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_uniform_seed1011.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_uniform_seed1011", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "mobilenetv2", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084525_57211530/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_uniform_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_uniform_seed123.yaml deleted file mode 100644 index 269b67c0..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_uniform_seed123.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_uniform_seed123", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "mobilenetv2", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084458_57217336/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_uniform_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_uniform_seed42.yaml deleted file mode 100644 index bb76e8cf..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_uniform_seed42.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_uniform_seed42", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "mobilenetv2", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_uniform_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_uniform_seed456.yaml deleted file mode 100644 index 9a03dfe5..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_uniform_seed456.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_uniform_seed456", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "mobilenetv2", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084514_57217362/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_uniform_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_uniform_seed789.yaml deleted file mode 100644 index e8578951..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/mobilenetv2_cifar100_metric_usage_uniform_seed789.yaml +++ /dev/null @@ -1,331 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_usage_uniform_seed789", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "mobilenetv2", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084522_57217372/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_global_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_global_seed1011.yaml deleted file mode 100644 index 4dd72595..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_global_seed1011.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_global_seed1011", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "resnet18", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080111_57211528/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_global_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_global_seed123.yaml deleted file mode 100644 index ff4421f1..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_global_seed123.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_global_seed123", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "resnet18", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080045_57211555/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_global_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_global_seed42.yaml deleted file mode 100644 index 925e00c3..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_global_seed42.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_global_seed42", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "resnet18", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_global_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_global_seed456.yaml deleted file mode 100644 index 417e2a82..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_global_seed456.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_global_seed456", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "resnet18", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080106_57211563/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_global_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_global_seed789.yaml deleted file mode 100644 index dba73fc0..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_global_seed789.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_global_seed789", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "resnet18", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080112_57211572/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_hybrid_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_hybrid_seed1011.yaml deleted file mode 100644 index d77881eb..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_hybrid_seed1011.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_hybrid_seed1011", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "resnet18", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080111_57211528/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_hybrid_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_hybrid_seed123.yaml deleted file mode 100644 index f4bb11fc..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_hybrid_seed123.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_hybrid_seed123", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "resnet18", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080045_57211555/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_hybrid_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_hybrid_seed42.yaml deleted file mode 100644 index 35e9d55a..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_hybrid_seed42.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_hybrid_seed42", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "resnet18", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_hybrid_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_hybrid_seed456.yaml deleted file mode 100644 index 6c6bd89f..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_hybrid_seed456.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_hybrid_seed456", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "resnet18", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080106_57211563/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_hybrid_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_hybrid_seed789.yaml deleted file mode 100644 index 9f7ee196..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_hybrid_seed789.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_hybrid_seed789", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "resnet18", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080112_57211572/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_uniform_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_uniform_seed1011.yaml deleted file mode 100644 index 92fa6d2a..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_uniform_seed1011.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_uniform_seed1011", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "resnet18", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080111_57211528/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_uniform_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_uniform_seed123.yaml deleted file mode 100644 index dc7b6701..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_uniform_seed123.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_uniform_seed123", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "resnet18", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080045_57211555/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_uniform_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_uniform_seed42.yaml deleted file mode 100644 index 86effc92..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_uniform_seed42.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_uniform_seed42", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "resnet18", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_uniform_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_uniform_seed456.yaml deleted file mode 100644 index fce696bf..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_uniform_seed456.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_uniform_seed456", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "resnet18", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080106_57211563/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_uniform_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_uniform_seed789.yaml deleted file mode 100644 index d9f90c19..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/resnet18_cifar100_metric_usage_uniform_seed789.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_usage_uniform_seed789", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "resnet18", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080112_57211572/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_global_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_global_seed1011.yaml deleted file mode 100644 index 0ed9eee5..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_global_seed1011.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_global_seed1011", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "vgg16", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080043_57211529/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_global_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_global_seed123.yaml deleted file mode 100644 index 39d2d24d..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_global_seed123.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_global_seed123", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "vgg16", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211556/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_global_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_global_seed42.yaml deleted file mode 100644 index 3ef8cbe3..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_global_seed42.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_global_seed42", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "vgg16", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_global_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_global_seed456.yaml deleted file mode 100644 index a1cc1c24..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_global_seed456.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_global_seed456", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "vgg16", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080038_57211564/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_global_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_global_seed789.yaml deleted file mode 100644 index b1fa1d32..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_global_seed789.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_global_seed789", - "description": "Locked ranking-only sweep with global-threshold allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "global_threshold", - "vgg16", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211573/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_hybrid_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_hybrid_seed1011.yaml deleted file mode 100644 index 15f4a63a..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_hybrid_seed1011.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_hybrid_seed1011", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "vgg16", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080043_57211529/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_hybrid_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_hybrid_seed123.yaml deleted file mode 100644 index 4ec2e4d1..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_hybrid_seed123.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_hybrid_seed123", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "vgg16", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211556/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_hybrid_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_hybrid_seed42.yaml deleted file mode 100644 index 6a765ea1..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_hybrid_seed42.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_hybrid_seed42", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "vgg16", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_hybrid_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_hybrid_seed456.yaml deleted file mode 100644 index fae98489..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_hybrid_seed456.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_hybrid_seed456", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "vgg16", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080038_57211564/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_hybrid_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_hybrid_seed789.yaml deleted file mode 100644 index 2c764a77..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_hybrid_seed789.yaml +++ /dev/null @@ -1,296 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_hybrid_seed789", - "description": "Locked ranking sweep with Taylor-based cross-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "hybrid_taylor_allocation", - "vgg16", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211573/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "ixy_minus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": true, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_uniform_seed1011.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_uniform_seed1011.yaml deleted file mode 100644 index 6c66ab2d..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_uniform_seed1011.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_uniform_seed1011", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "vgg16", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080043_57211529/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_uniform_seed123.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_uniform_seed123.yaml deleted file mode 100644 index e43f5771..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_uniform_seed123.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_uniform_seed123", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "vgg16", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211556/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_uniform_seed42.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_uniform_seed42.yaml deleted file mode 100644 index cf6eab37..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_uniform_seed42.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_uniform_seed42", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "vgg16", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_uniform_seed456.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_uniform_seed456.yaml deleted file mode 100644 index 5ac300b5..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_uniform_seed456.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_uniform_seed456", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "vgg16", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080038_57211564/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_uniform_seed789.yaml b/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_uniform_seed789.yaml deleted file mode 100644 index 770cc757..00000000 --- a/configs/vision_prune/paper_2026_metric_usage_allocation_fixmissing/vgg16_cifar100_metric_usage_uniform_seed789.yaml +++ /dev/null @@ -1,298 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_usage_uniform_seed789", - "description": "Locked ranking-only sweep with uniform per-layer allocation", - "tags": [ - "paper2026", - "metric_usage_allocation", - "uniform", - "vgg16", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211573/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "taylor", - "composite_ixy", - "ixy_minus_red", - "ixy_plus_red", - "magnitude_plus_ixy", - "magnitude_minus_red", - "composite_pid_no_red", - "composite_pid", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_usage_allocation_fixmissing_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep/mobilenetv2_cifar100_metric_weight_sweep_seed1011.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep/mobilenetv2_cifar100_metric_weight_sweep_seed1011.yaml deleted file mode 100644 index 9fb945bf..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep/mobilenetv2_cifar100_metric_weight_sweep_seed1011.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_weight_sweep_seed1011", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "mobilenetv2", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084525_57211530/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep/mobilenetv2_cifar100_metric_weight_sweep_seed123.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep/mobilenetv2_cifar100_metric_weight_sweep_seed123.yaml deleted file mode 100644 index edc698fb..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep/mobilenetv2_cifar100_metric_weight_sweep_seed123.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_weight_sweep_seed123", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "mobilenetv2", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084458_57217336/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep/mobilenetv2_cifar100_metric_weight_sweep_seed42.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep/mobilenetv2_cifar100_metric_weight_sweep_seed42.yaml deleted file mode 100644 index 957395bb..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep/mobilenetv2_cifar100_metric_weight_sweep_seed42.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_weight_sweep_seed42", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "mobilenetv2", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep/mobilenetv2_cifar100_metric_weight_sweep_seed456.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep/mobilenetv2_cifar100_metric_weight_sweep_seed456.yaml deleted file mode 100644 index 5e44919b..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep/mobilenetv2_cifar100_metric_weight_sweep_seed456.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_weight_sweep_seed456", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "mobilenetv2", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084514_57217362/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep/mobilenetv2_cifar100_metric_weight_sweep_seed789.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep/mobilenetv2_cifar100_metric_weight_sweep_seed789.yaml deleted file mode 100644 index fba48bc1..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep/mobilenetv2_cifar100_metric_weight_sweep_seed789.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_weight_sweep_seed789", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "mobilenetv2", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084522_57217372/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep/resnet18_cifar100_metric_weight_sweep_seed1011.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep/resnet18_cifar100_metric_weight_sweep_seed1011.yaml deleted file mode 100644 index 7ad315de..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep/resnet18_cifar100_metric_weight_sweep_seed1011.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_weight_sweep_seed1011", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "resnet18", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080111_57211528/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep/resnet18_cifar100_metric_weight_sweep_seed123.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep/resnet18_cifar100_metric_weight_sweep_seed123.yaml deleted file mode 100644 index 8dc80c82..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep/resnet18_cifar100_metric_weight_sweep_seed123.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_weight_sweep_seed123", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "resnet18", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080045_57211555/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep/resnet18_cifar100_metric_weight_sweep_seed42.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep/resnet18_cifar100_metric_weight_sweep_seed42.yaml deleted file mode 100644 index 27cfd720..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep/resnet18_cifar100_metric_weight_sweep_seed42.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_weight_sweep_seed42", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "resnet18", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep/resnet18_cifar100_metric_weight_sweep_seed456.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep/resnet18_cifar100_metric_weight_sweep_seed456.yaml deleted file mode 100644 index d2b840e0..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep/resnet18_cifar100_metric_weight_sweep_seed456.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_weight_sweep_seed456", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "resnet18", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080106_57211563/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep/resnet18_cifar100_metric_weight_sweep_seed789.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep/resnet18_cifar100_metric_weight_sweep_seed789.yaml deleted file mode 100644 index 27309d8e..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep/resnet18_cifar100_metric_weight_sweep_seed789.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_weight_sweep_seed789", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "resnet18", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080112_57211572/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep/vgg16_cifar100_metric_weight_sweep_seed1011.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep/vgg16_cifar100_metric_weight_sweep_seed1011.yaml deleted file mode 100644 index 0c943812..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep/vgg16_cifar100_metric_weight_sweep_seed1011.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_weight_sweep_seed1011", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "vgg16", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080043_57211529/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep/vgg16_cifar100_metric_weight_sweep_seed123.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep/vgg16_cifar100_metric_weight_sweep_seed123.yaml deleted file mode 100644 index bc9b241e..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep/vgg16_cifar100_metric_weight_sweep_seed123.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_weight_sweep_seed123", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "vgg16", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211556/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep/vgg16_cifar100_metric_weight_sweep_seed42.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep/vgg16_cifar100_metric_weight_sweep_seed42.yaml deleted file mode 100644 index 97d5f51d..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep/vgg16_cifar100_metric_weight_sweep_seed42.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_weight_sweep_seed42", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "vgg16", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep/vgg16_cifar100_metric_weight_sweep_seed456.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep/vgg16_cifar100_metric_weight_sweep_seed456.yaml deleted file mode 100644 index 4607ada3..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep/vgg16_cifar100_metric_weight_sweep_seed456.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_weight_sweep_seed456", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "vgg16", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080038_57211564/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep/vgg16_cifar100_metric_weight_sweep_seed789.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep/vgg16_cifar100_metric_weight_sweep_seed789.yaml deleted file mode 100644 index c8f2d1c1..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep/vgg16_cifar100_metric_weight_sweep_seed789.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_weight_sweep_seed789", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "vgg16", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211573/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/mobilenetv2_cifar100_metric_weight_sweep_seed1011.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/mobilenetv2_cifar100_metric_weight_sweep_seed1011.yaml deleted file mode 100644 index b71c15ef..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/mobilenetv2_cifar100_metric_weight_sweep_seed1011.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_weight_sweep_seed1011", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "mobilenetv2", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084525_57211530/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_fixdispatch_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/mobilenetv2_cifar100_metric_weight_sweep_seed123.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/mobilenetv2_cifar100_metric_weight_sweep_seed123.yaml deleted file mode 100644 index 8177aa2b..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/mobilenetv2_cifar100_metric_weight_sweep_seed123.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_weight_sweep_seed123", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "mobilenetv2", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084458_57217336/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_fixdispatch_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/mobilenetv2_cifar100_metric_weight_sweep_seed42.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/mobilenetv2_cifar100_metric_weight_sweep_seed42.yaml deleted file mode 100644 index cc392477..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/mobilenetv2_cifar100_metric_weight_sweep_seed42.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_weight_sweep_seed42", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "mobilenetv2", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_fixdispatch_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/mobilenetv2_cifar100_metric_weight_sweep_seed456.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/mobilenetv2_cifar100_metric_weight_sweep_seed456.yaml deleted file mode 100644 index 2063aed4..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/mobilenetv2_cifar100_metric_weight_sweep_seed456.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_weight_sweep_seed456", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "mobilenetv2", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084514_57217362/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_fixdispatch_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/mobilenetv2_cifar100_metric_weight_sweep_seed789.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/mobilenetv2_cifar100_metric_weight_sweep_seed789.yaml deleted file mode 100644 index 38465afd..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/mobilenetv2_cifar100_metric_weight_sweep_seed789.yaml +++ /dev/null @@ -1,332 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_metric_weight_sweep_seed789", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "mobilenetv2", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084522_57217372/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_fixdispatch_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/resnet18_cifar100_metric_weight_sweep_seed1011.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/resnet18_cifar100_metric_weight_sweep_seed1011.yaml deleted file mode 100644 index e614c988..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/resnet18_cifar100_metric_weight_sweep_seed1011.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_weight_sweep_seed1011", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "resnet18", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080111_57211528/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_fixdispatch_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/resnet18_cifar100_metric_weight_sweep_seed123.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/resnet18_cifar100_metric_weight_sweep_seed123.yaml deleted file mode 100644 index 5e72ed62..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/resnet18_cifar100_metric_weight_sweep_seed123.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_weight_sweep_seed123", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "resnet18", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080045_57211555/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_fixdispatch_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/resnet18_cifar100_metric_weight_sweep_seed42.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/resnet18_cifar100_metric_weight_sweep_seed42.yaml deleted file mode 100644 index 62490fd5..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/resnet18_cifar100_metric_weight_sweep_seed42.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_weight_sweep_seed42", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "resnet18", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_fixdispatch_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/resnet18_cifar100_metric_weight_sweep_seed456.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/resnet18_cifar100_metric_weight_sweep_seed456.yaml deleted file mode 100644 index f4fa80f9..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/resnet18_cifar100_metric_weight_sweep_seed456.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_weight_sweep_seed456", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "resnet18", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080106_57211563/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_fixdispatch_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/resnet18_cifar100_metric_weight_sweep_seed789.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/resnet18_cifar100_metric_weight_sweep_seed789.yaml deleted file mode 100644 index 63ee95fb..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/resnet18_cifar100_metric_weight_sweep_seed789.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "resnet18_cifar100_metric_weight_sweep_seed789", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "resnet18", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080112_57211572/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_fixdispatch_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/vgg16_cifar100_metric_weight_sweep_seed1011.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/vgg16_cifar100_metric_weight_sweep_seed1011.yaml deleted file mode 100644 index 69742065..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/vgg16_cifar100_metric_weight_sweep_seed1011.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_weight_sweep_seed1011", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "vgg16", - "seed1011" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080043_57211529/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_fixdispatch_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/vgg16_cifar100_metric_weight_sweep_seed123.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/vgg16_cifar100_metric_weight_sweep_seed123.yaml deleted file mode 100644 index 5613945f..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/vgg16_cifar100_metric_weight_sweep_seed123.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_weight_sweep_seed123", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "vgg16", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211556/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_fixdispatch_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/vgg16_cifar100_metric_weight_sweep_seed42.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/vgg16_cifar100_metric_weight_sweep_seed42.yaml deleted file mode 100644 index c8a0fe96..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/vgg16_cifar100_metric_weight_sweep_seed42.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_weight_sweep_seed42", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "vgg16", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_fixdispatch_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/vgg16_cifar100_metric_weight_sweep_seed456.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/vgg16_cifar100_metric_weight_sweep_seed456.yaml deleted file mode 100644 index 2fb0ae98..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/vgg16_cifar100_metric_weight_sweep_seed456.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_weight_sweep_seed456", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "vgg16", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080038_57211564/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_fixdispatch_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/vgg16_cifar100_metric_weight_sweep_seed789.yaml b/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/vgg16_cifar100_metric_weight_sweep_seed789.yaml deleted file mode 100644 index aff14ac9..00000000 --- a/configs/vision_prune/paper_2026_metric_weight_sweep_fixdispatch/vgg16_cifar100_metric_weight_sweep_seed789.yaml +++ /dev/null @@ -1,299 +0,0 @@ -{ - "name": "vgg16_cifar100_metric_weight_sweep_seed789", - "description": "Locked uniform-allocation sweep for magnitude/IXY/redundancy weightings", - "tags": [ - "paper2026", - "metric_weight_sweep", - "vgg16", - "seed789" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211573/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "magnitude", - "composite_ixy", - "cluster_aware_quantile_gradient_weighted_ixy", - "composite_pid_no_red", - "magnitude_plus_ixy_w025", - "magnitude_plus_ixy_w050", - "magnitude_plus_ixy_w075", - "magnitude_plus_ixy_w100", - "ixy_minus_red_w015", - "ixy_minus_red_w030", - "ixy_minus_red_w050", - "ixy_minus_red_w070" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_metric_weight_sweep_fixdispatch_20260312", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_two_axis_scaleup/resnet50_imagenet100_two_axis_seed123.yaml b/configs/vision_prune/paper_2026_two_axis_scaleup/resnet50_imagenet100_two_axis_seed123.yaml deleted file mode 100644 index 757eb74c..00000000 --- a/configs/vision_prune/paper_2026_two_axis_scaleup/resnet50_imagenet100_two_axis_seed123.yaml +++ /dev/null @@ -1,326 +0,0 @@ -{ - "name": "resnet50_imagenet100_two_axis_scaleup_seed123", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "two_axis_scaleup", - "resnet50", - "imagenet100", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet50", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet50_imagenet100_cluster_analysis_mb200_20260306_023258_64241273/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "two_axis_a1_b0", - "two_axis_a1_b0p25", - "two_axis_a1_b0p5", - "two_axis_a1_b1" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet50_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet50_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/two_axis_scaleup_20260417", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_two_axis_scaleup/resnet50_imagenet100_two_axis_seed42.yaml b/configs/vision_prune/paper_2026_two_axis_scaleup/resnet50_imagenet100_two_axis_seed42.yaml deleted file mode 100644 index b383554b..00000000 --- a/configs/vision_prune/paper_2026_two_axis_scaleup/resnet50_imagenet100_two_axis_seed42.yaml +++ /dev/null @@ -1,326 +0,0 @@ -{ - "name": "resnet50_imagenet100_two_axis_scaleup_seed42", - "description": "Locked benchmark breadth suite for NeurIPS two-axis appendix controls", - "tags": [ - "paper2026", - "two_axis_scaleup", - "resnet50", - "imagenet100", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet50", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet50_imagenet100_cluster_analysis_mb200_20260306_023248_64241271/checkpoints/trained_model.pth", - "dataset_name": "imagenet100", - "dataset_config": {}, - "data_path": "./data/imagenet100", - "batch_size": 64, - "num_workers": 8, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 30, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0001, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 512, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.1, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "two_axis_a1_b0", - "two_axis_a1_b0p25", - "two_axis_a1_b0p5", - "two_axis_a1_b1" - ], - "pruning_amounts": [ - 0.1, - 0.3, - 0.5, - 0.7, - 0.8, - 0.9 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 1e-05, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true, - "layer_importance_heatmap": true, - "sensitivity_curves": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet50_imagenet100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet50_imagenet100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/two_axis_scaleup_20260417", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_two_axis_sweep/mobilenetv2_cifar100_two_axis_seed123.yaml b/configs/vision_prune/paper_2026_two_axis_sweep/mobilenetv2_cifar100_two_axis_seed123.yaml deleted file mode 100644 index 02611f0f..00000000 --- a/configs/vision_prune/paper_2026_two_axis_sweep/mobilenetv2_cifar100_two_axis_seed123.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_two_axis_sweep_seed123", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "two_axis_sweep", - "mobilenetv2", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084458_57217336/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "two_axis_a1_b0", - "two_axis_a1_b0p1", - "two_axis_a1_b0p25", - "two_axis_a1_b0p5", - "two_axis_a1_b1" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/two_axis_sweep_20260417", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_two_axis_sweep/mobilenetv2_cifar100_two_axis_seed42.yaml b/configs/vision_prune/paper_2026_two_axis_sweep/mobilenetv2_cifar100_two_axis_seed42.yaml deleted file mode 100644 index 01bd5c86..00000000 --- a/configs/vision_prune/paper_2026_two_axis_sweep/mobilenetv2_cifar100_two_axis_seed42.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_two_axis_sweep_seed42", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "two_axis_sweep", - "mobilenetv2", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "two_axis_a1_b0", - "two_axis_a1_b0p1", - "two_axis_a1_b0p25", - "two_axis_a1_b0p5", - "two_axis_a1_b1" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/two_axis_sweep_20260417", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_two_axis_sweep/mobilenetv2_cifar100_two_axis_seed456.yaml b/configs/vision_prune/paper_2026_two_axis_sweep/mobilenetv2_cifar100_two_axis_seed456.yaml deleted file mode 100644 index 109263ec..00000000 --- a/configs/vision_prune/paper_2026_two_axis_sweep/mobilenetv2_cifar100_two_axis_seed456.yaml +++ /dev/null @@ -1,328 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_two_axis_sweep_seed456", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "two_axis_sweep", - "mobilenetv2", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_084514_57217362/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "two_axis_a1_b0", - "two_axis_a1_b0p1", - "two_axis_a1_b0p25", - "two_axis_a1_b0p5", - "two_axis_a1_b1" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/two_axis_sweep_20260417", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_two_axis_sweep/resnet18_cifar100_two_axis_seed123.yaml b/configs/vision_prune/paper_2026_two_axis_sweep/resnet18_cifar100_two_axis_seed123.yaml deleted file mode 100644 index 0171e377..00000000 --- a/configs/vision_prune/paper_2026_two_axis_sweep/resnet18_cifar100_two_axis_seed123.yaml +++ /dev/null @@ -1,295 +0,0 @@ -{ - "name": "resnet18_cifar100_two_axis_sweep_seed123", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "two_axis_sweep", - "resnet18", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080045_57211555/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "two_axis_a1_b0", - "two_axis_a1_b0p1", - "two_axis_a1_b0p25", - "two_axis_a1_b0p5", - "two_axis_a1_b1" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/two_axis_sweep_20260417", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_two_axis_sweep/resnet18_cifar100_two_axis_seed42.yaml b/configs/vision_prune/paper_2026_two_axis_sweep/resnet18_cifar100_two_axis_seed42.yaml deleted file mode 100644 index cc4d0152..00000000 --- a/configs/vision_prune/paper_2026_two_axis_sweep/resnet18_cifar100_two_axis_seed42.yaml +++ /dev/null @@ -1,295 +0,0 @@ -{ - "name": "resnet18_cifar100_two_axis_sweep_seed42", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "two_axis_sweep", - "resnet18", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "two_axis_a1_b0", - "two_axis_a1_b0p1", - "two_axis_a1_b0p25", - "two_axis_a1_b0p5", - "two_axis_a1_b1" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/two_axis_sweep_20260417", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_two_axis_sweep/resnet18_cifar100_two_axis_seed456.yaml b/configs/vision_prune/paper_2026_two_axis_sweep/resnet18_cifar100_two_axis_seed456.yaml deleted file mode 100644 index 74d6d911..00000000 --- a/configs/vision_prune/paper_2026_two_axis_sweep/resnet18_cifar100_two_axis_seed456.yaml +++ /dev/null @@ -1,295 +0,0 @@ -{ - "name": "resnet18_cifar100_two_axis_sweep_seed456", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "two_axis_sweep", - "resnet18", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080106_57211563/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "two_axis_a1_b0", - "two_axis_a1_b0p1", - "two_axis_a1_b0p25", - "two_axis_a1_b0p5", - "two_axis_a1_b1" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/two_axis_sweep_20260417", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_two_axis_sweep/vgg16_cifar100_two_axis_seed123.yaml b/configs/vision_prune/paper_2026_two_axis_sweep/vgg16_cifar100_two_axis_seed123.yaml deleted file mode 100644 index 1ba6e717..00000000 --- a/configs/vision_prune/paper_2026_two_axis_sweep/vgg16_cifar100_two_axis_seed123.yaml +++ /dev/null @@ -1,295 +0,0 @@ -{ - "name": "vgg16_cifar100_two_axis_sweep_seed123", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "two_axis_sweep", - "vgg16", - "seed123" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080034_57211556/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "two_axis_a1_b0", - "two_axis_a1_b0p1", - "two_axis_a1_b0p25", - "two_axis_a1_b0p5", - "two_axis_a1_b1" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/two_axis_sweep_20260417", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_two_axis_sweep/vgg16_cifar100_two_axis_seed42.yaml b/configs/vision_prune/paper_2026_two_axis_sweep/vgg16_cifar100_two_axis_seed42.yaml deleted file mode 100644 index 5267243f..00000000 --- a/configs/vision_prune/paper_2026_two_axis_sweep/vgg16_cifar100_two_axis_seed42.yaml +++ /dev/null @@ -1,295 +0,0 @@ -{ - "name": "vgg16_cifar100_two_axis_sweep_seed42", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "two_axis_sweep", - "vgg16", - "seed42" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "two_axis_a1_b0", - "two_axis_a1_b0p1", - "two_axis_a1_b0p25", - "two_axis_a1_b0p5", - "two_axis_a1_b1" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/two_axis_sweep_20260417", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_two_axis_sweep/vgg16_cifar100_two_axis_seed456.yaml b/configs/vision_prune/paper_2026_two_axis_sweep/vgg16_cifar100_two_axis_seed456.yaml deleted file mode 100644 index 7e771469..00000000 --- a/configs/vision_prune/paper_2026_two_axis_sweep/vgg16_cifar100_two_axis_seed456.yaml +++ /dev/null @@ -1,295 +0,0 @@ -{ - "name": "vgg16_cifar100_two_axis_sweep_seed456", - "description": "Locked unified rerun of headline non-hybrid pruning methods", - "tags": [ - "paper2026", - "two_axis_sweep", - "vgg16", - "seed456" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080038_57211564/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": false, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "magnitude", - "composite_ixy", - "two_axis_a1_b0", - "two_axis_a1_b0p1", - "two_axis_a1_b0p25", - "two_axis_a1_b0p5", - "two_axis_a1_b1" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 5, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 1.0, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": 200, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/two_axis_sweep_20260417", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "hybrid_taylor_allocation": false, - "fine_tune_type_aware_enabled": false, - "fine_tune_type_aware_methods": [], - "pruning_enforce_exact_global_channel_budget": false -} diff --git a/configs/vision_prune/paper_2026_v2/mobilenetv2_cifar100_cluster_analysis.yaml b/configs/vision_prune/paper_2026_v2/mobilenetv2_cifar100_cluster_analysis.yaml deleted file mode 100644 index 327a79ef..00000000 --- a/configs/vision_prune/paper_2026_v2/mobilenetv2_cifar100_cluster_analysis.yaml +++ /dev/null @@ -1,324 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 100, - "learning_rate": 0.01, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "network_slimming", - "geometric_median", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "uniform", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 0.85, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 1e-05, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": { - "enabled": true, - "format": "pdf", - "dpi": 300, - "style": "seaborn-v0_8-paper", - "histograms": true, - "violin_plots": true, - "correlation_heatmap": true, - "cluster_scatter": true, - "cluster_evolution": true, - "influence_matrix": true, - "halo_properties": true, - "pruning_comparison": true, - "pruning_recovery": true, - "cascade_test": true, - "metric_distributions": true - }, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/mobilenetv2_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/mobilenetv2_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "fine_tune_track_epoch_accuracy": true -} diff --git a/configs/vision_prune/paper_2026_v2/resnet18_cifar100_cluster_analysis.yaml b/configs/vision_prune/paper_2026_v2/resnet18_cifar100_cluster_analysis.yaml deleted file mode 100644 index 454c3f60..00000000 --- a/configs/vision_prune/paper_2026_v2/resnet18_cifar100_cluster_analysis.yaml +++ /dev/null @@ -1,291 +0,0 @@ -{ - "name": "resnet18_cifar100_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 100, - "learning_rate": 0.1, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "network_slimming", - "geometric_median", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 0.85, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0005, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/resnet18_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/resnet18_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "fine_tune_track_epoch_accuracy": true -} diff --git a/configs/vision_prune/paper_2026_v2/vgg16_cifar100_cluster_analysis.yaml b/configs/vision_prune/paper_2026_v2/vgg16_cifar100_cluster_analysis.yaml deleted file mode 100644 index 28dde833..00000000 --- a/configs/vision_prune/paper_2026_v2/vgg16_cifar100_cluster_analysis.yaml +++ /dev/null @@ -1,291 +0,0 @@ -{ - "name": "vgg16_cifar100_cluster_analysis", - "description": "", - "tags": [], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": null, - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 100, - "learning_rate": 0.05, - "optimizer": "sgd", - "scheduler": "cosine", - "scheduler_config": {}, - "weight_decay": 0.0005, - "momentum": 0.9, - "num_networks": 1, - "do_train": true, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "definition": "both", - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - } - }, - "metric_optimization": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": {}, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": "match", - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "type_mapping_mode": "global", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "compute_loss_proxy": true, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": true, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "cluster_aware_alpha": 1.0, - "cluster_aware_beta": 0.5, - "cluster_aware_gamma": 0.3, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.5, - "cluster_aware_anneal_end": 0.8, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "random", - "magnitude", - "activation_mean", - "taylor", - "network_slimming", - "geometric_median", - "hrank", - "composite", - "cluster_aware", - "cluster_aware_annealed", - "cluster_aware_taylor_blend", - "cluster_aware_depth_adaptive" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.95, - "pruning_max_per_layer_sparsity_cap": 0.85, - "fine_tune_learning_rate": 0.0001, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0001, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": false, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "png", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./results/vision/vgg16_cifar100/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./results/vision/vgg16_cifar100", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true, - "permutation_baseline": { - "enabled": false, - "n_permutations": 100 - } - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {}, - "fine_tune_track_epoch_accuracy": true -} diff --git a/configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed1011.yaml b/configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed1011.yaml deleted file mode 100644 index 408184c1..00000000 --- a/configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed1011.yaml +++ /dev/null @@ -1,323 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_cluster_analysis_seed1011_rq_exact_tm_simple", - "description": "v8 hybrid: Taylor allocation + IXY ranking", - "tags": [ - "v8", - "hybrid_taylor_alloc" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": true, - "training_epochs": 10, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": null, - "scheduler_config": {}, - "weight_decay": 0.0, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": {}, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": null, - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "rq_definition": "covariance_exact", - "type_mapping_mode": "global_simple", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "ixy", - "clustering_importance_mode": "geometric", - "compute_loss_proxy": false, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 2.0, - "cluster_aware_beta": 0.0, - "cluster_aware_gamma": 0.25, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "composite_ixy", - "cluster_aware_spectral_rs_ixy", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "hybrid_taylor_allocation": true, - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 0.85, - "pruning_enforce_exact_global_channel_budget": true, - "fine_tune_learning_rate": null, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0, - "fine_tune_type_aware_enabled": true, - "fine_tune_type_aware_methods": [], - "fine_tune_type_aware_lr_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.5, - "background": 1.5 - }, - "fine_tune_type_aware_wd_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.25, - "background": 1.5 - }, - "fine_tune_type_aware_scale_batchnorm": true, - "fine_tune_type_aware_scale_classifier": false, - "fine_tune_track_epoch_accuracy": true, - "fine_tune_eval_frequency": 5, - "fine_tune_use_amp": true, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./logs/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./logs", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed123.yaml b/configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed123.yaml deleted file mode 100644 index 5e84a21f..00000000 --- a/configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed123.yaml +++ /dev/null @@ -1,323 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_cluster_analysis_seed123_rq_exact_tm_simple", - "description": "v8 hybrid: Taylor allocation + IXY ranking", - "tags": [ - "v8", - "hybrid_taylor_alloc" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": true, - "training_epochs": 10, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": null, - "scheduler_config": {}, - "weight_decay": 0.0, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": {}, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": null, - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "rq_definition": "covariance_exact", - "type_mapping_mode": "global_simple", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "ixy", - "clustering_importance_mode": "geometric", - "compute_loss_proxy": false, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 2.0, - "cluster_aware_beta": 0.0, - "cluster_aware_gamma": 0.25, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "composite_ixy", - "cluster_aware_spectral_rs_ixy", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "hybrid_taylor_allocation": true, - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 0.85, - "pruning_enforce_exact_global_channel_budget": true, - "fine_tune_learning_rate": null, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0, - "fine_tune_type_aware_enabled": true, - "fine_tune_type_aware_methods": [], - "fine_tune_type_aware_lr_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.5, - "background": 1.5 - }, - "fine_tune_type_aware_wd_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.25, - "background": 1.5 - }, - "fine_tune_type_aware_scale_batchnorm": true, - "fine_tune_type_aware_scale_classifier": false, - "fine_tune_track_epoch_accuracy": true, - "fine_tune_eval_frequency": 5, - "fine_tune_use_amp": true, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./logs/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./logs", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed42.yaml b/configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed42.yaml deleted file mode 100644 index a930d330..00000000 --- a/configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed42.yaml +++ /dev/null @@ -1,323 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_cluster_analysis_seed42_rq_exact_tm_simple", - "description": "v8 hybrid: Taylor allocation + IXY ranking", - "tags": [ - "v8", - "hybrid_taylor_alloc" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 10, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": null, - "scheduler_config": {}, - "weight_decay": 0.0, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": {}, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": null, - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "rq_definition": "covariance_exact", - "type_mapping_mode": "global_simple", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "ixy", - "clustering_importance_mode": "geometric", - "compute_loss_proxy": false, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 2.0, - "cluster_aware_beta": 0.0, - "cluster_aware_gamma": 0.25, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "composite_ixy", - "cluster_aware_spectral_rs_ixy", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "hybrid_taylor_allocation": true, - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 0.85, - "pruning_enforce_exact_global_channel_budget": true, - "fine_tune_learning_rate": null, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0, - "fine_tune_type_aware_enabled": true, - "fine_tune_type_aware_methods": [], - "fine_tune_type_aware_lr_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.5, - "background": 1.5 - }, - "fine_tune_type_aware_wd_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.25, - "background": 1.5 - }, - "fine_tune_type_aware_scale_batchnorm": true, - "fine_tune_type_aware_scale_classifier": false, - "fine_tune_track_epoch_accuracy": true, - "fine_tune_eval_frequency": 5, - "fine_tune_use_amp": true, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./logs/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./logs", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed456.yaml b/configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed456.yaml deleted file mode 100644 index 1b8d296e..00000000 --- a/configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed456.yaml +++ /dev/null @@ -1,323 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_cluster_analysis_seed456_rq_exact_tm_simple", - "description": "v8 hybrid: Taylor allocation + IXY ranking", - "tags": [ - "v8", - "hybrid_taylor_alloc" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": true, - "training_epochs": 10, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": null, - "scheduler_config": {}, - "weight_decay": 0.0, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": {}, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": null, - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "rq_definition": "covariance_exact", - "type_mapping_mode": "global_simple", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "ixy", - "clustering_importance_mode": "geometric", - "compute_loss_proxy": false, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 2.0, - "cluster_aware_beta": 0.0, - "cluster_aware_gamma": 0.25, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "composite_ixy", - "cluster_aware_spectral_rs_ixy", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "hybrid_taylor_allocation": true, - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 0.85, - "pruning_enforce_exact_global_channel_budget": true, - "fine_tune_learning_rate": null, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0, - "fine_tune_type_aware_enabled": true, - "fine_tune_type_aware_methods": [], - "fine_tune_type_aware_lr_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.5, - "background": 1.5 - }, - "fine_tune_type_aware_wd_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.25, - "background": 1.5 - }, - "fine_tune_type_aware_scale_batchnorm": true, - "fine_tune_type_aware_scale_classifier": false, - "fine_tune_track_epoch_accuracy": true, - "fine_tune_eval_frequency": 5, - "fine_tune_use_amp": true, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./logs/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./logs", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed789.yaml b/configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed789.yaml deleted file mode 100644 index 515b2466..00000000 --- a/configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed789.yaml +++ /dev/null @@ -1,323 +0,0 @@ -{ - "name": "mobilenetv2_cifar100_cluster_analysis_seed789_rq_exact_tm_simple", - "description": "v8 hybrid: Taylor allocation + IXY ranking", - "tags": [ - "v8", - "hybrid_taylor_alloc" - ], - "experiment_type": "cluster_analysis", - "model_name": "mobilenet_v2", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/mobilenetv2_cifar100_cluster_analysis_20260127_080037_57211589/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": true, - "training_epochs": 10, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": null, - "scheduler_config": {}, - "weight_decay": 0.0, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": {}, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": null, - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "rq_definition": "covariance_exact", - "type_mapping_mode": "global_simple", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "ixy", - "clustering_importance_mode": "geometric", - "compute_loss_proxy": false, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 2.0, - "cluster_aware_beta": 0.0, - "cluster_aware_gamma": 0.25, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "composite_ixy", - "cluster_aware_spectral_rs_ixy", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "hybrid_taylor_allocation": true, - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 0.85, - "pruning_enforce_exact_global_channel_budget": true, - "fine_tune_learning_rate": null, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0, - "fine_tune_type_aware_enabled": true, - "fine_tune_type_aware_methods": [], - "fine_tune_type_aware_lr_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.5, - "background": 1.5 - }, - "fine_tune_type_aware_wd_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.25, - "background": 1.5 - }, - "fine_tune_type_aware_scale_batchnorm": true, - "fine_tune_type_aware_scale_classifier": false, - "fine_tune_track_epoch_accuracy": true, - "fine_tune_eval_frequency": 5, - "fine_tune_use_amp": true, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": true, - "pruning_skip_depthwise": true, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./logs/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./logs", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed1011.yaml b/configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed1011.yaml deleted file mode 100644 index 6e1a442c..00000000 --- a/configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed1011.yaml +++ /dev/null @@ -1,323 +0,0 @@ -{ - "name": "resnet18_cifar100_cluster_analysis_seed1011_rq_exact_tm_simple", - "description": "v8 hybrid: Taylor allocation + IXY ranking", - "tags": [ - "v8", - "hybrid_taylor_alloc" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": true, - "training_epochs": 10, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": null, - "scheduler_config": {}, - "weight_decay": 0.0, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": {}, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": null, - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "rq_definition": "covariance_exact", - "type_mapping_mode": "global_simple", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "ixy", - "clustering_importance_mode": "geometric", - "compute_loss_proxy": false, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 2.0, - "cluster_aware_beta": 0.0, - "cluster_aware_gamma": 0.25, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "composite_ixy", - "cluster_aware_spectral_rs_ixy", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "hybrid_taylor_allocation": true, - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 0.85, - "pruning_enforce_exact_global_channel_budget": true, - "fine_tune_learning_rate": null, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0, - "fine_tune_type_aware_enabled": true, - "fine_tune_type_aware_methods": [], - "fine_tune_type_aware_lr_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.5, - "background": 1.5 - }, - "fine_tune_type_aware_wd_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.25, - "background": 1.5 - }, - "fine_tune_type_aware_scale_batchnorm": true, - "fine_tune_type_aware_scale_classifier": false, - "fine_tune_track_epoch_accuracy": true, - "fine_tune_eval_frequency": 5, - "fine_tune_use_amp": true, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./logs/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./logs", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed123.yaml b/configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed123.yaml deleted file mode 100644 index b18c92b2..00000000 --- a/configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed123.yaml +++ /dev/null @@ -1,323 +0,0 @@ -{ - "name": "resnet18_cifar100_cluster_analysis_seed123_rq_exact_tm_simple", - "description": "v8 hybrid: Taylor allocation + IXY ranking", - "tags": [ - "v8", - "hybrid_taylor_alloc" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": true, - "training_epochs": 10, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": null, - "scheduler_config": {}, - "weight_decay": 0.0, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": {}, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": null, - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "rq_definition": "covariance_exact", - "type_mapping_mode": "global_simple", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "ixy", - "clustering_importance_mode": "geometric", - "compute_loss_proxy": false, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 2.0, - "cluster_aware_beta": 0.0, - "cluster_aware_gamma": 0.25, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "composite_ixy", - "cluster_aware_spectral_rs_ixy", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "hybrid_taylor_allocation": true, - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 0.85, - "pruning_enforce_exact_global_channel_budget": true, - "fine_tune_learning_rate": null, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0, - "fine_tune_type_aware_enabled": true, - "fine_tune_type_aware_methods": [], - "fine_tune_type_aware_lr_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.5, - "background": 1.5 - }, - "fine_tune_type_aware_wd_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.25, - "background": 1.5 - }, - "fine_tune_type_aware_scale_batchnorm": true, - "fine_tune_type_aware_scale_classifier": false, - "fine_tune_track_epoch_accuracy": true, - "fine_tune_eval_frequency": 5, - "fine_tune_use_amp": true, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./logs/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./logs", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed42.yaml b/configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed42.yaml deleted file mode 100644 index c0fc8ea6..00000000 --- a/configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed42.yaml +++ /dev/null @@ -1,323 +0,0 @@ -{ - "name": "resnet18_cifar100_cluster_analysis_seed42_rq_exact_tm_simple", - "description": "v8 hybrid: Taylor allocation + IXY ranking", - "tags": [ - "v8", - "hybrid_taylor_alloc" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 10, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": null, - "scheduler_config": {}, - "weight_decay": 0.0, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": {}, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": null, - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "rq_definition": "covariance_exact", - "type_mapping_mode": "global_simple", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "ixy", - "clustering_importance_mode": "geometric", - "compute_loss_proxy": false, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 2.0, - "cluster_aware_beta": 0.0, - "cluster_aware_gamma": 0.25, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "composite_ixy", - "cluster_aware_spectral_rs_ixy", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "hybrid_taylor_allocation": true, - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 0.85, - "pruning_enforce_exact_global_channel_budget": true, - "fine_tune_learning_rate": null, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0, - "fine_tune_type_aware_enabled": true, - "fine_tune_type_aware_methods": [], - "fine_tune_type_aware_lr_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.5, - "background": 1.5 - }, - "fine_tune_type_aware_wd_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.25, - "background": 1.5 - }, - "fine_tune_type_aware_scale_batchnorm": true, - "fine_tune_type_aware_scale_classifier": false, - "fine_tune_track_epoch_accuracy": true, - "fine_tune_eval_frequency": 5, - "fine_tune_use_amp": true, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./logs/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./logs", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed456.yaml b/configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed456.yaml deleted file mode 100644 index 8e184d2d..00000000 --- a/configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed456.yaml +++ /dev/null @@ -1,323 +0,0 @@ -{ - "name": "resnet18_cifar100_cluster_analysis_seed456_rq_exact_tm_simple", - "description": "v8 hybrid: Taylor allocation + IXY ranking", - "tags": [ - "v8", - "hybrid_taylor_alloc" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": true, - "training_epochs": 10, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": null, - "scheduler_config": {}, - "weight_decay": 0.0, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": {}, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": null, - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "rq_definition": "covariance_exact", - "type_mapping_mode": "global_simple", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "ixy", - "clustering_importance_mode": "geometric", - "compute_loss_proxy": false, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 2.0, - "cluster_aware_beta": 0.0, - "cluster_aware_gamma": 0.25, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "composite_ixy", - "cluster_aware_spectral_rs_ixy", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "hybrid_taylor_allocation": true, - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 0.85, - "pruning_enforce_exact_global_channel_budget": true, - "fine_tune_learning_rate": null, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0, - "fine_tune_type_aware_enabled": true, - "fine_tune_type_aware_methods": [], - "fine_tune_type_aware_lr_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.5, - "background": 1.5 - }, - "fine_tune_type_aware_wd_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.25, - "background": 1.5 - }, - "fine_tune_type_aware_scale_batchnorm": true, - "fine_tune_type_aware_scale_classifier": false, - "fine_tune_track_epoch_accuracy": true, - "fine_tune_eval_frequency": 5, - "fine_tune_use_amp": true, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./logs/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./logs", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed789.yaml b/configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed789.yaml deleted file mode 100644 index 455139ba..00000000 --- a/configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed789.yaml +++ /dev/null @@ -1,323 +0,0 @@ -{ - "name": "resnet18_cifar100_cluster_analysis_seed789_rq_exact_tm_simple", - "description": "v8 hybrid: Taylor allocation + IXY ranking", - "tags": [ - "v8", - "hybrid_taylor_alloc" - ], - "experiment_type": "cluster_analysis", - "model_name": "resnet18", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/resnet18_cifar100_cluster_analysis_20260127_080032_57211546/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": true, - "training_epochs": 10, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": null, - "scheduler_config": {}, - "weight_decay": 0.0, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": {}, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": null, - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "rq_definition": "covariance_exact", - "type_mapping_mode": "global_simple", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "ixy", - "clustering_importance_mode": "geometric", - "compute_loss_proxy": false, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 2.0, - "cluster_aware_beta": 0.0, - "cluster_aware_gamma": 0.25, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "composite_ixy", - "cluster_aware_spectral_rs_ixy", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "hybrid_taylor_allocation": true, - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 0.85, - "pruning_enforce_exact_global_channel_budget": true, - "fine_tune_learning_rate": null, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0, - "fine_tune_type_aware_enabled": true, - "fine_tune_type_aware_methods": [], - "fine_tune_type_aware_lr_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.5, - "background": 1.5 - }, - "fine_tune_type_aware_wd_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.25, - "background": 1.5 - }, - "fine_tune_type_aware_scale_batchnorm": true, - "fine_tune_type_aware_scale_classifier": false, - "fine_tune_track_epoch_accuracy": true, - "fine_tune_eval_frequency": 5, - "fine_tune_use_amp": true, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./logs/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./logs", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_v8/submit_v8.sh b/configs/vision_prune/paper_2026_v8/submit_v8.sh deleted file mode 100755 index 71445735..00000000 --- a/configs/vision_prune/paper_2026_v8/submit_v8.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=v8_hybrid -#SBATCH --partition=kempner_h100 -#SBATCH --account=kempner_dev -#SBATCH --nodes=1 -#SBATCH --ntasks=1 -#SBATCH --gpus-per-node=1 -#SBATCH --cpus-per-task=8 -#SBATCH --mem=64G -#SBATCH --time=04:00:00 -#SBATCH --array=1-15%16 -#SBATCH --output=/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8/slurm_%A_%a.out -#SBATCH --error=/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8/slurm_%A_%a.err - -module load cuda/12.2.0-fasrc01 -module load gcc/12.2.0-fasrc01 - -# Activate conda -eval "$(conda shell.bash hook)" -conda activate networkAlignmentAnalysis - -cd /n/holylabs/kempner_dev/Users/hsafaai/Code/alignment - -# Get config for this array task -CONFIG=$(sed -n "${SLURM_ARRAY_TASK_ID}p" configs/vision_prune/paper_2026_v8/v8_config_list.txt) - -echo "=========================================" -echo "Job $SLURM_ARRAY_JOB_ID task $SLURM_ARRAY_TASK_ID" -echo "Config: $CONFIG" -echo "Node: $(hostname)" -echo "GPU: $(nvidia-smi --query-gpu=name --format=csv,noheader | head -1)" -echo "=========================================" - -python scripts/run_experiment.py --config "$CONFIG" --allow-dirty diff --git a/configs/vision_prune/paper_2026_v8/v8_config_list.txt b/configs/vision_prune/paper_2026_v8/v8_config_list.txt deleted file mode 100644 index f35c0f6d..00000000 --- a/configs/vision_prune/paper_2026_v8/v8_config_list.txt +++ /dev/null @@ -1,15 +0,0 @@ -configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed42.yaml -configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed123.yaml -configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed456.yaml -configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed789.yaml -configs/vision_prune/paper_2026_v8/resnet18_cifar100_v8_hybrid_seed1011.yaml -configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed42.yaml -configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed123.yaml -configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed456.yaml -configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed789.yaml -configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed1011.yaml -configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed42.yaml -configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed123.yaml -configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed456.yaml -configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed789.yaml -configs/vision_prune/paper_2026_v8/mobilenetv2_cifar100_v8_hybrid_seed1011.yaml diff --git a/configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed1011.yaml b/configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed1011.yaml deleted file mode 100644 index a473f1ac..00000000 --- a/configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed1011.yaml +++ /dev/null @@ -1,323 +0,0 @@ -{ - "name": "vgg16_cifar100_cluster_analysis_seed1011_rq_exact_tm_simple", - "description": "v8 hybrid: Taylor allocation + IXY ranking", - "tags": [ - "v8", - "hybrid_taylor_alloc" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 1011, - "train_before_dropout": true, - "training_epochs": 10, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": null, - "scheduler_config": {}, - "weight_decay": 0.0, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": {}, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": null, - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "rq_definition": "covariance_exact", - "type_mapping_mode": "global_simple", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "ixy", - "clustering_importance_mode": "geometric", - "compute_loss_proxy": false, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 2.0, - "cluster_aware_beta": 0.0, - "cluster_aware_gamma": 0.25, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "composite_ixy", - "cluster_aware_spectral_rs_ixy", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "hybrid_taylor_allocation": true, - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 0.85, - "pruning_enforce_exact_global_channel_budget": true, - "fine_tune_learning_rate": null, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0, - "fine_tune_type_aware_enabled": true, - "fine_tune_type_aware_methods": [], - "fine_tune_type_aware_lr_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.5, - "background": 1.5 - }, - "fine_tune_type_aware_wd_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.25, - "background": 1.5 - }, - "fine_tune_type_aware_scale_batchnorm": true, - "fine_tune_type_aware_scale_classifier": false, - "fine_tune_track_epoch_accuracy": true, - "fine_tune_eval_frequency": 5, - "fine_tune_use_amp": true, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./logs/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./logs", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed123.yaml b/configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed123.yaml deleted file mode 100644 index 3d9f4099..00000000 --- a/configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed123.yaml +++ /dev/null @@ -1,323 +0,0 @@ -{ - "name": "vgg16_cifar100_cluster_analysis_seed123_rq_exact_tm_simple", - "description": "v8 hybrid: Taylor allocation + IXY ranking", - "tags": [ - "v8", - "hybrid_taylor_alloc" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 123, - "train_before_dropout": true, - "training_epochs": 10, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": null, - "scheduler_config": {}, - "weight_decay": 0.0, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": {}, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": null, - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "rq_definition": "covariance_exact", - "type_mapping_mode": "global_simple", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "ixy", - "clustering_importance_mode": "geometric", - "compute_loss_proxy": false, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 2.0, - "cluster_aware_beta": 0.0, - "cluster_aware_gamma": 0.25, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "composite_ixy", - "cluster_aware_spectral_rs_ixy", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "hybrid_taylor_allocation": true, - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 0.85, - "pruning_enforce_exact_global_channel_budget": true, - "fine_tune_learning_rate": null, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0, - "fine_tune_type_aware_enabled": true, - "fine_tune_type_aware_methods": [], - "fine_tune_type_aware_lr_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.5, - "background": 1.5 - }, - "fine_tune_type_aware_wd_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.25, - "background": 1.5 - }, - "fine_tune_type_aware_scale_batchnorm": true, - "fine_tune_type_aware_scale_classifier": false, - "fine_tune_track_epoch_accuracy": true, - "fine_tune_eval_frequency": 5, - "fine_tune_use_amp": true, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./logs/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./logs", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed42.yaml b/configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed42.yaml deleted file mode 100644 index a9c7991e..00000000 --- a/configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed42.yaml +++ /dev/null @@ -1,323 +0,0 @@ -{ - "name": "vgg16_cifar100_cluster_analysis_seed42_rq_exact_tm_simple", - "description": "v8 hybrid: Taylor allocation + IXY ranking", - "tags": [ - "v8", - "hybrid_taylor_alloc" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 42, - "train_before_dropout": true, - "training_epochs": 10, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": null, - "scheduler_config": {}, - "weight_decay": 0.0, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": {}, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": null, - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "rq_definition": "covariance_exact", - "type_mapping_mode": "global_simple", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "ixy", - "clustering_importance_mode": "geometric", - "compute_loss_proxy": false, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 2.0, - "cluster_aware_beta": 0.0, - "cluster_aware_gamma": 0.25, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "composite_ixy", - "cluster_aware_spectral_rs_ixy", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "hybrid_taylor_allocation": true, - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 0.85, - "pruning_enforce_exact_global_channel_budget": true, - "fine_tune_learning_rate": null, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0, - "fine_tune_type_aware_enabled": true, - "fine_tune_type_aware_methods": [], - "fine_tune_type_aware_lr_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.5, - "background": 1.5 - }, - "fine_tune_type_aware_wd_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.25, - "background": 1.5 - }, - "fine_tune_type_aware_scale_batchnorm": true, - "fine_tune_type_aware_scale_classifier": false, - "fine_tune_track_epoch_accuracy": true, - "fine_tune_eval_frequency": 5, - "fine_tune_use_amp": true, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./logs/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./logs", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed456.yaml b/configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed456.yaml deleted file mode 100644 index bc9caee0..00000000 --- a/configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed456.yaml +++ /dev/null @@ -1,323 +0,0 @@ -{ - "name": "vgg16_cifar100_cluster_analysis_seed456_rq_exact_tm_simple", - "description": "v8 hybrid: Taylor allocation + IXY ranking", - "tags": [ - "v8", - "hybrid_taylor_alloc" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 456, - "train_before_dropout": true, - "training_epochs": 10, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": null, - "scheduler_config": {}, - "weight_decay": 0.0, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": {}, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": null, - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "rq_definition": "covariance_exact", - "type_mapping_mode": "global_simple", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "ixy", - "clustering_importance_mode": "geometric", - "compute_loss_proxy": false, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 2.0, - "cluster_aware_beta": 0.0, - "cluster_aware_gamma": 0.25, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "composite_ixy", - "cluster_aware_spectral_rs_ixy", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "hybrid_taylor_allocation": true, - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 0.85, - "pruning_enforce_exact_global_channel_budget": true, - "fine_tune_learning_rate": null, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0, - "fine_tune_type_aware_enabled": true, - "fine_tune_type_aware_methods": [], - "fine_tune_type_aware_lr_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.5, - "background": 1.5 - }, - "fine_tune_type_aware_wd_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.25, - "background": 1.5 - }, - "fine_tune_type_aware_scale_batchnorm": true, - "fine_tune_type_aware_scale_classifier": false, - "fine_tune_track_epoch_accuracy": true, - "fine_tune_eval_frequency": 5, - "fine_tune_use_amp": true, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./logs/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./logs", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed789.yaml b/configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed789.yaml deleted file mode 100644 index b3624e6c..00000000 --- a/configs/vision_prune/paper_2026_v8/vgg16_cifar100_v8_hybrid_seed789.yaml +++ /dev/null @@ -1,323 +0,0 @@ -{ - "name": "vgg16_cifar100_cluster_analysis_seed789_rq_exact_tm_simple", - "description": "v8 hybrid: Taylor allocation + IXY ranking", - "tags": [ - "v8", - "hybrid_taylor_alloc" - ], - "experiment_type": "cluster_analysis", - "model_name": "vgg16_bn", - "model_config": {}, - "pretrained": true, - "model_checkpoint": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/vgg16_cifar100_cluster_analysis_20260127_080032_57211547/checkpoints/trained_model.pth", - "dataset_name": "cifar100", - "dataset_config": {}, - "data_path": "./data", - "batch_size": 128, - "num_workers": 4, - "device": "cuda", - "seed": 789, - "train_before_dropout": true, - "training_epochs": 10, - "learning_rate": 0.001, - "optimizer": "adam", - "scheduler": null, - "scheduler_config": {}, - "weight_decay": 0.0, - "momentum": 0.9, - "num_networks": 1, - "do_train": false, - "metrics": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "metric_configs": { - "rayleigh_quotient": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "relative": false, - "shrinkage": true - }, - "gaussian_mi_analytic": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "sampling": "all" - }, - "synergy_gaussian_mmi": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "target": "logit_margin", - "num_pairs": 10, - "sampling": "top_k" - }, - "activation_l2_norm": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000 - }, - "taylor": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "criterion": "gradient_weight" - }, - "composite_weights": { - "use_jit": false, - "use_gpu_acceleration": false, - "force_cpu_for_large_ops": true, - "cpu_threshold": 100000000, - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - } - }, - "metric_optimization": {}, - "tracked_layers": null, - "scale_by_norm": false, - "force_cpu_for_large_metric_ops": true, - "cnn_rq_aggregation_op": "mean", - "exclude_classification_layer": true, - "alignment_methods": [ - "rayleigh_quotient", - "gaussian_mi_analytic", - "synergy_gaussian_mmi", - "activation_l2_norm", - "taylor" - ], - "compute_alignment": true, - "save_alignment_history": true, - "measure_alignment_during_training": true, - "alignment_frequency": 1, - "alignment_data_num_samples": 1, - "alignment_computation_texts": [], - "alignment_composite_weights": { - "rayleigh_quotient": 0.33, - "gaussian_mi_analytic": -0.33, - "synergy_gaussian_mmi": 0.33 - }, - "supernode_config": {}, - "cnn_mode": "unfold", - "calibration_mode": "indices", - "calibration_num_workers": 0, - "n_calibration": 5000, - "simulate_post_train_shuffle_epochs": 0, - "simulate_post_train_include_eval": true, - "activation_point": "pre_bn", - "activation_samples": "flatten_spatial", - "task_activation_samples": null, - "spatial_samples_per_image": 16, - "n_clusters": 4, - "synergy_target": "logit_margin", - "synergy_candidate_pool": 50, - "synergy_pairs": 10, - "rq_definition": "covariance_exact", - "type_mapping_mode": "global_simple", - "run_metric_ablation": false, - "metric_ablations": [ - "all", - "rq_red", - "rq_syn", - "red_syn" - ], - "run_permutation_baseline": false, - "n_permutations": 100, - "clustering_first_metric": "ixy", - "clustering_importance_mode": "geometric", - "compute_loss_proxy": false, - "loss_proxy_n_calibration": 1024, - "compute_within_layer_connectivity": false, - "within_layer_red_topk": 20, - "within_layer_syn_topk": 10, - "routing_bottleneck_topk": 5, - "outred_candidate_pool": 64, - "outred_topm": 8, - "bottleneck_protect_percentile": 95.0, - "halo_percentile": 90.0, - "use_activation_weight": true, - "cascade_n_remove": 5, - "damage_sample_frac": 0.2, - "taylor_samples": 1024, - "taylor_act_samples": 1024, - "taylor_act_batch_size": 16, - "geometric_median_iters": 10, - "geometric_median_eps": 1e-08, - "hrank_images": 256, - "hrank_pool": 8, - "hrank_sv_eps": 0.001, - "chip_images": 256, - "cluster_aware_alpha": 2.0, - "cluster_aware_beta": 0.0, - "cluster_aware_gamma": 0.25, - "cluster_aware_lambda_halo": 0.5, - "cluster_aware_protect_critical_frac": 0.3, - "cluster_aware_anneal_start": 0.7, - "cluster_aware_anneal_end": 0.9, - "cluster_aware_taylor_weight": 0.3, - "cluster_aware_depth_adaptive": false, - "cluster_aware_early_alpha": 1.5, - "cluster_aware_early_gamma": 0.1, - "cluster_aware_late_alpha": 0.8, - "cluster_aware_late_gamma": 0.5, - "cluster_aware_early_layer_frac": 0.3, - "generalized_taylor_weight_rq": 1.0, - "generalized_taylor_weight_redundancy": 0.3, - "generalized_taylor_weight_synergy": 0.5, - "generalized_taylor_gradient_exponent": 1.0, - "generalized_taylor_activation_exponent": 1.0, - "generalized_taylor_redundancy_discount_beta": 1.0, - "generalized_taylor_synergy_boost_gamma": 0.5, - "generalized_taylor_critical_multiplier": 1.5, - "generalized_taylor_redundant_multiplier": 0.5, - "generalized_taylor_synergistic_multiplier": 1.2, - "generalized_taylor_background_multiplier": 0.8, - "generalized_taylor_gate_mode": "sigmoid", - "generalized_taylor_gate_temperature": 6.0, - "generalized_taylor_gate_bias": 0.5, - "generalized_taylor_gate_eps": 0.05, - "generalized_taylor_gate_min": 0.0, - "generalized_taylor_gate_include_cluster_multiplier": true, - "generalized_taylor_structural_eps": 0.1, - "generalized_taylor_rq_log_eps": 1e-10, - "generalized_taylor_grad_over_act_eps": 1e-08, - "generalized_taylor_lp_optimal_l2_reg": 0.01, - "do_dropout_analysis": false, - "do_eigenfeature_analysis": false, - "do_pruning_experiments": true, - "dropout_rates": [ - 0.0, - 0.1, - 0.3, - 0.5, - 0.7, - 0.9 - ], - "dropout_mode": "scaled", - "measure_expected_distribution": true, - "distribution_bins": 50, - "pruning_strategies": [ - "taylor", - "composite_ixy", - "cluster_aware_spectral_rs_ixy", - "cluster_aware_quantile_gradient_weighted_ixy" - ], - "pruning_amounts": [ - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 0.95 - ], - "pruning_selection_mode": "low", - "fine_tune_after_pruning": true, - "fine_tune_epochs": 20, - "pruning_alignment_metric": "rayleigh_quotient", - "pruning_hybrid_alpha": 0.5, - "pruning_scope": "layer", - "pruning_distribution": "global_threshold", - "hybrid_taylor_allocation": true, - "pruning_min_per_layer": 0.0, - "pruning_max_per_layer": 0.9, - "pruning_max_per_layer_sparsity_cap": 0.85, - "pruning_enforce_exact_global_channel_budget": true, - "fine_tune_learning_rate": null, - "fine_tune_max_batches": null, - "fine_tune_weight_decay": 0.0, - "fine_tune_type_aware_enabled": true, - "fine_tune_type_aware_methods": [], - "fine_tune_type_aware_lr_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.5, - "background": 1.5 - }, - "fine_tune_type_aware_wd_multipliers": { - "critical": 0.5, - "synergistic": 1.0, - "redundant": 1.25, - "background": 1.5 - }, - "fine_tune_type_aware_scale_batchnorm": true, - "fine_tune_type_aware_scale_classifier": false, - "fine_tune_track_epoch_accuracy": true, - "fine_tune_eval_frequency": 5, - "fine_tune_use_amp": true, - "alignment_structured_pruning": false, - "cascading_direction": "forward", - "dependency_aware_pruning": true, - "pruning_target_layer": null, - "pruning_pointwise_only": false, - "pruning_skip_depthwise": false, - "generate_plots": false, - "plot_format": "pdf", - "plot_dpi": 300, - "visualization_options": {}, - "post_analysis": {}, - "checkpoint_dir": "./logs/checkpoints", - "checkpoint_interval": 1000, - "save_best": true, - "log_dir": "./logs", - "log_interval": 100, - "plots_dir": "./plots", - "experiment_dir": null, - "base_output_dir": "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER/importance_clustering_v8", - "wandb_project": null, - "wandb_entity": null, - "distributed": false, - "world_size": 1, - "rank": 0, - "do_perplexity_computation": false, - "evaluation_dataset": "wikitext", - "evaluation_num_samples": 100, - "evaluation_metrics": [ - "perplexity" - ], - "llm": {}, - "use_nvidia_fewshot": false, - "use_chain_of_thought": false, - "fewshot_settings": {}, - "do_directed_redundancy": true, - "do_connectivity_pruning": true, - "do_scar_metrics": false, - "do_attention_scar_metrics": false, - "scar_num_samples": 0, - "scar_max_length": 512, - "supernode": {}, - "supernode_robustness": {}, - "supernode_summary": {}, - "halo_analysis": { - "enabled": true, - "percentile": 90.0, - "use_activation_weight": true, - "compute_influence_matrix": true - }, - "generalized_importance": {}, - "do_halo_analysis": true, - "do_generalized_importance": false, - "do_scar_optimal": false, - "do_random_supernode_ablation": false, - "do_supernode_hit_rate_sweep": false, - "supernode_hit_rate_sweep": {}, - "eval_batches": null, - "use_tensorized_training": true, - "use_tensorized_pruning": true, - "use_ultra_parallel_eval": true, - "tokenizer_kwargs": {}, - "model_kwargs": {}, - "analysis_options": {} -} diff --git a/configs/vision_prune/paper_locked/alexnet_imagenet100_protocol_locked.yaml b/configs/vision_prune/paper_locked/alexnet_imagenet100_protocol_locked.yaml deleted file mode 100644 index c4025fcb..00000000 --- a/configs/vision_prune/paper_locked/alexnet_imagenet100_protocol_locked.yaml +++ /dev/null @@ -1,156 +0,0 @@ -# ============================================================================= -# AlexNet on ImageNet-100 - UNIFIED FORMAT (FAST PRUNING SWEEP) -# ============================================================================= -# This config is identical to alexnet_imagenet100_unified.yaml except: -# - Pruning fine-tuning is capped per epoch via `max_batches` to ensure the full -# (methods × sparsity) sweep completes within typical 4h SLURM walltimes. -# ============================================================================= - -# ----------------------------------------------------------------------------- -# EXPERIMENT -# ----------------------------------------------------------------------------- -experiment: - name: "alexnet_imagenet100_cluster_analysis" - type: "cluster_analysis" - seed: 42 - device: "cuda" - output_dir: "./results/vision/alexnet_imagenet100" - -# ----------------------------------------------------------------------------- -# MODEL -# ----------------------------------------------------------------------------- -model: - name: "alexnet" - pretrained: true - num_classes: 100 - -# ----------------------------------------------------------------------------- -# DATASET -# ----------------------------------------------------------------------------- -dataset: - name: "imagenet100" - root: "./data/imagenet100" - batch_size: 128 - num_workers: 8 - image_size: 224 - normalize: true - -# ----------------------------------------------------------------------------- -# TRAINING (classifier head is replaced for ImageNet-100) -# ----------------------------------------------------------------------------- -training: - enabled: true - epochs: 20 - learning_rate: 0.001 - optimizer: "adam" - scheduler: "cosine" - weight_decay: 0.0001 - -# ----------------------------------------------------------------------------- -# CALIBRATION -# ----------------------------------------------------------------------------- -calibration: - num_samples: 5000 - -# ----------------------------------------------------------------------------- -# METRICS -# ----------------------------------------------------------------------------- -metrics: - activation_point: "pre_bn" - task_activation_samples: "match" - optimization: - use_jit: false - use_gpu_acceleration: false - force_cpu_for_large_ops: true - cpu_threshold: 100000000 - - rayleigh_quotient: - enabled: true - relative: false - definition: both - shrinkage: true - - redundancy: - enabled: true - sampling: "all" - - synergy: - enabled: true - target: "logit_margin" - num_pairs: 10 - sampling: "top_k" - - magnitude: - enabled: true - - taylor: - enabled: true - criterion: "gradient_weight" - -# ----------------------------------------------------------------------------- -# CLUSTERING -# ----------------------------------------------------------------------------- -clustering: - n_clusters: 4 - method: "kmeans" - features: - - "log_rq" - - "redundancy" - - "synergy" - standardize: true - assign_types: true - type_mapping_strategy: "centroid_ranking" - -# ----------------------------------------------------------------------------- -# HALO ANALYSIS -# ----------------------------------------------------------------------------- -halo_analysis: - enabled: true - threshold_percentile: 90 - influence_type: "activation_weighted" - skip_residual_edges: false - -# ----------------------------------------------------------------------------- -# PRUNING -# ----------------------------------------------------------------------------- -pruning: - enabled: true - methods: - - random - - magnitude - - activation_mean - - taylor - - network_slimming - - geometric_median - - hrank - - composite - - cluster_aware - - cluster_aware_annealed - sparsity_levels: [0.1, 0.3, 0.5, 0.7, 0.8, 0.9] - distribution: "uniform" - dependency_aware: false - min_per_layer: 0.0 - max_per_layer: 0.90 - fine_tuning: - enabled: true - # Key speed knob: limit per-epoch batches so the sweep finishes within walltime. - max_batches: 200 - epochs: 5 - learning_rate: 0.001 - optimizer: "adam" - scheduler: "cosine" - -# ----------------------------------------------------------------------------- -# VISUALIZATION -# ----------------------------------------------------------------------------- -visualization: - enabled: true - save_format: "png" - dpi: 150 - generate: - - metric_distributions - - cluster_scatter - - cluster_evolution - - halo_influence_matrix - - pruning_curves - - cascade_damage diff --git a/configs/vision_prune/paper_locked/resnet18_cifar10_protocol_locked.yaml b/configs/vision_prune/paper_locked/resnet18_cifar10_protocol_locked.yaml deleted file mode 100644 index 7f513ee8..00000000 --- a/configs/vision_prune/paper_locked/resnet18_cifar10_protocol_locked.yaml +++ /dev/null @@ -1,613 +0,0 @@ -# ============================================================================= -# ResNet-18 on CIFAR-10 - UNIFIED FORMAT (ENHANCED) -# ============================================================================= -# Full cluster analysis pipeline for ResNet-18 on CIFAR-10 with comprehensive -# evaluation, benchmarks, and analysis sections for vision pruning research. -# -# Key features: -# - Uses unified metric naming (rayleigh_quotient, redundancy, synergy, magnitude) -# - Comprehensive evaluation metrics (accuracy, efficiency, per-class) -# - Full visualization pipeline for paper figures -# - Layer-wise sensitivity analysis -# -# Usage: python scripts/run_experiment.py --config configs/vision_prune/resnet18_cifar10_unified.yaml -# ============================================================================= - -# ----------------------------------------------------------------------------- -# EXPERIMENT -# ----------------------------------------------------------------------------- -experiment: - name: "resnet18_cifar10_cluster_analysis" - type: "cluster_analysis" - seed: 42 - device: "cuda" - output_dir: "./results/vision/resnet18_cifar10" - -# ----------------------------------------------------------------------------- -# MODEL -# ----------------------------------------------------------------------------- -model: - name: "resnet18" - pretrained: true - num_classes: 10 - -# ----------------------------------------------------------------------------- -# DATASET -# ----------------------------------------------------------------------------- -dataset: - name: "cifar10" - root: "./data" - batch_size: 128 - num_workers: 4 - -# ----------------------------------------------------------------------------- -# TRAINING (paper-quality CIFAR baselines) -# ----------------------------------------------------------------------------- -# NOTE: This trains/fine-tunes the model on CIFAR-10 before running the metric/cluster/pruning analyses. -training: - enabled: true - epochs: 50 - learning_rate: 0.05 - optimizer: "sgd" - scheduler: "cosine" - momentum: 0.9 - weight_decay: 0.0005 - -# ----------------------------------------------------------------------------- -# CALIBRATION -# ----------------------------------------------------------------------------- -calibration: - num_samples: 5000 - -# ----------------------------------------------------------------------------- -# METRICS -# ----------------------------------------------------------------------------- -# Unified naming convention: -# rayleigh_quotient (alias: rq, compute_rq) -# redundancy (alias: gaussian_mi_analytic, average_redundancy, pairwise_redundancy) -# synergy (alias: synergy_gaussian_mmi) -# magnitude (alias: activation_l2_norm) -# ----------------------------------------------------------------------------- -metrics: - # Where to read activations for within-layer statistics: - # - pre_bn: Conv output before BatchNorm (matches Jan-20 behaviour, best pruning performance) - # - post_bn: BatchNorm output before ReLU (matches what downstream layers consume, but worse pruning) - activation_point: "pre_bn" - # How to sample activations for task-level metrics (TaskMI, synergy): - # - match: use same spatial samples as local metrics (matches Jan-20 behaviour) - # - gap: use global-average-pooled per-image samples (avoids pseudo-replication, slightly worse pruning) - task_activation_samples: "match" - # Optional: compute per-channel Fisher/Gauss-Newton loss proxy on calibration data. - # This is used for the "importance prediction" analysis blocks in the paper. - compute_loss_proxy: true - loss_proxy_n_calibration: 1024 - # Optional: within-layer connectivity summaries (for within-layer organization analyses) - within_layer_connectivity: true - within_layer_red_topk: 20 - within_layer_syn_topk: 10 - # Optimization options for faster metric computation - optimization: - use_jit: false # Enable JIT-compiled computations (20-50% faster) - use_gpu_acceleration: false # Enable GPU-accelerated functions - force_cpu_for_large_ops: true # Prevent OOM for large covariance matrices - cpu_threshold: 100000000 # 1e8 elements threshold - - rayleigh_quotient: - enabled: true - relative: false # Standard Rayleigh quotient (no trace-normalization) - definition: both - shrinkage: true - - redundancy: - enabled: true - sampling: "all" # all, random, top_k - - synergy: - enabled: true - target: "logit_margin" # logit_margin, correct_logit, logit_pc1 - num_pairs: 10 - sampling: "top_k" - - magnitude: - enabled: true - - taylor: - enabled: true - criterion: "gradient_weight" # gradient_weight, gradient_activation - - activation_sparsity: - enabled: true - threshold: 0.01 - - # Composite weights for combined scoring - composite_weights: - rayleigh_quotient: 0.33 - redundancy: -0.33 # Negative = penalize redundancy - synergy: 0.33 - -# ----------------------------------------------------------------------------- -# CLUSTERING -# ----------------------------------------------------------------------------- -clustering: - enabled: true - n_clusters: 4 - type_names: ["critical", "redundant", "synergistic", "background"] - normalize_features: true - features: ["rayleigh_quotient", "redundancy", "synergy"] - - stability_enabled: true - n_bootstrap: 50 - - # Metric ablation study: validate each metric's contribution - # Clusters using subsets of metrics and compares to full 3-metric clustering - ablation: - enabled: true - # Which ablation modes to run (all = full 3 metrics, rq_red = RQ+Redundancy, etc.) - modes: ["all", "rq_red", "rq_syn", "red_syn"] - -# ----------------------------------------------------------------------------- -# HALO ANALYSIS (Cross-layer dependencies) -# ----------------------------------------------------------------------------- -halo_analysis: - enabled: true - percentile: 90.0 - use_activation_weight: true - compute_influence_matrix: true - - # Permutation baseline: shuffle cluster labels to establish null distribution - # Tests whether observed halo effects are statistically significant - permutation_baseline: - enabled: true - n_permutations: 100 # Number of random permutations - -# ----------------------------------------------------------------------------- -# CASCADE ANALYSIS (Damage testing) -# ----------------------------------------------------------------------------- -cascade_analysis: - enabled: true - n_remove_per_group: 5 - damage_sample_fraction: 0.2 - -# ----------------------------------------------------------------------------- -# MULTI-SEED EXPERIMENT -# ----------------------------------------------------------------------------- -# Run experiment with multiple random seeds for robust statistics (mean ± std) -multi_seed: - enabled: true - seeds: [42, 123, 456, 789, 1000] # 5 seeds for good statistics - -# ----------------------------------------------------------------------------- -# PRUNING - Comprehensive testing of all metrics -# ----------------------------------------------------------------------------- -# This tests individual metrics and combinations to validate basic assumptions -# about what makes channels important in CNNs vs what works for LLMs. -# -# Key questions to answer: -# 1. Do low-RQ channels safely prune? (rq_low) -# 2. Is high redundancy bad (redundancy_high) or good (redundancy_low)? -# 3. Does synergy matter for CNNs? -# 4. What combinations work best? -# ----------------------------------------------------------------------------- -pruning: - enabled: true - distribution: "global_threshold" # uniform, global_threshold, size_proportional, importance_weighted - dependency_aware: true # Propagate masks through BN/skip connections - min_per_layer: 0.0 - max_per_layer: 0.95 - # Optional: per-layer safety cap for global-threshold style distributions. - # Set to 1.0 to disable (legacy behavior); set to e.g. 0.90 to limit per-layer sparsity. - max_per_layer_sparsity_cap: 1.0 - # Include high sparsity (80%, 90%) to clearly see degradation - ratios: [0.1, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9, 0.95] - - # COMPREHENSIVE ALGORITHM LIST for exploration - algorithms: - # ========================================================================= - # BASELINES - # ========================================================================= - - "random" # Random baseline - - "magnitude" # Standard magnitude pruning (prune low) - - "activation_mean" # Mean |activation| baseline - - "taylor" # Gradient-based importance - - "network_slimming" # Network Slimming (BN gamma) baseline - - "geometric_median" # FPGM-style geometric median baseline - - "hrank" # HRank feature-rank baseline - - # ========================================================================= - # SINGLE METRICS - Prune LOW (assumes low = unimportant) - # ========================================================================= - - "rq_low" # Prune low Rayleigh Quotient - - "mi_low" # Prune low MI = 0.5*log(1 + RQ*||w||^2) - - "redundancy_low" # Prune low redundancy - - "synergy_low" # Prune low synergy - - "lp_low" # Prune low loss-proxy (Fisher importance) - # Controls: prune HIGH (opposite direction) - - "rq_high" # Prune high RQ (keep low RQ) - - "mi_high" # Prune high MI - - "redundancy_high" # Prune high redundancy (standard approach) - - "synergy_high" # Prune high synergy - - "lp_high" # Prune high loss-proxy (should be catastrophically bad; sanity check) - - # ========================================================================= - # COMPOSITE COMBINATIONS - # ========================================================================= - - "composite" # Original: score = RQ + syn - red (prune low) - - "composite_pos_red" # Flipped: score = RQ + syn + red (prune low) - - "rq_minus_red" # score = RQ - redundancy - - "rq_plus_red" # score = RQ + redundancy - - "magnitude_plus_rq" # score = magnitude + RQ - - "magnitude_minus_red" # score = magnitude - redundancy - - "magnitude_plus_red" # score = magnitude + redundancy - - # ========================================================================= - # CLUSTER-AWARE - # ========================================================================= - - "cluster_aware" # Pure cluster-aware (no Taylor blending) - - "cluster_aware_annealed" # Annealed: Taylor at low sparsity, CA at high - - "cluster_aware_taylor_blend" # Constant Taylor blend (not sparsity-dependent) - - "cluster_aware_depth_adaptive" # Per-layer adaptive weights (early=conservative) - - "cluster_aware_gradient_weighted" # Generalized Taylor: gradient-weight the CA score - - "cluster_aware_protect_redundant" # Ablation: inverted priority - - # ========================================================================= - # TAYLOR-WEIGHTED METRICS (simple combinations) - # ========================================================================= - - "taylor_rq" # sqrt(Taylor * RQ) - unique AND loss-sensitive - - "taylor_redundancy" # sqrt(Taylor * -redundancy) - non-redundant AND loss-sensitive - - "taylor_synergy" # sqrt(Taylor * synergy) - synergistic AND loss-sensitive - - # ========================================================================= - # GENERALIZED TAYLOR (analytically-motivated combinations) - # ========================================================================= - - "rq_weighted_taylor" # Taylor × log(RQ): loss-sensitive AND unique - - "redundancy_discounted_taylor" # Taylor / (1 + β·redundancy): discount redundant - - "synergy_boosted_taylor" # Taylor × (1 + γ·synergy): boost cooperative - - "structural_taylor" # |∂L/∂a| × structural_score: gradient × structure - - "metric_gated_taylor" # Taylor × gate(structural_score[, cluster_type]) - - "mi_taylor" # Taylor × MI(channel, task): loss-sensitive AND informative - - "cluster_type_taylor" # Taylor × type_multiplier: cluster-weighted gradient - - "taylor_optimal_combo" # Learn: w_t·Taylor + w_rq·RQ + w_r·(-red) + w_s·syn - - # ========================================================================= - # ADVANCED METHODS - # ========================================================================= - - "lp_with_constraints" # Rank by LP, but enforce type-based protection/constraints - - "type_quota_taylor" # Rank by Taylor, but enforce type-based protection/constraints - - "outred_with_constraints" # Prune high outgoing-overlap (replaceable routing) with type constraints - - "cluster_aware_halo_lp" # Cluster-aware, but use HaloLP (importance propagation) as halo term - - "cluster_aware_bottleneck_protect" # Cluster-aware + protect high-bottleneck channels (routing tail) - - "lp_optimal" # Learn optimal weights from LP correlation - - "cluster_structure" # Use cluster membership in scoring (not just selection) - - scoring_methods: - - "random" - - "magnitude" - - "network_slimming" - - "geometric_median" - - "hrank" - - "rq_low" - - "rq_high" - - "redundancy_low" - - "redundancy_high" - - "synergy_low" - - "composite" - - "composite_pos_red" - - # ========================================================================= - # CLUSTER-AWARE METHOD CONFIGURATION - # All cluster_aware* methods share these base settings - # ========================================================================= - cluster_aware: - # --- Base score weights (for pure cluster_aware) --- - alpha: 1.0 # Weight for log(RQ) - channel uniqueness - beta: 0.5 # Weight for synergy - task cooperation - gamma: 0.3 # Weight for redundancy penalty - lambda_halo: 0.5 # Weight for halo-synergy (cross-layer importance) - protect_critical_frac: 0.3 # Fraction of critical channels to protect absolutely - - # --- Annealing settings (for cluster_aware_annealed) --- - # At sparsity < anneal_start: use pure Taylor - # At sparsity > anneal_end: use pure cluster-aware - # In between: linear blend - anneal_start: 0.50 # Default: start blending at 50% sparsity - anneal_end: 0.80 # Default: full CA at 80% sparsity - - # --- Taylor blend (for cluster_aware_taylor_blend) --- - # Constant blend: score = (1-w)*CA + w*Taylor - taylor_weight: 0.3 # 30% Taylor, 70% cluster-aware (constant across sparsities) - - # --- Depth-adaptive settings (for cluster_aware_depth_adaptive) --- - # Early layers are typically more sensitive; use more conservative weights - depth_adaptive: true # Enable depth-adaptive weight adjustment - early_layer_frac: 0.3 # First 30% of layers = "early" - early_alpha: 1.5 # Higher RQ weight in early layers (protect unique more) - early_gamma: 0.1 # Lower redundancy penalty in early layers (less aggressive) - late_alpha: 0.8 # Lower RQ weight in late layers (can be more aggressive) - late_gamma: 0.5 # Higher redundancy penalty in late layers - - # ========================================================================= - # GENERALIZED TAYLOR METHOD CONFIGURATION - # Controls rq_weighted_taylor / structural_taylor / metric_gated_taylor / etc. - # Exposed here so runs are fully config-driven and reproducible. - # ========================================================================= - generalized_taylor: - weight_rq: 1.0 - weight_redundancy: 0.3 - weight_synergy: 0.5 - gradient_exponent: 1.0 - activation_exponent: 1.0 - redundancy_discount_beta: 1.0 - synergy_boost_gamma: 0.5 - critical_multiplier: 1.5 - redundant_multiplier: 0.5 - synergistic_multiplier: 1.2 - background_multiplier: 0.8 - gate_mode: "sigmoid" - gate_temperature: 6.0 - gate_bias: 0.5 - gate_eps: 0.05 - gate_min: 0.0 - gate_include_cluster_multiplier: true - # Numerical stability - rq_log_eps: 1.0e-10 - structural_eps: 0.1 - grad_over_act_eps: 1.0e-8 - lp_optimal_l2_reg: 0.01 - - fine_tune: - enabled: true # Enable recovery fine-tuning after pruning (standard for reporting) - epochs: 5 - learning_rate: 0.0001 - weight_decay: 0.0001 - # Safety cap: limits fine-tune compute so the full method×ratio grid stays feasible on 1 GPU - max_batches: 200 - -# ----------------------------------------------------------------------------- -# EVALUATION (Enhanced for Vision) -# ----------------------------------------------------------------------------- -evaluation: - enabled: true - - # Classification metrics - accuracy: true - top1_accuracy: true - top5_accuracy: true - loss: true - - # Per-class analysis - per_class_accuracy: true - confusion_matrix: true - - # Calibration metrics - calibration_enabled: true - expected_calibration_error: true - reliability_diagram: true - - # Efficiency metrics - compute_flops: true - compute_params: true - compute_memory: true - measure_latency: true - latency_batch_sizes: [1, 8, 32, 128] - - # Robustness (optional - requires corruption data) - robustness_enabled: false - corruption_types: ["gaussian_noise", "shot_noise", "impulse_noise", "gaussian_blur", "contrast", "brightness"] - corruption_severities: [1, 3, 5] - - # Transfer evaluation (optional) - transfer_enabled: false - transfer_datasets: ["cifar100", "svhn"] - -# ----------------------------------------------------------------------------- -# BENCHMARKS (Vision-specific) -# ----------------------------------------------------------------------------- -benchmarks: - enabled: true - - # Standard test benchmarks - tasks: - - name: "cifar10_test" - dataset: "cifar10" - split: "test" - enabled: true - - - name: "cifar100_transfer" - dataset: "cifar100" - split: "test" - enabled: false - - # Inference benchmarks - inference: - warmup_iterations: 10 - benchmark_iterations: 100 - batch_sizes: [1, 8, 32, 128] - devices: ["cuda"] - - # Adversarial robustness (optional) - adversarial: - enabled: false - attacks: ["fgsm", "pgd"] - epsilons: [0.01, 0.03, 0.1] - -# ----------------------------------------------------------------------------- -# VISUALIZATION (Enhanced) -# ----------------------------------------------------------------------------- -visualization: - enabled: true - format: "pdf" # pdf for paper quality - dpi: 300 - style: "seaborn-v0_8-paper" - - # Basic plots - histograms: true - violin_plots: true - correlation_heatmap: true - cluster_scatter: true - cluster_evolution: true - influence_matrix: true - halo_properties: true - pruning_comparison: true - pruning_recovery: true - cascade_test: true - - # Additional analysis plots - metric_distributions: true - layer_importance_heatmap: true - sensitivity_curves: true - efficiency_tradeoffs: true - - # Scatter plot pairs (unified naming) - scatter_pairs: - - ["rayleigh_quotient", "redundancy"] - - ["rayleigh_quotient", "synergy"] - - ["redundancy", "synergy"] - - ["magnitude", "rayleigh_quotient"] - - ["magnitude", "taylor"] - - ["taylor", "rayleigh_quotient"] - -# ----------------------------------------------------------------------------- -# OUTPUT -# ----------------------------------------------------------------------------- -# Uses job directory structure: creates unique folders for each run -# Directory format: {base_dir}/{experiment_name}_{timestamp}_{job_id}/ -output: - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/Prune_LLM" - dir: "./results/vision/resnet18_cifar10" - save_metrics: true - save_clusters: true - save_figures: true - save_checkpoints: true - save_per_layer: true - -# ----------------------------------------------------------------------------- -# EXTRA (Vision-specific detailed settings) -# ----------------------------------------------------------------------------- -extra: - # Pre-training for ImageNet pretrained models on CIFAR - # Train until model achieves ~90% accuracy on CIFAR-10 - pretrain_epochs: 30 - pretrain_lr: 0.001 - - # Baselines to compare against - baselines: - - "magnitude" - - "taylor" - - "network_slimming" - - "geometric_median" - - # Layer-wise analysis - analysis: - layer_indices: "all" # or specific: [0, 2, 4, 6, 8] - save_scores: true - generate_plots: true - - # Metrics to compute per layer - metrics: - - "rayleigh_quotient" - - "redundancy" - - "synergy" - - "magnitude" - - "taylor" - - "activation_sparsity" - - # Plots to generate - plots: - histograms: true - scatter_plots: true - pruning_curves: true - layer_comparison: true - filter_correlation: true - - scatter_pairs: - - ["rayleigh_quotient", "redundancy"] - - ["rayleigh_quotient", "synergy"] - - ["magnitude", "taylor"] - - ["redundancy", "synergy"] - - # Pruning sensitivity analysis - sensitivity_analysis: - enabled: true - per_layer: true - ratios: [0.1, 0.2, 0.3, 0.4, 0.5] - metric: "accuracy" - output_dir: "sensitivity" - - # Structured pruning options - structured_pruning: - enabled: true - granularity: "filter" # filter, channel, block - importance_criteria: - - "l1_norm" - - "l2_norm" - - "taylor" - - "alignment" - - # Feature analysis - feature_analysis: - enabled: true - compute_feature_rank: true - compute_channel_redundancy: true - visualize_filters: false # Set true for filter visualization (slow) - num_samples_to_visualize: 10 - - # Efficiency tracking - efficiency: - track_flops: true - track_params: true - track_memory: true - track_latency: true - baseline_comparison: true - - # Paper figure generation - visualization: - save_plots: true - format: "pdf" - dpi: 300 - style: "seaborn-v0_8-paper" - - # Figure 1: Metric distributions by layer - metric_distributions: - enabled: true - by_layer: true - by_cluster: true - - # Figure 2: Cluster analysis - cluster_analysis: - enabled: true - scatter_3d: true - cluster_evolution_by_layer: true - cluster_purity: true - - # Figure 3: Pruning comparison - pruning_comparison: - enabled: true - accuracy_vs_sparsity: true - accuracy_vs_flops: true - accuracy_vs_params: true - methods_to_compare: - - "random" - - "magnitude" - - "taylor" - - "composite" - - "cluster_aware" - - "network_slimming" - - # Figure 4: Layer-wise importance - layer_importance: - enabled: true - heatmap: true - bar_chart: true - - # Figure 5: Recovery after fine-tuning - fine_tuning_recovery: - enabled: true - by_method: true - by_sparsity: true - - # Figure 6: Efficiency vs Accuracy tradeoffs - efficiency_tradeoffs: - enabled: true - accuracy_vs_flops: true - accuracy_vs_latency: true - accuracy_vs_params: true diff --git a/configs/vision_prune/paper_locked/resnet50_imagenet100_protocol_locked.yaml b/configs/vision_prune/paper_locked/resnet50_imagenet100_protocol_locked.yaml deleted file mode 100644 index 3a64586f..00000000 --- a/configs/vision_prune/paper_locked/resnet50_imagenet100_protocol_locked.yaml +++ /dev/null @@ -1,179 +0,0 @@ -# ============================================================================= -# ResNet-50 on ImageNet-100 - UNIFIED FORMAT (PAPER / UNIFORM DISTRIBUTION) -# ============================================================================= -# Goal: a paper-ready ImageNet-100 run that avoids deep-network layer collapse by using: -# - uniform per-layer sparsity allocation -# - an explicit per-layer cap (max_per_layer) -# and a trimmed pruning method list (only what we report). -# -# Usage: -# python scripts/run_experiment.py --config configs/vision_prune/resnet50_imagenet100_unified_paper_uniform.yaml -# ============================================================================= - -experiment: - name: "resnet50_imagenet100_cluster_analysis" - type: "cluster_analysis" - seed: 42 - device: "cuda" - output_dir: "./results/vision/resnet50_imagenet100" - -model: - name: "resnet50" - pretrained: true - num_classes: 100 - weights: "IMAGENET1K_V2" - -dataset: - name: "imagenet100" - root: "./data/imagenet100" - batch_size: 64 - num_workers: 8 - image_size: 224 - normalize: true - -training: - enabled: true - epochs: 30 - learning_rate: 0.001 - optimizer: "adam" - scheduler: "cosine" - weight_decay: 0.0001 - -calibration: - num_samples: 5000 - -metrics: - activation_point: "pre_bn" - task_activation_samples: "match" - compute_loss_proxy: true - loss_proxy_n_calibration: 512 - optimization: - use_jit: false - use_gpu_acceleration: false - force_cpu_for_large_ops: true - cpu_threshold: 100000000 - - rayleigh_quotient: - enabled: true - relative: false - definition: both - shrinkage: true - - redundancy: - enabled: true - sampling: "all" - - synergy: - enabled: true - target: "logit_margin" - num_pairs: 10 - sampling: "top_k" - - magnitude: - enabled: true - - taylor: - enabled: true - criterion: "gradient_weight" - - activation_sparsity: - enabled: true - threshold: 0.01 - - composite_weights: - rayleigh_quotient: 0.33 - redundancy: -0.33 - synergy: 0.33 - -clustering: - enabled: true - n_clusters: 4 - type_names: ["critical", "redundant", "synergistic", "background"] - normalize_features: true - features: ["rayleigh_quotient", "redundancy", "synergy"] - stability_enabled: true - n_bootstrap: 30 - -halo_analysis: - enabled: true - percentile: 90.0 - use_activation_weight: true - compute_influence_matrix: true - -cascade_analysis: - enabled: true - n_remove_per_group: 5 - damage_sample_fraction: 0.1 - -pruning: - enabled: true - distribution: "uniform" - dependency_aware: true - min_per_layer: 0.0 - max_per_layer: 0.90 - ratios: [0.1, 0.3, 0.5, 0.7, 0.8, 0.9] - - # Keep only methods we report (reduces runtime substantially vs. an exhaustive sweep) - methods: - - "random" - - "magnitude" - - "activation_mean" - - "taylor" - - "network_slimming" - - "geometric_median" - - "hrank" - - "composite" - - "cluster_aware" - - "cluster_aware_annealed" - - fine_tune: - enabled: true - epochs: 3 - learning_rate: 0.00001 - weight_decay: 0.0001 - max_batches: 50 - -evaluation: - enabled: true - accuracy: true - top1_accuracy: true - top5_accuracy: true - loss: true - per_class_accuracy: true - confusion_matrix: true - calibration_enabled: true - expected_calibration_error: true - reliability_diagram: true - compute_flops: true - compute_params: true - compute_memory: true - # Latency benchmarking can be noisy/slow on shared clusters; keep off for the paper run. - measure_latency: false - -visualization: - enabled: true - format: "pdf" - dpi: 300 - style: "seaborn-v0_8-paper" - histograms: true - violin_plots: true - correlation_heatmap: true - cluster_scatter: true - cluster_evolution: true - influence_matrix: true - halo_properties: true - pruning_comparison: true - pruning_recovery: true - cascade_test: true - metric_distributions: true - layer_importance_heatmap: true - sensitivity_curves: true - -output: - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red" - dir: "./results/vision/resnet50_imagenet100" - save_metrics: true - save_clusters: true - save_figures: true - save_checkpoints: true - save_per_layer: true diff --git a/configs/vision_prune/resnet18_cifar10_unified.yaml b/configs/vision_prune/resnet18_cifar10_unified.yaml index bb072420..c044a8ff 100644 --- a/configs/vision_prune/resnet18_cifar10_unified.yaml +++ b/configs/vision_prune/resnet18_cifar10_unified.yaml @@ -477,7 +477,7 @@ visualization: # Uses job directory structure: creates unique folders for each run # Directory format: {base_dir}/{experiment_name}_{timestamp}_{job_id}/ output: - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/Prune_LLM" + base_dir: "/path/to/results/Prune_LLM" dir: "./results/vision/resnet18_cifar10" save_metrics: true save_clusters: true diff --git a/configs/vision_prune/resnet50_imagenet100_unified.yaml b/configs/vision_prune/resnet50_imagenet100_unified.yaml index 67a58c85..b53bd9f2 100644 --- a/configs/vision_prune/resnet50_imagenet100_unified.yaml +++ b/configs/vision_prune/resnet50_imagenet100_unified.yaml @@ -330,7 +330,7 @@ visualization: # OUTPUT # ----------------------------------------------------------------------------- output: - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/Prune_LLM" + base_dir: "/path/to/results/Prune_LLM" dir: "./results/vision/resnet50_imagenet100" save_metrics: true save_clusters: true diff --git a/configs/vision_prune/resnet50_imagenet100_unified_paper_globalthreshold.yaml b/configs/vision_prune/resnet50_imagenet100_unified_paper_globalthreshold.yaml index 2b856158..14bd99f3 100644 --- a/configs/vision_prune/resnet50_imagenet100_unified_paper_globalthreshold.yaml +++ b/configs/vision_prune/resnet50_imagenet100_unified_paper_globalthreshold.yaml @@ -159,7 +159,7 @@ visualization: metric_distributions: true output: - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER" + base_dir: "/path/to/results/losslens_vision/PAPER" dir: "./results/vision/resnet50_imagenet100" save_metrics: true save_clusters: true diff --git a/configs/vision_prune/resnet50_imagenet100_unified_paper_uniform.yaml b/configs/vision_prune/resnet50_imagenet100_unified_paper_uniform.yaml index 0b78c899..ded1aeb0 100644 --- a/configs/vision_prune/resnet50_imagenet100_unified_paper_uniform.yaml +++ b/configs/vision_prune/resnet50_imagenet100_unified_paper_uniform.yaml @@ -170,7 +170,7 @@ visualization: sensitivity_curves: true output: - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red" + base_dir: "/path/to/results/losslens_vision" dir: "./results/vision/resnet50_imagenet100" save_metrics: true save_clusters: true diff --git a/configs/vision_prune/vgg16_cifar10_unified.yaml b/configs/vision_prune/vgg16_cifar10_unified.yaml index da8c6a17..76752c2a 100644 --- a/configs/vision_prune/vgg16_cifar10_unified.yaml +++ b/configs/vision_prune/vgg16_cifar10_unified.yaml @@ -327,7 +327,7 @@ visualization: # OUTPUT # ----------------------------------------------------------------------------- output: - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/Prune_LLM" + base_dir: "/path/to/results/Prune_LLM" dir: "./results/vision/vgg16_cifar10" save_metrics: true save_clusters: true diff --git a/configs/vision_prune/vgg16_imagenet100_unified_paper_uniform.yaml b/configs/vision_prune/vgg16_imagenet100_unified_paper_uniform.yaml index 4001d829..f1479108 100644 --- a/configs/vision_prune/vgg16_imagenet100_unified_paper_uniform.yaml +++ b/configs/vision_prune/vgg16_imagenet100_unified_paper_uniform.yaml @@ -164,7 +164,7 @@ visualization: sensitivity_curves: true output: - base_dir: "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/alignment_red/PAPER" + base_dir: "/path/to/results/losslens_vision/PAPER" dir: "./results/vision/vgg16_imagenet100" save_metrics: true save_clusters: true diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 00000000..93845cc6 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,65 @@ +# Architecture + +NodeLens is organized as a reusable library plus paper-specific project +folders. The library code should remain general; each paper folder should only +contain release notes, configs, and artifact packaging scripts for that paper. + +```mermaid +flowchart TB + subgraph Library[src/nodelens] + M[metrics] + P[pruning] + E[experiments] + A[analysis] + S[services] + end + + subgraph Inputs[Inputs] + C[configs] + D[calibration data] + N[model checkpoints] + end + + subgraph Projects[projects] + R[supernodes_scar] + end + + C --> E + D --> S + N --> S + S --> M + M --> P + M --> A + P --> E + A --> E + E --> R + R --> H[Hugging Face artifact bundle] +``` + +## Design Rules + +- Keep reusable metrics, services, pruning code, and experiment classes in + `src/nodelens/`. +- Keep paper release instructions and packaging scripts in `projects/`. +- Keep generated outputs in `outputs/`, which is ignored by git. +- Do not store model weights, raw datasets, cluster logs, or private paths in + the repository. +- Use project manifests and checksums for anything uploaded as an artifact. + +## Supernodes and SCAR Flow + +```mermaid +sequenceDiagram + participant Config as YAML config + participant Runner as run_experiment.py + participant Capture as activation and gradient capture + participant Metrics as SCAR metrics + participant Prune as structured pruning + participant Artifacts as artifact bundle + + Config->>Runner: choose model, calibration data, sparsity, metrics + Runner->>Capture: collect layer-wise activations and gradients + Capture->>Metrics: compute LP, activation, curvature, and Taylor scores + Metrics->>Prune: protect supernode core and rank remaining channels + Prune->>Artifacts: write results, figures, tables, and manifests +``` diff --git a/docs/METRIC_CONSISTENCY.md b/docs/METRIC_CONSISTENCY.md index 8e0b5df9..3396fd47 100644 --- a/docs/METRIC_CONSISTENCY.md +++ b/docs/METRIC_CONSISTENCY.md @@ -1,12 +1,12 @@ # Metric Definitions & Sign Conventions (Theory <-> Code) -This document is a **codebase-facing** reference for the core metrics used throughout `src/alignment/`. +This document is a **codebase-facing** reference for the core metrics used throughout `src/nodelens/`. It exists to prevent subtle drift in: - **Formulas** (what is computed), - **Keys** (how values are named/stored), - **Sign conventions** (what "high" means when used for pruning/scoring). -It intentionally avoids referencing any paper draft; the canonical sources are the implementations under `src/alignment/metrics/` and the experiment pipeline that stores per-layer metric arrays. +It intentionally avoids referencing any paper draft; the canonical sources are the implementations under `src/nodelens/metrics/` and the experiment pipeline that stores per-layer metric arrays. --- @@ -46,7 +46,7 @@ For redundancy specifically: \] **Implementation** -- `src/alignment/metrics/rayleigh/rayleigh_quotient.py` +- `src/nodelens/metrics/rayleigh/rayleigh_quotient.py` - Computes covariance \(\Sigma_X\) from inputs (optionally class-conditioned) and returns per-output-channel RQ. **Notes** @@ -65,7 +65,7 @@ I(Y_i;Y_j) = -\tfrac12 \log(1-\rho^2) We typically summarize "redundancy of channel \(i\)" as an **average MI** to other channels (or sampled references). **Implementation** -- `src/alignment/metrics/information/redundancy.py` +- `src/nodelens/metrics/information/redundancy.py` - Computes correlations between projected outputs and converts to MI using the formula above. - Returns **nonnegative** redundancy values (more redundancy => larger). @@ -90,7 +90,7 @@ S(Z;Y_i,Y_j)= I(Z;[Y_i,Y_j]) - \max\{I(Z;Y_i),I(Z;Y_j)\} Per-channel synergy is commonly computed as an average over a sampled set of partner channels. **Implementation** -- `src/alignment/metrics/information/gaussian_pid.py` +- `src/nodelens/metrics/information/gaussian_pid.py` **Interpretation** - Synergy is a **pair-structure descriptor**, not a scalar importance proxy; it is often weakly correlated with loss sensitivity within layers. @@ -104,7 +104,7 @@ A common composite importance score combines multiple signals: - decrease with redundancy. **Implementation** -- `src/alignment/metrics/composite.py` +- `src/nodelens/metrics/composite.py` **Typical sign pattern** - `+ logRQ` @@ -128,10 +128,10 @@ Pruning strategies may consume these via "precomputed metrics" dicts. ## Quick verification snippet ```python -from alignment.metrics import get_metric +from nodelens.metrics import get_metric -rq = get_metric("rayleigh_quotient") # RQ(w; Σ_X) -red = get_metric("average_redundancy") # -0.5 log(1-ρ²) aggregated per neuron +rq = get_metric("rayleigh_quotient") # RQ(w; Sigma_X) +red = get_metric("average_redundancy") # -0.5 log(1-rho^2) aggregated per neuron syn = get_metric("gaussian_pid_synergy_mmi")# MMI Gaussian PID synergy ``` diff --git a/docs/README.md b/docs/README.md index bb7568ab..dc45638e 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,11 +1,15 @@ # Documentation +NodeLens is the public project name. The Python package is imported as +`nodelens`. + ## Guides - [Usage Guide](usage.md) - Running experiments and configuration - [API Reference](api_reference.md) - Core classes and functions - [LLM Guide](llm_guide.md) - LLM-specific analysis and pruning - [Metric Consistency](METRIC_CONSISTENCY.md) - Theory-code verification +- [Architecture](ARCHITECTURE.md) - Library and project-release layout ## Configuration @@ -41,7 +45,7 @@ python scripts/run_experiment.py --config configs/examples/mnist_basic.yaml # LLM analysis -python scripts/run_experiment.py --config configs/prune_llm/llama3_8b_full.yaml +python scripts/run_experiment.py --config configs/prune_llm/llama3_8b_unified.yaml # Cluster-based analysis python scripts/run_experiment.py --config configs/vision_prune/resnet18_cifar10_full.yaml diff --git a/docs/api_reference.md b/docs/api_reference.md index 71b87e3b..a38c99d6 100644 --- a/docs/api_reference.md +++ b/docs/api_reference.md @@ -7,7 +7,7 @@ Wraps PyTorch models for activation capture and analysis. ```python -from alignment import ModelWrapper +from nodelens import ModelWrapper wrapper = ModelWrapper( model, # PyTorch model @@ -38,7 +38,7 @@ metric.compute(inputs, weights, outputs, **kwargs) # Returns scores ### Rayleigh Quotient ```python -from alignment.metrics import get_metric +from nodelens.metrics import get_metric rq = get_metric('rayleigh_quotient', relative=True, @@ -61,7 +61,7 @@ scores = redundancy.compute(outputs=layer_outputs) ### Synergy (Continuous Target) ```python -from alignment.metrics.information import SynergyContinuousTarget +from nodelens.metrics.information import SynergyContinuousTarget synergy = SynergyContinuousTarget( target_type='logit_margin', # or 'correct_logit', 'logit_pc1' @@ -80,7 +80,7 @@ scores = synergy.compute(outputs=activations, logits=logits, labels=labels) Clusters channels in (RQ, Redundancy, Synergy) space. ```python -from alignment.analysis.clustering import MetricSpaceClustering, ClusterResult +from nodelens.analysis.clustering import MetricSpaceClustering, ClusterResult clusterer = MetricSpaceClustering(n_clusters=4, seed=42) result = clusterer.fit(rq_scores, redundancy_scores, synergy_scores, layer_name="conv1") @@ -98,7 +98,7 @@ result.type_counts # {'critical': N, ...} Analyzes downstream dependencies via halos. ```python -from alignment.analysis.clustering import CrossLayerHaloAnalysis, HaloResult +from nodelens.analysis.clustering import CrossLayerHaloAnalysis, HaloResult halo_analyzer = CrossLayerHaloAnalysis(percentile=90.0, use_activation_weight=True) @@ -120,7 +120,7 @@ halo_result = halo_analyzer.analyze_halo( Validates importance via channel ablation. ```python -from alignment.analysis import CascadeAnalysis, DamagePrediction +from nodelens.analysis import CascadeAnalysis, DamagePrediction cascade = CascadeAnalysis(model, test_loader, device="cuda") baseline = cascade.baseline() @@ -142,7 +142,7 @@ results = cascade.by_cluster(layer_name, labels, type_mapping, n_rm=5) General cluster-based analysis for any architecture. ```python -from alignment.experiments import ClusterAnalysisExperiment, ClusterAnalysisConfig +from nodelens.experiments import ClusterAnalysisExperiment, ClusterAnalysisConfig config = ClusterAnalysisConfig( name="resnet18_cifar10_cluster_analysis", @@ -164,7 +164,7 @@ experiment.generate_figures() LLM-specific analysis with SCAR metrics. ```python -from alignment.experiments import LLMAlignmentExperiment +from nodelens.experiments import LLMAlignmentExperiment experiment = LLMAlignmentExperiment(config) experiment.setup() @@ -180,7 +180,7 @@ perplexity = experiment.evaluate_perplexity("wikitext", "test", num_samples=100) Vision model alignment analysis. ```python -from alignment.experiments import GeneralAlignmentExperiment +from nodelens.experiments import GeneralAlignmentExperiment experiment = GeneralAlignmentExperiment.from_yaml("config.yaml") results = experiment.run() @@ -193,7 +193,7 @@ results = experiment.run() ### Cluster Plots ```python -from alignment.analysis.visualization import ( +from nodelens.analysis.visualization import ( plot_metric_scatter, plot_cluster_evolution, plot_influence_matrix, @@ -218,7 +218,7 @@ plot_cascade_test(cascade_results, save_path) ### UnifiedVisualizer ```python -from alignment.analysis.visualization import UnifiedVisualizer +from nodelens.analysis.visualization import UnifiedVisualizer viz = UnifiedVisualizer() viz.plot_layer_scores(scores, metric_name, plot_type='violin', save_path='plot.png') @@ -234,7 +234,7 @@ viz.plot_heatmap(data, title, cmap, save_path) ### Quick Pruning ```python -from alignment.pruning.orchestrator import prune_with_all_options +from nodelens.pruning.orchestrator import prune_with_all_options result = prune_with_all_options( model, @@ -250,7 +250,7 @@ result = prune_with_all_options( ### Dependency-Aware Pruning ```python -from alignment.pruning.dependency_aware import DependencyAwarePruning +from nodelens.pruning.dependency_aware import DependencyAwarePruning pruner = DependencyAwarePruning(model) result = pruner.prune(layer_scores={'conv1': scores1}, amount=0.5, mode='low') @@ -263,7 +263,7 @@ result = pruner.prune(layer_scores={'conv1': scores1}, amount=0.5, mode='low') ### ActivationCaptureService ```python -from alignment.services import ActivationCaptureService +from nodelens.services import ActivationCaptureService capture = ActivationCaptureService(model_wrapper) data = capture.capture(input_batch, layers=['conv1'], include_weights=True) @@ -272,7 +272,7 @@ data = capture.capture(input_batch, layers=['conv1'], include_weights=True) ### NodeScoringService ```python -from alignment.services import NodeScoringService +from nodelens.services import NodeScoringService scorer = NodeScoringService( metrics={'rq': rq_metric, 'redundancy': redundancy_metric}, diff --git a/docs/llm_guide.md b/docs/llm_guide.md index 8588f6d2..18607504 100644 --- a/docs/llm_guide.md +++ b/docs/llm_guide.md @@ -181,7 +181,7 @@ The cross-layer analysis traces how supernodes in layer N influence layer N+1: ## Programmatic Usage ```python -from alignment.experiments import LLMAlignmentExperiment +from nodelens.experiments import LLMAlignmentExperiment experiment = LLMAlignmentExperiment(config) experiment.setup() @@ -202,7 +202,7 @@ perplexity = experiment.evaluate_perplexity("wikitext", "test", num_samples=100) ## Visualization ```python -from alignment.analysis.visualization import UnifiedVisualizer +from nodelens.analysis.visualization import UnifiedVisualizer viz = UnifiedVisualizer() diff --git a/docs/source/api/analysis.rst b/docs/source/api/analysis.rst index 828ce3bd..c57f09a9 100644 --- a/docs/source/api/analysis.rst +++ b/docs/source/api/analysis.rst @@ -1,12 +1,12 @@ Analysis API Reference ====================== -This section documents the analysis and visualization components of the alignment framework. +This section documents the analysis and visualization components of NodeLens. Result Aggregation ------------------ -.. automodule:: alignment.analysis.aggregation +.. automodule:: nodelens.analysis.aggregation :members: :undoc-members: :show-inheritance: @@ -14,7 +14,7 @@ Result Aggregation Results Aggregation ~~~~~~~~~~~~~~~~~~~ -.. automodule:: alignment.analysis.aggregation.results +.. automodule:: nodelens.analysis.aggregation.results :members: :undoc-members: :show-inheritance: @@ -22,7 +22,7 @@ Results Aggregation Metrics Aggregation ~~~~~~~~~~~~~~~~~~~ -.. automodule:: alignment.analysis.aggregation.metrics +.. automodule:: nodelens.analysis.aggregation.metrics :members: :undoc-members: :show-inheritance: @@ -30,7 +30,7 @@ Metrics Aggregation Layer Aggregation ~~~~~~~~~~~~~~~~~ -.. automodule:: alignment.analysis.aggregation.layers +.. automodule:: nodelens.analysis.aggregation.layers :members: :undoc-members: :show-inheritance: @@ -38,7 +38,7 @@ Layer Aggregation Reporting --------- -.. automodule:: alignment.analysis.reporting +.. automodule:: nodelens.analysis.reporting :members: :undoc-members: :show-inheritance: @@ -46,7 +46,7 @@ Reporting HTML Reports ~~~~~~~~~~~~ -.. automodule:: alignment.analysis.reporting.html +.. automodule:: nodelens.analysis.reporting.html :members: :undoc-members: :show-inheritance: @@ -54,7 +54,7 @@ HTML Reports Markdown Reports ~~~~~~~~~~~~~~~~ -.. automodule:: alignment.analysis.reporting.markdown +.. automodule:: nodelens.analysis.reporting.markdown :members: :undoc-members: :show-inheritance: @@ -62,7 +62,7 @@ Markdown Reports JSON Reports ~~~~~~~~~~~~ -.. automodule:: alignment.analysis.reporting.json_reporter +.. automodule:: nodelens.analysis.reporting.json_reporter :members: :undoc-members: :show-inheritance: @@ -70,7 +70,7 @@ JSON Reports Visualization ------------- -.. automodule:: alignment.analysis.visualization +.. automodule:: nodelens.analysis.visualization :members: :undoc-members: :show-inheritance: @@ -78,14 +78,14 @@ Visualization Plotting Functions ~~~~~~~~~~~~~~~~~~ -.. autofunction:: alignment.analysis.visualization.plot_metric_vs_sparsity -.. autofunction:: alignment.analysis.visualization.plot_layer_comparison -.. autofunction:: alignment.analysis.visualization.plot_pruning_heatmap +.. autofunction:: nodelens.analysis.visualization.plot_metric_vs_sparsity +.. autofunction:: nodelens.analysis.visualization.plot_layer_comparison +.. autofunction:: nodelens.analysis.visualization.plot_pruning_heatmap Analysis Utilities ------------------ -.. automodule:: alignment.analysis.utils +.. automodule:: nodelens.analysis.utils :members: :undoc-members: :show-inheritance: diff --git a/docs/source/api/core.rst b/docs/source/api/core.rst index 32cdb875..5eb57450 100644 --- a/docs/source/api/core.rst +++ b/docs/source/api/core.rst @@ -1,12 +1,12 @@ Core API Reference ================== -This section documents the core components of the alignment framework. +This section documents the core components of NodeLens. Registry System --------------- -.. automodule:: alignment.core.registry +.. automodule:: nodelens.core.registry :members: :undoc-members: :show-inheritance: @@ -14,12 +14,12 @@ Registry System Model Wrapper ------------- -.. automodule:: alignment.core.wrappers +.. automodule:: nodelens.core.wrappers :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.core.wrappers.ModelWrapper +.. autoclass:: nodelens.core.wrappers.ModelWrapper :members: :special-members: __init__ :undoc-members: @@ -27,7 +27,7 @@ Model Wrapper Base Classes ------------ -.. automodule:: alignment.core.base +.. automodule:: nodelens.core.base :members: :undoc-members: :show-inheritance: @@ -35,7 +35,7 @@ Base Classes Configuration ------------- -.. automodule:: alignment.core.config +.. automodule:: nodelens.core.config :members: :undoc-members: :show-inheritance: @@ -43,7 +43,7 @@ Configuration Utilities --------- -.. automodule:: alignment.core.utils +.. automodule:: nodelens.core.utils :members: :undoc-members: :show-inheritance: diff --git a/docs/source/api/data.rst b/docs/source/api/data.rst index 4f23f59d..dfa9fe87 100644 --- a/docs/source/api/data.rst +++ b/docs/source/api/data.rst @@ -1,17 +1,17 @@ Data API Reference ================== -This section documents the data handling components of the alignment framework. +This section documents the data handling components of NodeLens. Dataset Wrappers ---------------- -.. automodule:: alignment.data.datasets +.. automodule:: nodelens.data.datasets :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.data.datasets.DatasetWrapper +.. autoclass:: nodelens.data.datasets.DatasetWrapper :members: :special-members: __init__ :undoc-members: @@ -19,7 +19,7 @@ Dataset Wrappers Data Processing --------------- -.. automodule:: alignment.data.processing +.. automodule:: nodelens.data.processing :members: :undoc-members: :show-inheritance: @@ -27,23 +27,23 @@ Data Processing Batch Processing ~~~~~~~~~~~~~~~~ -.. automodule:: alignment.data.processing.batch +.. automodule:: nodelens.data.processing.batch :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.data.processing.batch.BatchMetricProcessor +.. autoclass:: nodelens.data.processing.batch.BatchMetricProcessor :members: :undoc-members: -.. autoclass:: alignment.data.processing.batch.StreamingMetricComputer +.. autoclass:: nodelens.data.processing.batch.StreamingMetricComputer :members: :undoc-members: Data Utilities -------------- -.. automodule:: alignment.data.utils +.. automodule:: nodelens.data.utils :members: :undoc-members: :show-inheritance: diff --git a/docs/source/api/experiments.rst b/docs/source/api/experiments.rst index e5060726..2d21b14d 100644 --- a/docs/source/api/experiments.rst +++ b/docs/source/api/experiments.rst @@ -1,7 +1,7 @@ Experiments API Reference ========================= -This section provides detailed documentation for all experiment types available in the alignment framework. +This section provides detailed documentation for all experiment types available in NodeLens. .. contents:: Table of Contents :local: @@ -10,7 +10,7 @@ This section provides detailed documentation for all experiment types available Base Experiment Classes ----------------------- -.. automodule:: alignment.experiments.base +.. automodule:: nodelens.experiments.base :members: :undoc-members: :show-inheritance: @@ -18,7 +18,7 @@ Base Experiment Classes ExperimentConfig ~~~~~~~~~~~~~~~~ -.. autoclass:: alignment.experiments.base.ExperimentConfig +.. autoclass:: nodelens.experiments.base.ExperimentConfig :members: :undoc-members: @@ -69,12 +69,12 @@ ExperimentConfig Progressive Dropout Experiment ------------------------------ -.. automodule:: alignment.experiments.progressive_dropout +.. automodule:: nodelens.experiments.progressive_dropout :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.experiments.progressive_dropout.ProgressiveDropoutExperiment +.. autoclass:: nodelens.experiments.progressive_dropout.ProgressiveDropoutExperiment :members: :undoc-members: @@ -137,8 +137,8 @@ Progressive Dropout Experiment .. code-block:: python - from alignment.experiments import ProgressiveDropoutExperiment - from alignment.experiments.base import ExperimentConfig + from nodelens.experiments import ProgressiveDropoutExperiment + from nodelens.experiments.base import ExperimentConfig config = ExperimentConfig( name="progressive_dropout_resnet", @@ -163,12 +163,12 @@ Progressive Dropout Experiment Experiment Runner ----------------- -.. automodule:: alignment.experiments.runner +.. automodule:: nodelens.experiments.runner :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.experiments.runner.ExperimentRunner +.. autoclass:: nodelens.experiments.runner.ExperimentRunner :members: :undoc-members: @@ -189,8 +189,8 @@ Experiment Runner .. code-block:: python - from alignment.experiments import ExperimentRunner - from alignment.experiments.base import ExperimentConfig + from nodelens.experiments import ExperimentRunner + from nodelens.experiments.base import ExperimentConfig # Define multiple experiments configs = [] diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst index 534e9a54..7cbbf12e 100644 --- a/docs/source/api/index.rst +++ b/docs/source/api/index.rst @@ -1,7 +1,7 @@ API Reference ============= -This section contains the complete API reference for the alignment framework. +This section contains the complete API reference for NodeLens. .. toctree:: :maxdepth: 2 @@ -34,34 +34,34 @@ Module Overview Core Modules ~~~~~~~~~~~~ -**alignment.core** +**nodelens.core** Base classes, registries, and core functionality -**alignment.models** +**nodelens.models** Model wrappers and architecture definitions -**alignment.metrics** +**nodelens.metrics** 36+ metrics for neural network analysis -**alignment.experiments** +**nodelens.experiments** Experiment runners and configurations -**alignment.pruning** +**nodelens.pruning** Pruning strategies and utilities Supporting Modules ~~~~~~~~~~~~~~~~~~ -**alignment.data** +**nodelens.data** Dataset wrappers and data processing -**alignment.training** +**nodelens.training** Training loops and optimization -**alignment.analysis** +**nodelens.analysis** Result analysis and visualization -**alignment.infrastructure** +**nodelens.infrastructure** Configuration, logging, distributed computing Quick Links @@ -70,17 +70,17 @@ Quick Links Most Common Classes ~~~~~~~~~~~~~~~~~~~ -- :class:`alignment.experiments.base.ExperimentConfig` - Configure experiments -- :class:`alignment.metrics.RayleighQuotient` - Primary alignment metric -- :class:`alignment.models.ModelWrapper` - Wrap models for analysis -- :class:`alignment.experiments.ProgressiveDropoutExperiment` - Main pruning experiment -- :class:`alignment.pruning.strategies.MagnitudePruning` - Standard pruning method +- :class:`nodelens.experiments.base.ExperimentConfig` - Configure experiments +- :class:`nodelens.metrics.RayleighQuotient` - Primary alignment metric +- :class:`nodelens.models.ModelWrapper` - Wrap models for analysis +- :class:`nodelens.experiments.ProgressiveDropoutExperiment` - Main pruning experiment +- :class:`nodelens.pruning.strategies.MagnitudePruning` - Standard pruning method Key Functions ~~~~~~~~~~~~~ -- :func:`alignment.core.get_metric` - Get metric by name -- :func:`alignment.core.get_experiment` - Get experiment by type -- :func:`alignment.core.list_metrics` - List available metrics -- :func:`alignment.infrastructure.configuration.load_config` - Load YAML config -- :func:`alignment.analysis.load_results` - Load experiment results +- :func:`nodelens.core.get_metric` - Get metric by name +- :func:`nodelens.core.get_experiment` - Get experiment by type +- :func:`nodelens.core.list_metrics` - List available metrics +- :func:`nodelens.infrastructure.configuration.load_config` - Load YAML config +- :func:`nodelens.analysis.load_results` - Load experiment results diff --git a/docs/source/api/metrics.rst b/docs/source/api/metrics.rst index 4afa87e0..64b2d8e6 100644 --- a/docs/source/api/metrics.rst +++ b/docs/source/api/metrics.rst @@ -1,7 +1,7 @@ Metrics API Reference ===================== -This section provides detailed documentation for all metrics available in the alignment framework. +This section provides detailed documentation for all metrics available in NodeLens. .. contents:: Table of Contents :local: @@ -10,7 +10,7 @@ This section provides detailed documentation for all metrics available in the al Overview -------- -The alignment framework provides 36+ metrics for analyzing neural network behavior, organized into several categories: +NodeLens provides 36+ metrics for analyzing neural network behavior, organized into several categories: - **Alignment Metrics**: Measure how well neurons align with input statistics - **Information Theory Metrics**: Quantify information flow and dependencies @@ -21,12 +21,12 @@ The alignment framework provides 36+ metrics for analyzing neural network behavi Base Metric Classes ------------------- -.. automodule:: alignment.metrics.base +.. automodule:: nodelens.metrics.base :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.metrics.base.BaseMetric +.. autoclass:: nodelens.metrics.base.BaseMetric :members: :undoc-members: @@ -43,7 +43,7 @@ Alignment Metrics Rayleigh Quotient (RQ) ~~~~~~~~~~~~~~~~~~~~~~ -.. autoclass:: alignment.metrics.rayleigh.RayleighQuotient +.. autoclass:: nodelens.metrics.rayleigh.RayleighQuotient :members: :undoc-members: @@ -66,7 +66,7 @@ Rayleigh Quotient (RQ) .. code-block:: python - from alignment.metrics import RayleighQuotient + from nodelens.metrics import RayleighQuotient rq = RayleighQuotient(scale_by_norm=True) scores = rq.compute( @@ -84,7 +84,7 @@ Rayleigh Quotient (RQ) Generalized Rayleigh Quotient ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autoclass:: alignment.metrics.rayleigh.GeneralizedRayleighQuotient +.. autoclass:: nodelens.metrics.rayleigh.GeneralizedRayleighQuotient :members: :undoc-members: @@ -108,7 +108,7 @@ Information Theory Metrics Mutual Information (MI) ~~~~~~~~~~~~~~~~~~~~~~~ -.. autoclass:: alignment.metrics.information.MutualInformationGaussian +.. autoclass:: nodelens.metrics.information.MutualInformationGaussian :members: :undoc-members: @@ -133,7 +133,7 @@ Mutual Information (MI) .. code-block:: python - from alignment.metrics import MutualInformationGaussian + from nodelens.metrics import MutualInformationGaussian mi = MutualInformationGaussian(estimation_method="knn") scores = mi.compute( @@ -144,7 +144,7 @@ Mutual Information (MI) Conditional Mutual Information ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autoclass:: alignment.metrics.information.ConditionalMutualInformation +.. autoclass:: nodelens.metrics.information.ConditionalMutualInformation :members: :undoc-members: @@ -163,7 +163,7 @@ Conditional Mutual Information Partial Information Decomposition (PID) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autoclass:: alignment.metrics.information.PartialInformationDecomposition +.. autoclass:: nodelens.metrics.information.PartialInformationDecomposition :members: :undoc-members: @@ -187,7 +187,7 @@ Partial Information Decomposition (PID) .. code-block:: python - from alignment.metrics import PartialInformationDecomposition + from nodelens.metrics import PartialInformationDecomposition pid = PartialInformationDecomposition(method="broja") results = pid.compute( @@ -206,7 +206,7 @@ Similarity Metrics Centered Kernel Alignment (CKA) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autoclass:: alignment.metrics.similarity.CKA +.. autoclass:: nodelens.metrics.similarity.CKA :members: :undoc-members: @@ -230,7 +230,7 @@ Centered Kernel Alignment (CKA) .. code-block:: python - from alignment.metrics import CKA + from nodelens.metrics import CKA cka = CKA(kernel="rbf", sigma=1.0) similarity = cka.compute( @@ -242,7 +242,7 @@ Centered Kernel Alignment (CKA) Canonical Correlation Analysis (CCA) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. autoclass:: alignment.metrics.similarity.CCA +.. autoclass:: nodelens.metrics.similarity.CCA :members: :undoc-members: @@ -260,7 +260,7 @@ Canonical Correlation Analysis (CCA) Procrustes Distance ~~~~~~~~~~~~~~~~~~~ -.. autoclass:: alignment.metrics.similarity.ProcrustesDistance +.. autoclass:: nodelens.metrics.similarity.ProcrustesDistance :members: :undoc-members: @@ -281,7 +281,7 @@ Spectral Metrics Spectral Analysis ~~~~~~~~~~~~~~~~~ -.. autoclass:: alignment.metrics.spectral.SpectralAnalysis +.. autoclass:: nodelens.metrics.spectral.SpectralAnalysis :members: :undoc-members: @@ -296,7 +296,7 @@ Spectral Analysis .. code-block:: python - from alignment.metrics import SpectralAnalysis + from nodelens.metrics import SpectralAnalysis spectral = SpectralAnalysis() results = spectral.compute(weights=layer_weights) @@ -308,7 +308,7 @@ Spectral Analysis Weight Spectral Norm ~~~~~~~~~~~~~~~~~~~~ -.. autoclass:: alignment.metrics.spectral.WeightSpectralNorm +.. autoclass:: nodelens.metrics.spectral.WeightSpectralNorm :members: :undoc-members: @@ -326,7 +326,7 @@ Task-Specific Metrics Classification Metrics ~~~~~~~~~~~~~~~~~~~~~~ -.. autoclass:: alignment.metrics.task_specific.ClassificationAlignment +.. autoclass:: nodelens.metrics.task_specific.ClassificationAlignment :members: :undoc-members: @@ -339,7 +339,7 @@ Classification Metrics Regression Metrics ~~~~~~~~~~~~~~~~~~ -.. autoclass:: alignment.metrics.task_specific.RegressionAlignment +.. autoclass:: nodelens.metrics.task_specific.RegressionAlignment :members: :undoc-members: @@ -355,7 +355,7 @@ Advanced Metric Features Metric Collections ~~~~~~~~~~~~~~~~~~ -.. autoclass:: alignment.metrics.MetricCollection +.. autoclass:: nodelens.metrics.MetricCollection :members: :undoc-members: @@ -363,7 +363,7 @@ Metric Collections .. code-block:: python - from alignment.metrics import MetricCollection + from nodelens.metrics import MetricCollection metrics = MetricCollection([ RayleighQuotient(scale_by_norm=True), @@ -421,8 +421,8 @@ Creating Custom Metrics .. code-block:: python - from alignment.metrics.base import BaseMetric - from alignment.core import register_metric + from nodelens.metrics.base import BaseMetric + from nodelens.core import register_metric @register_metric("my_custom_metric") class MyCustomMetric(BaseMetric): @@ -451,9 +451,9 @@ Creating Custom Metrics Metric Registry ~~~~~~~~~~~~~~~ -.. autofunction:: alignment.core.registry.register_metric -.. autofunction:: alignment.core.registry.get_metric -.. autofunction:: alignment.core.registry.list_metrics +.. autofunction:: nodelens.core.registry.register_metric +.. autofunction:: nodelens.core.registry.get_metric +.. autofunction:: nodelens.core.registry.list_metrics Performance Considerations -------------------------- diff --git a/docs/source/api/models.rst b/docs/source/api/models.rst index 7f949326..9d48bf62 100644 --- a/docs/source/api/models.rst +++ b/docs/source/api/models.rst @@ -1,12 +1,12 @@ Models API Reference ==================== -This section documents the model components of the alignment framework. +This section documents the model components of NodeLens. Model Architectures ------------------- -.. automodule:: alignment.models.architectures +.. automodule:: nodelens.models.architectures :members: :undoc-members: :show-inheritance: @@ -16,7 +16,7 @@ Pre-defined Models The framework includes several pre-defined model architectures: -.. autofunction:: alignment.models.architectures.get_model +.. autofunction:: nodelens.models.architectures.get_model Available Models ^^^^^^^^^^^^^^^^ @@ -31,7 +31,7 @@ Available Models Model Registry -------------- -.. automodule:: alignment.models.registry +.. automodule:: nodelens.models.registry :members: :undoc-members: :show-inheritance: @@ -39,7 +39,7 @@ Model Registry Model Utilities --------------- -.. automodule:: alignment.models.utils +.. automodule:: nodelens.models.utils :members: :undoc-members: :show-inheritance: diff --git a/docs/source/api/pruning.rst b/docs/source/api/pruning.rst index b502ff71..8fcca18a 100644 --- a/docs/source/api/pruning.rst +++ b/docs/source/api/pruning.rst @@ -1,7 +1,7 @@ Pruning API Reference ===================== -.. currentmodule:: alignment.pruning +.. currentmodule:: nodelens.pruning Main Interface -------------- @@ -20,12 +20,12 @@ Configuration Base Classes ------------ -.. autoclass:: alignment.pruning.base.BasePruningStrategy +.. autoclass:: nodelens.pruning.base.BasePruningStrategy :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.pruning.base.IterativePruningStrategy +.. autoclass:: nodelens.pruning.base.IterativePruningStrategy :members: :undoc-members: :show-inheritance: @@ -33,17 +33,17 @@ Base Classes Magnitude-based Strategies -------------------------- -.. autoclass:: alignment.pruning.strategies.MagnitudePruning +.. autoclass:: nodelens.pruning.strategies.MagnitudePruning :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.pruning.strategies.IterativeMagnitudePruning +.. autoclass:: nodelens.pruning.strategies.IterativeMagnitudePruning :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.pruning.strategies.GlobalMagnitudePruning +.. autoclass:: nodelens.pruning.strategies.GlobalMagnitudePruning :members: :undoc-members: :show-inheritance: @@ -51,17 +51,17 @@ Magnitude-based Strategies Gradient-based Strategies ------------------------- -.. autoclass:: alignment.pruning.strategies.GradientPruning +.. autoclass:: nodelens.pruning.strategies.GradientPruning :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.pruning.strategies.FisherPruning +.. autoclass:: nodelens.pruning.strategies.FisherPruning :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.pruning.strategies.MomentumPruning +.. autoclass:: nodelens.pruning.strategies.MomentumPruning :members: :undoc-members: :show-inheritance: @@ -69,17 +69,17 @@ Gradient-based Strategies Random Strategies ----------------- -.. autoclass:: alignment.pruning.strategies.RandomPruning +.. autoclass:: nodelens.pruning.strategies.RandomPruning :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.pruning.strategies.LayerwiseRandomPruning +.. autoclass:: nodelens.pruning.strategies.LayerwiseRandomPruning :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.pruning.strategies.BernoulliPruning +.. autoclass:: nodelens.pruning.strategies.BernoulliPruning :members: :undoc-members: :show-inheritance: @@ -87,7 +87,7 @@ Random Strategies Parallel Strategies ------------------- -.. autoclass:: alignment.pruning.strategies.ParallelModePruning +.. autoclass:: nodelens.pruning.strategies.ParallelModePruning :members: :undoc-members: :show-inheritance: @@ -95,7 +95,7 @@ Parallel Strategies .. automethod:: prune_parallel .. automethod:: combine_masks -.. autoclass:: alignment.pruning.strategies.TensorizedPruning +.. autoclass:: nodelens.pruning.strategies.TensorizedPruning :members: :undoc-members: :show-inheritance: @@ -103,14 +103,14 @@ Parallel Strategies .. automethod:: compute_pruning_tensor .. automethod:: analyze_pruning_patterns -.. autoclass:: alignment.pruning.strategies.AsyncParallelPruning +.. autoclass:: nodelens.pruning.strategies.AsyncParallelPruning :members: :undoc-members: :show-inheritance: .. automethod:: prune_modules_parallel -.. autoclass:: alignment.pruning.strategies.ParallelPruningResult +.. autoclass:: nodelens.pruning.strategies.ParallelPruningResult :members: :undoc-members: :show-inheritance: @@ -118,22 +118,22 @@ Parallel Strategies Pruning Experiments ------------------- -.. autoclass:: alignment.pruning.experiments.ProgressiveDropoutExperiment +.. autoclass:: nodelens.pruning.experiments.ProgressiveDropoutExperiment :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.pruning.experiments.CascadingLayerPruningExperiment +.. autoclass:: nodelens.pruning.experiments.CascadingLayerPruningExperiment :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.pruning.experiments.LayerIsolatedPruningExperiment +.. autoclass:: nodelens.pruning.experiments.LayerIsolatedPruningExperiment :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.pruning.experiments.EigenvectorDropoutExperiment +.. autoclass:: nodelens.pruning.experiments.EigenvectorDropoutExperiment :members: :undoc-members: :show-inheritance: diff --git a/docs/source/api/training.rst b/docs/source/api/training.rst index 258879cb..302d8617 100644 --- a/docs/source/api/training.rst +++ b/docs/source/api/training.rst @@ -1,17 +1,17 @@ Training API Reference ====================== -This section documents the training components of the alignment framework. +This section documents the training components of NodeLens. Base Training ------------- -.. automodule:: alignment.training.base +.. automodule:: nodelens.training.base :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.training.base.BaseTrainer +.. autoclass:: nodelens.training.base.BaseTrainer :members: :special-members: __init__ :undoc-members: @@ -19,19 +19,19 @@ Base Training Multi-Network Training ---------------------- -.. automodule:: alignment.training.multi_network +.. automodule:: nodelens.training.multi_network :members: :undoc-members: :show-inheritance: -.. autoclass:: alignment.training.multi_network.MultiNetworkTrainer +.. autoclass:: nodelens.training.multi_network.MultiNetworkTrainer :members: :undoc-members: Training Utilities ------------------ -.. automodule:: alignment.training.utils +.. automodule:: nodelens.training.utils :members: :undoc-members: :show-inheritance: @@ -39,11 +39,11 @@ Training Utilities Optimization ~~~~~~~~~~~~ -.. autofunction:: alignment.training.utils.get_optimizer -.. autofunction:: alignment.training.utils.get_scheduler +.. autofunction:: nodelens.training.utils.get_optimizer +.. autofunction:: nodelens.training.utils.get_scheduler Training Loops ~~~~~~~~~~~~~~ -.. autofunction:: alignment.training.utils.train_epoch -.. autofunction:: alignment.training.utils.evaluate +.. autofunction:: nodelens.training.utils.train_epoch +.. autofunction:: nodelens.training.utils.evaluate diff --git a/docs/source/api/utils.rst b/docs/source/api/utils.rst index 39a356f8..b4d5bae2 100644 --- a/docs/source/api/utils.rst +++ b/docs/source/api/utils.rst @@ -1,17 +1,17 @@ Infrastructure API Reference ============================ -This section documents the infrastructure and utility components of the alignment framework. +This section documents the infrastructure and utility components of NodeLens. Configuration ------------- -.. automodule:: alignment.infrastructure.configuration +.. automodule:: nodelens.infrastructure.configuration :members: :undoc-members: :show-inheritance: -.. automodule:: alignment.infrastructure.configuration.config +.. automodule:: nodelens.infrastructure.configuration.config :members: :undoc-members: :show-inheritance: @@ -19,7 +19,7 @@ Configuration Distributed Computing --------------------- -.. automodule:: alignment.infrastructure.computing.distributed +.. automodule:: nodelens.infrastructure.computing.distributed :members: :undoc-members: :show-inheritance: @@ -27,7 +27,7 @@ Distributed Computing Storage and Logging ------------------- -.. automodule:: alignment.infrastructure.storage +.. automodule:: nodelens.infrastructure.storage :members: :undoc-members: :show-inheritance: @@ -35,7 +35,7 @@ Storage and Logging Checkpointing ~~~~~~~~~~~~~ -.. automodule:: alignment.infrastructure.storage.checkpoint +.. automodule:: nodelens.infrastructure.storage.checkpoint :members: :undoc-members: :show-inheritance: @@ -43,7 +43,7 @@ Checkpointing Logging ~~~~~~~ -.. automodule:: alignment.infrastructure.storage.logging +.. automodule:: nodelens.infrastructure.storage.logging :members: :undoc-members: :show-inheritance: @@ -51,7 +51,7 @@ Logging Experiment Tracking ------------------- -.. automodule:: alignment.experiments.tracking +.. automodule:: nodelens.experiments.tracking :members: :undoc-members: :show-inheritance: diff --git a/docs/source/conf.py b/docs/source/conf.py index 21663407..1c60901f 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,10 +13,10 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -project = "Neural Network Alignment" +project = "NodeLens" copyright = f"{datetime.now().year}" -author = "" -release = "0.1.0" +author = "Kempner Institute" +release = "0.2.0" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration @@ -39,7 +39,11 @@ } templates_path = ["_templates"] -exclude_patterns = [] +exclude_patterns = [ + "api/**", + "developer_guide/**", + "examples/**", +] # Napoleon settings for Google/NumPy style docstrings napoleon_google_docstring = True @@ -69,7 +73,7 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output html_theme = "sphinx_rtd_theme" -html_static_path = ["_static"] +html_static_path = [] html_logo = None html_favicon = None html_theme_options = { diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst index f0fea63f..7745d59a 100644 --- a/docs/source/contributing.rst +++ b/docs/source/contributing.rst @@ -1,7 +1,7 @@ Contributing Guide ================== -We welcome contributions to the alignment framework! This guide will help you get started. +We welcome contributions to NodeLens. This guide will help you get started. Getting Started --------------- @@ -11,7 +11,7 @@ Getting Started 3. Create a new branch for your feature or bugfix 4. Make your changes 5. Submit a pull request -6. Check out the kempner computing handbook for more information on how to contribute to the project. +6. Check the project documentation for repository-specific conventions. Development Setup ----------------- @@ -19,8 +19,8 @@ Development Setup .. code-block:: bash # Clone the repository - git clone https://github.com/yourusername/alignment.git - cd alignment + git clone https://github.com/KempnerInstitute/nodelens.git + cd nodelens # Install in development mode with all extras pip install -e ".[all]" @@ -33,9 +33,9 @@ Code Style We use the following tools to maintain code quality: +- **ruff**: Linting - **black**: Code formatting - **isort**: Import sorting -- **flake8**: Linting - **mypy**: Type checking Run all checks: @@ -49,7 +49,7 @@ Run all checks: isort src/ tests/ # Run linting - flake8 src/ tests/ + ruff check src tests # Type checking mypy src/ @@ -63,7 +63,7 @@ All new features should include tests: # tests/test_my_feature.py import pytest - from alignment.my_module import my_function + from nodelens.my_module import my_function def test_my_function(): result = my_function(input_data) @@ -80,7 +80,7 @@ Run tests: pytest tests/test_metrics.py # Run with coverage - pytest --cov=alignment + pytest --cov=nodelens Documentation ------------- @@ -104,7 +104,7 @@ Pull Request Guidelines 2. **Description**: Explain what changes you made and why 3. **Tests**: Ensure all tests pass 4. **Documentation**: Update docs if needed -5. **Changelog**: Add entry to CHANGELOG.md +5. **Release notes**: For paper-facing changes, update the relevant file under ``projects/`` Example PR description: @@ -142,4 +142,4 @@ Questions? - Start a discussion for general questions - Check existing issues before creating new ones -Thank you for contributing to the alignment framework! +Thank you for contributing to NodeLens. diff --git a/docs/source/developer_guide/extensibility.rst b/docs/source/developer_guide/extensibility.rst index 7eab0b0a..a8634fe6 100644 --- a/docs/source/developer_guide/extensibility.rst +++ b/docs/source/developer_guide/extensibility.rst @@ -1,7 +1,7 @@ Extending the Framework ======================= -This guide explains how to extend the alignment framework with custom components +This guide explains how to extend NodeLens with custom components using the registry system. Overview @@ -53,8 +53,8 @@ Here's how to create and register a custom alignment metric: .. code-block:: python - from alignment.core.registry import register_metric - from alignment.core.protocols import BaseMetric + from nodelens.core.registry import register_metric + from nodelens.core.protocols import BaseMetric import torch @register_metric( @@ -124,8 +124,8 @@ Analyzers perform higher-level analysis on metrics: .. code-block:: python - from alignment.core.registry import register_analyzer - from alignment.core.protocols import BaseAnalyzer + from nodelens.core.registry import register_analyzer + from nodelens.core.protocols import BaseAnalyzer import numpy as np @register_analyzer( @@ -174,8 +174,8 @@ Pruning strategies define how to select neurons for removal: .. code-block:: python - from alignment.core.registry import register_pruner - from alignment.core.protocols import BasePruner + from nodelens.core.registry import register_pruner + from nodelens.core.protocols import BasePruner import torch @register_pruner( @@ -217,7 +217,7 @@ Once registered, custom components can be used by name: .. code-block:: python - from alignment.core.registry import get_metric, initialize_registries + from nodelens.core.registry import get_metric, initialize_registries # Initialize (discovers built-in + custom components) initialize_registries() @@ -230,7 +230,7 @@ Once registered, custom components can be used by name: metric = get_metric("kurtosis") # Same as "activation_kurtosis" # Search for metrics - from alignment.core import METRIC_REGISTRY + from nodelens.core import METRIC_REGISTRY statistical_metrics = METRIC_REGISTRY.search(tags=["statistics"]) Plugin Discovery @@ -239,7 +239,7 @@ Plugin Discovery Place your custom components in these locations for auto-discovery: - ``./plugins/`` (project-local) -- ``~/.alignment/plugins/`` (user-global) +- ``~/.nodelens/plugins/`` (user-global) They will be automatically loaded when the framework initializes. @@ -247,7 +247,7 @@ Or manually load from a custom location: .. code-block:: python - from alignment.core.registry import discover_plugins + from nodelens.core.registry import discover_plugins discover_plugins(["./my_custom_plugins/"]) diff --git a/docs/source/developer_guide/index.rst b/docs/source/developer_guide/index.rst index 55f4980c..70cdc431 100644 --- a/docs/source/developer_guide/index.rst +++ b/docs/source/developer_guide/index.rst @@ -2,7 +2,7 @@ Developer Guide =============== This section contains documentation for developers who want to extend or contribute -to the alignment framework. +to NodeLens. .. toctree:: :maxdepth: 2 @@ -13,7 +13,7 @@ to the alignment framework. Overview -------- -The alignment framework is designed to be highly extensible. You can add: +NodeLens is designed to be highly extensible. You can add: - **Custom Metrics**: Define new per-neuron alignment metrics - **Custom Analyzers**: Create new analysis pipelines (clustering, halo, etc.) diff --git a/docs/source/examples/index.rst b/docs/source/examples/index.rst index 00e9ef5a..00397b33 100644 --- a/docs/source/examples/index.rst +++ b/docs/source/examples/index.rst @@ -1,7 +1,7 @@ Examples and Tutorials ====================== -This section contains examples and tutorials for using the alignment framework. +This section contains examples and tutorials for using NodeLens. Quick Start Examples -------------------- @@ -109,7 +109,7 @@ Loading and Running Experiments .. code-block:: python - from alignment.experiments import GeneralAlignmentExperiment + from nodelens.experiments import GeneralAlignmentExperiment # From configuration file experiment = GeneralAlignmentExperiment.from_yaml("config.yaml") @@ -120,7 +120,7 @@ Computing Metrics on a Model .. code-block:: python - from alignment import ModelWrapper, get_metric + from nodelens import ModelWrapper, get_metric wrapped_model = ModelWrapper(model) metric = get_metric("rayleigh_quotient")() @@ -139,7 +139,7 @@ Batch Processing Multiple Metrics .. code-block:: python - from alignment.dataops.processing import BatchMetricProcessor + from nodelens.dataops.processing import BatchMetricProcessor processor = BatchMetricProcessor( metrics=["rayleigh_quotient", "mutual_information_gaussian"], diff --git a/docs/source/index.rst b/docs/source/index.rst index 9d5c247a..1ea6e57a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,36 +1,39 @@ -Alignment Analysis Framework Documentation -========================================== +NodeLens Documentation +====================== -A framework for analyzing neural network alignment, pruning, and information-theoretic properties. +NodeLens is a research codebase for node- and channel-level metrics, +interpretability analysis, and structured interventions. The Python package is +imported as ``nodelens``. Overview -------- -The Alignment Analysis Framework provides tools for: +The codebase provides tools for: - Computing alignment metrics between neural representations and task structure - Implementing and testing pruning strategies on neural networks -- Training and analyzing multiple networks with parallel execution +- Estimating channel-level loss sensitivity in LLM feed-forward layers - Evaluating information-theoretic properties of learned representations +- Packaging paper artifacts for public release Key Features ------------ -- 30+ alignment metrics including Rayleigh quotient, mutual information, and spectral methods +- Alignment metrics including Rayleigh quotient, mutual information, and spectral methods - Multiple pruning strategies: magnitude-based, gradient-based, and alignment-based - Support for vision models (ResNet, VGG, EfficientNet, ViT) and language models - Flexible experiment framework with YAML configuration -- GPU-optimized implementations +- Paper-specific release folders under ``projects/`` Quick Start ----------- .. code-block:: python - from alignment.experiments import GeneralAlignmentExperiment - from alignment.configs.config_loader import load_config + from nodelens.experiments import GeneralAlignmentExperiment + from nodelens.configs.config_loader import load_config - config = load_config('configs/examples/resnet18_analysis.yaml') + config = load_config('configs/examples/mnist_basic.yaml') experiment = GeneralAlignmentExperiment(config) results = experiment.run() @@ -43,6 +46,7 @@ Quick Start user_guide/experiments user_guide/metrics user_guide/pruning + user_guide/pruning_strategies user_guide/configuration .. toctree:: @@ -54,23 +58,6 @@ Quick Start reference/models reference/configuration -.. toctree:: - :maxdepth: 2 - :caption: API Documentation - - api/index - api/experiments - api/metrics - api/pruning - api/models - api/data - -.. toctree:: - :maxdepth: 1 - :caption: Examples - - examples/index - .. toctree:: :maxdepth: 1 :caption: Contributing diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst index 2ed46c5f..9ec54ae1 100644 --- a/docs/source/reference/index.rst +++ b/docs/source/reference/index.rst @@ -1,7 +1,7 @@ Reference Documentation ======================== -Complete reference materials for the alignment framework. +Complete reference materials for NodeLens. .. toctree:: :maxdepth: 2 diff --git a/docs/source/reference/metrics.rst b/docs/source/reference/metrics.rst index e8043e6d..276b4998 100644 --- a/docs/source/reference/metrics.rst +++ b/docs/source/reference/metrics.rst @@ -79,7 +79,7 @@ Spectral Metrics - Distribution of eigenvalues Task-Specific Metrics -~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~ .. list-table:: :header-rows: 1 @@ -99,7 +99,7 @@ Usage Example .. code-block:: python - from alignment.metrics import get_metric + from nodelens.metrics import get_metric # Single metric metric = get_metric("rayleigh_quotient") diff --git a/docs/source/reference/models.rst b/docs/source/reference/models.rst index ba251787..1ed12d81 100644 --- a/docs/source/reference/models.rst +++ b/docs/source/reference/models.rst @@ -193,7 +193,7 @@ Complete Model Examples See ``configs/examples/`` for complete working configurations: -- ``resnet18_analysis.yaml`` - ResNet-18 on CIFAR-10 +- ``mnist_basic.yaml`` - MLP on MNIST - ``alexnet_analysis.yaml`` - AlexNet configuration - ``vit_b16_analysis.yaml`` - Vision Transformer setup - ``vision_networks_master.yaml`` - All models with options diff --git a/docs/source/user_guide/configuration.rst b/docs/source/user_guide/configuration.rst index 80f98b94..cacb2304 100644 --- a/docs/source/user_guide/configuration.rst +++ b/docs/source/user_guide/configuration.rst @@ -1,7 +1,7 @@ Configuration Guide =================== -This guide explains all configuration options available in the alignment framework. +This guide explains all configuration options available in NodeLens. .. contents:: Table of Contents :local: @@ -10,7 +10,7 @@ This guide explains all configuration options available in the alignment framewo Overview -------- -The alignment framework uses a hierarchical configuration system that allows fine-grained control over experiments, metrics, training, and analysis. +NodeLens uses a hierarchical configuration system that allows fine-grained control over experiments, metrics, training, and analysis. Configuration can be specified via: @@ -27,7 +27,7 @@ Using Python .. code-block:: python - from alignment.experiments.base import ExperimentConfig + from nodelens.experiments.base import ExperimentConfig config = ExperimentConfig( name="my_experiment", @@ -55,7 +55,7 @@ Loading configuration: .. code-block:: python - from alignment.infrastructure.configuration import load_config + from nodelens.infrastructure.configuration import load_config config = load_config("config.yaml") @@ -391,7 +391,7 @@ Weights & Biases Integration wandb_config = { "use_wandb": True, - "wandb_project": "alignment", + "wandb_project": "nodelens", "wandb_entity": "your-entity", "wandb_tags": ["experiment"], "wandb_notes": "Experiment notes", @@ -500,7 +500,7 @@ The framework validates configurations: .. code-block:: python - from alignment.infrastructure.configuration import validate_config + from nodelens.infrastructure.configuration import validate_config # Validate configuration errors = validate_config(config) @@ -508,7 +508,7 @@ The framework validates configurations: print("Configuration errors:", errors) # Auto-fix common issues - from alignment.infrastructure.configuration import fix_config + from nodelens.infrastructure.configuration import fix_config fixed_config = fix_config(config) @@ -525,5 +525,5 @@ Best Practices See Also -------- -- :doc:`/api/experiments` - Experiment API documentation +- ``nodelens.experiments`` - Experiment API entry point - :doc:`experiments` - Experiments user guide diff --git a/docs/source/user_guide/experiments.rst b/docs/source/user_guide/experiments.rst index 2a016140..07d46fa9 100644 --- a/docs/source/user_guide/experiments.rst +++ b/docs/source/user_guide/experiments.rst @@ -1,7 +1,7 @@ Experiments Guide ================= -This guide covers the different types of experiments available in the alignment framework. +This guide covers the different types of experiments available in NodeLens. Overview -------- @@ -26,7 +26,7 @@ The main experiment class that supports: .. code-block:: python - from alignment.experiments import GeneralAlignmentExperiment, GeneralAlignmentConfig + from nodelens.experiments import GeneralAlignmentExperiment, GeneralAlignmentConfig config = GeneralAlignmentConfig( experiment_name="mnist_alignment", @@ -74,7 +74,7 @@ Analyze the effect of pruning individual layers: .. code-block:: python - from alignment.pruning.experiments import LayerIsolatedPruningExperiment, LayerIsolatedConfig + from nodelens.pruning.experiments import LayerIsolatedPruningExperiment, LayerIsolatedConfig config = LayerIsolatedConfig( experiment_name="layer_analysis", @@ -95,7 +95,7 @@ Apply the same pruning rate across all layers: .. code-block:: python - from alignment.pruning.experiments import GlobalDropoutExperiment, GlobalDropoutConfig + from nodelens.pruning.experiments import GlobalDropoutExperiment, GlobalDropoutConfig config = GlobalDropoutConfig( experiment_name="global_pruning", @@ -115,7 +115,7 @@ Progressive pruning that cascades through the network: .. code-block:: python - from alignment.pruning.experiments import CascadingLayerPruningExperiment, CascadingConfig + from nodelens.pruning.experiments import CascadingLayerPruningExperiment, CascadingConfig config = CascadingConfig( experiment_name="cascading_analysis", @@ -135,7 +135,7 @@ Use eigendecomposition for pruning decisions: .. code-block:: python - from alignment.pruning.experiments import EigenvectorDropoutExperiment, EigenvectorConfig + from nodelens.pruning.experiments import EigenvectorDropoutExperiment, EigenvectorConfig config = EigenvectorConfig( experiment_name="eigenvector_pruning", @@ -193,7 +193,7 @@ From Python .. code-block:: python - from alignment.experiments import create_experiment_from_config + from nodelens.experiments import create_experiment_from_config import yaml # Load configuration @@ -265,7 +265,7 @@ Add custom alignment metrics: .. code-block:: python - from alignment.metrics import register_metric + from nodelens.metrics import register_metric @register_metric("my_custom_metric") def my_metric(model, dataloader, device): @@ -279,7 +279,7 @@ Implement custom pruning strategies: .. code-block:: python - from alignment.pruning.strategies import BasePruningStrategy + from nodelens.pruning.strategies import BasePruningStrategy class MyPruningStrategy(BasePruningStrategy): def compute_importance_scores(self, model, dataloader): diff --git a/docs/source/user_guide/installation.rst b/docs/source/user_guide/installation.rst index 5dec2cfa..31b2476a 100644 --- a/docs/source/user_guide/installation.rst +++ b/docs/source/user_guide/installation.rst @@ -19,10 +19,10 @@ Create and activate the conda environment: .. code-block:: bash git clone - cd alignment + cd nodelens conda env create -f environment.yml - conda activate alignment + conda activate nodelens pip install -e . @@ -34,7 +34,7 @@ Install directly from source: .. code-block:: bash git clone - cd alignment + cd nodelens pip install -e . Verification @@ -44,8 +44,8 @@ Test the installation: .. code-block:: python - import alignment - from alignment.metrics import METRIC_REGISTRY + import nodelens + from nodelens.metrics import METRIC_REGISTRY # List available metrics print(METRIC_REGISTRY.list()) @@ -70,5 +70,5 @@ Next Steps ---------- - See :doc:`quickstart` for basic usage -- Check :doc:`/examples/index` for examples -- Read :doc:`/api/index` for API documentation +- Check the repository ``examples/`` folder for runnable examples +- Read the top-level README for current API entry points diff --git a/docs/source/user_guide/metrics.rst b/docs/source/user_guide/metrics.rst index 4b57801a..1f1228b8 100644 --- a/docs/source/user_guide/metrics.rst +++ b/docs/source/user_guide/metrics.rst @@ -32,7 +32,7 @@ Usage .. code-block:: python - from alignment.metrics import RayleighQuotient + from nodelens.metrics import RayleighQuotient # Create metric instance rq_metric = RayleighQuotient( @@ -70,7 +70,7 @@ Usage .. code-block:: python - from alignment.metrics import MutualInformationGaussian + from nodelens.metrics import MutualInformationGaussian # Create metric instance mi_metric = MutualInformationGaussian( @@ -102,7 +102,7 @@ Usage .. code-block:: python - from alignment.metrics import PartialInformationDecomposition + from nodelens.metrics import PartialInformationDecomposition # Create metric instance pid_metric = PartialInformationDecomposition( @@ -139,7 +139,7 @@ Usage .. code-block:: python - from alignment.metrics import CKA + from nodelens.metrics import CKA # Create metric instance cka_metric = CKA( @@ -170,7 +170,7 @@ Usage .. code-block:: python - from alignment.metrics import CCA + from nodelens.metrics import CCA # Create metric instance cca_metric = CCA( @@ -194,7 +194,7 @@ Usage .. code-block:: python - from alignment.metrics import GeneralizedRayleighQuotient + from nodelens.metrics import GeneralizedRayleighQuotient grq_metric = GeneralizedRayleighQuotient() @@ -215,7 +215,7 @@ Usage .. code-block:: python - from alignment.metrics import SharedInformation + from nodelens.metrics import SharedInformation shared_info = SharedInformation( method="correlation" # Method for measuring sharing @@ -234,7 +234,7 @@ Batch Computation .. code-block:: python - from alignment.metrics import MetricCollection + from nodelens.metrics import MetricCollection # Create collection of metrics metrics = MetricCollection([ @@ -308,7 +308,7 @@ Creating a Custom Metric .. code-block:: python - from alignment.metrics.base import BaseMetric + from nodelens.metrics.base import BaseMetric class MyCustomMetric(BaseMetric): def __init__(self, parameter=1.0): @@ -325,7 +325,7 @@ Registering Custom Metrics .. code-block:: python - from alignment.core.registry import register_metric + from nodelens.core.registry import register_metric @register_metric("my_metric") class MyMetric(BaseMetric): diff --git a/docs/source/user_guide/pruning.rst b/docs/source/user_guide/pruning.rst index c7f3fbf8..af3a17fa 100644 --- a/docs/source/user_guide/pruning.rst +++ b/docs/source/user_guide/pruning.rst @@ -1,7 +1,7 @@ Pruning Guide ============= -This guide covers the pruning capabilities in the alignment framework, including different strategies and experiment types. +This guide covers the pruning capabilities in NodeLens, including different strategies and experiment types. Overview -------- @@ -15,7 +15,7 @@ The framework provides comprehensive pruning capabilities: Pruning Strategies ------------------ -The framework includes several pruning strategies in ``alignment.pruning.strategies``: +The framework includes several pruning strategies in ``nodelens.pruning.strategies``: Magnitude-based Pruning ^^^^^^^^^^^^^^^^^^^^^^^ @@ -24,7 +24,7 @@ Prunes weights or neurons based on their magnitude: .. code-block:: python - from alignment.pruning.strategies import MagnitudePruning + from nodelens.pruning.strategies import MagnitudePruning strategy = MagnitudePruning() masks = strategy.compute_masks(model, pruning_ratio=0.5) @@ -36,7 +36,7 @@ Uses gradient information to determine importance: .. code-block:: python - from alignment.pruning.strategies import GradientPruning + from nodelens.pruning.strategies import GradientPruning strategy = GradientPruning() masks = strategy.compute_masks(model, dataloader, pruning_ratio=0.5) @@ -48,7 +48,7 @@ Baseline strategy that randomly prunes connections: .. code-block:: python - from alignment.pruning.strategies import RandomPruning + from nodelens.pruning.strategies import RandomPruning strategy = RandomPruning(seed=42) masks = strategy.compute_masks(model, pruning_ratio=0.5) @@ -60,7 +60,7 @@ Uses alignment metrics to guide pruning decisions: .. code-block:: python - from alignment.pruning.strategies import AlignmentPruning + from nodelens.pruning.strategies import AlignmentPruning strategy = AlignmentPruning(metric="rayleigh_quotient") masks = strategy.compute_masks(model, dataloader, pruning_ratio=0.5) @@ -75,7 +75,7 @@ Applies the same pruning rate across all layers: .. code-block:: python - from alignment.pruning.experiments import GlobalDropoutExperiment, GlobalDropoutConfig + from nodelens.pruning.experiments import GlobalDropoutExperiment, GlobalDropoutConfig config = GlobalDropoutConfig( experiment_name="global_pruning_mnist", @@ -96,7 +96,7 @@ Analyzes the effect of pruning individual layers: .. code-block:: python - from alignment.pruning.experiments import LayerIsolatedPruningExperiment, LayerIsolatedConfig + from nodelens.pruning.experiments import LayerIsolatedPruningExperiment, LayerIsolatedConfig config = LayerIsolatedConfig( experiment_name="layer_analysis", @@ -117,7 +117,7 @@ Progressive pruning that cascades through network layers: .. code-block:: python - from alignment.pruning.experiments import CascadingLayerPruningExperiment, CascadingConfig + from nodelens.pruning.experiments import CascadingLayerPruningExperiment, CascadingConfig config = CascadingConfig( experiment_name="cascading_analysis", @@ -138,7 +138,7 @@ Uses spectral analysis for pruning: .. code-block:: python - from alignment.pruning.experiments import EigenvectorDropoutExperiment, EigenvectorConfig + from nodelens.pruning.experiments import EigenvectorDropoutExperiment, EigenvectorConfig config = EigenvectorConfig( experiment_name="eigenvector_pruning", @@ -226,7 +226,7 @@ Implement custom strategies by extending the base class: .. code-block:: python - from alignment.pruning.strategies import BasePruningStrategy + from nodelens.pruning.strategies import BasePruningStrategy class MyCustomPruning(BasePruningStrategy): def compute_importance_scores(self, model, dataloader=None): diff --git a/docs/source/user_guide/pruning_strategies.rst b/docs/source/user_guide/pruning_strategies.rst index 25409677..10afd852 100644 --- a/docs/source/user_guide/pruning_strategies.rst +++ b/docs/source/user_guide/pruning_strategies.rst @@ -1,12 +1,12 @@ Pruning Strategies Guide ======================== -This guide documents all pruning strategies available in the alignment framework and their use cases. +This guide documents all pruning strategies available in NodeLens and their use cases. Overview -------- -Pruning is a technique for reducing neural network size by removing parameters while maintaining performance. The alignment framework provides several pruning strategies to analyze how network sparsity affects alignment metrics. +Pruning is a technique for reducing neural network size by removing parameters while maintaining performance. NodeLens provides several pruning strategies to analyze how network sparsity affects alignment metrics. Available Pruning Strategies ---------------------------- @@ -14,7 +14,7 @@ Available Pruning Strategies 1. Magnitude-Based Pruning ~~~~~~~~~~~~~~~~~~~~~~~~~~ -**Module**: :mod:`alignment.pruning.strategies.magnitude` +**Module**: :mod:`nodelens.pruning.strategies.magnitude` **Classes**: @@ -30,7 +30,7 @@ Available Pruning Strategies .. code-block:: python - from alignment.pruning import get_pruning_strategy + from nodelens.pruning import get_pruning_strategy # Basic magnitude pruning strategy = get_pruning_strategy("magnitude") @@ -50,7 +50,7 @@ Available Pruning Strategies 2. Random Pruning ~~~~~~~~~~~~~~~~~ -**Module**: :mod:`alignment.pruning.strategies.random` +**Module**: :mod:`nodelens.pruning.strategies.random` **Classes**: @@ -72,7 +72,7 @@ Available Pruning Strategies 3. Gradient-Based Pruning ~~~~~~~~~~~~~~~~~~~~~~~~~ -**Module**: :mod:`alignment.pruning.strategies.gradient` +**Module**: :mod:`nodelens.pruning.strategies.gradient` **Classes**: @@ -107,7 +107,7 @@ All strategies support structured pruning by setting ``structured=True``: .. code-block:: python - from alignment.pruning import PruningConfig + from nodelens.pruning import PruningConfig config = PruningConfig( strategy="magnitude", @@ -126,7 +126,7 @@ The framework provides iterative pruning through dedicated strategies: .. code-block:: python - from alignment.pruning.strategies.magnitude import IterativeMagnitudePruning + from nodelens.pruning.strategies.magnitude import IterativeMagnitudePruning strategy = IterativeMagnitudePruning( iterations=10, @@ -150,7 +150,7 @@ Create pruning schedules for gradual sparsification: .. code-block:: python - from alignment.pruning.schedules import PolynomialSchedule, LinearSchedule + from nodelens.pruning.schedules import PolynomialSchedule, LinearSchedule # Polynomial schedule (recommended) schedule = PolynomialSchedule( @@ -219,7 +219,7 @@ Check Sparsity .. code-block:: python - from alignment.pruning.utils import get_sparsity, get_model_sparsity + from nodelens.pruning.utils import get_sparsity, get_model_sparsity # Layer sparsity sparsity = get_sparsity(layer) @@ -232,7 +232,7 @@ Remove Pruning .. code-block:: python - from alignment.pruning.utils import remove_pruning + from nodelens.pruning.utils import remove_pruning # Makes pruning permanent and removes masks remove_pruning(layer) @@ -250,7 +250,7 @@ Example analysis: .. code-block:: python - from alignment.experiments import GeneralAlignmentExperiment + from nodelens.experiments import GeneralAlignmentExperiment # Track how metrics change with pruning config = { @@ -293,6 +293,6 @@ Issue: Memory Not Reduced After Pruning See Also -------- -- :doc:`/api/pruning` - Complete API reference +- ``nodelens.pruning`` - Pruning API entry point - :doc:`experiments` - Pruning experiments guide -- :doc:`/examples/pruning_experiments` - Example code +- Repository examples and configs - Example code diff --git a/docs/source/user_guide/quickstart.rst b/docs/source/user_guide/quickstart.rst index bb4f4669..7dab6635 100644 --- a/docs/source/user_guide/quickstart.rst +++ b/docs/source/user_guide/quickstart.rst @@ -1,7 +1,7 @@ Quickstart Guide ================ -This guide will get you up and running with the alignment framework in minutes. +This guide will get you up and running with NodeLens in minutes. .. contents:: Table of Contents :local: @@ -16,8 +16,8 @@ Basic Installation .. code-block:: bash # Clone the repository - git clone https://github.com/KempnerInstitute/alignment.git - cd alignment + git clone https://github.com/KempnerInstitute/nodelens.git + cd nodelens # Install the package pip install -e . @@ -31,8 +31,8 @@ Full Installation pip install -e .[all] # Or install specific extras - pip install -e .[viz] # Visualization tools - pip install -e .[all] # All optional dependencies + pip install -e .[train] # Training and large-model utilities + pip install -e .[all] # Development and training extras pip install -e .[docs] # Documentation building Your First Experiment @@ -44,7 +44,7 @@ Your First Experiment .. code-block:: python import torch - from alignment.metrics import RayleighQuotient + from nodelens.metrics import RayleighQuotient # Create some sample data inputs = torch.randn(100, 512) # 100 samples, 512 features @@ -62,8 +62,8 @@ Your First Experiment .. code-block:: python - from alignment.models import ModelWrapper - from alignment.metrics import get_metric + from nodelens.models import ModelWrapper + from nodelens.metrics import get_metric import torchvision.models as models # Load a pre-trained ResNet @@ -94,8 +94,8 @@ Your First Experiment .. code-block:: python - from alignment.experiments import ProgressiveDropoutExperiment - from alignment.experiments.base import ExperimentConfig + from nodelens.experiments import ProgressiveDropoutExperiment + from nodelens.experiments.base import ExperimentConfig # Configure experiment config = ExperimentConfig( @@ -133,8 +133,8 @@ Comparing Network Architectures .. code-block:: python - from alignment.experiments import ExperimentRunner - from alignment.experiments.base import ExperimentConfig + from nodelens.experiments import ExperimentRunner + from nodelens.experiments.base import ExperimentConfig # Define experiments for different architectures architectures = ["resnet18", "vgg16", "efficientnet_b0"] @@ -161,8 +161,8 @@ Analyzing Layer Importance .. code-block:: python - from alignment.experiments import LayerIsolatedPruningExperiment - from alignment.analysis import LayerImportanceAnalyzer + from nodelens.experiments import LayerIsolatedPruningExperiment + from nodelens.analysis import LayerImportanceAnalyzer config = ExperimentConfig( name="layer_importance_analysis", @@ -187,8 +187,8 @@ Custom Metric Implementation .. code-block:: python - from alignment.metrics.base import BaseMetric - from alignment.core import register_metric + from nodelens.metrics.base import BaseMetric + from nodelens.core import register_metric import torch @register_metric("gradient_alignment") @@ -278,8 +278,8 @@ Loading and Running .. code-block:: python - from alignment.infrastructure.configuration import load_config - from alignment.experiments import create_experiment + from nodelens.infrastructure.configuration import load_config + from nodelens.experiments import create_experiment # Load configuration config = load_config("experiment_config.yaml") @@ -299,7 +299,7 @@ Plotting Metrics .. code-block:: python - from alignment.analysis import MetricVisualizer + from nodelens.analysis import MetricVisualizer # Load results results = load_results("results/my_experiment/") @@ -326,7 +326,7 @@ Generating Reports .. code-block:: python - from alignment.analysis import ReportGenerator + from nodelens.analysis import ReportGenerator # Generate comprehensive report generator = ReportGenerator(results) @@ -394,8 +394,8 @@ Next Steps - :doc:`/user_guide/experiments` - Detailed experiment guide - :doc:`/user_guide/metrics` - All available metrics - :doc:`/user_guide/configuration` - Configuration options -- :doc:`/examples/pruning_experiments` - Advanced examples -- :doc:`/api/index` - Full API reference +- Repository examples and configs - Advanced examples +- Top-level README - Current API entry points Common Issues ------------- @@ -429,10 +429,10 @@ Common Issues .. code-block:: bash # Ensure you're in the right directory - cd alignment + cd nodelens # Reinstall in development mode pip install -e . # Check installation - python -c "import alignment; print(alignment.__version__)" + python -c "import nodelens; print(nodelens.__version__)" diff --git a/docs/usage.md b/docs/usage.md index 500098a9..29a58606 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -2,7 +2,7 @@ ## Running Experiments -Experiments are configured via YAML files: +Experiments are configured with YAML files: ```bash python scripts/run_experiment.py --config configs/examples/mnist_basic.yaml @@ -23,16 +23,30 @@ python scripts/run_experiment.py --config configs/examples/mnist_basic.yaml | Config | Description | |--------|-------------| -| `configs/examples/mnist_basic.yaml` | MLP on MNIST | -| `configs/examples/resnet_pruning.yaml` | ResNet-18 pruning on CIFAR-10 | -| `configs/examples/llm_alignment.yaml` | LLM importance scoring | +| `configs/examples/mnist_basic.yaml` | Fast MNIST smoke test | +| `configs/examples/resnet_pruning.yaml` | Small vision pruning example | +| `configs/vision_prune/resnet18_cifar10_unified.yaml` | Vision clustering, halo, cascade, and pruning | +| `configs/prune_llm/llama3_8b_unified.yaml` | Main LLM supernode and SCAR suite | +| `configs/prune_llm/llama3_70b_scale_mechanism.yaml` | Large-model mechanism check | +| `configs/prune_llm/olmo2_7b_pruning_curves.yaml` | OLMo pruning replication | + +See [the config catalog](../configs/README.md) for a fuller list. + +## Experiment Types + +| Type | What it runs | Typical configs | +|------|--------------|-----------------| +| `alignment_analysis` | General activation/alignment metrics and small-model pruning | `configs/examples/*.yaml` | +| `cluster_analysis` | Vision channel clustering, halo analysis, cascade tests, and structured pruning | `configs/vision_prune/*.yaml` | +| `llm_alignment` | Hugging Face LLM metrics, supernodes, SCAR, and structured FFN pruning | `configs/prune_llm/*.yaml` | +| `vision_synergy` | Older focused vision synergy experiments | `configs/examples/vision_synergy.yaml` | ## Configuration Structure ```yaml experiment: name: "my_experiment" - type: "general_alignment" # or "llm_alignment" + type: "cluster_analysis" # or "alignment_analysis", "llm_alignment" seed: 42 device: "cuda" @@ -65,6 +79,9 @@ visualization: See `configs/template.yaml` for all parameters. +New work should prefer the unified-format configs, especially files ending in +`_unified.yaml`. Older example configs are still supported for compatibility. + ## Pruning Configuration ### Basic Pruning @@ -119,7 +136,7 @@ python scripts/run_analysis.py --config configs/analysis_template.yaml \ ### Programmatic Analysis ```python -from alignment.analysis import AnalysisRunner, AnalysisConfig +from nodelens.analysis import AnalysisRunner, AnalysisConfig config = AnalysisConfig( results_dir="./results", @@ -145,14 +162,14 @@ outputs = runner.run() ``` results/experiment_YYYYMMDD_HHMMSS/ -├── experiment_config.yaml -├── experiment.log -├── results_YYYYMMDD_HHMMSS.json -├── checkpoints/ -└── plots/ - ├── training_loss.png - ├── pruning_accuracy.png - └── ... +|-- experiment_config.yaml +|-- experiment.log +|-- results_YYYYMMDD_HHMMSS.json +|-- checkpoints/ +`-- plots/ + |-- training_loss.png + |-- pruning_accuracy.png + `-- ... ``` ## Workflow Examples @@ -160,13 +177,13 @@ results/experiment_YYYYMMDD_HHMMSS/ ### Vision Experiment ```bash -python scripts/run_experiment.py --config configs/examples/resnet_pruning.yaml +python scripts/run_experiment.py --config configs/vision_prune/resnet18_cifar10_unified.yaml ``` ### LLM Analysis ```bash -python scripts/run_experiment.py --config configs/examples/llm_alignment.yaml +python scripts/run_experiment.py --config configs/prune_llm/llama3_8b_unified.yaml ``` ## Supernode Analysis (LLM) diff --git a/environment.yml b/environment.yml index 4450488d..c03f6fa2 100644 --- a/environment.yml +++ b/environment.yml @@ -1,4 +1,4 @@ -name: alignment +name: nodelens channels: - pytorch - nvidia diff --git a/projects/README.md b/projects/README.md new file mode 100644 index 00000000..3d28b967 --- /dev/null +++ b/projects/README.md @@ -0,0 +1,10 @@ +# Paper Projects + +This directory contains paper-specific release material layered on top of the +general `nodelens` package. Keep reusable code in `src/nodelens/`; keep +paper-specific commands, artifact manifests, and reproduction notes here. + +## Projects + +- `supernodes_scar/`: release material for "Supernodes and Halos: + Loss-Critical Hubs in LLM Feed-Forward Layers". diff --git a/projects/supernodes_scar/ARTIFACTS.md b/projects/supernodes_scar/ARTIFACTS.md new file mode 100644 index 00000000..0da9ba37 --- /dev/null +++ b/projects/supernodes_scar/ARTIFACTS.md @@ -0,0 +1,67 @@ +# Artifact Plan + +This document defines what should be shared alongside the paper and why. + +## Recommended Public Artifacts + +`paper_artifacts/figures/` +: PNG figures used in the arXiv paper. These are useful for quick inspection +and for checking that regenerated figures match the submitted version. + +`paper_artifacts/tables/` +: LaTeX table fragments used by the paper. + +`paper_artifacts/experiments/` +: Compact JSON summaries used for figure/table generation. These are derived +statistics, not raw datasets. + +`raw_results/` +: Locked result JSON files copied from the runs used by the paper, sanitized +and compressed as `.json.gz`. The public paths are stable; internal cluster +paths are not included. + +`configs/` +: Paper experiment configs needed to rerun metric estimation, pruning, and +evaluation. + +`paper_scripts/` +: Active figure/table aggregation scripts used by the current draft. + +`metadata/` +: Release metadata, checksums, git commit, and manifest files. + +## Large Or Restricted Items + +The public artifact repository should not contain model weights. Users should +download models through their original providers and accept the relevant model +licenses. The artifact repository should also not duplicate raw public +benchmarks; instead, document dataset names and versions in the dataset card. + +## Hugging Face vs Zenodo + +Hugging Face Datasets is a good fit for browsable, versioned ML artifacts that +users may download programmatically. Zenodo is better for a citable archival +snapshot with a DOI. The strongest release pattern is: + +1. GitHub release tag for code. +2. Hugging Face dataset repo for result artifacts. +3. Zenodo archive of the GitHub release, plus optionally the artifact bundle, + for DOI-based citation. + +## Minimal Artifact Schema + +Each generated bundle should include: + +```text +README.md +MANIFEST.json +MANIFEST.sha256 +metadata/release_metadata.json +configs/ +paper_artifacts/ +paper_scripts/ +raw_results/ +``` + +`MANIFEST.json` should record relative path, size, SHA256, and artifact group +for every file. It should not record private absolute paths. diff --git a/projects/supernodes_scar/README.md b/projects/supernodes_scar/README.md new file mode 100644 index 00000000..a7ee797b --- /dev/null +++ b/projects/supernodes_scar/README.md @@ -0,0 +1,122 @@ +# Supernodes and Halos Release + +This folder is the public-release entry point for the paper: + +> Supernodes and Halos: Loss-Critical Hubs in LLM Feed-Forward Layers + +The reusable implementation lives in the main `nodelens` package. This project +folder records the paper-specific configs, artifact layout, and release process. + +## What To Release + +The public release should have two parts: + +1. A GitHub release/tag for code, configs, and reproduction scripts. +2. A Hugging Face dataset repository for derived artifacts: result JSON files, + paper figures, LaTeX tables, checksums, and a dataset card. + +This split is intentional. Code belongs in GitHub; generated experiment outputs +and larger derived artifacts are easier to consume and version through the +Hugging Face Hub. A Zenodo DOI can additionally archive the GitHub release for +citation stability. + +## Reproduce The Main Runs + +Install the package: + +```bash +conda env create -f environment.yml +conda activate nodelens +pip install -e . +``` + +Run a paper config: + +```bash +python scripts/run_experiment.py \ + --config configs/prune_llm/llama3_8b_unified.yaml \ + --base-output-dir /path/to/results +``` + +Important paper configs include: + +```text +configs/prune_llm/llama3_8b_unified.yaml +configs/prune_llm/mistral_7b_unified.yaml +configs/prune_llm/llama2_7b_unified.yaml +configs/prune_llm/qwen2_7b_unified.yaml +configs/prune_llm/llama3_70b_scale_pruning_curves.yaml +configs/prune_llm/llama3_70b_scale_mechanism.yaml +configs/prune_llm/llama3_70b_scale_benchmarks_50_papersafe.yaml +``` + +The 7B/8B runs are feasible on one A100/H100-class GPU. The 70B validation is a +targeted large-model check and needs substantially more memory or model +parallelism depending on the environment. + +## Build The Artifact Bundle + +The artifact bundle is prepared locally under `outputs/`, which is ignored by +git: + +```bash +python projects/supernodes_scar/scripts/prepare_hf_artifacts.py \ + --output-dir outputs/supernodes_scar_hf \ + --clean + +python projects/supernodes_scar/scripts/verify_hf_artifacts.py \ + outputs/supernodes_scar_hf +``` + +The script copies only releaseable material into a clean directory: + +- paper figures and LaTeX tables +- generated numeric summaries and JSON diagnostics +- selected locked result JSON files, sanitized and compressed as `.json.gz` +- experiment configs used by the paper +- active paper-side figure/table scripts +- checksums and a machine-readable manifest +- a Hugging Face dataset-card README + +It intentionally excludes model weights, raw calibration datasets, logs, +checkpoints, Python caches, LaTeX build files, and internal absolute paths. + +## Upload To Hugging Face + +After inspecting `outputs/supernodes_scar_hf`, upload it as a dataset repo: + +```bash +huggingface-cli login +huggingface-cli repo create supernodes-scar-artifacts --type dataset +huggingface-cli upload hsafaai/supernodes-scar-artifacts \ + outputs/supernodes_scar_hf \ + --repo-type dataset +``` + +For very large bundles, use the `huggingface_hub` large-folder upload workflow +instead of the simple CLI upload. + +## What Not To Upload + +Do not upload: + +- Llama, Mistral, Qwen, or OLMo model weights. +- Raw WikiText-2, C4, MMLU, or LM Evaluation Harness datasets. +- Cluster logs, SLURM stdout/stderr, checkpoints, caches, or private paths. +- Any file containing access tokens, usernames beyond public author metadata, + or absolute Harvard cluster paths. + +## Release Checklist + +- `python -m pip install -e . --no-deps --dry-run` succeeds. +- `PYTHONPATH=src python -c "import nodelens; print(nodelens.__version__)"` + succeeds. +- The artifact bundle has no `.pyc`, `__pycache__`, `.aux`, `.log`, `.out`, + model checkpoint, or raw dataset files. +- A private-path scan over both plain text files and compressed `.json.gz` + files returns no internal cluster paths. +- `MANIFEST.sha256` verifies all staged artifacts. +- GitHub release tag, Hugging Face dataset revision, and arXiv version are + recorded together in the dataset card. + +See `REPRODUCIBILITY.md` for the local rerun and figure-regeneration workflow. diff --git a/projects/supernodes_scar/REPRODUCIBILITY.md b/projects/supernodes_scar/REPRODUCIBILITY.md new file mode 100644 index 00000000..da1bedbd --- /dev/null +++ b/projects/supernodes_scar/REPRODUCIBILITY.md @@ -0,0 +1,110 @@ +# Reproducibility Notes + +This page describes the release workflow for the paper. It separates three +tasks: rerunning experiments, rebuilding figures and tables from locked outputs, +and rebuilding the arXiv PDF. + +The public GitHub repository contains the reusable code, configs, project +metadata, and artifact-packaging scripts. The private paper-source checkout may +also contain draft-only LaTeX files and maintainer scripts; those paths are +called out below when they are needed. + +## 1. Rerun Experiments + +Install the code: + +```bash +conda env create -f environment.yml +conda activate nodelens +pip install -e . +``` + +Run the main 8B config: + +```bash +python scripts/run_experiment.py \ + --config configs/prune_llm/llama3_8b_unified.yaml \ + --base-output-dir /path/to/results/Prune_LLM +``` + +The main paper configs are listed in `projects/supernodes_scar/README.md`. +Large runs, especially the 70B validation, require substantial GPU memory and +should usually be launched through the local cluster workflow. + +## 2. Rebuild Figures And Tables From Locked Outputs + +The paper figures and tables are regenerated from locked result JSON files. The +release bundle stores those JSON files under `raw_results/` as sanitized +`.json.gz` files and records their public names in: + +```text +metadata/result_sources.json +``` + +For maintainers with the private paper-source checkout, the active paper scripts +can be rerun against the original locked output folders: + +```bash +python drafts/LLM_prune/paper/scripts/regenerate_fig1_overview.py +python drafts/LLM_prune/paper/scripts/regenerate_fig2_halo.py +python drafts/LLM_prune/paper/scripts/generate_70b_scale_figures.py +python drafts/LLM_prune/paper/scripts/generate_lp_vs_activation_overlap_figure.py +python drafts/LLM_prune/paper/scripts/generate_lp_vs_activation_supernode_figure.py +python drafts/LLM_prune/paper/scripts/collect_paper_artifacts.py \ + --results-base /path/to/results/Prune_LLM/PAPER \ + --draft-dir drafts/LLM_prune +``` + +The public artifact bundle also includes the active paper scripts under +`paper_scripts/`, plus compact derived summaries under +`paper_artifacts/experiments/`. Some scripts use path constants because they +were designed for the locked local paper tree; update those constants or run the +script from a checkout that has the original output folders available. + +## 3. Rebuild The Paper + +For maintainers with the private paper-source checkout, the paper has one shared +body file: + +```text +drafts/LLM_prune/paper_body.tex +``` + +Build the arXiv and anonymous versions: + +```bash +cd drafts/LLM_prune +./compile_pdf.sh paper_arxiv.tex +./compile_pdf.sh paper_icml.tex +``` + +## 4. Build And Verify The Hugging Face Bundle + +```bash +python projects/supernodes_scar/scripts/prepare_hf_artifacts.py \ + --output-dir outputs/supernodes_scar_hf \ + --clean + +python projects/supernodes_scar/scripts/verify_hf_artifacts.py \ + outputs/supernodes_scar_hf +``` + +The verifier checks: + +- `MANIFEST.sha256` +- absence of Python caches, LaTeX build files, PDFs, checkpoints, model weights, + and raw datasets +- absence of private local paths in plain text and compressed `.json.gz` files + +## 5. Local Storage Policy + +Uploading to Hugging Face is not a replacement for local retention. Maintainers +should keep: + +- the frozen HF bundle under `outputs/supernodes_scar_hf` +- the original locked result folders used to regenerate paper figures +- the arXiv source bundle under `drafts/LLM_prune/arxiv_bundle.tar.gz` +- the Git commit or release tag associated with the upload + +This lets future work continue from the exact paper state while the public HF +repo remains a clean, portable snapshot. diff --git a/projects/supernodes_scar/hf_dataset_card.md b/projects/supernodes_scar/hf_dataset_card.md new file mode 100644 index 00000000..2b11a992 --- /dev/null +++ b/projects/supernodes_scar/hf_dataset_card.md @@ -0,0 +1,75 @@ +--- +license: mit +pretty_name: "Supernodes and Halos Reproducibility Artifacts" +task_categories: +- text-generation +tags: +- large-language-models +- pruning +- structured-pruning +- mechanistic-interpretability +- loss-sensitivity +- reproducibility +size_categories: +- n<1K +--- + +# Supernodes and Halos Reproducibility Artifacts + +This dataset repository contains derived artifacts for the paper +"Supernodes and Halos: Loss-Critical Hubs in LLM Feed-Forward Layers". + +It is not a training dataset and does not include model weights. The files here +are intended to make the paper results inspectable and reproducible: compressed +result JSON files, generated figures, LaTeX tables, experiment configs, active +paper scripts, and checksums. + +## Contents + +```text +MANIFEST.json +MANIFEST.sha256 +metadata/release_metadata.json +configs/ +paper_artifacts/ +paper_scripts/ +raw_results/ +docs/ +``` + +## How To Use + +Download the artifact bundle and inspect the manifest: + +```bash +huggingface-cli download hsafaai/supernodes-scar-artifacts \ + --repo-type dataset \ + --local-dir supernodes_scar_artifacts + +cd supernodes_scar_artifacts +sha256sum -c MANIFEST.sha256 +``` + +The corresponding code release is available at: + +```text +https://github.com/KempnerInstitute/nodelens +``` + +Use the configs in `configs/` with `scripts/run_experiment.py` from the code +repo to rerun the experiments. + +The file `metadata/result_sources.json` lists the public artifact path for each +locked result JSON used in the paper. + +## Data And Model Sources + +The experiments use public model families and public evaluation/calibration +datasets through their original providers and licenses. This artifact repository +does not redistribute those raw assets. + +## Limitations + +Some full reruns require substantial GPU memory and time, especially the 70B +validation. The artifact bundle is meant to support inspection and targeted +reproduction without requiring every reader to rerun all large-model jobs. diff --git a/projects/supernodes_scar/release_manifest.yaml b/projects/supernodes_scar/release_manifest.yaml new file mode 100644 index 00000000..7c1cd526 --- /dev/null +++ b/projects/supernodes_scar/release_manifest.yaml @@ -0,0 +1,56 @@ +project: supernodes_scar +paper_title: "Supernodes and Halos: Loss-Critical Hubs in LLM Feed-Forward Layers" +code_package: nodelens +code_license: MIT + +public_release: + code: + location: https://github.com/KempnerInstitute/nodelens + include: + - src/nodelens + - scripts/run_experiment.py + - configs/prune_llm + - projects/supernodes_scar + - tests + - README.md + - LICENSE + - pyproject.toml + exclude: + - drafts + - checkpoints + - raw datasets + - logs + - results + - Python caches + + artifacts: + location: https://huggingface.co/datasets/hsafaai/supernodes-scar-artifacts + include: + - paper figures + - paper LaTeX table fragments + - derived JSON summaries + - sanitized locked result JSON files compressed as .json.gz + - paper configs + - active paper figure/table scripts + - checksums and release metadata + - sanitized result-source index + exclude: + - pretrained model weights + - raw calibration/evaluation datasets + - private cluster paths + - SLURM logs + - checkpoints + - LaTeX build files + + archival: + location: Zenodo + include: + - GitHub release snapshot + - optional artifact bundle snapshot + purpose: DOI and long-term citation stability + +artifact_builder: + command: >- + python projects/supernodes_scar/scripts/prepare_hf_artifacts.py + --output-dir outputs/supernodes_scar_hf + --clean diff --git a/projects/supernodes_scar/scripts/prepare_hf_artifacts.py b/projects/supernodes_scar/scripts/prepare_hf_artifacts.py new file mode 100644 index 00000000..bf4bbd68 --- /dev/null +++ b/projects/supernodes_scar/scripts/prepare_hf_artifacts.py @@ -0,0 +1,346 @@ +#!/usr/bin/env python3 +"""Prepare a sanitized Hugging Face artifact bundle for the supernodes paper.""" + +from __future__ import annotations + +import argparse +import datetime as dt +import gzip +import hashlib +import json +import re +import shutil +import subprocess +from pathlib import Path +from typing import Any + +EXCLUDED_SUFFIXES = { + ".aux", + ".blg", + ".fdb_latexmk", + ".fls", + ".log", + ".out", + ".pdf", + ".pyc", + ".synctex.gz", +} + +TEXT_SUFFIXES = { + ".json", + ".md", + ".py", + ".tex", + ".toml", + ".txt", + ".yaml", + ".yml", +} + +PRIVATE_PATH_PATTERNS = ( + (re.compile("/" + r"n/home[0-9]*/[^\s\"',)]+"), "/path/to/user_home"), + (re.compile("/" + r"n/[^\s\"',)]+"), "/path/to/internal_storage"), +) + + +def sha256_file(path: Path) -> str: + digest = hashlib.sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + +def git_commit(repo_root: Path) -> str | None: + try: + return subprocess.check_output( + ["git", "rev-parse", "HEAD"], + cwd=repo_root, + text=True, + stderr=subprocess.DEVNULL, + ).strip() + except Exception: + return None + + +def git_is_dirty(repo_root: Path) -> bool | None: + try: + status = subprocess.check_output( + ["git", "status", "--porcelain"], + cwd=repo_root, + text=True, + stderr=subprocess.DEVNULL, + ) + return bool(status.strip()) + except Exception: + return None + + +def should_skip(path: Path) -> bool: + parts = set(path.parts) + if "__pycache__" in parts: + return True + name = path.name + return any(name.endswith(suffix) for suffix in EXCLUDED_SUFFIXES) + + +def sanitize_text(text: str) -> str: + for pattern, replacement in PRIVATE_PATH_PATTERNS: + text = pattern.sub(replacement, text) + return text + + +def iter_sanitized_lines(src: Path): + with src.open("r", encoding="utf-8") as handle: + for line in handle: + yield sanitize_text(line) + + +def is_text_like(path: Path) -> bool: + return any(path.name.endswith(suffix) for suffix in TEXT_SUFFIXES) + + +def copy_one( + src: Path, + dst: Path, + group: str, + entries: list[dict[str, Any]], + *, + sanitize: bool = True, +) -> None: + if not src.exists() or not src.is_file() or should_skip(src): + return + dst.parent.mkdir(parents=True, exist_ok=True) + if sanitize and is_text_like(src): + with dst.open("w", encoding="utf-8") as handle: + handle.writelines(iter_sanitized_lines(src)) + else: + shutil.copy2(src, dst) + entries.append( + { + "path": dst.as_posix(), + "group": group, + "bytes": dst.stat().st_size, + "sha256": sha256_file(dst), + } + ) + + +def copy_text_gzip(src: Path, dst: Path, group: str, entries: list[dict[str, Any]]) -> None: + if not src.exists() or not src.is_file() or should_skip(src): + return + dst.parent.mkdir(parents=True, exist_ok=True) + with gzip.open(dst, "wt", encoding="utf-8", compresslevel=1) as handle: + handle.writelines(iter_sanitized_lines(src)) + entries.append( + { + "path": dst.as_posix(), + "group": group, + "bytes": dst.stat().st_size, + "sha256": sha256_file(dst), + } + ) + + +def copy_tree_files(src_root: Path, dst_root: Path, group: str, entries: list[dict[str, Any]]) -> None: + if not src_root.exists(): + return + for src in sorted(p for p in src_root.rglob("*") if p.is_file()): + if should_skip(src): + continue + copy_one(src, dst_root / src.relative_to(src_root), group, entries) + + +def load_json(path: Path) -> dict[str, Any]: + if not path.exists(): + return {} + with path.open("r", encoding="utf-8") as handle: + data = json.load(handle) + return data if isinstance(data, dict) else {} + + +def copy_named_json_sources( + mapping: dict[str, Any], + dst_root: Path, + group: str, + entries: list[dict[str, Any]], + source_entries: list[dict[str, Any]], + *, + compress: bool = True, +) -> None: + for key, raw_path in sorted(mapping.items()): + if not isinstance(raw_path, str): + continue + src = Path(raw_path) + if not src.exists() or should_skip(src): + continue + suffix = "".join(src.suffixes) or ".json" + safe_name = f"{key}{suffix}.gz" if compress else f"{key}{suffix}" + public_path = (dst_root / safe_name).relative_to(dst_root.parents[1]).as_posix() + print(f"Staging result: {public_path}", flush=True) + if compress: + copy_text_gzip(src, dst_root / safe_name, group, entries) + else: + copy_one(src, dst_root / safe_name, group, entries) + source_entries.append( + { + "name": key, + "artifact_path": public_path, + "source_kind": "locked_result_json", + "compressed": compress, + } + ) + + +def write_json(path: Path, payload: dict[str, Any] | list[dict[str, Any]]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8") as handle: + json.dump(payload, handle, indent=2, sort_keys=True) + handle.write("\n") + + +def relativize_entries(entries: list[dict[str, Any]], root: Path) -> list[dict[str, Any]]: + clean_entries: list[dict[str, Any]] = [] + for item in entries: + clean_item = dict(item) + path = Path(str(clean_item["path"])) + if path.is_absolute(): + clean_item["path"] = path.relative_to(root).as_posix() + clean_entries.append(clean_item) + return clean_entries + + +def main() -> int: + script_path = Path(__file__).resolve() + default_repo_root = script_path.parents[3] + + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--repo-root", type=Path, default=default_repo_root) + parser.add_argument("--paper-dir", type=Path, default=None) + parser.add_argument("--output-dir", type=Path, default=None) + parser.add_argument("--clean", action="store_true", help="Remove output directory before staging.") + args = parser.parse_args() + + repo_root = args.repo_root.resolve() + paper_dir = (args.paper_dir or repo_root / "drafts" / "LLM_prune").resolve() + output_dir = (args.output_dir or repo_root / "outputs" / "supernodes_scar_hf").resolve() + project_dir = repo_root / "projects" / "supernodes_scar" + + if args.clean and output_dir.exists(): + shutil.rmtree(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + entries: list[dict[str, Any]] = [] + source_entries: list[dict[str, Any]] = [] + + copy_one(project_dir / "hf_dataset_card.md", output_dir / "README.md", "dataset_card", entries) + copy_one(project_dir / "README.md", output_dir / "docs" / "PROJECT_README.md", "docs", entries) + copy_one(project_dir / "ARTIFACTS.md", output_dir / "docs" / "ARTIFACTS.md", "docs", entries) + copy_one(project_dir / "REPRODUCIBILITY.md", output_dir / "docs" / "REPRODUCIBILITY.md", "docs", entries) + copy_one(project_dir / "release_manifest.yaml", output_dir / "docs" / "release_manifest.yaml", "docs", entries) + + copy_one(repo_root / "README.md", output_dir / "code_metadata" / "README.md", "code_metadata", entries) + copy_one(repo_root / "LICENSE", output_dir / "code_metadata" / "LICENSE", "code_metadata", entries) + copy_one(repo_root / "pyproject.toml", output_dir / "code_metadata" / "pyproject.toml", "code_metadata", entries) + + copy_tree_files(repo_root / "configs" / "prune_llm", output_dir / "configs" / "prune_llm", "configs", entries) + copy_tree_files(paper_dir / "paper" / "configs", output_dir / "configs" / "paper_side", "configs", entries) + + copy_tree_files(paper_dir / "figures", output_dir / "paper_artifacts" / "figures", "paper_figures", entries) + copy_tree_files(paper_dir / "paper_artifacts" / "tables", output_dir / "paper_artifacts" / "tables", "paper_tables", entries) + copy_one(paper_dir / "paper_artifacts" / "numbers.tex", output_dir / "paper_artifacts" / "numbers.tex", "paper_tables", entries) + for summary_name in ("olmo_trajectory.json", "olmo_pruning_summary.json"): + copy_one( + paper_dir / "paper_artifacts" / summary_name, + output_dir / "paper_artifacts" / "experiments" / summary_name, + "derived_experiment_summaries", + entries, + ) + copy_tree_files( + paper_dir / "paper_artifacts" / "experiments", + output_dir / "paper_artifacts" / "experiments", + "derived_experiment_summaries", + entries, + ) + + copy_tree_files(paper_dir / "paper" / "scripts", output_dir / "paper_scripts", "paper_scripts", entries) + + expanded_manifest = load_json(paper_dir / "paper_artifacts" / "repro_manifest_expanded.json") + collector = expanded_manifest.get("collector_manifest", {}) + if isinstance(collector, dict): + results_files = collector.get("results_files", {}) + if isinstance(results_files, dict): + copy_named_json_sources( + results_files, + output_dir / "raw_results" / "main_runs", + "raw_results", + entries, + source_entries, + ) + for source_group in ("extra_external_runs", "repo_side_aggregated_inputs"): + mapping = expanded_manifest.get(source_group, {}) + if isinstance(mapping, dict): + copy_named_json_sources( + mapping, + output_dir / "raw_results" / source_group, + "raw_results", + entries, + source_entries, + ) + + entries = sorted(relativize_entries(entries, output_dir), key=lambda item: item["path"]) + manifest_path = output_dir / "MANIFEST.json" + sha_path = output_dir / "MANIFEST.sha256" + metadata_path = output_dir / "metadata" / "release_metadata.json" + result_sources_path = output_dir / "metadata" / "result_sources.json" + + metadata = { + "project": "supernodes_scar", + "created_utc": dt.datetime.now(dt.timezone.utc).isoformat(), + "repo_commit": git_commit(repo_root), + "repo_dirty": git_is_dirty(repo_root), + "artifact_count": len(entries), + "artifact_bytes": sum(int(item["bytes"]) for item in entries), + "notes": [ + "Manifest paths are relative to the dataset repository root.", + "Internal source paths are intentionally not included.", + "No model weights, raw datasets, logs, or checkpoints are included.", + ], + } + + # Include manifest files themselves in the checksum set. + write_json(metadata_path, metadata) + write_json(result_sources_path, {"results": sorted(source_entries, key=lambda item: item["artifact_path"])}) + entries.append( + { + "path": metadata_path.relative_to(output_dir).as_posix(), + "group": "metadata", + "bytes": metadata_path.stat().st_size, + "sha256": sha256_file(metadata_path), + } + ) + entries.append( + { + "path": result_sources_path.relative_to(output_dir).as_posix(), + "group": "metadata", + "bytes": result_sources_path.stat().st_size, + "sha256": sha256_file(result_sources_path), + } + ) + entries = sorted(entries, key=lambda item: item["path"]) + write_json(manifest_path, entries) + + with sha_path.open("w", encoding="utf-8") as handle: + for item in entries: + handle.write(f"{item['sha256']} {item['path']}\n") + + print(f"Wrote artifact bundle: {output_dir}") + print(f"Files: {len(entries)}") + print(f"Bytes: {sum(int(item['bytes']) for item in entries):,}") + print(f"Manifest: {manifest_path}") + print(f"Checksums: {sha_path}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/projects/supernodes_scar/scripts/verify_hf_artifacts.py b/projects/supernodes_scar/scripts/verify_hf_artifacts.py new file mode 100644 index 00000000..cacff5c3 --- /dev/null +++ b/projects/supernodes_scar/scripts/verify_hf_artifacts.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +"""Verify a staged supernodes artifact bundle before upload.""" + +from __future__ import annotations + +import argparse +import gzip +import hashlib +from pathlib import Path + +FORBIDDEN_SUFFIXES = ( + ".aux", + ".blg", + ".ckpt", + ".fdb_latexmk", + ".fls", + ".log", + ".out", + ".pdf", + ".pt", + ".pth", + ".pyc", + ".safetensors", + ".synctex.gz", +) + +PRIVATE_PATTERNS = ( + ("/" + "n/").encode(), + ("Users" + "/").encode(), + ("/" + "home").encode(), + b"HF_TOKEN", + b"WANDB_API_KEY", +) + +TEXT_SUFFIXES = ( + ".json", + ".md", + ".py", + ".tex", + ".toml", + ".txt", + ".yaml", + ".yml", +) + + +def sha256_file(path: Path) -> str: + digest = hashlib.sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + +def verify_manifest(root: Path) -> list[str]: + errors: list[str] = [] + manifest = root / "MANIFEST.sha256" + if not manifest.exists(): + return ["Missing MANIFEST.sha256"] + for line_no, raw in enumerate(manifest.read_text(encoding="utf-8").splitlines(), start=1): + if not raw.strip(): + continue + try: + expected, rel = raw.split(None, 1) + except ValueError: + errors.append(f"Bad checksum line {line_no}: {raw}") + continue + path = root / rel.strip() + if not path.exists(): + errors.append(f"Missing file from manifest: {rel}") + continue + got = sha256_file(path) + if got != expected: + errors.append(f"Checksum mismatch: {rel}") + return errors + + +def verify_forbidden_files(root: Path) -> list[str]: + errors: list[str] = [] + for path in root.rglob("*"): + if not path.is_file(): + continue + rel = path.relative_to(root).as_posix() + if "__pycache__" in path.parts or any(path.name.endswith(suffix) for suffix in FORBIDDEN_SUFFIXES): + errors.append(f"Forbidden file: {rel}") + return errors + + +def scan_bytes(path: Path, lines) -> str | None: + for line_no, line in lines: + if any(pattern in line for pattern in PRIVATE_PATTERNS): + return f"{path}: private pattern on line {line_no}" + return None + + +def verify_private_paths(root: Path) -> list[str]: + errors: list[str] = [] + for path in root.rglob("*"): + if not path.is_file(): + continue + rel = path.relative_to(root) + if path.name.endswith(".gz"): + with gzip.open(path, "rb") as handle: + hit = scan_bytes(rel, enumerate(handle, start=1)) + elif not path.name.endswith(TEXT_SUFFIXES): + hit = None + else: + try: + with path.open("rb") as handle: + hit = scan_bytes(rel, enumerate(handle, start=1)) + except UnicodeDecodeError: + hit = None + if hit: + errors.append(hit) + return errors + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("bundle_dir", type=Path) + args = parser.parse_args() + + root = args.bundle_dir.resolve() + errors: list[str] = [] + errors.extend(verify_manifest(root)) + errors.extend(verify_forbidden_files(root)) + errors.extend(verify_private_paths(root)) + + if errors: + for err in errors: + print(f"ERROR: {err}") + return 1 + + print(f"Artifact bundle verified: {root}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/pyproject.toml b/pyproject.toml index fbe9b58d..c48e57f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,15 +3,16 @@ requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" [project] -name = "alignment" -version = "0.1.0" -description = "A package for performing alignment analysis on different models." +name = "nodelens" +version = "0.2.0" +description = "Node and channel metrics for neural network interpretability, importance, and intervention studies." authors = [ { name = "Research and Engineering at Kempner Institute", email = "kempner-research-engineering@g.harvard.edu" }, ] readme = "README.md" requires-python = ">=3.8" -license = { file = "LICENSE" } +license = "MIT" +license-files = ["LICENSE"] dependencies = [ "numpy", "torch", @@ -26,7 +27,10 @@ dependencies = [ [tool.setuptools] package-dir = { "" = "src" } -packages = ["alignment"] + +[tool.setuptools.packages.find] +where = ["src"] +include = ["nodelens*"] [project.optional-dependencies] @@ -54,7 +58,7 @@ train = [ "msgspec>=0.14.0", ] all = [ - "alignment[dev,train]", + "nodelens[dev,train]", ] docs = [ @@ -72,12 +76,10 @@ docs = [ [project.urls] -Homepage = "https://github.com/KempnerInstitute/alignment" -Repository = "https://github.com/KempnerInstitute/alignment" - -[tool.setuptools.dynamic] -version = { attr = "alignment.__version__" } - +Homepage = "https://github.com/KempnerInstitute/nodelens" +Repository = "https://github.com/KempnerInstitute/nodelens" +Documentation = "https://github.com/KempnerInstitute/nodelens#readme" +Artifacts = "https://huggingface.co/datasets/hsafaai/supernodes-scar-artifacts" [tool.black] line-length = 150 @@ -94,7 +96,7 @@ extend-ignore = ["E402", "E721", "E722"] [tool.ruff.lint.per-file-ignores] "__init__.py" = ["F401"] # Allow unused imports in __init__ files (used for exports) -"src/alignment/external/**" = ["E721", "E722"] # Allow in external code +"src/nodelens/external/**" = ["E721", "E722"] # Allow in external code [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/scripts/README.md b/scripts/README.md index dbfd0147..89bf36c9 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -14,7 +14,7 @@ python scripts/run_experiment.py --config configs/examples/mnist_basic.yaml python scripts/run_experiment.py --config configs/prune_llm/llama3_8b_full.yaml # Cluster-based analysis -python scripts/run_experiment.py --config configs/cluster_analysis/resnet18_cifar10_full.yaml +python scripts/run_experiment.py --config configs/vision_prune/resnet18_cifar10_full.yaml ``` Options: diff --git a/scripts/extend_run.py b/scripts/extend_run.py index 4b3e7041..60218432 100644 --- a/scripts/extend_run.py +++ b/scripts/extend_run.py @@ -97,7 +97,7 @@ def _build_cluster_experiment(repo_root: Path, cfg: Dict[str, Any]): sys.path.insert(0, str(repo_root)) sys.path.insert(0, str(repo_root / "src")) - from alignment.experiments.base import ExperimentConfig + from nodelens.experiments.base import ExperimentConfig allowed = {f.name for f in fields(ExperimentConfig)} config = ExperimentConfig(**{k: v for k, v in cfg.items() if k in allowed}) diff --git a/scripts/run_analysis.py b/scripts/run_analysis.py index 15e63724..8792893f 100644 --- a/scripts/run_analysis.py +++ b/scripts/run_analysis.py @@ -25,7 +25,7 @@ python scripts/run_analysis.py --results-dir ./results --quick For more control, use the AnalysisRunner class directly: - from alignment.analysis import AnalysisRunner, AnalysisConfig + from nodelens.analysis import AnalysisRunner, AnalysisConfig config = AnalysisConfig( results_dir="./results", @@ -42,11 +42,11 @@ from pathlib import Path try: - from alignment.analysis import AnalysisConfig, AnalysisRunner + from nodelens.analysis import AnalysisConfig, AnalysisRunner except ImportError: # Add src to path for development (repo-local runs without installing the package) sys.path.insert(0, str(Path(__file__).parent.parent / "src")) - from alignment.analysis import AnalysisConfig, AnalysisRunner + from nodelens.analysis import AnalysisConfig, AnalysisRunner logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) diff --git a/scripts/run_experiment.py b/scripts/run_experiment.py index c8acd24b..0073bfb1 100644 --- a/scripts/run_experiment.py +++ b/scripts/run_experiment.py @@ -34,9 +34,9 @@ import torch try: - from alignment.experiments.cluster_experiments import ClusterAnalysisExperiment - from alignment.experiments.general_alignment import GeneralAlignmentExperiment - from alignment.experiments.llm_experiments import LLMAlignmentExperiment + from nodelens.experiments.cluster_experiments import ClusterAnalysisExperiment + from nodelens.experiments.general_alignment import GeneralAlignmentExperiment + from nodelens.experiments.llm_experiments import LLMAlignmentExperiment except ImportError: # Repo-local runs (without installing the package): add project root + src/ to sys.path. current_dir = os.path.dirname(os.path.abspath(__file__)) @@ -44,9 +44,9 @@ sys.path.insert(0, repo_root) sys.path.insert(0, os.path.join(repo_root, "src")) - from alignment.experiments.cluster_experiments import ClusterAnalysisExperiment # backward compat - from alignment.experiments.general_alignment import GeneralAlignmentExperiment - from alignment.experiments.llm_experiments import LLMAlignmentExperiment + from nodelens.experiments.cluster_experiments import ClusterAnalysisExperiment # backward compat + from nodelens.experiments.general_alignment import GeneralAlignmentExperiment + from nodelens.experiments.llm_experiments import LLMAlignmentExperiment # Configure tqdm to avoid ANSI escape codes in log files (common under SLURM). try: @@ -275,8 +275,8 @@ def _get_nested(obj, key, default): # --------------------------------------------------------------- # Create model using torchvision + registry-based stem adaptation # --------------------------------------------------------------- - from alignment.dataops.datasets.unified_dataset import DATASET_CONFIGS - from alignment.models.hub import adapt_model_for_dataset + from nodelens.dataops.datasets.unified_dataset import DATASET_CONFIGS + from nodelens.models.hub import adapt_model_for_dataset requested_model_name = str(cluster_config.model_name).lower() dataset_name = str(cluster_config.dataset_name).lower() @@ -358,7 +358,7 @@ def _get_nested(obj, key, default): model.classifier = torch.nn.Linear(model.classifier.in_features, num_classes) # Adapt model stem for dataset resolution (CIFAR, Tiny-ImageNet, etc.) - # This is now handled by a shared utility in src/alignment/models/hub.py + # This is now handled by a shared utility in src/nodelens/models/hub.py adapt_model_for_dataset(model, resolved_model_name, dataset_name, pretrained=pretrained) # Optional: explicit checkpoint @@ -392,7 +392,7 @@ def _get_nested(obj, key, default): if dataset_name not in DATASET_CONFIGS: raise ValueError(f"Unknown dataset: {dataset_name}. Available: {list(DATASET_CONFIGS.keys())}") - from alignment.dataops.datasets.unified_dataset import UnifiedDataset + from nodelens.dataops.datasets.unified_dataset import UnifiedDataset train_dataset = UnifiedDataset( dataset_type=dataset_name, @@ -640,7 +640,7 @@ def run_post_analysis(config, results_file: Path, output_dir: Path): logger.info("Running post-experiment analysis...") try: - from alignment.analysis import AnalysisConfig, AnalysisRunner + from nodelens.analysis import AnalysisConfig, AnalysisRunner # Build analysis config from post_analysis block analysis_config = AnalysisConfig( @@ -683,7 +683,7 @@ def _regenerate_llm_visualizations(experiment, results: dict, output_dir: Path): matplotlib.use("Agg") # Non-interactive backend import matplotlib.pyplot as plt - from alignment.analysis.visualization import UnifiedVisualizer + from nodelens.analysis.visualization import UnifiedVisualizer # Determine plots directory if (output_dir / "figures").exists(): @@ -851,7 +851,7 @@ def _create_job_directory(config, args, timestamp: str) -> Path: Returns: Path to the created job directory. """ - from alignment.infrastructure.storage import create_job_directory, get_slurm_job_id + from nodelens.infrastructure.storage import create_job_directory, get_slurm_job_id experiment_name = getattr(config, "name", "experiment") @@ -928,7 +928,7 @@ def main(): # Load config (support key=value overrides passed after args) # Example: # python scripts/run_experiment.py --config ... name="llama3_8b_paper_main" supernode.protect_core=false - from alignment.configs.config_loader import load_config_with_overrides as proper_load_config + from nodelens.configs.config_loader import load_config_with_overrides as proper_load_config cli_overrides = [x for x in (unknown or []) if isinstance(x, str) and "=" in x] config = proper_load_config(args.config, overrides=overrides or None, cli_args=cli_overrides or None) diff --git a/src/alignment/__init__.py b/src/nodelens/__init__.py similarity index 88% rename from src/alignment/__init__.py rename to src/nodelens/__init__.py index 359bb2ae..9ebd6b76 100644 --- a/src/alignment/__init__.py +++ b/src/nodelens/__init__.py @@ -1,8 +1,8 @@ """ -Neural Network Alignment Framework +NodeLens package. -A comprehensive framework for analyzing neural network representations -through information-theoretic metrics and alignment measures. +NodeLens provides metrics, analyses, and interventions for studying neural +network channels, neurons, and other node-like components. """ # Core functionality diff --git a/src/alignment/analysis/README.md b/src/nodelens/analysis/README.md similarity index 88% rename from src/alignment/analysis/README.md rename to src/nodelens/analysis/README.md index 47f48083..ddd30eb3 100644 --- a/src/alignment/analysis/README.md +++ b/src/nodelens/analysis/README.md @@ -29,7 +29,7 @@ Result analysis, visualization, clustering, and reporting. ### Cluster Analysis ```python -from alignment.analysis.clustering import MetricSpaceClustering, CrossLayerHaloAnalysis +from nodelens.analysis.clustering import MetricSpaceClustering, CrossLayerHaloAnalysis # Cluster channels clusterer = MetricSpaceClustering(n_clusters=4) @@ -43,7 +43,7 @@ halo_idx, influence = halo_analyzer.find_halo(weights, cluster_indices) ### Cascade Testing ```python -from alignment.analysis import CascadeAnalysis +from nodelens.analysis import CascadeAnalysis cascade = CascadeAnalysis(model, test_loader, device="cuda") cascade.baseline() @@ -53,7 +53,7 @@ results = cascade.by_cluster(layer_name, labels, type_mapping, n_rm=5) ### Visualization ```python -from alignment.analysis.visualization import plot_metric_scatter, plot_cluster_evolution +from nodelens.analysis.visualization import plot_metric_scatter, plot_cluster_evolution plot_metric_scatter(rq, red, syn, labels, type_map, "layer1", "scatter.png") plot_cluster_evolution(layer_results, "evolution.png") @@ -62,7 +62,7 @@ plot_cluster_evolution(layer_results, "evolution.png") ### General Analysis ```python -from alignment.analysis import AnalysisRunner, AnalysisConfig +from nodelens.analysis import AnalysisRunner, AnalysisConfig config = AnalysisConfig( results_dir="./results", diff --git a/src/alignment/analysis/__init__.py b/src/nodelens/analysis/__init__.py similarity index 100% rename from src/alignment/analysis/__init__.py rename to src/nodelens/analysis/__init__.py diff --git a/src/alignment/analysis/aggregation/__init__.py b/src/nodelens/analysis/aggregation/__init__.py similarity index 100% rename from src/alignment/analysis/aggregation/__init__.py rename to src/nodelens/analysis/aggregation/__init__.py diff --git a/src/alignment/analysis/aggregation/layers.py b/src/nodelens/analysis/aggregation/layers.py similarity index 100% rename from src/alignment/analysis/aggregation/layers.py rename to src/nodelens/analysis/aggregation/layers.py diff --git a/src/alignment/analysis/aggregation/metrics.py b/src/nodelens/analysis/aggregation/metrics.py similarity index 100% rename from src/alignment/analysis/aggregation/metrics.py rename to src/nodelens/analysis/aggregation/metrics.py diff --git a/src/alignment/analysis/aggregation/results.py b/src/nodelens/analysis/aggregation/results.py similarity index 100% rename from src/alignment/analysis/aggregation/results.py rename to src/nodelens/analysis/aggregation/results.py diff --git a/src/alignment/analysis/analysis_runner.py b/src/nodelens/analysis/analysis_runner.py similarity index 99% rename from src/alignment/analysis/analysis_runner.py rename to src/nodelens/analysis/analysis_runner.py index dbaed632..0b524189 100644 --- a/src/alignment/analysis/analysis_runner.py +++ b/src/nodelens/analysis/analysis_runner.py @@ -6,13 +6,13 @@ scripts, and other modules into a unified interface. Usage: - from alignment.analysis import AnalysisRunner + from nodelens.analysis import AnalysisRunner runner = AnalysisRunner(config) runner.run() Or via CLI: - python -m alignment.analysis.analysis_runner --config analysis_config.yaml + python -m nodelens.analysis.analysis_runner --config analysis_config.yaml """ import json diff --git a/src/alignment/analysis/cascade_analysis.py b/src/nodelens/analysis/cascade_analysis.py similarity index 100% rename from src/alignment/analysis/cascade_analysis.py rename to src/nodelens/analysis/cascade_analysis.py diff --git a/src/alignment/analysis/clustering/__init__.py b/src/nodelens/analysis/clustering/__init__.py similarity index 100% rename from src/alignment/analysis/clustering/__init__.py rename to src/nodelens/analysis/clustering/__init__.py diff --git a/src/alignment/analysis/clustering/cross_layer_halo.py b/src/nodelens/analysis/clustering/cross_layer_halo.py similarity index 100% rename from src/alignment/analysis/clustering/cross_layer_halo.py rename to src/nodelens/analysis/clustering/cross_layer_halo.py diff --git a/src/alignment/analysis/clustering/metric_clustering.py b/src/nodelens/analysis/clustering/metric_clustering.py similarity index 82% rename from src/alignment/analysis/clustering/metric_clustering.py rename to src/nodelens/analysis/clustering/metric_clustering.py index 427477f2..b88bc8b5 100644 --- a/src/alignment/analysis/clustering/metric_clustering.py +++ b/src/nodelens/analysis/clustering/metric_clustering.py @@ -5,20 +5,28 @@ import numpy as np -# Archetype names (formerly: critical, redundant, synergistic, background). -# New names avoid overloading PID terminology. -# Geometric meaning is unchanged: high I_X low R_X, high R_X, moderate I_X low R_X, low both. -TYPE_ESSENTIAL = "essential" # formerly "critical" -TYPE_SUBSTITUTABLE = "substitutable" # formerly "redundant" -TYPE_SPECIALIZED = "specialized" # formerly "synergistic" -TYPE_DORMANT = "dormant" # formerly "background" - -# Backward compatibility: map old names to new -_OLD_TO_NEW = { - "critical": TYPE_ESSENTIAL, - "redundant": TYPE_SUBSTITUTABLE, - "synergistic": TYPE_SPECIALIZED, - "background": TYPE_DORMANT, +# Public archetype names used across configs, pruning logic, plots, and tests. +# Geometric meaning: high I_X low R_X, high R_X, high complementarity, low all. +TYPE_CRITICAL = "critical" +TYPE_REDUNDANT = "redundant" +TYPE_SYNERGISTIC = "synergistic" +TYPE_BACKGROUND = "background" + +# Backward compatibility for an unpublished naming pass. +TYPE_ESSENTIAL = TYPE_CRITICAL +TYPE_SUBSTITUTABLE = TYPE_REDUNDANT +TYPE_SPECIALIZED = TYPE_SYNERGISTIC +TYPE_DORMANT = TYPE_BACKGROUND + +_TYPE_ALIASES = { + "essential": TYPE_CRITICAL, + "substitutable": TYPE_REDUNDANT, + "specialized": TYPE_SYNERGISTIC, + "dormant": TYPE_BACKGROUND, + TYPE_CRITICAL: TYPE_CRITICAL, + TYPE_REDUNDANT: TYPE_REDUNDANT, + TYPE_SYNERGISTIC: TYPE_SYNERGISTIC, + TYPE_BACKGROUND: TYPE_BACKGROUND, } try: @@ -200,6 +208,9 @@ def fit( lab = km.fit_predict(X_cluster) cen = km.cluster_centers_ sil = silhouette_score(X_cluster, lab) if n > effective_k else 0.0 + elif n >= effective_k and effective_k >= 2: + lab, cen = self._kmeans_numpy(X_cluster, effective_k) + sil = self._silhouette_numpy(X_cluster, lab) else: lab = np.zeros(n, dtype=int) cen = np.zeros((1, X_cluster.shape[1])) @@ -255,6 +266,73 @@ def _norm01(x: np.ndarray) -> np.ndarray: lo, hi = x.min(), x.max() return (x - lo) / (hi - lo) if hi > lo else np.zeros_like(x) + def _kmeans_numpy(self, x: np.ndarray, k: int, max_iter: int = 100) -> Tuple[np.ndarray, np.ndarray]: + """Small deterministic k-means fallback used when scikit-learn is unavailable.""" + x = np.asarray(x, dtype=np.float64) + n = x.shape[0] + if n == 0 or k <= 1: + return np.zeros(n, dtype=int), np.zeros((1, x.shape[1])) + + rng = np.random.default_rng(self.seed) + centers = [x[int(rng.integers(n))].copy()] + for _ in range(1, k): + dist_sq = np.min(((x[:, None, :] - np.asarray(centers)[None, :, :]) ** 2).sum(axis=2), axis=1) + centers.append(x[int(np.argmax(dist_sq))].copy()) + centers_arr = np.asarray(centers, dtype=np.float64) + + labels = np.zeros(n, dtype=int) + for _ in range(max_iter): + dist_sq = ((x[:, None, :] - centers_arr[None, :, :]) ** 2).sum(axis=2) + new_labels = np.argmin(dist_sq, axis=1) + + new_centers = centers_arr.copy() + for cluster_id in range(k): + mask = new_labels == cluster_id + if mask.any(): + new_centers[cluster_id] = x[mask].mean(axis=0) + else: + # Re-seed empty clusters at the point farthest from its assigned center. + nearest_dist = dist_sq[np.arange(n), new_labels] + new_centers[cluster_id] = x[int(np.argmax(nearest_dist))] + + if np.array_equal(labels, new_labels) and np.allclose(centers_arr, new_centers): + centers_arr = new_centers + break + labels = new_labels + centers_arr = new_centers + + return labels, centers_arr + + @staticmethod + def _silhouette_numpy(x: np.ndarray, labels: np.ndarray) -> float: + """Compute mean silhouette without sklearn; returns 0.0 for degenerate labels.""" + x = np.asarray(x, dtype=np.float64) + labels = np.asarray(labels) + unique = np.unique(labels) + n = x.shape[0] + if n <= 1 or len(unique) < 2 or len(unique) >= n: + return 0.0 + + distances = np.linalg.norm(x[:, None, :] - x[None, :, :], axis=2) + values = [] + for i in range(n): + same = labels == labels[i] + same[i] = False + a_i = float(distances[i, same].mean()) if same.any() else 0.0 + + b_i = np.inf + for label in unique: + if label == labels[i]: + continue + other = labels == label + if other.any(): + b_i = min(b_i, float(distances[i, other].mean())) + + denom = max(a_i, b_i) + values.append(0.0 if not np.isfinite(denom) or denom == 0.0 else (b_i - a_i) / denom) + + return float(np.mean(values)) + def _types_by_importance( self, labels: np.ndarray, @@ -264,10 +342,10 @@ def _types_by_importance( """Assign type names by ranking clusters by mean importance score. Higher mean score -> higher-priority type: - rank 3 (highest) = "essential" - rank 2 = "specialized" - rank 1 = "substitutable" - rank 0 (lowest) = "dormant" + rank 3 (highest) = "critical" + rank 2 = "synergistic" + rank 1 = "redundant" + rank 0 (lowest) = "background" """ type_names_ranked = [TYPE_DORMANT, TYPE_SUBSTITUTABLE, TYPE_SPECIALIZED, TYPE_ESSENTIAL] scores = np.asarray(scores).flatten() @@ -434,7 +512,7 @@ def _solve_global_assignment(self, scores: np.ndarray) -> Dict[int, str]: Args: scores: [n_clusters, 4] score matrix for - [essential, substitutable, specialized, dormant]. + [critical, redundant, synergistic, background]. """ import itertools @@ -476,13 +554,13 @@ def _scores_global_penalized( w_syn = 1.0 if use_syn else 0.0 scores = np.zeros((len(c), 4), dtype=np.float64) - # essential: high I_X, low R_X + # critical: high I_X, low R_X scores[:, 0] = (w_rq * c[:, 0]) - (w_red * c[:, 1]) - # substitutable: high R_X (with mild penalty for high I_X) + # redundant: high R_X (with mild penalty for high I_X) scores[:, 1] = (w_red * c[:, 1]) - (0.25 * w_rq * c[:, 0]) - # specialized: high complementarity (with mild penalty for high R_X) + # synergistic: high complementarity (with mild penalty for high R_X) scores[:, 2] = (w_syn * c[:, 2]) - (0.25 * w_red * c[:, 1]) - # dormant: close to origin + # background: close to origin scores[:, 3] = -((w_rq * np.abs(c[:, 0])) + (w_red * np.abs(c[:, 1])) + (w_syn * np.abs(c[:, 2]))) return scores @@ -500,13 +578,13 @@ def _scores_global_simple( w_syn = 1.0 if use_syn else 0.0 scores = np.zeros((len(c), 4), dtype=np.float64) - # essential: high I_X, low R_X + # critical: high I_X, low R_X scores[:, 0] = (w_rq * c[:, 0]) - (w_red * c[:, 1]) - # substitutable: maximize shared information + # redundant: maximize shared information scores[:, 1] = w_red * c[:, 1] - # specialized: maximize complementarity + # synergistic: maximize complementarity scores[:, 2] = w_syn * c[:, 2] - # dormant: low magnitude in active metric dimensions + # background: low magnitude in active metric dimensions scores[:, 3] = -((w_rq * np.abs(c[:, 0])) + (w_red * np.abs(c[:, 1])) + (w_syn * np.abs(c[:, 2]))) return scores diff --git a/src/alignment/analysis/dynamic_scoring.py b/src/nodelens/analysis/dynamic_scoring.py similarity index 99% rename from src/alignment/analysis/dynamic_scoring.py rename to src/nodelens/analysis/dynamic_scoring.py index 22c9ab19..76a5a7be 100644 --- a/src/alignment/analysis/dynamic_scoring.py +++ b/src/nodelens/analysis/dynamic_scoring.py @@ -56,7 +56,7 @@ def __init__(self, weight_final: float = 0.4, weight_trend: float = 0.2, weight_ def aggregate( self, score_history: Dict[str, Dict[str, List[float]]], loss_history: List[float], layer_name: str, metric_name: str = "rq" ) -> torch.Tensor: - """ + r""" Aggregate scores for a layer using training dynamics. Args: diff --git a/src/alignment/analysis/mechanism_validation.py b/src/nodelens/analysis/mechanism_validation.py similarity index 100% rename from src/alignment/analysis/mechanism_validation.py rename to src/nodelens/analysis/mechanism_validation.py diff --git a/src/alignment/analysis/read_halo_llm.py b/src/nodelens/analysis/read_halo_llm.py similarity index 100% rename from src/alignment/analysis/read_halo_llm.py rename to src/nodelens/analysis/read_halo_llm.py diff --git a/src/alignment/analysis/reporting/__init__.py b/src/nodelens/analysis/reporting/__init__.py similarity index 100% rename from src/alignment/analysis/reporting/__init__.py rename to src/nodelens/analysis/reporting/__init__.py diff --git a/src/alignment/analysis/reporting/html.py b/src/nodelens/analysis/reporting/html.py similarity index 100% rename from src/alignment/analysis/reporting/html.py rename to src/nodelens/analysis/reporting/html.py diff --git a/src/alignment/analysis/reporting/json_reporter.py b/src/nodelens/analysis/reporting/json_reporter.py similarity index 100% rename from src/alignment/analysis/reporting/json_reporter.py rename to src/nodelens/analysis/reporting/json_reporter.py diff --git a/src/alignment/analysis/reporting/markdown.py b/src/nodelens/analysis/reporting/markdown.py similarity index 100% rename from src/alignment/analysis/reporting/markdown.py rename to src/nodelens/analysis/reporting/markdown.py diff --git a/src/alignment/analysis/semantic_hooks.py b/src/nodelens/analysis/semantic_hooks.py similarity index 100% rename from src/alignment/analysis/semantic_hooks.py rename to src/nodelens/analysis/semantic_hooks.py diff --git a/src/alignment/analysis/unified_reporter.py b/src/nodelens/analysis/unified_reporter.py similarity index 100% rename from src/alignment/analysis/unified_reporter.py rename to src/nodelens/analysis/unified_reporter.py diff --git a/src/alignment/analysis/visualization/__init__.py b/src/nodelens/analysis/visualization/__init__.py similarity index 94% rename from src/alignment/analysis/visualization/__init__.py rename to src/nodelens/analysis/visualization/__init__.py index 60bc82ef..e8f3be33 100644 --- a/src/alignment/analysis/visualization/__init__.py +++ b/src/nodelens/analysis/visualization/__init__.py @@ -11,7 +11,7 @@ For most use cases, use UnifiedVisualizer: - from alignment.analysis.visualization import UnifiedVisualizer + from nodelens.analysis.visualization import UnifiedVisualizer viz = UnifiedVisualizer() viz.plot_layer_scores(scores, "Rayleigh Quotient") @@ -19,7 +19,7 @@ For metric histograms and distributions: - from alignment.analysis.visualization import ( + from nodelens.analysis.visualization import ( plot_metric_histogram, plot_metric_violin, plot_multi_metric_histogram, @@ -29,13 +29,13 @@ For pruning comparisons (unified for both vision and LLM): - from alignment.analysis.visualization import plot_unified_pruning_comparison + from nodelens.analysis.visualization import plot_unified_pruning_comparison plot_unified_pruning_comparison(results, baseline_value=0.92, metric='accuracy') For halo redundancy analysis: - from alignment.analysis.visualization import ( + from nodelens.analysis.visualization import ( plot_halo_redundancy_by_depth, plot_halo_redundancy_comprehensive ) diff --git a/src/alignment/analysis/visualization/alignment_plots.py b/src/nodelens/analysis/visualization/alignment_plots.py similarity index 100% rename from src/alignment/analysis/visualization/alignment_plots.py rename to src/nodelens/analysis/visualization/alignment_plots.py diff --git a/src/alignment/analysis/visualization/cluster_plots.py b/src/nodelens/analysis/visualization/cluster_plots.py similarity index 100% rename from src/alignment/analysis/visualization/cluster_plots.py rename to src/nodelens/analysis/visualization/cluster_plots.py diff --git a/src/alignment/analysis/visualization/halo_plots.py b/src/nodelens/analysis/visualization/halo_plots.py similarity index 100% rename from src/alignment/analysis/visualization/halo_plots.py rename to src/nodelens/analysis/visualization/halo_plots.py diff --git a/src/alignment/analysis/visualization/llm_mechanism_plots.py b/src/nodelens/analysis/visualization/llm_mechanism_plots.py similarity index 100% rename from src/alignment/analysis/visualization/llm_mechanism_plots.py rename to src/nodelens/analysis/visualization/llm_mechanism_plots.py diff --git a/src/alignment/analysis/visualization/metric_plots.py b/src/nodelens/analysis/visualization/metric_plots.py similarity index 100% rename from src/alignment/analysis/visualization/metric_plots.py rename to src/nodelens/analysis/visualization/metric_plots.py diff --git a/src/alignment/analysis/visualization/pruning_plots.py b/src/nodelens/analysis/visualization/pruning_plots.py similarity index 100% rename from src/alignment/analysis/visualization/pruning_plots.py rename to src/nodelens/analysis/visualization/pruning_plots.py diff --git a/src/alignment/analysis/visualization/unified_visualizer.py b/src/nodelens/analysis/visualization/unified_visualizer.py similarity index 100% rename from src/alignment/analysis/visualization/unified_visualizer.py rename to src/nodelens/analysis/visualization/unified_visualizer.py diff --git a/src/alignment/configs/README.md b/src/nodelens/configs/README.md similarity index 81% rename from src/alignment/configs/README.md rename to src/nodelens/configs/README.md index a7152019..51f3e750 100644 --- a/src/alignment/configs/README.md +++ b/src/nodelens/configs/README.md @@ -10,7 +10,7 @@ Configuration loading and management. ## Usage ```python -from alignment.configs.config_loader import load_config +from nodelens.configs.config_loader import load_config config = load_config("configs/examples/mnist_basic.yaml") ``` diff --git a/src/alignment/configs/__init__.py b/src/nodelens/configs/__init__.py similarity index 90% rename from src/alignment/configs/__init__.py rename to src/nodelens/configs/__init__.py index 5c1dc631..9ffc7a62 100644 --- a/src/alignment/configs/__init__.py +++ b/src/nodelens/configs/__init__.py @@ -8,15 +8,15 @@ Usage: # Legacy (still works) - from alignment.configs import load_config + from nodelens.configs import load_config config = load_config("path/to/config.yaml") # New unified config (recommended) - from alignment.configs import load_unified_config, UnifiedConfig + from nodelens.configs import load_unified_config, UnifiedConfig config = load_unified_config("path/to/config.yaml") # Programmatic config - from alignment.configs import UnifiedConfig, ExperimentConfig + from nodelens.configs import UnifiedConfig, ExperimentConfig config = UnifiedConfig( experiment=ExperimentConfig(name="my_exp", type="cluster_analysis"), ... diff --git a/src/alignment/configs/config_loader.py b/src/nodelens/configs/config_loader.py similarity index 99% rename from src/alignment/configs/config_loader.py rename to src/nodelens/configs/config_loader.py index c7b00f6e..8e8048f2 100644 --- a/src/alignment/configs/config_loader.py +++ b/src/nodelens/configs/config_loader.py @@ -964,8 +964,8 @@ def _map_nested_to_flat_config(nested_config: Dict[str, Any]) -> Dict[str, Any]: if "num_networks" in nested_config: flat_config["num_networks"] = nested_config["num_networks"] - # Map alignment/metrics settings - # Priority: metrics.enabled > alignment.methods > alignment_methods > default + # Map alignment/metrics settings. + # Priority: metrics.enabled > alignment.methods > alignment_methods > default. flat_config["alignment_methods"] = nested_config.get("alignment_methods", ["rayleigh_quotient"]) flat_config["alignment_data_num_samples"] = nested_config.get("alignment_data_num_samples", 1) @@ -1511,7 +1511,7 @@ def _map_nested_to_flat_config(nested_config: Dict[str, Any]) -> Dict[str, Any]: try: import dataclasses as _dc - from alignment.experiments.base import ExperimentConfig as _EC + from nodelens.experiments.base import ExperimentConfig as _EC valid_fields = {f.name for f in _dc.fields(_EC)} for key, value in nested_config.items(): diff --git a/src/alignment/configs/config_validator.py b/src/nodelens/configs/config_validator.py similarity index 96% rename from src/alignment/configs/config_validator.py rename to src/nodelens/configs/config_validator.py index d896ac38..1199d9e3 100644 --- a/src/alignment/configs/config_validator.py +++ b/src/nodelens/configs/config_validator.py @@ -27,7 +27,7 @@ def validate_config(config_dict: Dict[str, Any]) -> List[str]: # Model validation (dynamic from registry if available) if "model_name" in config_dict: try: - from alignment.core.registry import MODEL_REGISTRY + from nodelens.core.registry import MODEL_REGISTRY valid_models = MODEL_REGISTRY.list() except Exception: @@ -38,7 +38,7 @@ def validate_config(config_dict: Dict[str, Any]) -> List[str]: # Dataset validation (dynamic) if "dataset_name" in config_dict: try: - from alignment.core.registry import DATASET_REGISTRY + from nodelens.core.registry import DATASET_REGISTRY valid_datasets = DATASET_REGISTRY.list() except Exception: @@ -75,7 +75,7 @@ def validate_config(config_dict: Dict[str, Any]) -> List[str]: errors.append("'metrics' must be a list") else: try: - from alignment.core.registry import METRIC_REGISTRY + from nodelens.core.registry import METRIC_REGISTRY valid_metrics = METRIC_REGISTRY.list() except Exception: diff --git a/src/alignment/configs/unified_config.py b/src/nodelens/configs/unified_config.py similarity index 99% rename from src/alignment/configs/unified_config.py rename to src/nodelens/configs/unified_config.py index d9ad036d..7fabcd6e 100644 --- a/src/alignment/configs/unified_config.py +++ b/src/nodelens/configs/unified_config.py @@ -13,7 +13,7 @@ - Registry-aware: validates that referenced components exist Usage: - from alignment.configs.unified_config import load_unified_config, UnifiedConfig + from nodelens.configs.unified_config import load_unified_config, UnifiedConfig # From YAML file config = load_unified_config("configs/my_experiment.yaml") diff --git a/src/alignment/core/__init__.py b/src/nodelens/core/__init__.py similarity index 97% rename from src/alignment/core/__init__.py rename to src/nodelens/core/__init__.py index e83573db..b1b0373e 100644 --- a/src/alignment/core/__init__.py +++ b/src/nodelens/core/__init__.py @@ -11,7 +11,7 @@ Example - Registering a custom metric: - from alignment.core import register_metric, BaseMetric + from nodelens.core import register_metric, BaseMetric @register_metric("my_metric", category="custom", tags=["experimental"]) class MyMetric(BaseMetric): @@ -22,7 +22,7 @@ def compute(self, outputs, **kwargs): Example - Using registered components: - from alignment.core import get_metric, METRIC_REGISTRY + from nodelens.core import get_metric, METRIC_REGISTRY # By name metric = get_metric("rayleigh_quotient") diff --git a/src/alignment/core/base.py b/src/nodelens/core/base.py similarity index 99% rename from src/alignment/core/base.py rename to src/nodelens/core/base.py index d4778ac2..7bcc4ac0 100644 --- a/src/alignment/core/base.py +++ b/src/nodelens/core/base.py @@ -249,7 +249,7 @@ def model(self) -> nn.Module: @property def tracked_layers(self) -> List[str]: - """List of layer names being tracked for alignment.""" + """List of layer names being tracked by NodeLens.""" return self._tracked_layers def _register_hooks(self) -> None: diff --git a/src/alignment/core/layer_detector.py b/src/nodelens/core/layer_detector.py similarity index 100% rename from src/alignment/core/layer_detector.py rename to src/nodelens/core/layer_detector.py diff --git a/src/alignment/core/protocols.py b/src/nodelens/core/protocols.py similarity index 99% rename from src/alignment/core/protocols.py rename to src/nodelens/core/protocols.py index 2e689206..89c905d2 100644 --- a/src/alignment/core/protocols.py +++ b/src/nodelens/core/protocols.py @@ -19,8 +19,8 @@ Example - Creating a custom metric: - from alignment.core.protocols import AlignmentMetric - from alignment.core.registry import register_metric + from nodelens.core.protocols import AlignmentMetric + from nodelens.core.registry import register_metric @register_metric("my_custom_metric", category="custom", tags=["experimental"]) class MyCustomMetric: @@ -108,7 +108,7 @@ def model(self) -> nn.Module: @property def tracked_layers(self) -> List[str]: - """List of layer names being tracked for alignment.""" + """List of layer names being tracked by NodeLens.""" ... def get_layer_activations(self, inputs: torch.Tensor, layers: Optional[List[str]] = None) -> Dict[str, torch.Tensor]: diff --git a/src/alignment/core/registry.py b/src/nodelens/core/registry.py similarity index 98% rename from src/alignment/core/registry.py rename to src/nodelens/core/registry.py index f83db662..e10bebc4 100644 --- a/src/alignment/core/registry.py +++ b/src/nodelens/core/registry.py @@ -597,7 +597,7 @@ def discover_and_register(module_path: str, registry_type: str = "all") -> int: the @register_* decorators. Args: - module_path: Python module path to scan (e.g., "alignment.metrics") + module_path: Python module path to scan (e.g., "nodelens.metrics") registry_type: Type of components to register ("all", "metrics", etc.) Returns: @@ -700,12 +700,12 @@ def initialize_registries(discover_builtin: bool = True, discover_plugins_flag: return if discover_builtin: - # Discover built-in components from alignment package + # Discover built-in components from nodelens package builtin_modules = [ - "alignment.metrics", - "alignment.pruning.strategies", - "alignment.analysis", - "alignment.models", + "nodelens.metrics", + "nodelens.pruning.strategies", + "nodelens.analysis", + "nodelens.models", ] for module in builtin_modules: try: diff --git a/src/alignment/core/streaming.py b/src/nodelens/core/streaming.py similarity index 100% rename from src/alignment/core/streaming.py rename to src/nodelens/core/streaming.py diff --git a/src/alignment/data/datasets/__init__.py b/src/nodelens/data/datasets/__init__.py similarity index 77% rename from src/alignment/data/datasets/__init__.py rename to src/nodelens/data/datasets/__init__.py index e0d4b4b7..2e7790d0 100644 --- a/src/alignment/data/datasets/__init__.py +++ b/src/nodelens/data/datasets/__init__.py @@ -1,12 +1,12 @@ """ Dataset implementations for alignment analysis. -NOTE: This module re-exports from alignment.dataops.datasets for backward compatibility. -The canonical location is alignment.dataops.datasets. +NOTE: This module re-exports from nodelens.dataops.datasets for backward compatibility. +The canonical location is nodelens.dataops.datasets. """ # Re-export everything from the canonical location -from alignment.dataops.datasets import ( +from nodelens.dataops.datasets import ( DATASET_CONFIGS, CIFAR10Dataset, CIFAR100Dataset, @@ -20,7 +20,7 @@ # Try to import text datasets (optional) try: - from alignment.dataops.datasets import C4Dataset, TextDataset, WikiTextDataset, load_text_dataset + from nodelens.dataops.datasets import C4Dataset, TextDataset, WikiTextDataset, load_text_dataset HAS_TEXT_DATASETS = True except ImportError: diff --git a/src/alignment/dataops/__init__.py b/src/nodelens/dataops/__init__.py similarity index 55% rename from src/alignment/dataops/__init__.py rename to src/nodelens/dataops/__init__.py index 8c1682f4..dea49795 100644 --- a/src/alignment/dataops/__init__.py +++ b/src/nodelens/dataops/__init__.py @@ -5,15 +5,15 @@ for use with alignment analysis experiments. """ -from alignment.dataops.base import BaseDataset, DatasetWrapper -from alignment.dataops.datasets import get_dataset -from alignment.dataops.loaders import DataLoaderConfig, create_data_loader, create_distributed_loader +from nodelens.dataops.base import BaseDataset, DatasetWrapper +from nodelens.dataops.datasets import get_dataset +from nodelens.dataops.loaders import DataLoaderConfig, create_data_loader, create_distributed_loader # Import dataset implementations when they're created try: - from alignment.dataops.datasets.cifar import CIFAR10Dataset, CIFAR100Dataset - from alignment.dataops.datasets.imagenet import ImageNetDataset - from alignment.dataops.datasets.mnist import MNISTDataset + from nodelens.dataops.datasets.cifar import CIFAR10Dataset, CIFAR100Dataset + from nodelens.dataops.datasets.imagenet import ImageNetDataset + from nodelens.dataops.datasets.mnist import MNISTDataset except ImportError: pass # Datasets will be implemented next diff --git a/src/alignment/dataops/base.py b/src/nodelens/dataops/base.py similarity index 99% rename from src/alignment/dataops/base.py rename to src/nodelens/dataops/base.py index c745e6a4..959903a5 100644 --- a/src/alignment/dataops/base.py +++ b/src/nodelens/dataops/base.py @@ -13,7 +13,7 @@ import torch from torch.utils.data import DataLoader, Dataset, Sampler -from alignment.core.base import BaseDataset as CoreBaseDataset +from nodelens.core.base import BaseDataset as CoreBaseDataset logger = logging.getLogger(__name__) diff --git a/src/alignment/dataops/datasets/__init__.py b/src/nodelens/dataops/datasets/__init__.py similarity index 91% rename from src/alignment/dataops/datasets/__init__.py rename to src/nodelens/dataops/datasets/__init__.py index edd9c55a..3aceb598 100644 --- a/src/alignment/dataops/datasets/__init__.py +++ b/src/nodelens/dataops/datasets/__init__.py @@ -11,12 +11,12 @@ # Import for backward compatibility - these are now created dynamically # but we import them to make them available at module level -from alignment.core.registry import DATASET_REGISTRY -from alignment.dataops.datasets.unified_dataset import DATASET_CONFIGS, UnifiedDataset +from nodelens.core.registry import DATASET_REGISTRY +from nodelens.dataops.datasets.unified_dataset import DATASET_CONFIGS, UnifiedDataset # Try to import text datasets (optional - requires additional dependencies) try: - from alignment.dataops.datasets.text_datasets import C4Dataset, TextDataset, WikiTextDataset, load_text_dataset + from nodelens.dataops.datasets.text_datasets import C4Dataset, TextDataset, WikiTextDataset, load_text_dataset HAS_TEXT_DATASETS = True except ImportError: diff --git a/src/alignment/dataops/datasets/text_datasets.py b/src/nodelens/dataops/datasets/text_datasets.py similarity index 99% rename from src/alignment/dataops/datasets/text_datasets.py rename to src/nodelens/dataops/datasets/text_datasets.py index af8d54af..3b4a301c 100644 --- a/src/alignment/dataops/datasets/text_datasets.py +++ b/src/nodelens/dataops/datasets/text_datasets.py @@ -11,7 +11,7 @@ import torch from torch.utils.data import Dataset, IterableDataset -from alignment.core.registry import register_dataset +from nodelens.core.registry import register_dataset logger = logging.getLogger(__name__) diff --git a/src/alignment/dataops/datasets/unified_dataset.py b/src/nodelens/dataops/datasets/unified_dataset.py similarity index 99% rename from src/alignment/dataops/datasets/unified_dataset.py rename to src/nodelens/dataops/datasets/unified_dataset.py index 28065b15..2ee9ca8c 100644 --- a/src/alignment/dataops/datasets/unified_dataset.py +++ b/src/nodelens/dataops/datasets/unified_dataset.py @@ -12,8 +12,8 @@ import torch from torchvision import datasets, transforms -from alignment.core.registry import register_dataset -from alignment.dataops.base import BaseDataset +from nodelens.core.registry import register_dataset +from nodelens.dataops.base import BaseDataset logger = logging.getLogger(__name__) diff --git a/src/alignment/dataops/loaders.py b/src/nodelens/dataops/loaders.py similarity index 100% rename from src/alignment/dataops/loaders.py rename to src/nodelens/dataops/loaders.py diff --git a/src/alignment/dataops/processing/__init__.py b/src/nodelens/dataops/processing/__init__.py similarity index 100% rename from src/alignment/dataops/processing/__init__.py rename to src/nodelens/dataops/processing/__init__.py diff --git a/src/alignment/dataops/processing/batch.py b/src/nodelens/dataops/processing/batch.py similarity index 100% rename from src/alignment/dataops/processing/batch.py rename to src/nodelens/dataops/processing/batch.py diff --git a/src/alignment/dataops/processing/covariance.py b/src/nodelens/dataops/processing/covariance.py similarity index 95% rename from src/alignment/dataops/processing/covariance.py rename to src/nodelens/dataops/processing/covariance.py index fd8b485a..2c1fd9fb 100644 --- a/src/alignment/dataops/processing/covariance.py +++ b/src/nodelens/dataops/processing/covariance.py @@ -148,13 +148,10 @@ def _oas_shrinkage(self, X_centered: torch.Tensor, cov_sample: torch.Tensor) -> trace_S2 = torch.trace(cov_sample @ cov_sample) trace_S = torch.trace(cov_sample) - rho = min( - 1.0, - ((1 - 2 / n_features) * trace_S2 + trace_S**2) / ((n_samples + 1 - 2 / n_features) * (trace_S2 - trace_S**2 / n_features) + 1e-8), - ) - shrinkage = torch.tensor(rho, device=X_centered.device) + rho = ((1 - 2 / n_features) * trace_S2 + trace_S**2) / ((n_samples + 1 - 2 / n_features) * (trace_S2 - trace_S**2 / n_features) + 1e-8) + shrinkage = rho.to(device=X_centered.device, dtype=X_centered.dtype) else: - shrinkage = torch.tensor(1.0, device=X_centered.device) + shrinkage = torch.ones((), device=X_centered.device, dtype=X_centered.dtype) shrinkage = torch.clamp(shrinkage, 0.0, 1.0) diff --git a/src/alignment/dataops/processing/layers.py b/src/nodelens/dataops/processing/layers.py similarity index 100% rename from src/alignment/dataops/processing/layers.py rename to src/nodelens/dataops/processing/layers.py diff --git a/src/alignment/experiments/README.md b/src/nodelens/experiments/README.md similarity index 88% rename from src/alignment/experiments/README.md rename to src/nodelens/experiments/README.md index d91c297a..42a141c7 100644 --- a/src/alignment/experiments/README.md +++ b/src/nodelens/experiments/README.md @@ -9,7 +9,7 @@ Structured experiment framework for alignment analysis. Vision and general model analysis with training, metrics, and pruning. ```python -from alignment.experiments import GeneralAlignmentExperiment +from nodelens.experiments import GeneralAlignmentExperiment experiment = GeneralAlignmentExperiment.from_yaml("config.yaml") results = experiment.run() @@ -20,7 +20,7 @@ results = experiment.run() LLM analysis with SCAR metrics, supernode detection, and structured pruning. ```python -from alignment.experiments import LLMAlignmentExperiment +from nodelens.experiments import LLMAlignmentExperiment experiment = LLMAlignmentExperiment(config) experiment.setup() diff --git a/src/alignment/experiments/__init__.py b/src/nodelens/experiments/__init__.py similarity index 100% rename from src/alignment/experiments/__init__.py rename to src/nodelens/experiments/__init__.py diff --git a/src/alignment/experiments/base.py b/src/nodelens/experiments/base.py similarity index 98% rename from src/alignment/experiments/base.py rename to src/nodelens/experiments/base.py index f8b456c6..e3b40c07 100644 --- a/src/alignment/experiments/base.py +++ b/src/nodelens/experiments/base.py @@ -15,10 +15,10 @@ import torch -from alignment.core.base import BaseExperiment as CoreBaseExperiment -from alignment.core.registry import DATASET_REGISTRY -from alignment.dataops.loaders import create_distributed_loader -from alignment.models import ModelWrapper +from nodelens.core.base import BaseExperiment as CoreBaseExperiment +from nodelens.core.registry import DATASET_REGISTRY +from nodelens.dataops.loaders import create_distributed_loader +from nodelens.models import ModelWrapper logger = logging.getLogger(__name__) @@ -270,7 +270,7 @@ class ExperimentConfig: # Generalized Taylor pruning (vision) # --------------------------------------------------------------------- # These parameters control the analytically-motivated "generalized Taylor" family - # (see src/alignment/pruning/strategies/generalized_taylor.py). They are exposed + # (see src/nodelens/pruning/strategies/generalized_taylor.py). They are exposed # here so they can be set in YAML and saved into experiment_config.yaml for # reproducibility. generalized_taylor_weight_rq: float = 1.0 @@ -557,7 +557,7 @@ def _initialize_model(self): else: # Try to get model from registry first try: - from alignment.core.registry import MODEL_REGISTRY + from nodelens.core.registry import MODEL_REGISTRY # Handle parameter mapping for specific models model_kwargs = self.config.model_config.copy() @@ -718,7 +718,7 @@ def _initialize_metrics(self): """Initialize metrics.""" import inspect - from alignment.core.registry import METRIC_REGISTRY + from nodelens.core.registry import METRIC_REGISTRY # Combine primary metrics and alignment-specific methods so that # alignment-only metrics (e.g., synergy / redundancy) are also diff --git a/src/alignment/experiments/cluster_experiments.py b/src/nodelens/experiments/cluster_experiments.py similarity index 100% rename from src/alignment/experiments/cluster_experiments.py rename to src/nodelens/experiments/cluster_experiments.py diff --git a/src/alignment/experiments/general_alignment.py b/src/nodelens/experiments/general_alignment.py similarity index 98% rename from src/alignment/experiments/general_alignment.py rename to src/nodelens/experiments/general_alignment.py index b768917a..0ae40c2c 100644 --- a/src/alignment/experiments/general_alignment.py +++ b/src/nodelens/experiments/general_alignment.py @@ -23,14 +23,14 @@ import torch.nn as nn from tqdm import tqdm -from alignment.core.registry import register_experiment -from alignment.experiments.base import BaseExperiment, ExperimentConfig -from alignment.metrics.rayleigh.rayleigh_quotient import RayleighQuotient -from alignment.models import ModelWrapper -from alignment.pruning.base import PruningConfig -from alignment.pruning.pipeline import PruningPipelineOptions, run_pruning_pipeline -from alignment.pruning.strategies import MagnitudePruning -from alignment.services import ActivationCaptureService, MaskOperations +from nodelens.core.registry import register_experiment +from nodelens.experiments.base import BaseExperiment, ExperimentConfig +from nodelens.metrics.rayleigh.rayleigh_quotient import RayleighQuotient +from nodelens.models import ModelWrapper +from nodelens.pruning.base import PruningConfig +from nodelens.pruning.pipeline import PruningPipelineOptions, run_pruning_pipeline +from nodelens.pruning.strategies import MagnitudePruning +from nodelens.services import ActivationCaptureService, MaskOperations logger = logging.getLogger(__name__) @@ -736,7 +736,7 @@ def _measure_alignment(self) -> Dict[str, Dict[str, List[float]]]: weights = wrapped_model_to_use.get_layer_weights() # Manual preprocessing - from alignment.dataops.processing import preprocess_layer_activations + from nodelens.dataops.processing import preprocess_layer_activations layer_modules = dict(wrapped_model_to_use._model.named_modules()) @@ -884,7 +884,7 @@ def _compute_redundancy_matrices(self) -> Dict[str, Any]: return {} try: - from alignment.metrics.information.pairwise_gaussian import PairwiseRedundancyGaussian + from nodelens.metrics.information.pairwise_gaussian import PairwiseRedundancyGaussian except ImportError: logger.warning("PairwiseRedundancyGaussian not available, skipping redundancy matrices") return {} @@ -1365,14 +1365,14 @@ def _pruning_experiments_single(self) -> Dict[str, Any]: if strategy_name == "magnitude": if pruning_config.global_pruning: - from alignment.pruning.strategies import GlobalMagnitudePruning + from nodelens.pruning.strategies import GlobalMagnitudePruning strategy = GlobalMagnitudePruning(config=pruning_config) else: strategy = MagnitudePruning(config=pruning_config) elif strategy_name == "alignment": # Legacy "alignment" keyword - use pruning_alignment_metric - from alignment.pruning.strategies import AlignmentPruning, CascadingAlignmentPruning, GlobalAlignmentPruning + from nodelens.pruning.strategies import AlignmentPruning, CascadingAlignmentPruning, GlobalAlignmentPruning alignment_metric = getattr(self.config, "pruning_alignment_metric", "rayleigh_quotient") @@ -1390,7 +1390,7 @@ def _pruning_experiments_single(self) -> Dict[str, Any]: # Note: in 'cascading' scope we perform the sequential recomputation in this # experiment loop, so we use the standard AlignmentPruning wrapper (which # forwards outputs/targets kwargs to the metric implementation). - from alignment.pruning.strategies import AlignmentPruning, GlobalAlignmentPruning + from nodelens.pruning.strategies import AlignmentPruning, GlobalAlignmentPruning metric_kwargs = {} try: @@ -1407,29 +1407,29 @@ def _pruning_experiments_single(self) -> Dict[str, Any]: elif strategy_name == "cascading_alignment": # Legacy cascading_alignment handling logger.warning("'cascading_alignment' algorithm is deprecated. Use algorithms=['alignment'] with scope='cascading'") - from alignment.pruning.strategies import CascadingAlignmentPruning + from nodelens.pruning.strategies import CascadingAlignmentPruning alignment_metric = getattr(self.config, "pruning_alignment_metric", "rayleigh_quotient") pruning_config.structured = True strategy = CascadingAlignmentPruning(metric=alignment_metric, direction="forward", config=pruning_config) elif strategy_name == "hybrid": - from alignment.pruning.strategies import HybridPruning + from nodelens.pruning.strategies import HybridPruning alignment_metric = getattr(self.config, "pruning_alignment_metric", "rayleigh_quotient") alpha = getattr(self.config, "pruning_hybrid_alpha", 0.5) strategy = HybridPruning(alignment_metric=alignment_metric, alpha=alpha, config=pruning_config) elif strategy_name == "gradient": logger.warning("Gradient pruning is not suitable for post-training pruning on converged models") - from alignment.pruning.strategies import GradientPruning + from nodelens.pruning.strategies import GradientPruning strategy = GradientPruning(config=pruning_config) elif strategy_name == "fisher": logger.warning("Fisher pruning is not suitable for post-training pruning on converged models") - from alignment.pruning.strategies import FisherPruning + from nodelens.pruning.strategies import FisherPruning strategy = FisherPruning(config=pruning_config) elif strategy_name == "random": - from alignment.pruning.strategies import RandomPruning + from nodelens.pruning.strategies import RandomPruning strategy = RandomPruning(config=pruning_config) else: @@ -1465,7 +1465,7 @@ def _pruning_experiments_single(self) -> Dict[str, Any]: supernode_metric = supernode_cfg.get("score_metric", None) if isinstance(supernode_metric, str) and supernode_metric: try: - from alignment.core.registry import get_metric + from nodelens.core.registry import get_metric m = get_metric(supernode_metric) if m is not None: @@ -1755,7 +1755,7 @@ def _should_protect() -> bool: if sn_score_metric == strategy_name: sn_scores = scores.detach().clone() else: - from alignment.pruning.strategies import AlignmentPruning + from nodelens.pruning.strategies import AlignmentPruning metric_kwargs = {} try: @@ -2016,7 +2016,7 @@ def _compute_metric_importance( Returns: Per-neuron importance scores """ - from alignment.metrics import get_metric + from nodelens.metrics import get_metric W = module.weight.detach() @@ -2367,9 +2367,8 @@ def _create_tensorized_masks_optimized( # Use random selection but ensure reproducibility unique_seed = base_seed + net_idx * 10000 + hash(name) % 1000 + amount_idx * 100 torch.manual_seed(unique_seed) - # Pass dummy scores to force random selection - dummy_scores = torch.ones_like(importance_scores) - mask = self._create_pruning_mask_tensor(dummy_scores, amount, "random") + random_tie_scores = torch.ones_like(importance_scores) + mask = self._create_pruning_mask_tensor(random_tie_scores, amount, "random") else: # Use the actual importance scores with the selection mode mask = self._create_pruning_mask_tensor(importance_scores, amount, selection_mode) @@ -2418,8 +2417,8 @@ def _create_tensorized_masks(self, strategy_name: str, selection_mode: str, prun # Random selection for magnitude strategy unique_seed = base_seed + net_idx * 10000 + hash(name) % 1000 + amount_idx * 100 torch.manual_seed(unique_seed) - dummy_scores = torch.ones_like(weight) - mask = self._create_pruning_mask_tensor(dummy_scores, amount, "random") + random_tie_scores = torch.ones_like(weight) + mask = self._create_pruning_mask_tensor(random_tie_scores, amount, "random") else: # Low/high selection based on magnitude importance_scores = weight.abs() @@ -2434,8 +2433,8 @@ def _create_tensorized_masks(self, strategy_name: str, selection_mode: str, prun torch.manual_seed(unique_seed) # For random strategy, always use random selection - dummy_scores = torch.ones_like(weight) - mask = self._create_pruning_mask_tensor(dummy_scores, amount, "random") + random_tie_scores = torch.ones_like(weight) + mask = self._create_pruning_mask_tensor(random_tie_scores, amount, "random") layer_masks[name] = mask network_masks.append(layer_masks) @@ -3010,10 +3009,7 @@ def _compute_alignment_importance(self, module: nn.Module, layer_inputs: torch.T def _pruning_experiments_tensorized_detailed(self) -> Dict[str, Any]: """Tensorized pruning with detailed per-network results.""" - # TODO: Implement per-network (not aggregated) tensorized pruning results. - # This should return the same keys as `_pruning_experiments_tensorized()`, but with - # per-network curves preserved for later variance/error-bar plots. - logger.info("Detailed tensorized pruning not yet implemented, using aggregated results") + logger.info("Detailed tensorized pruning is currently aliased to the aggregated tensorized path") return self._pruning_experiments_tensorized() def run(self) -> Dict[str, Any]: @@ -3131,7 +3127,7 @@ def _generate_visualizations(self): output_dir = Path(getattr(self.config, "plots_dir", Path(self.config.log_dir) / "plots")) output_dir.mkdir(parents=True, exist_ok=True) - from alignment.analysis.visualization import generate_experiment_visualizations + from nodelens.analysis.visualization import generate_experiment_visualizations # Use the centralized visualization function for standard plots # Calculate total_params from model for secondary x-axis in pruning plots @@ -3175,7 +3171,7 @@ def _generate_visualizations_fallback(self, output_dir: Path): - scatter/ - Metric scatter plots - redundancy/ - Redundancy heatmaps """ - from alignment.analysis.visualization import UnifiedVisualizer + from nodelens.analysis.visualization import UnifiedVisualizer visualizer = UnifiedVisualizer() @@ -3504,8 +3500,8 @@ def _create_pruning_strategy(self, strategy_name: str, pruning_config: PruningCo Returns: Initialized pruning strategy or None if creation fails """ - from alignment.pruning import get_pruning_strategy - from alignment.pruning.strategies import GlobalAlignmentPruning, GlobalMagnitudePruning + from nodelens.pruning import get_pruning_strategy + from nodelens.pruning.strategies import GlobalAlignmentPruning, GlobalMagnitudePruning try: # Handle global pruning variants @@ -3524,15 +3520,20 @@ def _create_pruning_strategy(self, strategy_name: str, pruning_config: PruningCo return None def _pruning_experiments_single_network(self, model: nn.Module, wrapped_model: ModelWrapper, network_id: int) -> Dict[str, Any]: - """Perform pruning experiments on a single specific network (fallback for compatibility).""" - logger.info(f"Using single network pruning for network {network_id} (fallback mode)") + """Perform pruning experiments on one network in the multi-network fallback path.""" + logger.info(f"Using single network pruning for network {network_id}") - # TODO: Implement a real single-network pruning run (strategy loop + finetune + eval), - # matching the tensorized outputs structure, so callers don't get empty results. - results = {"strategies": {}, "final_model_performance": {}, "network_id": network_id} - - # For now, return empty results - the tensorized version should handle everything - return results + original_model = self.model + original_wrapped_model = self.wrapped_model + try: + self.model = model + self.wrapped_model = wrapped_model + results = self._pruning_experiments_single() + results["network_id"] = network_id + return results + finally: + self.model = original_model + self.wrapped_model = original_wrapped_model def _evaluate_single_model(self, model: nn.Module) -> Tuple[float, float]: """Evaluate a specific model (fallback for compatibility).""" @@ -4301,7 +4302,12 @@ def _pruning_experiments_tensorized_v2(self) -> Dict[str, Any]: for selection_mode in selection_modes: # Use the ultra-parallel evaluation - if strategy_name == "alignment" and hasattr(self.config, "use_ultra_parallel_eval") and self.config.use_ultra_parallel_eval: + use_ultra_parallel = bool(getattr(self.config, "use_ultra_parallel_eval", False)) + if use_ultra_parallel and getattr(self.config, "fine_tune_after_pruning", False): + logger.info("Ultra-parallel pruning evaluation is disabled when fine-tuning is requested") + use_ultra_parallel = False + + if strategy_name == "alignment" and use_ultra_parallel: batch_results = self._tensorized_pruning_ultra_parallel(strategy_name, selection_mode, self.config.pruning_amounts) else: # Existing implementation @@ -4359,13 +4365,13 @@ def _tensorized_pruning_ultra_parallel(self, strategy_name: str, selection_mode: logger.info(" Creating masks for all configurations...") all_masks = self._create_alignment_masks_batch(selection_mode, pruning_amounts) - # Calculate sparsities sparsities = torch.zeros(num_networks, num_amounts) for net_idx in range(num_networks): for amount_idx, amount in enumerate(pruning_amounts): - # TODO: Compute actual sparsity from the masks (not the requested pruning amount), - # especially if mask construction has ties/constraints that affect the achieved rate. - sparsities[net_idx, amount_idx] = amount + masks = all_masks[net_idx][amount_idx] + total_units = sum(int(mask.numel()) for mask in masks.values()) + pruned_units = sum(int((mask == 0).sum().item()) for mask in masks.values()) + sparsities[net_idx, amount_idx] = pruned_units / total_units if total_units else amount # TRULY PARALLEL EVALUATION - all configs at once! logger.info(" Starting TRULY PARALLEL evaluation of all configurations...") @@ -4379,10 +4385,7 @@ def _tensorized_pruning_ultra_parallel(self, strategy_name: str, selection_mode: # Fine-tuning phase if self.config.fine_tune_after_pruning: - # TODO: Implement fine-tuning for ultra-parallel mode (e.g., batched finetune or per-config micro-finetune), - # or explicitly disable this mode when fine_tune_after_pruning=True. - logger.info(" Fine-tuning is not yet implemented for ultra-parallel mode") - # For now, just copy the before results + logger.info(" Ultra-parallel mode reports pre-finetuning results; caller should use batch mode when fine-tuning is required") accuracies_after = accuracies_before.clone() losses_after = losses_before.clone() else: diff --git a/src/alignment/experiments/llm_experiments.py b/src/nodelens/experiments/llm_experiments.py similarity index 99% rename from src/alignment/experiments/llm_experiments.py rename to src/nodelens/experiments/llm_experiments.py index c4bbe8c3..5e5b4a35 100644 --- a/src/alignment/experiments/llm_experiments.py +++ b/src/nodelens/experiments/llm_experiments.py @@ -6,15 +6,15 @@ import torch import torch.nn as nn -from alignment.analysis.visualization import UnifiedVisualizer -from alignment.core.streaming import StreamingCovariance -from alignment.experiments.base import BaseExperiment -from alignment.metrics import get_metric -from alignment.models.transformers import TransformerWrapperEnhanced as TransformerWrapper -from alignment.pruning import AlignmentPruning, PruningConfig -from alignment.pruning.pipeline import PruningPipelineOptions -from alignment.pruning.strategies.llm_baselines import SparseGPTPruning, WandaPruning -from alignment.services import MaskOperations +from nodelens.analysis.visualization import UnifiedVisualizer +from nodelens.core.streaming import StreamingCovariance +from nodelens.experiments.base import BaseExperiment +from nodelens.metrics import get_metric +from nodelens.models.transformers import TransformerWrapperEnhanced as TransformerWrapper +from nodelens.pruning import AlignmentPruning, PruningConfig +from nodelens.pruning.pipeline import PruningPipelineOptions +from nodelens.pruning.strategies.llm_baselines import SparseGPTPruning, WandaPruning +from nodelens.services import MaskOperations logger = logging.getLogger(__name__) @@ -80,7 +80,7 @@ def setup(self): ) if needs_text_dataset: try: - from alignment.dataops.datasets.text_datasets import load_text_dataset + from nodelens.dataops.datasets.text_datasets import load_text_dataset except ImportError as e: logger.error(f"Unable to import text datasets for LLMAlignmentExperiment: {e}") self.dataset = None @@ -238,7 +238,7 @@ def evaluate_perplexity(self, dataset: str = "wikitext", split: str = "test", nu # Legacy per-sample perplexity (kept for backwards compatibility). # WARNING: this is sensitive to padding/truncation and is not a standard protocol for fair perplexity reporting. # ------------------------------------------------------------------ - from alignment.dataops.datasets.text_datasets import load_text_dataset + from nodelens.dataops.datasets.text_datasets import load_text_dataset dataset_obj = load_text_dataset( dataset, @@ -2030,9 +2030,8 @@ def compute_importance_scores(self, num_samples: int = 1, dim="input") -> Dict[s # Pass 1: Compute independent metrics (RQ, OI, Magnitude) for metric_name in metric_names: - # TODO: Add an efficient pairwise-metric path (redundancy/synergy) for LLM layers. - # Current implementation computes independent metrics only. if "redundancy" in metric_name or "synergy" in metric_name: + logger.debug("Skipping pairwise metric %s in the independent LLM scoring pass", metric_name) continue try: @@ -2791,7 +2790,7 @@ def compute_baseline_pruning_scores( try: from torch.utils.data import DataLoader - from alignment.dataops.datasets.text_datasets import WikiTextDataset + from nodelens.dataops.datasets.text_datasets import WikiTextDataset tokenizer = getattr(self, "tokenizer", None) if tokenizer is None: @@ -2992,7 +2991,7 @@ def _resolve_mlp_path(layer_idx: int) -> Optional[str]: if "owl" in strategies: logger.info("Calibrating OWL (Outlier-aware Wanda) pruning strategy...") try: - from alignment.pruning.strategies.llm_baselines import OWLPruning + from nodelens.pruning.strategies.llm_baselines import OWLPruning owl = OWLPruning(num_calibration_samples=num_calibration_samples) owl.calibrate(model, calib_dataloader, device=str(device)) @@ -3047,7 +3046,7 @@ def _resolve_mlp_path(layer_idx: int) -> Optional[str]: if "llm_pruner" in strategies: logger.info("Calibrating LLM-Pruner pruning strategy...") try: - from alignment.pruning.strategies.llm_baselines import LLMPrunerChannelMode + from nodelens.pruning.strategies.llm_baselines import LLMPrunerChannelMode llm_pruner = LLMPrunerChannelMode(num_calibration_samples=num_calibration_samples) llm_pruner.calibrate(model, calib_dataloader, device=str(device)) @@ -3102,7 +3101,7 @@ def _resolve_mlp_path(layer_idx: int) -> Optional[str]: if "flap" in strategies: logger.info("Calibrating FLAP pruning strategy...") try: - from alignment.pruning.strategies.llm_baselines import FLAPPruning + from nodelens.pruning.strategies.llm_baselines import FLAPPruning flap = FLAPPruning(num_calibration_samples=num_calibration_samples) flap.calibrate(model, calib_dataloader, device=str(device)) @@ -3140,7 +3139,7 @@ def _resolve_mlp_path(layer_idx: int) -> Optional[str]: if "ria" in strategies: logger.info("Calibrating RIA pruning strategy...") try: - from alignment.pruning.strategies.llm_baselines import RIAPruning + from nodelens.pruning.strategies.llm_baselines import RIAPruning ria = RIAPruning(num_calibration_samples=num_calibration_samples) ria.calibrate(model, calib_dataloader, device=str(device)) @@ -3178,7 +3177,7 @@ def _resolve_mlp_path(layer_idx: int) -> Optional[str]: if "slimllm" in strategies: logger.info("Calibrating SlimLLM pruning strategy...") try: - from alignment.pruning.strategies.llm_baselines import SlimLLMPruning + from nodelens.pruning.strategies.llm_baselines import SlimLLMPruning slimllm = SlimLLMPruning(num_calibration_samples=num_calibration_samples) slimllm.calibrate(model, calib_dataloader, device=str(device)) @@ -3797,7 +3796,7 @@ def _get_importance_metric(name: str, metric_name: str) -> Optional[torch.Tensor or {} ) if isinstance(rh_cfg, dict) and bool(rh_cfg.get("enabled", False)): - from alignment.analysis.read_halo_llm import ReadHaloConfig, compute_next_layer_read_halo + from nodelens.analysis.read_halo_llm import ReadHaloConfig, compute_next_layer_read_halo cfg = ReadHaloConfig( enabled=True, @@ -6695,7 +6694,7 @@ def analyze_halo_vs_nonhalo_redundancy( max_length: int = 256, sample_pairs: int = 2000, ) -> Dict[str, Dict[str, Any]]: - """ + r""" Paper-aligned halo redundancy analysis using the loss-relevant contribution signal. We compare redundancy between three groups (per layer), then aggregate across layers: @@ -8066,7 +8065,7 @@ def apply_pruning(self, sparsity: float = 0.2, metric: str = "activation_l2_norm "ria", "slimllm", ] - from alignment.pruning.base import PrecomputedScorePruning + from nodelens.pruning.base import PrecomputedScorePruning if metric in precomputed_metrics: pruner = PrecomputedScorePruning(config=config) @@ -8528,7 +8527,7 @@ def apply_minimal_repair(self, dataset_name: str = "wikitext", epochs: int = 1, # Need a dataset loader from torch.utils.data import DataLoader - from alignment.dataops.datasets.text_datasets import load_text_dataset + from nodelens.dataops.datasets.text_datasets import load_text_dataset # Minimal dataset for repair (calibration set) dataset = load_text_dataset(dataset_name, self.config.model_config.get("model_id"), split="train", max_samples=1000) @@ -9722,7 +9721,7 @@ def restore_weights(): # ------------------------------------------------------------------ if getattr(self.config, "generate_plots", True): try: - from alignment.analysis.visualization.llm_mechanism_plots import ( + from nodelens.analysis.visualization.llm_mechanism_plots import ( plot_bus_concentration, plot_conditional_halo_ablation, plot_halo_structure, diff --git a/src/alignment/experiments/runner.py b/src/nodelens/experiments/runner.py similarity index 98% rename from src/alignment/experiments/runner.py rename to src/nodelens/experiments/runner.py index 09759605..fac8185e 100644 --- a/src/alignment/experiments/runner.py +++ b/src/nodelens/experiments/runner.py @@ -10,8 +10,8 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Type, Union -from alignment.core.registry import get_experiment -from alignment.experiments.base import BaseExperiment, ExperimentConfig +from nodelens.core.registry import get_experiment +from nodelens.experiments.base import BaseExperiment, ExperimentConfig logger = logging.getLogger(__name__) diff --git a/src/alignment/experiments/tracking/__init__.py b/src/nodelens/experiments/tracking/__init__.py similarity index 100% rename from src/alignment/experiments/tracking/__init__.py rename to src/nodelens/experiments/tracking/__init__.py diff --git a/src/alignment/experiments/tracking/base.py b/src/nodelens/experiments/tracking/base.py similarity index 100% rename from src/alignment/experiments/tracking/base.py rename to src/nodelens/experiments/tracking/base.py diff --git a/src/alignment/infrastructure/README.md b/src/nodelens/infrastructure/README.md similarity index 87% rename from src/alignment/infrastructure/README.md rename to src/nodelens/infrastructure/README.md index 29dac15e..90d62a68 100644 --- a/src/alignment/infrastructure/README.md +++ b/src/nodelens/infrastructure/README.md @@ -9,7 +9,7 @@ System utilities for computing, storage, and configuration. | `storage/checkpoint.py` | ACTIVE | Model checkpoint save/load | | `storage/logging.py` | ACTIVE | Logging setup and MetricLogger | | `storage/job_directory.py` | ACTIVE | SLURM job directory management | -| `configuration/config.py` | AVAILABLE (warning) | Basic config utilities (use `alignment.configs` for main config) | +| `configuration/config.py` | AVAILABLE (warning) | Basic config utilities (use `nodelens.configs` for main config) | | `computing/distributed.py` | AVAILABLE | Multi-GPU distributed computing (not currently integrated) | | `computing/optimized/gpu.py` | INTEGRATED | GPU-accelerated histogram/MI (enable via config) | | `computing/optimized/jit.py` | INTEGRATED | JIT-compiled metrics (enable via config) | @@ -20,7 +20,7 @@ System utilities for computing, storage, and configuration. **checkpoint.py** - Model checkpoint utilities ```python -from alignment.infrastructure import save_checkpoint, load_checkpoint +from nodelens.infrastructure import save_checkpoint, load_checkpoint # Save model with optimizer state save_checkpoint(model, optimizer, epoch=10, filepath="checkpoint.pt") @@ -31,7 +31,7 @@ checkpoint = load_checkpoint("checkpoint.pt", model=model, optimizer=optimizer) **logging.py** - Logging utilities ```python -from alignment.infrastructure import setup_logging, get_logger, MetricLogger +from nodelens.infrastructure import setup_logging, get_logger, MetricLogger # Setup logging setup_logging(log_level="INFO", log_file="experiment.log") @@ -47,7 +47,7 @@ metric_logger.write_summary() **job_directory.py** - SLURM job directory management ```python -from alignment.infrastructure.storage import create_job_directory, JobDirectory +from nodelens.infrastructure.storage import create_job_directory, JobDirectory # Create unique job directory (auto-detects SLURM_JOB_ID) job_dir = create_job_directory( @@ -71,7 +71,7 @@ with JobDirectory("/path/to/outputs", "my_experiment") as job: **distributed.py** - Distributed training utilities ```python -from alignment.infrastructure import ( +from nodelens.infrastructure import ( setup_distributed, cleanup_distributed, is_distributed, is_main_process, get_rank, get_world_size @@ -88,7 +88,7 @@ if is_main_process(): **optimized/gpu.py** - GPU-accelerated operations ```python -from alignment.infrastructure.computing.optimized import ( +from nodelens.infrastructure.computing.optimized import ( gpu_histogram1d, gpu_histogram2d, gpu_mutual_information, gpu_entropy, GPUAcceleratedMetrics @@ -106,7 +106,7 @@ cov = GPUAcceleratedMetrics.fast_covariance(X) **optimized/jit.py** - JIT-compiled metrics ```python -from alignment.infrastructure.computing.optimized import ( +from nodelens.infrastructure.computing.optimized import ( JITRayleighQuotient, JITMutualInformation, JITNodeCorrelation ) @@ -118,10 +118,10 @@ scores = jit_rq(inputs, weights) # Faster than regular RQ ### configuration/ - Configuration Utilities (AVAILABLE, warning) Basic configuration utilities. For the main experiment configuration system, -use `alignment.configs` instead. +use `nodelens.configs` instead. ```python -from alignment.infrastructure.configuration import load_config, save_config +from nodelens.infrastructure.configuration import load_config, save_config # Load/save config files config = load_config("config.yaml") @@ -145,7 +145,7 @@ metrics: Or programmatically: ```python -from alignment.metrics import get_optimization_status, get_metric_with_optimizations +from nodelens.metrics import get_optimization_status, get_metric_with_optimizations # Check what's available status = get_optimization_status() diff --git a/src/alignment/infrastructure/__init__.py b/src/nodelens/infrastructure/__init__.py similarity index 84% rename from src/alignment/infrastructure/__init__.py rename to src/nodelens/infrastructure/__init__.py index 5407c48b..f9eb534b 100644 --- a/src/alignment/infrastructure/__init__.py +++ b/src/nodelens/infrastructure/__init__.py @@ -9,14 +9,14 @@ USAGE STATUS: - Storage (checkpoint, logging, job_directory): ACTIVELY USED -- Configuration: See alignment.configs for the main config system +- Configuration: See nodelens.configs for the main config system - Computing (distributed, GPU, JIT): AVAILABLE but not currently integrated These are optimized implementations ready for future performance improvements. Example: - >>> from alignment.infrastructure import save_checkpoint, load_checkpoint - >>> from alignment.infrastructure import setup_logging, get_logger - >>> from alignment.infrastructure.storage import create_job_directory + >>> from nodelens.infrastructure import save_checkpoint, load_checkpoint + >>> from nodelens.infrastructure import setup_logging, get_logger + >>> from nodelens.infrastructure.storage import create_job_directory """ # Computing infrastructure diff --git a/src/alignment/infrastructure/computing/__init__.py b/src/nodelens/infrastructure/computing/__init__.py similarity index 100% rename from src/alignment/infrastructure/computing/__init__.py rename to src/nodelens/infrastructure/computing/__init__.py diff --git a/src/alignment/infrastructure/computing/distributed.py b/src/nodelens/infrastructure/computing/distributed.py similarity index 100% rename from src/alignment/infrastructure/computing/distributed.py rename to src/nodelens/infrastructure/computing/distributed.py diff --git a/src/alignment/infrastructure/computing/optimized/__init__.py b/src/nodelens/infrastructure/computing/optimized/__init__.py similarity index 100% rename from src/alignment/infrastructure/computing/optimized/__init__.py rename to src/nodelens/infrastructure/computing/optimized/__init__.py diff --git a/src/alignment/infrastructure/computing/optimized/gpu.py b/src/nodelens/infrastructure/computing/optimized/gpu.py similarity index 100% rename from src/alignment/infrastructure/computing/optimized/gpu.py rename to src/nodelens/infrastructure/computing/optimized/gpu.py diff --git a/src/alignment/infrastructure/computing/optimized/jit.py b/src/nodelens/infrastructure/computing/optimized/jit.py similarity index 99% rename from src/alignment/infrastructure/computing/optimized/jit.py rename to src/nodelens/infrastructure/computing/optimized/jit.py index c763a9f0..b5531ce4 100644 --- a/src/alignment/infrastructure/computing/optimized/jit.py +++ b/src/nodelens/infrastructure/computing/optimized/jit.py @@ -321,7 +321,7 @@ def benchmark_jit_vs_regular(metric_name: str, input_shape: Tuple[int, ...], n_i """ is_cuda = str(device).startswith("cuda") - # Create dummy data + # Create synthetic benchmark data if metric_name == "rayleigh_quotient": inputs = torch.randn(input_shape[0], input_shape[1], device=device) weights = torch.randn(input_shape[2], input_shape[1], device=device) diff --git a/src/alignment/infrastructure/configuration/__init__.py b/src/nodelens/infrastructure/configuration/__init__.py similarity index 73% rename from src/alignment/infrastructure/configuration/__init__.py rename to src/nodelens/infrastructure/configuration/__init__.py index eb5a7d2d..ace6eb43 100644 --- a/src/alignment/infrastructure/configuration/__init__.py +++ b/src/nodelens/infrastructure/configuration/__init__.py @@ -2,8 +2,8 @@ Configuration infrastructure for the alignment framework. NOTE: This module provides basic configuration utilities. -For the main experiment configuration system, use alignment.configs instead: - from alignment.configs import ExperimentConfig, load_config +For the main experiment configuration system, use nodelens.configs instead: + from nodelens.configs import ExperimentConfig, load_config This module contains simpler utilities that can be used standalone. """ diff --git a/src/alignment/infrastructure/configuration/config.py b/src/nodelens/infrastructure/configuration/config.py similarity index 100% rename from src/alignment/infrastructure/configuration/config.py rename to src/nodelens/infrastructure/configuration/config.py diff --git a/src/alignment/infrastructure/storage/__init__.py b/src/nodelens/infrastructure/storage/__init__.py similarity index 100% rename from src/alignment/infrastructure/storage/__init__.py rename to src/nodelens/infrastructure/storage/__init__.py diff --git a/src/alignment/infrastructure/storage/checkpoint.py b/src/nodelens/infrastructure/storage/checkpoint.py similarity index 100% rename from src/alignment/infrastructure/storage/checkpoint.py rename to src/nodelens/infrastructure/storage/checkpoint.py diff --git a/src/alignment/infrastructure/storage/job_directory.py b/src/nodelens/infrastructure/storage/job_directory.py similarity index 98% rename from src/alignment/infrastructure/storage/job_directory.py rename to src/nodelens/infrastructure/storage/job_directory.py index 900d5ff3..44ae9faf 100644 --- a/src/alignment/infrastructure/storage/job_directory.py +++ b/src/nodelens/infrastructure/storage/job_directory.py @@ -88,11 +88,11 @@ def create_job_directory( Example: >>> job_dir = create_job_directory( - ... "/n/holylfs06/LABS/kempner_project_b/Lab/alignment/Prune_LLM", + ... "/path/to/results/Prune_LLM", ... "llama3_8b_pruning" ... ) >>> print(job_dir) - /n/holylfs06/LABS/kempner_project_b/Lab/alignment/Prune_LLM/llama3_8b_pruning_20241209_143052_12345678 + /path/to/results/Prune_LLM/llama3_8b_pruning_20241209_143052_12345678 """ base_output_dir = Path(base_output_dir) diff --git a/src/alignment/infrastructure/storage/logging.py b/src/nodelens/infrastructure/storage/logging.py similarity index 100% rename from src/alignment/infrastructure/storage/logging.py rename to src/nodelens/infrastructure/storage/logging.py diff --git a/src/alignment/metrics/__init__.py b/src/nodelens/metrics/__init__.py similarity index 100% rename from src/alignment/metrics/__init__.py rename to src/nodelens/metrics/__init__.py diff --git a/src/alignment/metrics/composite.py b/src/nodelens/metrics/composite.py similarity index 100% rename from src/alignment/metrics/composite.py rename to src/nodelens/metrics/composite.py diff --git a/src/alignment/metrics/conditional_metrics.py b/src/nodelens/metrics/conditional_metrics.py similarity index 99% rename from src/alignment/metrics/conditional_metrics.py rename to src/nodelens/metrics/conditional_metrics.py index e1f616f6..5010f3d6 100644 --- a/src/alignment/metrics/conditional_metrics.py +++ b/src/nodelens/metrics/conditional_metrics.py @@ -538,7 +538,7 @@ def compute( outputs_c = outputs[mask] # L2 norm per neuron within this class - norm_c = torch.norm(outputs_c, p=2, dim=0) / np.sqrt(n_c.float()) + norm_c = torch.norm(outputs_c, p=2, dim=0) / torch.sqrt(n_c.float()) class_norms.append(norm_c) if not class_norms: diff --git a/src/alignment/metrics/cross_layer.py b/src/nodelens/metrics/cross_layer.py similarity index 100% rename from src/alignment/metrics/cross_layer.py rename to src/nodelens/metrics/cross_layer.py diff --git a/src/alignment/metrics/gradient_based.py b/src/nodelens/metrics/gradient_based.py similarity index 100% rename from src/alignment/metrics/gradient_based.py rename to src/nodelens/metrics/gradient_based.py diff --git a/src/alignment/metrics/halo_redundancy.py b/src/nodelens/metrics/halo_redundancy.py similarity index 100% rename from src/alignment/metrics/halo_redundancy.py rename to src/nodelens/metrics/halo_redundancy.py diff --git a/src/alignment/metrics/information/__init__.py b/src/nodelens/metrics/information/__init__.py similarity index 91% rename from src/alignment/metrics/information/__init__.py rename to src/nodelens/metrics/information/__init__.py index 8114054e..351cf42e 100644 --- a/src/alignment/metrics/information/__init__.py +++ b/src/nodelens/metrics/information/__init__.py @@ -1,5 +1,5 @@ """ -Information-theoretic metrics for neural network alignment. +Information-theoretic metrics for neural network analysis. """ from .conditional_mutual_information import ConditionalMutualInformation @@ -10,7 +10,7 @@ from .pid import SharedInformation from .pid import SynergisticInformation as PIDSynergisticInformation from .pid import UniqueInformationX, UniqueInformationY -from .redundancy import AverageRedundancy +from .redundancy import AverageRedundancy, LayerRedundancy from .synergy_continuous import SynergyContinuousTarget from .synergy_mmi import SynergyGaussianMMI @@ -30,6 +30,7 @@ "FastGaussianMI", # Fast MI variant using GAP for CNNs # Redundancy "AverageRedundancy", + "LayerRedundancy", "PairwiseRedundancyGaussian", # Synergy "SynergyGaussianMMI", diff --git a/src/alignment/metrics/information/conditional_mutual_information.py b/src/nodelens/metrics/information/conditional_mutual_information.py similarity index 89% rename from src/alignment/metrics/information/conditional_mutual_information.py rename to src/nodelens/metrics/information/conditional_mutual_information.py index 08d05f85..8478123e 100644 --- a/src/alignment/metrics/information/conditional_mutual_information.py +++ b/src/nodelens/metrics/information/conditional_mutual_information.py @@ -129,39 +129,9 @@ def _compute_gaussian(self, X: torch.Tensor, Y: torch.Tensor, Z: Optional[torch. continue try: - # Compute I(Y;Z|X) = H(Y|X) + H(Z|X) - H(Y,Z|X) - # Under Gaussian assumption, we can compute this from covariances - - # Stack variables x_flat = X.reshape(X.shape[0], -1) - xyz = torch.cat([x_flat, y_i, z], dim=1) - - # Compute covariance matrix - cov = self._covariance(xyz) - - # Indices - n_x = x_flat.shape[1] - idx_x = slice(0, n_x) - idx_y = n_x - slice(n_x + 1, cov.shape[0]) - - # Compute conditional entropies using Schur complement - # H(Y|X) - cov_x = cov[idx_x, idx_x] - cov_y = cov[idx_y, idx_y] - cov_xy = cov[idx_x, idx_y] - - if torch.linalg.matrix_rank(cov_x) == cov_x.shape[0]: - cov_y_given_x = cov_y - cov_xy.T @ torch.linalg.inv(cov_x) @ cov_xy - 0.5 * torch.log(2 * np.pi * np.e * torch.clamp(cov_y_given_x, min=1e-10)) - else: - 0.5 * torch.log(2 * np.pi * np.e * torch.clamp(cov_y, min=1e-10)) - - # Similar for H(Z|X) and H(Y,Z|X) - # Simplified: just use correlation-based approximation corr_yz_given_x = self._partial_correlation(y_i, z, x_flat) - # Approximate CMI if torch.abs(corr_yz_given_x) < 0.999: cmi_scores[i] = -0.5 * torch.log(1 - corr_yz_given_x**2) @@ -260,7 +230,7 @@ def _partial_correlation(self, Y: torch.Tensor, Z: torch.Tensor, X: torch.Tensor if var_y_given_x > 1e-10 and var_z_given_x > 1e-10: partial_corr = partial_corr / torch.sqrt(var_y_given_x * var_z_given_x) else: - partial_corr = torch.tensor(0.0) + partial_corr = torch.zeros((), device=Y.device, dtype=Y.dtype) else: # Fallback to simple correlation partial_corr = r_yz diff --git a/src/alignment/metrics/information/gaussian_mi.py b/src/nodelens/metrics/information/gaussian_mi.py similarity index 100% rename from src/alignment/metrics/information/gaussian_mi.py rename to src/nodelens/metrics/information/gaussian_mi.py diff --git a/src/alignment/metrics/information/gaussian_pid.py b/src/nodelens/metrics/information/gaussian_pid.py similarity index 100% rename from src/alignment/metrics/information/gaussian_pid.py rename to src/nodelens/metrics/information/gaussian_pid.py diff --git a/src/alignment/metrics/information/gpu_binning.py b/src/nodelens/metrics/information/gpu_binning.py similarity index 100% rename from src/alignment/metrics/information/gpu_binning.py rename to src/nodelens/metrics/information/gpu_binning.py diff --git a/src/alignment/metrics/information/higher_order.py b/src/nodelens/metrics/information/higher_order.py similarity index 100% rename from src/alignment/metrics/information/higher_order.py rename to src/nodelens/metrics/information/higher_order.py diff --git a/src/alignment/metrics/information/mi_projection.py b/src/nodelens/metrics/information/mi_projection.py similarity index 100% rename from src/alignment/metrics/information/mi_projection.py rename to src/nodelens/metrics/information/mi_projection.py diff --git a/src/alignment/metrics/information/mutual_information.py b/src/nodelens/metrics/information/mutual_information.py similarity index 100% rename from src/alignment/metrics/information/mutual_information.py rename to src/nodelens/metrics/information/mutual_information.py diff --git a/src/alignment/metrics/information/pairwise_gaussian.py b/src/nodelens/metrics/information/pairwise_gaussian.py similarity index 100% rename from src/alignment/metrics/information/pairwise_gaussian.py rename to src/nodelens/metrics/information/pairwise_gaussian.py diff --git a/src/alignment/metrics/information/pid.py b/src/nodelens/metrics/information/pid.py similarity index 97% rename from src/alignment/metrics/information/pid.py rename to src/nodelens/metrics/information/pid.py index 4ef0f982..4d7dbef5 100644 --- a/src/alignment/metrics/information/pid.py +++ b/src/nodelens/metrics/information/pid.py @@ -5,9 +5,11 @@ input features provides about the output into unique, redundant, and synergistic components. -Note: These metrics currently return zeros as a placeholder. For practical PID-based -synergy analysis, use `gaussian_pid_synergy_mmi` which provides a fast Gaussian -approximation. A proper BROJA-2PID solver could be integrated here in the future. +The BROJA solver is optional. When `dit` with BROJA support is installed, these +metrics discretize the inputs and compute the requested PID component. Without +that optional dependency, they return zero scores and log a warning. For the +fast Gaussian approximation used in most experiments, use +`gaussian_pid_synergy_mmi`. """ import logging diff --git a/src/alignment/metrics/information/redundancy.py b/src/nodelens/metrics/information/redundancy.py similarity index 77% rename from src/alignment/metrics/information/redundancy.py rename to src/nodelens/metrics/information/redundancy.py index 6729cf95..b4517e82 100644 --- a/src/alignment/metrics/information/redundancy.py +++ b/src/nodelens/metrics/information/redundancy.py @@ -16,6 +16,37 @@ logger = logging.getLogger(__name__) +def _prepare_inputs_and_weights(inputs: torch.Tensor, weights: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """Flatten inputs and weights to the two-dimensional form used by these metrics.""" + if inputs.ndim != 2: + inputs = inputs.reshape(inputs.shape[0], -1) + if weights.ndim != 2: + weights = weights.reshape(weights.shape[0], -1) + return inputs, weights + + +def _project_neuron_outputs(inputs: torch.Tensor, weights: torch.Tensor) -> torch.Tensor: + """Project layer inputs through neuron weights.""" + return torch.matmul(inputs, weights.T) + + +def _gaussian_mi_matrix_from_projected(projected: torch.Tensor) -> torch.Tensor: + """Compute the Gaussian pairwise mutual-information matrix from neuron outputs.""" + projected = projected.float() + batch_size = projected.shape[0] + + proj_mean = projected.mean(dim=0, keepdim=True) + proj_std = projected.std(dim=0, keepdim=True) + proj_std = torch.where(proj_std > 1e-12, proj_std, torch.ones_like(proj_std)) + projected_norm = (projected - proj_mean) / proj_std + + corr_matrix = torch.matmul(projected_norm.T, projected_norm) / (batch_size - 1) + rho_sq = torch.clamp(corr_matrix**2, 0, 0.999999) + mi_matrix = -0.5 * torch.log(1.0 - rho_sq) + mi_matrix.fill_diagonal_(0) + return torch.nan_to_num(mi_matrix) + + @register_metric("average_redundancy", aliases=["redundancy_gaussian"]) class AverageRedundancy(BaseMetric): """ @@ -68,11 +99,7 @@ def compute( if inputs is None or weights is None: raise ValueError("AverageRedundancy requires inputs and weights") - # Flatten if needed - if inputs.ndim != 2: - inputs = inputs.reshape(inputs.shape[0], -1) - if weights.ndim != 2: - weights = weights.reshape(weights.shape[0], -1) + inputs, weights = _prepare_inputs_and_weights(inputs, weights) batch_size = inputs.shape[0] num_neurons = weights.shape[0] @@ -85,8 +112,7 @@ def compute( logger.warning("Redundancy: Need at least 2 neurons") return torch.zeros(num_neurons, device=weights.device, dtype=weights.dtype) - # Compute projected outputs - projected = torch.matmul(inputs, weights.T) # [batch_size, num_neurons] + projected = _project_neuron_outputs(inputs, weights) # [batch_size, num_neurons] # Move to CPU for large computations if self._should_use_cpu(projected) or num_neurons > 4096: @@ -133,12 +159,7 @@ def compute( # Average MI with reference set (approximate redundancy) redundancy_scores = mi_with_refs.mean(dim=1) else: - # Full correlation matrix for smaller layers - corr_matrix = torch.matmul(projected_norm.T, projected_norm) / (batch_size - 1) - rho_sq = corr_matrix**2 - rho_sq = torch.clamp(rho_sq, 0, 0.999999) - mi_matrix = -0.5 * torch.log(1.0 - rho_sq) - mi_matrix.fill_diagonal_(0) + mi_matrix = _gaussian_mi_matrix_from_projected(projected) redundancy_scores = mi_matrix.sum(dim=1) / max(1, num_neurons - 1) return torch.nan_to_num(redundancy_scores.to(weights.device)) @@ -252,16 +273,18 @@ class LayerRedundancy(BaseMetric): the average pairwise mutual information between neurons. """ - def __init__(self, return_matrix: bool = False, **config: Any): + def __init__(self, return_matrix: bool = False, max_matrix_neurons: int = 2048, **config: Any): """ Initialize layer redundancy metric. Args: return_matrix: If True, return full redundancy matrix + max_matrix_neurons: Maximum number of neurons for full matrix output **config: Additional configuration """ super().__init__(**config) self.return_matrix = return_matrix + self.max_matrix_neurons = max_matrix_neurons self._avg_redundancy = AverageRedundancy(**config) @property @@ -286,12 +309,31 @@ def compute( If return_matrix=False: scalar redundancy score If return_matrix=True: redundancy matrix [num_neurons, num_neurons] """ - # Get per-neuron redundancy scores - neuron_redundancies = self._avg_redundancy.compute(inputs=inputs, weights=weights, outputs=outputs, **kwargs) - if self.return_matrix: - # Return full redundancy matrix (would need to modify avg_redundancy) - logger.warning("Full matrix return not yet implemented, returning average") + if inputs is None or weights is None: + raise ValueError("LayerRedundancy requires inputs and weights") + + inputs, weights = _prepare_inputs_and_weights(inputs, weights) + batch_size = inputs.shape[0] + num_neurons = weights.shape[0] + + if batch_size < self._avg_redundancy.min_samples: + logger.warning(f"LayerRedundancy: Only {batch_size} samples, returning zero matrix") + return torch.zeros(num_neurons, num_neurons, device=weights.device, dtype=weights.dtype) + + if num_neurons > self.max_matrix_neurons: + raise ValueError( + "LayerRedundancy(return_matrix=True) would allocate a " + f"{num_neurons}x{num_neurons} matrix; set max_matrix_neurons " + "higher if this is intentional." + ) + + projected = _project_neuron_outputs(inputs, weights) + if self._should_use_cpu(projected): + projected = projected.cpu() + return _gaussian_mi_matrix_from_projected(projected).to(weights.device) + + neuron_redundancies = self._avg_redundancy.compute(inputs=inputs, weights=weights, outputs=outputs, **kwargs) # Return average redundancy across all neurons return neuron_redundancies.mean().unsqueeze(0) diff --git a/src/alignment/metrics/information/synergy_continuous.py b/src/nodelens/metrics/information/synergy_continuous.py similarity index 100% rename from src/alignment/metrics/information/synergy_continuous.py rename to src/nodelens/metrics/information/synergy_continuous.py diff --git a/src/alignment/metrics/information/synergy_mmi.py b/src/nodelens/metrics/information/synergy_mmi.py similarity index 100% rename from src/alignment/metrics/information/synergy_mmi.py rename to src/nodelens/metrics/information/synergy_mmi.py diff --git a/src/alignment/metrics/multi_supernode.py b/src/nodelens/metrics/multi_supernode.py similarity index 100% rename from src/alignment/metrics/multi_supernode.py rename to src/nodelens/metrics/multi_supernode.py diff --git a/src/alignment/metrics/pairwise_base.py b/src/nodelens/metrics/pairwise_base.py similarity index 100% rename from src/alignment/metrics/pairwise_base.py rename to src/nodelens/metrics/pairwise_base.py diff --git a/src/alignment/metrics/rayleigh/__init__.py b/src/nodelens/metrics/rayleigh/__init__.py similarity index 100% rename from src/alignment/metrics/rayleigh/__init__.py rename to src/nodelens/metrics/rayleigh/__init__.py diff --git a/src/alignment/metrics/rayleigh/delta_alignment.py b/src/nodelens/metrics/rayleigh/delta_alignment.py similarity index 100% rename from src/alignment/metrics/rayleigh/delta_alignment.py rename to src/nodelens/metrics/rayleigh/delta_alignment.py diff --git a/src/alignment/metrics/rayleigh/rayleigh_quotient.py b/src/nodelens/metrics/rayleigh/rayleigh_quotient.py similarity index 100% rename from src/alignment/metrics/rayleigh/rayleigh_quotient.py rename to src/nodelens/metrics/rayleigh/rayleigh_quotient.py diff --git a/src/alignment/metrics/rayleigh/rq_alternative.py b/src/nodelens/metrics/rayleigh/rq_alternative.py similarity index 100% rename from src/alignment/metrics/rayleigh/rq_alternative.py rename to src/nodelens/metrics/rayleigh/rq_alternative.py diff --git a/src/alignment/metrics/similarity/__init__.py b/src/nodelens/metrics/similarity/__init__.py similarity index 100% rename from src/alignment/metrics/similarity/__init__.py rename to src/nodelens/metrics/similarity/__init__.py diff --git a/src/alignment/metrics/similarity/cosine_similarity.py b/src/nodelens/metrics/similarity/cosine_similarity.py similarity index 100% rename from src/alignment/metrics/similarity/cosine_similarity.py rename to src/nodelens/metrics/similarity/cosine_similarity.py diff --git a/src/alignment/metrics/similarity/node_correlation.py b/src/nodelens/metrics/similarity/node_correlation.py similarity index 100% rename from src/alignment/metrics/similarity/node_correlation.py rename to src/nodelens/metrics/similarity/node_correlation.py diff --git a/src/alignment/metrics/similarity/node_redundancy.py b/src/nodelens/metrics/similarity/node_redundancy.py similarity index 100% rename from src/alignment/metrics/similarity/node_redundancy.py rename to src/nodelens/metrics/similarity/node_redundancy.py diff --git a/src/alignment/metrics/similarity/weight_similarity.py b/src/nodelens/metrics/similarity/weight_similarity.py similarity index 100% rename from src/alignment/metrics/similarity/weight_similarity.py rename to src/nodelens/metrics/similarity/weight_similarity.py diff --git a/src/alignment/metrics/spectral/__init__.py b/src/nodelens/metrics/spectral/__init__.py similarity index 100% rename from src/alignment/metrics/spectral/__init__.py rename to src/nodelens/metrics/spectral/__init__.py diff --git a/src/alignment/metrics/spectral/spectral_alignment.py b/src/nodelens/metrics/spectral/spectral_alignment.py similarity index 100% rename from src/alignment/metrics/spectral/spectral_alignment.py rename to src/nodelens/metrics/spectral/spectral_alignment.py diff --git a/src/alignment/metrics/spectral/spectral_classic.py b/src/nodelens/metrics/spectral/spectral_classic.py similarity index 100% rename from src/alignment/metrics/spectral/spectral_classic.py rename to src/nodelens/metrics/spectral/spectral_classic.py diff --git a/src/alignment/metrics/task_specific/__init__.py b/src/nodelens/metrics/task_specific/__init__.py similarity index 100% rename from src/alignment/metrics/task_specific/__init__.py rename to src/nodelens/metrics/task_specific/__init__.py diff --git a/src/alignment/metrics/task_specific/activation_magnitude.py b/src/nodelens/metrics/task_specific/activation_magnitude.py similarity index 100% rename from src/alignment/metrics/task_specific/activation_magnitude.py rename to src/nodelens/metrics/task_specific/activation_magnitude.py diff --git a/src/alignment/metrics/task_specific/classification.py b/src/nodelens/metrics/task_specific/classification.py similarity index 100% rename from src/alignment/metrics/task_specific/classification.py rename to src/nodelens/metrics/task_specific/classification.py diff --git a/src/alignment/metrics/task_specific/general.py b/src/nodelens/metrics/task_specific/general.py similarity index 99% rename from src/alignment/metrics/task_specific/general.py rename to src/nodelens/metrics/task_specific/general.py index 3ee9e460..5546dd67 100644 --- a/src/alignment/metrics/task_specific/general.py +++ b/src/nodelens/metrics/task_specific/general.py @@ -57,9 +57,8 @@ def compute( if outputs is None: outputs = inputs @ weights.T - # If no targets provided, create dummy targets if targets is None: - # Use outputs themselves as targets (self-supervised) + # Use outputs themselves as self-supervised targets. targets = outputs.detach() # Ensure inputs require gradients diff --git a/src/alignment/metrics/task_specific/language_model.py b/src/nodelens/metrics/task_specific/language_model.py similarity index 100% rename from src/alignment/metrics/task_specific/language_model.py rename to src/nodelens/metrics/task_specific/language_model.py diff --git a/src/alignment/metrics/task_specific/reinforcement_learning.py b/src/nodelens/metrics/task_specific/reinforcement_learning.py similarity index 100% rename from src/alignment/metrics/task_specific/reinforcement_learning.py rename to src/nodelens/metrics/task_specific/reinforcement_learning.py diff --git a/src/alignment/metrics/task_specific/vision.py b/src/nodelens/metrics/task_specific/vision.py similarity index 100% rename from src/alignment/metrics/task_specific/vision.py rename to src/nodelens/metrics/task_specific/vision.py diff --git a/src/alignment/models/README.md b/src/nodelens/models/README.md similarity index 92% rename from src/alignment/models/README.md rename to src/nodelens/models/README.md index ae7cb763..83c68230 100644 --- a/src/alignment/models/README.md +++ b/src/nodelens/models/README.md @@ -6,7 +6,7 @@ Model wrappers and loaders for the alignment metrics framework. ```python # Load a pretrained vision model -from alignment.models import ModelWrapper +from nodelens.models import ModelWrapper import torchvision.models as tvm model = tvm.resnet18(pretrained=True) @@ -61,7 +61,7 @@ model: Simple models for experiments: ```python -from alignment.models import MLP, CNN2P2, create_model +from nodelens.models import MLP, CNN2P2, create_model # Create MLP for MNIST model = create_model('mlp', 'mnist', hidden_dims=[300, 200]) @@ -75,7 +75,7 @@ model = create_model('cnn2p2', 'cifar10') Automatic hook lifecycle management: ```python -from alignment.models.hooks import HookManager +from nodelens.models.hooks import HookManager hook_mgr = HookManager() @@ -87,7 +87,7 @@ with hook_mgr.temporary_hooks(model, ['layer1', 'layer2']) as cache: # Hooks automatically removed after context # Or use PersistentHookManager for long-running tracking -from alignment.models.hooks import PersistentHookManager +from nodelens.models.hooks import PersistentHookManager persistent_mgr = PersistentHookManager() persistent_mgr.register_persistent_hooks(model, ['layer1']) @@ -104,7 +104,7 @@ The experiment runner automatically wraps models: self.wrapped_model = ModelWrapper(self.model, **wrapper_kwargs) # Activation capture via service layer -from alignment.services.activation_capture import ActivationCaptureService +from nodelens.services.activation_capture import ActivationCaptureService service = ActivationCaptureService(wrapped_model) data = service.capture(input_batch) ``` diff --git a/src/alignment/models/__init__.py b/src/nodelens/models/__init__.py similarity index 100% rename from src/alignment/models/__init__.py rename to src/nodelens/models/__init__.py diff --git a/src/alignment/models/architectures/standard_models.py b/src/nodelens/models/architectures/standard_models.py similarity index 98% rename from src/alignment/models/architectures/standard_models.py rename to src/nodelens/models/architectures/standard_models.py index c52365bd..4d13cbba 100644 --- a/src/alignment/models/architectures/standard_models.py +++ b/src/nodelens/models/architectures/standard_models.py @@ -18,7 +18,7 @@ class MLP(nn.Module): """ Multi-layer perceptron with configurable hidden layers. - This implementation matches the old alignment.models.models.MLP functionality. + This implementation matches the old nodelens.models.models.MLP functionality. Args: input_dim: Dimension of input features (default: 784 for MNIST) @@ -108,7 +108,7 @@ class CNN2P2(nn.Module): """ Convolutional Neural Network with 2 convolutional layers and 2 pooling layers. - This implementation matches the old alignment.models.models.CNN2P2 functionality. + This implementation matches the old nodelens.models.models.CNN2P2 functionality. Architecture: Conv1 -> ReLU -> MaxPool -> Conv2 -> ReLU -> MaxPool -> FC1 -> ReLU -> FC2 diff --git a/src/alignment/models/base.py b/src/nodelens/models/base.py similarity index 97% rename from src/alignment/models/base.py rename to src/nodelens/models/base.py index bb21136c..ab54c7a6 100644 --- a/src/alignment/models/base.py +++ b/src/nodelens/models/base.py @@ -11,13 +11,13 @@ import torch import torch.nn as nn -from alignment.core.base import BaseModel +from nodelens.core.base import BaseModel from .hooks import HookManager # Conditional import for layer detector (graceful fallback) try: - from alignment.core.layer_detector import detect_trackable_layers + from nodelens.core.layer_detector import detect_trackable_layers HAS_LAYER_DETECTOR = True except ImportError: @@ -72,7 +72,7 @@ def __init__( def _discover_layers(self) -> List[str]: """ - Auto-discover layers that can be tracked for alignment. + Auto-discover layers that can be tracked by NodeLens. Uses generic LayerDetector for model-agnostic detection. @@ -158,7 +158,7 @@ def preprocess_activations(self, activations: Dict[str, torch.Tensor], mode: str Basic activation preprocessing. For advanced preprocessing (CNN modes, attention, etc.), - use the data processing module: alignment.data.processing.preprocess_layer_activations + use the data processing module: nodelens.data.processing.preprocess_layer_activations Args: activations: Raw activations from hooks diff --git a/src/alignment/models/hooks.py b/src/nodelens/models/hooks.py similarity index 100% rename from src/alignment/models/hooks.py rename to src/nodelens/models/hooks.py diff --git a/src/alignment/models/hub.py b/src/nodelens/models/hub.py similarity index 100% rename from src/alignment/models/hub.py rename to src/nodelens/models/hub.py diff --git a/src/alignment/models/transformers.py b/src/nodelens/models/transformers.py similarity index 100% rename from src/alignment/models/transformers.py rename to src/nodelens/models/transformers.py diff --git a/src/alignment/models/wrappers.py b/src/nodelens/models/wrappers.py similarity index 100% rename from src/alignment/models/wrappers.py rename to src/nodelens/models/wrappers.py diff --git a/src/alignment/preprocessing/__init__.py b/src/nodelens/preprocessing/__init__.py similarity index 100% rename from src/alignment/preprocessing/__init__.py rename to src/nodelens/preprocessing/__init__.py diff --git a/src/alignment/preprocessing/layer_preprocessing.py b/src/nodelens/preprocessing/layer_preprocessing.py similarity index 100% rename from src/alignment/preprocessing/layer_preprocessing.py rename to src/nodelens/preprocessing/layer_preprocessing.py diff --git a/src/alignment/pruning/README.md b/src/nodelens/pruning/README.md similarity index 93% rename from src/alignment/pruning/README.md rename to src/nodelens/pruning/README.md index 1935e4db..4dda2c0e 100644 --- a/src/alignment/pruning/README.md +++ b/src/nodelens/pruning/README.md @@ -52,7 +52,7 @@ Neural network pruning strategies and infrastructure. ## Usage ```python -from alignment.pruning import MagnitudePruning, PruningConfig +from nodelens.pruning import MagnitudePruning, PruningConfig config = PruningConfig(amount=0.5, structured=True) strategy = MagnitudePruning(config) @@ -62,7 +62,7 @@ mask = strategy.prune(layer, amount=0.5) ### Eigenvector Pruning ```python -from alignment.pruning import EigenvectorPruning, PruningConfig +from nodelens.pruning import EigenvectorPruning, PruningConfig config = PruningConfig(amount=0.5, structured=True, pruning_mode='low') strategy = EigenvectorPruning(config=config) @@ -74,7 +74,7 @@ mask = strategy.prune(layer, inputs=activations) ### Movement Pruning ```python -from alignment.pruning import MovementPruning +from nodelens.pruning import MovementPruning strategy = MovementPruning() @@ -91,7 +91,7 @@ mask = strategy.prune(layer, amount=0.5) ### Adaptive Sensitivity Pruning ```python -from alignment.pruning import AdaptiveSensitivityPruning +from nodelens.pruning import AdaptiveSensitivityPruning strategy = AdaptiveSensitivityPruning( target_sparsity=0.7, @@ -131,7 +131,7 @@ masks = strategy.prune_adaptive(model, layer_names, eval_fn=None, inputs_per_lay ### Cascading Pruning (Progressive) ```python -from alignment.pruning import CascadingAlignmentPruning, PruningConfig +from nodelens.pruning import CascadingAlignmentPruning, PruningConfig config = PruningConfig(amount=0.5, structured=True) strategy = CascadingAlignmentPruning( @@ -147,7 +147,7 @@ masks = strategy.prune_model(model, get_layer_inputs_fn) ## Using the Pipeline ```python -from alignment.pruning import run_pruning_pipeline, PruningPipelineOptions +from nodelens.pruning import run_pruning_pipeline, PruningPipelineOptions options = PruningPipelineOptions( distribution="uniform", # or "global_threshold" diff --git a/src/alignment/pruning/__init__.py b/src/nodelens/pruning/__init__.py similarity index 97% rename from src/alignment/pruning/__init__.py rename to src/nodelens/pruning/__init__.py index 91fb5e49..8cf8b321 100644 --- a/src/alignment/pruning/__init__.py +++ b/src/nodelens/pruning/__init__.py @@ -17,7 +17,7 @@ Example: Basic pruning:: - from alignment.pruning import get_pruning_strategy, PruningConfig + from nodelens.pruning import get_pruning_strategy, PruningConfig # Prune low-magnitude weights strategy = get_pruning_strategy('magnitude') @@ -29,7 +29,7 @@ Parallel pruning:: - from alignment.pruning.strategies import ParallelModePruning + from nodelens.pruning.strategies import ParallelModePruning # Apply multiple modes simultaneously strategy = ParallelModePruning(modes=['low', 'high', 'random']) diff --git a/src/alignment/pruning/base.py b/src/nodelens/pruning/base.py similarity index 100% rename from src/alignment/pruning/base.py rename to src/nodelens/pruning/base.py diff --git a/src/alignment/pruning/dependency_aware.py b/src/nodelens/pruning/dependency_aware.py similarity index 94% rename from src/alignment/pruning/dependency_aware.py rename to src/nodelens/pruning/dependency_aware.py index d11098ec..c648ffb9 100644 --- a/src/alignment/pruning/dependency_aware.py +++ b/src/nodelens/pruning/dependency_aware.py @@ -85,24 +85,40 @@ def _get_layer_type(self, module: nn.Module) -> str: else: return "other" - def _detect_skip_connections(self): + def _detect_skip_connections(self) -> None: """ - Detect skip/residual connections. + Mark residual-compatible convolution groups. - Heuristic: Look for layers with same in/out dimensions - that might be part of residual blocks. + This graph is built from module names, not a traced forward pass. We therefore + keep the rule conservative: only convolutions that preserve channel count and + spatial size are marked as residual-compatible candidates. The pruning code + still validates tensor shapes before applying masks. """ - # This is a simplified heuristic - # For production, would need more sophisticated analysis - # (e.g., tracing actual forward pass) + residual_candidates: Dict[tuple[str, int], List[str]] = {} for name, dep in self.graph.items(): - if dep.layer_type == "conv": - module = dep.module - if hasattr(module, "in_channels") and hasattr(module, "out_channels"): - if module.in_channels == module.out_channels: - # Potential residual connection - dep.skip_connection_with = [] # Placeholder + if dep.layer_type != "conv": + continue + + module = dep.module + if not (hasattr(module, "in_channels") and hasattr(module, "out_channels")): + continue + if module.in_channels != module.out_channels: + continue + + stride = getattr(module, "stride", 1) + stride_values = stride if isinstance(stride, tuple) else (stride,) + if any(int(value) != 1 for value in stride_values): + continue + + parent = name.rsplit(".", 1)[0] if "." in name else "" + key = (parent, int(module.out_channels)) + residual_candidates.setdefault(key, []).append(name) + + for names in residual_candidates.values(): + for name in names: + peers = [candidate for candidate in names if candidate != name] + self.graph[name].skip_connection_with = peers logger.debug("Skip connection detection complete") diff --git a/src/alignment/pruning/distribution.py b/src/nodelens/pruning/distribution.py similarity index 95% rename from src/alignment/pruning/distribution.py rename to src/nodelens/pruning/distribution.py index 6621d2cb..da792872 100644 --- a/src/alignment/pruning/distribution.py +++ b/src/nodelens/pruning/distribution.py @@ -109,6 +109,15 @@ def compute_distribution( else: raise ValueError(f"Unknown strategy: {self.strategy}") + @staticmethod + def _layer_param_count(model: nn.Module, layer_name: str) -> int: + """Return the number of weight parameters for a named layer.""" + modules = dict(model.named_modules()) + layer = modules.get(layer_name) + if layer is None or not hasattr(layer, "weight"): + return 0 + return int(layer.weight.numel()) + def _uniform_distribution(self, layer_names: List[str]) -> Dict[str, float]: """Same amount for all layers.""" amount = max(self.min_amount, min(self.max_amount, self.target_sparsity)) @@ -223,8 +232,8 @@ def _global_knapsack_distribution(self, layer_scores: Dict[str, torch.Tensor], m # Removing row i of gate/up and col i of down. # Params = 2 * d_in + d_out (usually d_in=d_model) # We need to inspect the module to be sure - try: - layer_mod = dict(model.named_modules())[name] + layer_mod = dict(model.named_modules()).get(name) + if layer_mod is not None: # Rough heuristic: params / num_channels if hasattr(layer_mod, "weight"): # Simple linear layer @@ -237,7 +246,7 @@ def _global_knapsack_distribution(self, layer_scores: Dict[str, torch.Tensor], m cost_per_channel = 1.0 costs = torch.full_like(scores, cost_per_channel) - except Exception: + else: costs = torch.full_like(scores, 1.0) total_cost_model += costs.sum().item() @@ -317,18 +326,14 @@ def _size_proportional_distribution(self, model: nn.Module, layer_names: List[st Larger layers pruned more (have more params to spare). """ - # Get layer sizes layer_sizes = {} total_size = 0 for name in layer_names: - try: - layer = dict(model.named_modules())[name] - size = layer.weight.numel() + size = self._layer_param_count(model, name) + if size > 0: layer_sizes[name] = size total_size += size - except Exception: - continue if total_size == 0: return {name: self.target_sparsity for name in layer_names} @@ -378,14 +383,14 @@ def _importance_weighted_distribution(self, layer_scores: Dict[str, torch.Tensor amount = self.target_sparsity + 0.3 * (1 - norm_importance) - 0.15 amounts[name] = max(self.min_amount, min(self.max_amount, amount)) - # Normalize to hit target - # Need sizes layer_sizes = {} for name in layer_scores.keys(): - try: - layer_sizes[name] = dict(model.named_modules())[name].weight.numel() - except: - layer_sizes[name] = 1000 # dummy + size = self._layer_param_count(model, name) + if size <= 0: + # Scores are per prunable unit, so this fallback preserves relative + # layer weights without inventing a fixed fallback size. + size = int(layer_scores[name].numel()) + layer_sizes[name] = size amounts = self._normalize_to_target(amounts, layer_sizes) @@ -405,13 +410,9 @@ def _cascading_distribution(self, layer_scores: Dict[str, torch.Tensor], model: # Default order sorted_layers = layer_names - # Compute total params and target layer_sizes = {} for name in layer_names: - try: - layer_sizes[name] = dict(model.named_modules())[name].weight.numel() - except: - layer_sizes[name] = 0 + layer_sizes[name] = self._layer_param_count(model, name) total_params = sum(layer_sizes.values()) target_to_remove = int(total_params * self.target_sparsity) diff --git a/src/alignment/pruning/pipeline.py b/src/nodelens/pruning/pipeline.py similarity index 100% rename from src/alignment/pruning/pipeline.py rename to src/nodelens/pruning/pipeline.py diff --git a/src/alignment/pruning/strategies/__init__.py b/src/nodelens/pruning/strategies/__init__.py similarity index 95% rename from src/alignment/pruning/strategies/__init__.py rename to src/nodelens/pruning/strategies/__init__.py index 9b917cab..f4a4c767 100644 --- a/src/alignment/pruning/strategies/__init__.py +++ b/src/nodelens/pruning/strategies/__init__.py @@ -22,13 +22,13 @@ # Provide import-time stability for users who don't have CHIP code vendored. class CHIPPruning: # type: ignore def __init__(self, *args, **kwargs): - raise ImportError("CHIPPruning is unavailable: missing `alignment.pruning.strategies.chip`.") + raise ImportError("CHIPPruning is unavailable: missing `nodelens.pruning.strategies.chip`.") def compute_chip_scores(*args, **kwargs): # type: ignore - raise ImportError("compute_chip_scores is unavailable: missing `alignment.pruning.strategies.chip`.") + raise ImportError("compute_chip_scores is unavailable: missing `nodelens.pruning.strategies.chip`.") def chip_score_channels(*args, **kwargs): # type: ignore - raise ImportError("chip_score_channels is unavailable: missing `alignment.pruning.strategies.chip`.") + raise ImportError("chip_score_channels is unavailable: missing `nodelens.pruning.strategies.chip`.") __all__ = [ diff --git a/src/alignment/pruning/strategies/adaptive.py b/src/nodelens/pruning/strategies/adaptive.py similarity index 100% rename from src/alignment/pruning/strategies/adaptive.py rename to src/nodelens/pruning/strategies/adaptive.py diff --git a/src/alignment/pruning/strategies/alignment_based.py b/src/nodelens/pruning/strategies/alignment_based.py similarity index 97% rename from src/alignment/pruning/strategies/alignment_based.py rename to src/nodelens/pruning/strategies/alignment_based.py index 1822810b..859ba035 100644 --- a/src/alignment/pruning/strategies/alignment_based.py +++ b/src/nodelens/pruning/strategies/alignment_based.py @@ -34,8 +34,8 @@ class AlignmentPruning(BasePruningStrategy): - Less meaningful since alignment is a neuron-level property Examples: - >>> from alignment.pruning.strategies import AlignmentPruning - >>> from alignment.pruning import PruningConfig + >>> from nodelens.pruning.strategies import AlignmentPruning + >>> from nodelens.pruning import PruningConfig >>> >>> # Structured pruning - remove entire neurons with low alignment >>> config = PruningConfig( @@ -109,7 +109,7 @@ def compute_importance_scores(self, module: nn.Module, inputs: Optional[torch.Te raise ValueError(f"Module {module} does not have weights") if inputs is None: - raise ValueError("AlignmentPruning requires inputs to compute alignment. " "Pass inputs to the prune() method.") + raise ValueError("AlignmentPruning requires inputs to compute alignment scores. " "Pass inputs to the prune() method.") weights = module.weight.data @@ -215,7 +215,7 @@ class HybridPruning(BasePruningStrategy): alignment metrics for more informed pruning decisions. Examples: - >>> from alignment.pruning.strategies import HybridPruning + >>> from nodelens.pruning.strategies import HybridPruning >>> >>> # Combine magnitude and Rayleigh quotient >>> strategy = HybridPruning( @@ -324,8 +324,8 @@ class GlobalAlignmentPruning(AlignmentPruning): independently to achieve the same sparsity level. Examples: - >>> from alignment.pruning.strategies import GlobalAlignmentPruning - >>> from alignment.pruning import PruningConfig + >>> from nodelens.pruning.strategies import GlobalAlignmentPruning + >>> from nodelens.pruning import PruningConfig >>> >>> # Global pruning - removes 50% of neurons globally >>> config = PruningConfig( diff --git a/src/alignment/pruning/strategies/cascading.py b/src/nodelens/pruning/strategies/cascading.py similarity index 98% rename from src/alignment/pruning/strategies/cascading.py rename to src/nodelens/pruning/strategies/cascading.py index d9e6f979..b912713e 100644 --- a/src/alignment/pruning/strategies/cascading.py +++ b/src/nodelens/pruning/strategies/cascading.py @@ -31,8 +31,8 @@ class CascadingAlignmentPruning(BasePruningStrategy): 3. Later layers see the effects of earlier pruning Examples: - >>> from alignment.pruning.strategies import CascadingAlignmentPruning - >>> from alignment.pruning import PruningConfig + >>> from nodelens.pruning.strategies import CascadingAlignmentPruning + >>> from nodelens.pruning import PruningConfig >>> >>> config = PruningConfig( ... amount=0.5, diff --git a/src/alignment/pruning/strategies/chip.py b/src/nodelens/pruning/strategies/chip.py similarity index 100% rename from src/alignment/pruning/strategies/chip.py rename to src/nodelens/pruning/strategies/chip.py diff --git a/src/alignment/pruning/strategies/cluster_aware.py b/src/nodelens/pruning/strategies/cluster_aware.py similarity index 100% rename from src/alignment/pruning/strategies/cluster_aware.py rename to src/nodelens/pruning/strategies/cluster_aware.py diff --git a/src/alignment/pruning/strategies/eigenvector.py b/src/nodelens/pruning/strategies/eigenvector.py similarity index 98% rename from src/alignment/pruning/strategies/eigenvector.py rename to src/nodelens/pruning/strategies/eigenvector.py index cccc94b6..aa85f9a2 100644 --- a/src/alignment/pruning/strategies/eigenvector.py +++ b/src/nodelens/pruning/strategies/eigenvector.py @@ -31,8 +31,8 @@ class EigenvectorPruning(BasePruningStrategy): - 'high': Prune neurons aligned with high-variance directions (ablation) Examples: - >>> from alignment.pruning.strategies import EigenvectorPruning - >>> from alignment.pruning import PruningConfig + >>> from nodelens.pruning.strategies import EigenvectorPruning + >>> from nodelens.pruning import PruningConfig >>> >>> config = PruningConfig(amount=0.5, structured=True, pruning_mode='low') >>> strategy = EigenvectorPruning(config=config) diff --git a/src/alignment/pruning/strategies/external/__init__.py b/src/nodelens/pruning/strategies/external/__init__.py similarity index 100% rename from src/alignment/pruning/strategies/external/__init__.py rename to src/nodelens/pruning/strategies/external/__init__.py diff --git a/src/alignment/pruning/strategies/external/wanda/README.md b/src/nodelens/pruning/strategies/external/wanda/README.md similarity index 80% rename from src/alignment/pruning/strategies/external/wanda/README.md rename to src/nodelens/pruning/strategies/external/wanda/README.md index 62485c5f..19f187e6 100644 --- a/src/alignment/pruning/strategies/external/wanda/README.md +++ b/src/nodelens/pruning/strategies/external/wanda/README.md @@ -5,16 +5,16 @@ This directory vendors a reference implementation of **Wanda** (Sun et al., 2023 ### Purpose - **Reference-only**: this code is kept to make it easy to audit our internal Wanda baseline against a known implementation. -- Our comparisons use **channel-adapted baselines** implemented in `src/alignment/pruning/strategies/llm_baselines.py`. +- Our comparisons use **channel-adapted baselines** implemented in `src/nodelens/pruning/strategies/llm_baselines.py`. - When we run a *reference-faithful* unstructured Wanda reproduction baseline, we use the internal implementation (for integration/consistency), but keep this reference code for cross-checking. ### Provenance This code was merged via `origin/iss117_acllm_v3` (see merge commit on the target branch) and corresponds to the files: -- `src/alignment/pruning/strategies/external/wanda/data.py` -- `src/alignment/pruning/strategies/external/wanda/layerwrapper.py` -- `src/alignment/pruning/strategies/external/wanda/prune.py` +- `src/nodelens/pruning/strategies/external/wanda/data.py` +- `src/nodelens/pruning/strategies/external/wanda/layerwrapper.py` +- `src/nodelens/pruning/strategies/external/wanda/prune.py` ### Key details to match diff --git a/src/alignment/pruning/strategies/external/wanda/__init__.py b/src/nodelens/pruning/strategies/external/wanda/__init__.py similarity index 100% rename from src/alignment/pruning/strategies/external/wanda/__init__.py rename to src/nodelens/pruning/strategies/external/wanda/__init__.py diff --git a/src/alignment/pruning/strategies/external/wanda/data.py b/src/nodelens/pruning/strategies/external/wanda/data.py similarity index 100% rename from src/alignment/pruning/strategies/external/wanda/data.py rename to src/nodelens/pruning/strategies/external/wanda/data.py diff --git a/src/alignment/pruning/strategies/external/wanda/layerwrapper.py b/src/nodelens/pruning/strategies/external/wanda/layerwrapper.py similarity index 100% rename from src/alignment/pruning/strategies/external/wanda/layerwrapper.py rename to src/nodelens/pruning/strategies/external/wanda/layerwrapper.py diff --git a/src/alignment/pruning/strategies/external/wanda/prune.py b/src/nodelens/pruning/strategies/external/wanda/prune.py similarity index 100% rename from src/alignment/pruning/strategies/external/wanda/prune.py rename to src/nodelens/pruning/strategies/external/wanda/prune.py diff --git a/src/alignment/pruning/strategies/generalized_taylor.py b/src/nodelens/pruning/strategies/generalized_taylor.py similarity index 100% rename from src/alignment/pruning/strategies/generalized_taylor.py rename to src/nodelens/pruning/strategies/generalized_taylor.py diff --git a/src/alignment/pruning/strategies/gradient.py b/src/nodelens/pruning/strategies/gradient.py similarity index 97% rename from src/alignment/pruning/strategies/gradient.py rename to src/nodelens/pruning/strategies/gradient.py index 6d6ec38d..0127cf5d 100644 --- a/src/alignment/pruning/strategies/gradient.py +++ b/src/nodelens/pruning/strategies/gradient.py @@ -22,7 +22,7 @@ class GradientPruning(BasePruningStrategy): (Taylor approximation of loss change). Examples: - >>> from alignment.pruning.strategies import GradientPruning + >>> from nodelens.pruning.strategies import GradientPruning >>> >>> # Using gradient magnitude >>> strategy = GradientPruning(mode='gradient') @@ -98,7 +98,7 @@ class FisherPruning(BasePruningStrategy): using gradient squares accumulated over multiple batches. Examples: - >>> from alignment.pruning.strategies import FisherPruning + >>> from nodelens.pruning.strategies import FisherPruning >>> strategy = FisherPruning() >>> >>> # Accumulate Fisher information over multiple batches @@ -201,7 +201,7 @@ class MomentumPruning(BasePruningStrategy): providing more stable pruning decisions over time. Examples: - >>> from alignment.pruning.strategies import MomentumPruning + >>> from nodelens.pruning.strategies import MomentumPruning >>> strategy = MomentumPruning(momentum=0.9) >>> >>> # Update importance with momentum over multiple iterations diff --git a/src/alignment/pruning/strategies/llm_baselines.py b/src/nodelens/pruning/strategies/llm_baselines.py similarity index 100% rename from src/alignment/pruning/strategies/llm_baselines.py rename to src/nodelens/pruning/strategies/llm_baselines.py diff --git a/src/alignment/pruning/strategies/magnitude.py b/src/nodelens/pruning/strategies/magnitude.py similarity index 94% rename from src/alignment/pruning/strategies/magnitude.py rename to src/nodelens/pruning/strategies/magnitude.py index f2f042ff..1bf94ee4 100644 --- a/src/alignment/pruning/strategies/magnitude.py +++ b/src/nodelens/pruning/strategies/magnitude.py @@ -21,8 +21,8 @@ class MagnitudePruning(BasePruningStrategy): the assumption that small weights contribute less to the network's output. Examples: - >>> from alignment.pruning.strategies import MagnitudePruning - >>> from alignment.pruning import PruningConfig + >>> from nodelens.pruning.strategies import MagnitudePruning + >>> from nodelens.pruning import PruningConfig >>> >>> # Basic usage >>> strategy = MagnitudePruning() @@ -61,8 +61,8 @@ class IterativeMagnitudePruning(IterativePruningStrategy): allowing the network to adapt between pruning steps. Examples: - >>> from alignment.pruning.strategies import IterativeMagnitudePruning - >>> from alignment.pruning import PruningConfig + >>> from nodelens.pruning.strategies import IterativeMagnitudePruning + >>> from nodelens.pruning import PruningConfig >>> >>> config = PruningConfig( ... amount=0.9, # Final sparsity @@ -106,7 +106,7 @@ class GlobalMagnitudePruning(BasePruningStrategy): the entire network rather than per-layer. Examples: - >>> from alignment.pruning.strategies import GlobalMagnitudePruning + >>> from nodelens.pruning.strategies import GlobalMagnitudePruning >>> strategy = GlobalMagnitudePruning() >>> >>> # Prune entire model to 70% sparsity diff --git a/src/alignment/pruning/strategies/metric_based.py b/src/nodelens/pruning/strategies/metric_based.py similarity index 100% rename from src/alignment/pruning/strategies/metric_based.py rename to src/nodelens/pruning/strategies/metric_based.py diff --git a/src/alignment/pruning/strategies/movement.py b/src/nodelens/pruning/strategies/movement.py similarity index 100% rename from src/alignment/pruning/strategies/movement.py rename to src/nodelens/pruning/strategies/movement.py diff --git a/src/alignment/pruning/strategies/parallel.py b/src/nodelens/pruning/strategies/parallel.py similarity index 100% rename from src/alignment/pruning/strategies/parallel.py rename to src/nodelens/pruning/strategies/parallel.py diff --git a/src/alignment/pruning/strategies/parallel_batch.py b/src/nodelens/pruning/strategies/parallel_batch.py similarity index 100% rename from src/alignment/pruning/strategies/parallel_batch.py rename to src/nodelens/pruning/strategies/parallel_batch.py diff --git a/src/alignment/pruning/strategies/random.py b/src/nodelens/pruning/strategies/random.py similarity index 97% rename from src/alignment/pruning/strategies/random.py rename to src/nodelens/pruning/strategies/random.py index c16e2c5f..ba3711fd 100644 --- a/src/alignment/pruning/strategies/random.py +++ b/src/nodelens/pruning/strategies/random.py @@ -22,8 +22,8 @@ class RandomPruning(BasePruningStrategy): pruning strategies. Examples: - >>> from alignment.pruning.strategies import RandomPruning - >>> from alignment.pruning import PruningConfig + >>> from nodelens.pruning.strategies import RandomPruning + >>> from nodelens.pruning import PruningConfig >>> >>> # Basic random pruning >>> strategy = RandomPruning() @@ -103,7 +103,7 @@ class LayerwiseRandomPruning(RandomPruning): different layers while still using random selection. Examples: - >>> from alignment.pruning.strategies import LayerwiseRandomPruning + >>> from nodelens.pruning.strategies import LayerwiseRandomPruning >>> >>> # Define per-layer sparsity >>> layer_sparsity = { @@ -168,7 +168,7 @@ class BernoulliPruning(BasePruningStrategy): analysis and stochastic pruning approaches. Examples: - >>> from alignment.pruning.strategies import BernoulliPruning + >>> from nodelens.pruning.strategies import BernoulliPruning >>> >>> # Each weight has 50% chance of being pruned >>> strategy = BernoulliPruning(probability=0.5) diff --git a/src/alignment/quantization/__init__.py b/src/nodelens/quantization/__init__.py similarity index 100% rename from src/alignment/quantization/__init__.py rename to src/nodelens/quantization/__init__.py diff --git a/src/alignment/quantization/analysis.py b/src/nodelens/quantization/analysis.py similarity index 100% rename from src/alignment/quantization/analysis.py rename to src/nodelens/quantization/analysis.py diff --git a/src/alignment/quantization/ptq.py b/src/nodelens/quantization/ptq.py similarity index 100% rename from src/alignment/quantization/ptq.py rename to src/nodelens/quantization/ptq.py diff --git a/src/alignment/services/__init__.py b/src/nodelens/services/__init__.py similarity index 100% rename from src/alignment/services/__init__.py rename to src/nodelens/services/__init__.py diff --git a/src/alignment/services/activation_capture.py b/src/nodelens/services/activation_capture.py similarity index 99% rename from src/alignment/services/activation_capture.py rename to src/nodelens/services/activation_capture.py index 7b391e91..5404859f 100644 --- a/src/alignment/services/activation_capture.py +++ b/src/nodelens/services/activation_capture.py @@ -114,7 +114,7 @@ def capture( if mode in {"unfold", "patchwise", "batch_patch_combined"}: try: # Use the canonical preprocessing utilities that distinguish _input vs _output - from alignment.dataops.processing import preprocess_layer_activations + from nodelens.dataops.processing import preprocess_layer_activations model = getattr(self.model_wrapper, "model", None) or getattr(self.model_wrapper, "_model", None) if model is None: diff --git a/src/alignment/services/mask_ops.py b/src/nodelens/services/mask_ops.py similarity index 100% rename from src/alignment/services/mask_ops.py rename to src/nodelens/services/mask_ops.py diff --git a/src/alignment/services/scoring.py b/src/nodelens/services/scoring.py similarity index 100% rename from src/alignment/services/scoring.py rename to src/nodelens/services/scoring.py diff --git a/src/alignment/training/README.md b/src/nodelens/training/README.md similarity index 87% rename from src/alignment/training/README.md rename to src/nodelens/training/README.md index 883c87b5..c3a5e083 100644 --- a/src/alignment/training/README.md +++ b/src/nodelens/training/README.md @@ -24,7 +24,7 @@ Training utilities, trainers, and evaluation functions. ### Training ```python -from alignment.training import ExperimentTrainer, ExperimentTrainingConfig +from nodelens.training import ExperimentTrainer, ExperimentTrainingConfig config = ExperimentTrainingConfig( epochs=10, @@ -38,7 +38,7 @@ trainer.train(train_loader, val_loader) ### Evaluation ```python -from alignment.training import evaluate_classification, evaluate_perplexity +from nodelens.training import evaluate_classification, evaluate_perplexity # Classification results = evaluate_classification(model, test_loader, device="cuda") @@ -52,7 +52,7 @@ results = evaluate_perplexity(model, text_loader, device="cuda") ### Evaluation Manager ```python -from alignment.training import EvaluationManager +from nodelens.training import EvaluationManager manager = EvaluationManager(task="classification") diff --git a/src/alignment/training/__init__.py b/src/nodelens/training/__init__.py similarity index 100% rename from src/alignment/training/__init__.py rename to src/nodelens/training/__init__.py diff --git a/src/alignment/training/base.py b/src/nodelens/training/base.py similarity index 100% rename from src/alignment/training/base.py rename to src/nodelens/training/base.py diff --git a/src/alignment/training/callbacks/__init__.py b/src/nodelens/training/callbacks/__init__.py similarity index 100% rename from src/alignment/training/callbacks/__init__.py rename to src/nodelens/training/callbacks/__init__.py diff --git a/src/alignment/training/callbacks/alignment_callback.py b/src/nodelens/training/callbacks/alignment_callback.py similarity index 99% rename from src/alignment/training/callbacks/alignment_callback.py rename to src/nodelens/training/callbacks/alignment_callback.py index 5ff83110..f5eb1cd4 100644 --- a/src/alignment/training/callbacks/alignment_callback.py +++ b/src/nodelens/training/callbacks/alignment_callback.py @@ -27,7 +27,7 @@ class AlignmentMetricsCallback: - Supports any tracker (WandB, TensorBoard, etc.) Example: - >>> from alignment.training.callbacks import AlignmentMetricsCallback + >>> from nodelens.training.callbacks import AlignmentMetricsCallback >>> callback = AlignmentMetricsCallback( ... metrics={'rq': get_metric('rayleigh_quotient')}, ... layers=['conv1', 'fc1'], diff --git a/src/alignment/training/evaluation.py b/src/nodelens/training/evaluation.py similarity index 100% rename from src/alignment/training/evaluation.py rename to src/nodelens/training/evaluation.py diff --git a/src/alignment/training/experiment_trainer.py b/src/nodelens/training/experiment_trainer.py similarity index 100% rename from src/alignment/training/experiment_trainer.py rename to src/nodelens/training/experiment_trainer.py diff --git a/src/alignment/training/multi_network.py b/src/nodelens/training/multi_network.py similarity index 100% rename from src/alignment/training/multi_network.py rename to src/nodelens/training/multi_network.py diff --git a/tests/README.md b/tests/README.md index 346ca26d..1669c410 100644 --- a/tests/README.md +++ b/tests/README.md @@ -8,7 +8,7 @@ Unit and integration tests. pytest tests/ pytest tests/unit/ -v pytest tests/unit/test_models.py -pytest tests/ --cov=alignment +pytest tests/ --cov=nodelens ``` ## Structure diff --git a/tests/integration/test_all_completed.py b/tests/integration/test_all_completed.py index 164f9970..0075c437 100644 --- a/tests/integration/test_all_completed.py +++ b/tests/integration/test_all_completed.py @@ -18,23 +18,23 @@ logger = logging.getLogger(__name__) -def test_imports(): +def _check_imports(): """Test all imports work correctly.""" logger.info("Testing imports...") try: - import alignment + import nodelens # Core / registry - from alignment.core import ModelWrapper # noqa: F401 - from alignment.metrics import METRIC_REGISTRY # noqa: F401 - from alignment.metrics.base import MetricComputer # noqa: F401 + from nodelens.core import METRIC_REGISTRY # noqa: F401 + from nodelens.metrics import get_metric, list_metrics # noqa: F401 + from nodelens.models import ModelWrapper # noqa: F401 # Pruning + services - from alignment.pruning import get_pruning_strategy # noqa: F401 - from alignment.services import MaskOperations # noqa: F401 + from nodelens.pruning import get_pruning_strategy # noqa: F401 + from nodelens.services import MaskOperations # noqa: F401 - logger.info(f"OK alignment imports OK (version={getattr(alignment, '__version__', 'unknown')})") + logger.info(f"OK NodeLens imports OK (version={getattr(nodelens, '__version__', 'unknown')})") logger.info("OK All imports successful") return True @@ -43,41 +43,33 @@ def test_imports(): return False -def test_metric_computer(): +def _check_metric_computer(): """Test MetricComputer is functional.""" logger.info("\nTesting MetricComputer...") try: - from alignment.metrics import METRIC_REGISTRY - from alignment.metrics.base import MetricComputer - - # Create metrics - metrics = { - "rayleigh_quotient": METRIC_REGISTRY.get_metric("rayleigh_quotient"), - "mutual_information": METRIC_REGISTRY.get_metric("mutual_information"), - } - - # Create computer - computer = MetricComputer(metrics) + from nodelens.metrics import get_metric - # Test computation weights = torch.randn(10, 20) + inputs = torch.randn(32, 20) outputs = torch.randn(32, 10) - results = computer.compute_all(weights=weights, outputs=outputs) + rq = get_metric("rayleigh_quotient").compute(inputs=inputs, weights=weights) + act = get_metric("activation_l2_norm").compute(outputs=outputs) - assert len(results) == 2 - assert "rayleigh_quotient" in results - assert "mutual_information" in results + assert rq.shape == (weights.shape[0],) + assert act.shape == (outputs.shape[1],) + assert torch.all(torch.isfinite(rq)) + assert torch.all(torch.isfinite(act)) - logger.info("OK MetricComputer is functional") + logger.info("OK metric registry and metric computation are functional") return True except Exception as e: logger.error(f"FAIL MetricComputer test failed: {e}") return False -def test_parallel_processing(): +def _check_parallel_processing(): """Test parallel processing is implemented.""" logger.info("\nTesting parallel processing...") @@ -85,9 +77,9 @@ def test_parallel_processing(): import torch.nn as nn from torch.utils.data import DataLoader, TensorDataset - from alignment.core import ModelWrapper - from alignment.metrics import METRIC_REGISTRY - from alignment.utils.batch_processing import compute_metrics_parallel + from nodelens.dataops.processing.batch import compute_metrics_parallel + from nodelens.metrics import get_metric + from nodelens.models import ModelWrapper # Create simple model and data model = nn.Sequential(nn.Linear(10, 20), nn.ReLU(), nn.Linear(20, 5)) @@ -96,50 +88,40 @@ def test_parallel_processing(): dataloader = DataLoader(dataset, batch_size=10) wrapper = ModelWrapper(model, tracked_layers=["0", "2"]) - metrics = {"rayleigh_quotient": METRIC_REGISTRY["rayleigh_quotient"]()} + metrics = {"activation_l2_norm": get_metric("activation_l2_norm")} - # Test parallel computation (will use single worker if only 1 GPU) - results = compute_metrics_parallel(wrapper, dataloader, metrics, num_workers=2) + # Force the single-device path so this remains a lightweight CI smoke test. + results = compute_metrics_parallel(wrapper, dataloader, metrics, num_workers=1, devices=[torch.device("cpu")]) assert isinstance(results, dict) - logger.info("OK Parallel processing is implemented") + assert set(results) == {"0", "2"} + logger.info("OK batch metric processing is functional") return True except Exception as e: logger.error(f"FAIL Parallel processing test failed: {e}") return False -def test_pruning_utilities(): +def _check_pruning_utilities(): """Test pruning utilities are complete.""" logger.info("\nTesting pruning utilities...") try: import torch.nn as nn - from alignment.utils.pruning import PruningUtilities, create_pruning_schedule + from nodelens.pruning import get_pruning_strategy # Create test layer layer = nn.Linear(10, 20) - # Test different pruning methods - methods = [ - ("magnitude", PruningUtilities.get_pruning_mask_magnitude), - ("random", PruningUtilities.get_pruning_mask_random), - ] - - for name, method in methods: - mask = method(layer.weight.data, amount=0.5) + for name in ["magnitude", "random"]: + strategy = get_pruning_strategy(name) + scores = strategy.compute_importance_scores(layer) + mask = strategy.create_pruning_mask(scores, amount=0.5) assert mask.shape == layer.weight.shape assert 0.4 < (mask == 0).float().mean() < 0.6 # Roughly 50% pruned logger.info(f" OK {name} pruning works") - # Test pruning schedule - schedule = create_pruning_schedule(0.0, 0.9, 0, 100, 10, "polynomial") - assert schedule(0) == 0.0 - assert schedule(100) == 0.9 - assert 0.0 < schedule(50) < 0.9 - logger.info(" OK Pruning schedules work") - logger.info("OK All pruning utilities functional") return True except Exception as e: @@ -147,12 +129,12 @@ def test_pruning_utilities(): return False -def test_experiment_tracking(): +def _check_experiment_tracking(): """Test experiment tracking is functional.""" logger.info("\nTesting experiment tracking...") try: - from alignment.utils.experiment_tracking import ExperimentTracker, create_tracker + from nodelens.experiments.tracking import ExperimentTracker, create_tracker # Test base tracker (doesn't raise NotImplementedError anymore) tracker = ExperimentTracker("test", {"key": "value"}) @@ -175,11 +157,16 @@ def test_experiment_tracking(): return False -def test_examples_exist(): +def _check_examples_exist(): """Test that comprehensive examples exist.""" logger.info("\nChecking examples...") - example_files = ["examples/quick_demo.py", "examples/advanced_analysis.py", "examples/comprehensive_demo.py", "examples/pruning_demo.py"] + example_files = [ + "configs/examples/alexnet_pruning.yaml", + "configs/examples/resnet_pruning.yaml", + "configs/examples/llama3_extended_analysis.yaml", + "projects/supernodes_scar/README.md", + ] all_exist = True for file in example_files: @@ -192,6 +179,30 @@ def test_examples_exist(): return all_exist +def test_imports(): + assert _check_imports() + + +def test_metric_computer(): + assert _check_metric_computer() + + +def test_parallel_processing(): + assert _check_parallel_processing() + + +def test_pruning_utilities(): + assert _check_pruning_utilities() + + +def test_experiment_tracking(): + assert _check_experiment_tracking() + + +def test_examples_exist(): + assert _check_examples_exist() + + def main(): """Run all tests.""" logger.info("=" * 60) @@ -199,12 +210,12 @@ def main(): logger.info("=" * 60) tests = [ - ("Imports", test_imports), - ("MetricComputer", test_metric_computer), - ("Parallel Processing", test_parallel_processing), - ("Pruning Utilities", test_pruning_utilities), - ("Experiment Tracking", test_experiment_tracking), - ("Examples", test_examples_exist), + ("Imports", _check_imports), + ("MetricComputer", _check_metric_computer), + ("Parallel Processing", _check_parallel_processing), + ("Pruning Utilities", _check_pruning_utilities), + ("Experiment Tracking", _check_experiment_tracking), + ("Examples", _check_examples_exist), ] results = {} diff --git a/tests/integration/test_cluster_pipeline.py b/tests/integration/test_cluster_pipeline.py index 110f8974..e5296ce3 100644 --- a/tests/integration/test_cluster_pipeline.py +++ b/tests/integration/test_cluster_pipeline.py @@ -10,10 +10,10 @@ import torch import torch.nn as nn -from alignment.analysis.cascade_analysis import CascadeAnalysis -from alignment.analysis.clustering.cross_layer_halo import CrossLayerHaloAnalysis -from alignment.analysis.clustering.metric_clustering import MetricSpaceClustering -from alignment.pruning.strategies.cluster_aware import ClusterAwarePruning, ClusterAwarePruningConfig +from nodelens.analysis.cascade_analysis import CascadeAnalysis +from nodelens.analysis.clustering.cross_layer_halo import CrossLayerHaloAnalysis +from nodelens.analysis.clustering.metric_clustering import MetricSpaceClustering +from nodelens.pruning.strategies.cluster_aware import ClusterAwarePruning, ClusterAwarePruningConfig # --------------------------------------------------------------------------- # Tiny model diff --git a/tests/unit/metrics/test_class_conditioned_rq.py b/tests/unit/metrics/test_class_conditioned_rq.py index 7c0c512f..b1827569 100644 --- a/tests/unit/metrics/test_class_conditioned_rq.py +++ b/tests/unit/metrics/test_class_conditioned_rq.py @@ -5,7 +5,7 @@ import pytest import torch -from alignment.metrics.rayleigh.rayleigh_quotient import RayleighQuotient +from nodelens.metrics.rayleigh.rayleigh_quotient import RayleighQuotient class TestClassConditionedRQ: diff --git a/tests/unit/metrics/test_information_metrics.py b/tests/unit/metrics/test_information_metrics.py index b55a68e6..babc38c4 100644 --- a/tests/unit/metrics/test_information_metrics.py +++ b/tests/unit/metrics/test_information_metrics.py @@ -5,7 +5,13 @@ import pytest import torch -from alignment.metrics.information import ConditionalMutualInformation, MIProjectionVsMeanInput, MutualInformationBinning, MutualInformationGaussian +from nodelens.metrics.information import ( + ConditionalMutualInformation, + LayerRedundancy, + MIProjectionVsMeanInput, + MutualInformationBinning, + MutualInformationGaussian, +) class TestMutualInformation: @@ -179,6 +185,41 @@ def test_force_cpu(self): assert not torch.isnan(scores).any() +class TestLayerRedundancy: + """Test layer-level redundancy aggregation and matrix output.""" + + def test_scalar_redundancy(self): + metric = LayerRedundancy() + inputs = torch.randn(40, 8) + weights = torch.randn(5, 8) + + score = metric.compute(inputs=inputs, weights=weights) + + assert score.shape == (1,) + assert torch.isfinite(score).all() + assert (score >= 0).all() + + def test_return_matrix(self): + metric = LayerRedundancy(return_matrix=True) + inputs = torch.randn(40, 8) + weights = torch.randn(5, 8) + + matrix = metric.compute(inputs=inputs, weights=weights) + + assert matrix.shape == (5, 5) + assert torch.allclose(matrix, matrix.T, atol=1e-5) + assert torch.allclose(matrix.diag(), torch.zeros(5), atol=1e-5) + assert (matrix >= 0).all() + + def test_return_matrix_size_guard(self): + metric = LayerRedundancy(return_matrix=True, max_matrix_neurons=3) + inputs = torch.randn(40, 8) + weights = torch.randn(5, 8) + + with pytest.raises(ValueError, match="would allocate"): + metric.compute(inputs=inputs, weights=weights) + + class TestEdgeCases: """Test edge cases for information metrics.""" diff --git a/tests/unit/metrics/test_neurips_controls.py b/tests/unit/metrics/test_neurips_controls.py index c353066e..43187128 100644 --- a/tests/unit/metrics/test_neurips_controls.py +++ b/tests/unit/metrics/test_neurips_controls.py @@ -4,7 +4,7 @@ import numpy as np -from alignment.experiments.cluster_experiments import _maybe_permute_task_target, _mi_in_proxy_from_signal_power +from nodelens.experiments.cluster_experiments import _maybe_permute_task_target, _mi_in_proxy_from_signal_power def test_target_permutation_none_is_identity(): diff --git a/tests/unit/metrics/test_pairwise_redundancy.py b/tests/unit/metrics/test_pairwise_redundancy.py index 51868e54..59c233e9 100644 --- a/tests/unit/metrics/test_pairwise_redundancy.py +++ b/tests/unit/metrics/test_pairwise_redundancy.py @@ -6,7 +6,7 @@ import torch import torch.nn as nn -from alignment.metrics.information.pairwise_gaussian import PairwiseRedundancyGaussian +from nodelens.metrics.information.pairwise_gaussian import PairwiseRedundancyGaussian class TestPairwiseRedundancyGaussian: diff --git a/tests/unit/metrics/test_rayleigh_metrics.py b/tests/unit/metrics/test_rayleigh_metrics.py index 14f4c890..00f9b77a 100644 --- a/tests/unit/metrics/test_rayleigh_metrics.py +++ b/tests/unit/metrics/test_rayleigh_metrics.py @@ -6,7 +6,7 @@ import pytest import torch -from alignment.metrics.rayleigh import RayleighQuotient, RayleighQuotientAlternative +from nodelens.metrics.rayleigh import RayleighQuotient, RayleighQuotientAlternative class TestRayleighQuotient: diff --git a/tests/unit/metrics/test_scientific_correctness.py b/tests/unit/metrics/test_scientific_correctness.py index e2215e1f..6c429184 100644 --- a/tests/unit/metrics/test_scientific_correctness.py +++ b/tests/unit/metrics/test_scientific_correctness.py @@ -10,9 +10,9 @@ import pytest import torch -from alignment.metrics.information.pairwise_gaussian import PairwiseRedundancyGaussian -from alignment.metrics.information.synergy_mmi import SynergyGaussianMMI -from alignment.metrics.rayleigh.rayleigh_quotient import RayleighQuotient +from nodelens.metrics.information.pairwise_gaussian import PairwiseRedundancyGaussian +from nodelens.metrics.information.synergy_mmi import SynergyGaussianMMI +from nodelens.metrics.rayleigh.rayleigh_quotient import RayleighQuotient class TestRedundancyCorrectness: diff --git a/tests/unit/metrics/test_similarity_metrics.py b/tests/unit/metrics/test_similarity_metrics.py index 5e06fcc1..8b72675d 100644 --- a/tests/unit/metrics/test_similarity_metrics.py +++ b/tests/unit/metrics/test_similarity_metrics.py @@ -5,7 +5,7 @@ import pytest import torch -from alignment.metrics.similarity import NodeCorrelation, NodeRedundancy, WeightCosineSimilarity, WeightDotSimilarity, WeightEuclideanDistance +from nodelens.metrics.similarity import NodeCorrelation, NodeRedundancy, WeightCosineSimilarity, WeightDotSimilarity, WeightEuclideanDistance class TestNodeRedundancy: diff --git a/tests/unit/metrics/test_synergy_continuous_target.py b/tests/unit/metrics/test_synergy_continuous_target.py index cdde6051..17c868e9 100644 --- a/tests/unit/metrics/test_synergy_continuous_target.py +++ b/tests/unit/metrics/test_synergy_continuous_target.py @@ -1,6 +1,6 @@ import torch -from alignment.metrics.information.synergy_continuous import SynergyContinuousTarget +from nodelens.metrics.information.synergy_continuous import SynergyContinuousTarget def test_synergy_continuous_target_aggregates_unfolded_outputs(device): diff --git a/tests/unit/models/test_hooks.py b/tests/unit/models/test_hooks.py index 88e5709a..755f008e 100644 --- a/tests/unit/models/test_hooks.py +++ b/tests/unit/models/test_hooks.py @@ -6,7 +6,7 @@ import torch import torch.nn as nn -from alignment.models.hooks import HookManager, PersistentHookManager +from nodelens.models.hooks import HookManager, PersistentHookManager class SimpleModel(nn.Module): diff --git a/tests/unit/services/test_activation_capture.py b/tests/unit/services/test_activation_capture.py index b34bea00..944272b3 100644 --- a/tests/unit/services/test_activation_capture.py +++ b/tests/unit/services/test_activation_capture.py @@ -1,8 +1,8 @@ import torch import torch.nn as nn -from alignment.models import ModelWrapper -from alignment.services.activation_capture import ActivationCaptureService +from nodelens.models import ModelWrapper +from nodelens.services.activation_capture import ActivationCaptureService def test_activation_capture_conv2d_unfold_matches_conv(device): diff --git a/tests/unit/services/test_mask_ops.py b/tests/unit/services/test_mask_ops.py index 25ad3b1a..b2ab479d 100644 --- a/tests/unit/services/test_mask_ops.py +++ b/tests/unit/services/test_mask_ops.py @@ -5,7 +5,7 @@ import pytest import torch -from alignment.services.mask_ops import MaskOperations +from nodelens.services.mask_ops import MaskOperations class TestMaskOperations: diff --git a/tests/unit/test_adaptive_pruning.py b/tests/unit/test_adaptive_pruning.py index 1a0c5b20..b56cd52b 100644 --- a/tests/unit/test_adaptive_pruning.py +++ b/tests/unit/test_adaptive_pruning.py @@ -6,7 +6,7 @@ import torch import torch.nn as nn -from alignment.pruning.strategies.adaptive import AdaptiveSensitivityPruning, LayerSensitivity +from nodelens.pruning.strategies.adaptive import AdaptiveSensitivityPruning, LayerSensitivity # --------------------------------------------------------------------------- # Helpers diff --git a/tests/unit/test_aggregation.py b/tests/unit/test_aggregation.py index aa94f8c6..cf6dcbde 100644 --- a/tests/unit/test_aggregation.py +++ b/tests/unit/test_aggregation.py @@ -6,9 +6,9 @@ import pytest -from alignment.analysis.aggregation.layers import LayerAggregator -from alignment.analysis.aggregation.metrics import MetricAggregator -from alignment.analysis.aggregation.results import ResultAggregator +from nodelens.analysis.aggregation.layers import LayerAggregator +from nodelens.analysis.aggregation.metrics import MetricAggregator +from nodelens.analysis.aggregation.results import ResultAggregator # ========================================================================= # MetricAggregator diff --git a/tests/unit/test_attention_scar_metrics.py b/tests/unit/test_attention_scar_metrics.py index d325d0ec..27402918 100644 --- a/tests/unit/test_attention_scar_metrics.py +++ b/tests/unit/test_attention_scar_metrics.py @@ -16,8 +16,8 @@ # Skip entire module if transformers not installed pytest.importorskip("transformers") -from alignment.experiments.base import BaseExperiment, ExperimentConfig -from alignment.experiments.llm_experiments import LLMAlignmentExperiment +from nodelens.experiments.base import BaseExperiment, ExperimentConfig +from nodelens.experiments.llm_experiments import LLMAlignmentExperiment class _TinySelfAttention(nn.Module): @@ -259,7 +259,7 @@ class TestAttentionSCARVisualization: def test_plot_attention_head_heatmap_import(self): """Test that visualization function exists and is importable.""" - from alignment.analysis.visualization import UnifiedVisualizer + from nodelens.analysis.visualization import UnifiedVisualizer viz = UnifiedVisualizer() assert hasattr(viz, "plot_attention_head_heatmap"), "Missing plot_attention_head_heatmap method" @@ -271,7 +271,7 @@ def test_plot_ffn_vs_attention_concentration(self): import os import tempfile - from alignment.analysis.visualization import UnifiedVisualizer + from nodelens.analysis.visualization import UnifiedVisualizer viz = UnifiedVisualizer() diff --git a/tests/unit/test_cascade_analysis.py b/tests/unit/test_cascade_analysis.py index 10bc01c7..176f8327 100644 --- a/tests/unit/test_cascade_analysis.py +++ b/tests/unit/test_cascade_analysis.py @@ -12,7 +12,7 @@ import torch import torch.nn as nn -from alignment.analysis.cascade_analysis import CascadeAnalysis, CascadeResult +from nodelens.analysis.cascade_analysis import CascadeAnalysis, CascadeResult # --------------------------------------------------------------------------- # Tiny model + dataset diff --git a/tests/unit/test_checkpoint.py b/tests/unit/test_checkpoint.py index e893866c..55b9f5f5 100644 --- a/tests/unit/test_checkpoint.py +++ b/tests/unit/test_checkpoint.py @@ -9,7 +9,7 @@ import torch import torch.nn as nn -from alignment.infrastructure.storage.checkpoint import load_checkpoint, save_checkpoint, save_model_for_inference +from nodelens.infrastructure.storage.checkpoint import load_checkpoint, save_checkpoint, save_model_for_inference class DummyModelWithHooks(nn.Module): diff --git a/tests/unit/test_cluster_aware_pruning.py b/tests/unit/test_cluster_aware_pruning.py index d40e1537..38f45796 100644 --- a/tests/unit/test_cluster_aware_pruning.py +++ b/tests/unit/test_cluster_aware_pruning.py @@ -14,7 +14,7 @@ import torch import torch.nn as nn -from alignment.pruning.strategies.cluster_aware import ClusterAwarePruning, ClusterAwarePruningConfig, CompositePruning +from nodelens.pruning.strategies.cluster_aware import ClusterAwarePruning, ClusterAwarePruningConfig, CompositePruning # --------------------------------------------------------------------------- # Helpers diff --git a/tests/unit/test_conditional_metrics.py b/tests/unit/test_conditional_metrics.py index 2f0c9b02..776590de 100644 --- a/tests/unit/test_conditional_metrics.py +++ b/tests/unit/test_conditional_metrics.py @@ -5,7 +5,7 @@ import pytest import torch -from alignment.metrics.conditional_metrics import ConditionalActivationNorm, ConditionalMIGaussian, ConditionalRayleighQuotient, DeltaRQ, MIAboutClass +from nodelens.metrics.conditional_metrics import ConditionalActivationNorm, ConditionalMIGaussian, ConditionalRayleighQuotient, DeltaRQ, MIAboutClass # --------------------------------------------------------------------------- # Helpers diff --git a/tests/unit/test_config_loader.py b/tests/unit/test_config_loader.py index 3955b2cf..6ffe325e 100644 --- a/tests/unit/test_config_loader.py +++ b/tests/unit/test_config_loader.py @@ -7,7 +7,7 @@ import pytest import yaml -from alignment.configs.config_loader import ( +from nodelens.configs.config_loader import ( METRIC_ORIGINAL_TO_UNIFIED, METRIC_UNIFIED_TO_ORIGINAL, _convert_unified_to_original, @@ -251,7 +251,7 @@ def test_load_unsupported_format(self, tmp_path): load_config(fpath) def test_save_yaml(self, tmp_path): - from alignment.experiments.base import ExperimentConfig + from nodelens.experiments.base import ExperimentConfig config = ExperimentConfig(name="save_test") fpath = tmp_path / "saved.yaml" @@ -261,7 +261,7 @@ def test_save_yaml(self, tmp_path): assert loaded["name"] == "save_test" def test_save_json(self, tmp_path): - from alignment.experiments.base import ExperimentConfig + from nodelens.experiments.base import ExperimentConfig config = ExperimentConfig(name="save_json") fpath = tmp_path / "saved.json" @@ -270,7 +270,7 @@ def test_save_json(self, tmp_path): assert loaded["name"] == "save_json" def test_save_unsupported_format(self, tmp_path): - from alignment.experiments.base import ExperimentConfig + from nodelens.experiments.base import ExperimentConfig config = ExperimentConfig(name="test") with pytest.raises(ValueError, match="Unsupported"): diff --git a/tests/unit/test_config_validator.py b/tests/unit/test_config_validator.py index 29585164..4ae7efb4 100644 --- a/tests/unit/test_config_validator.py +++ b/tests/unit/test_config_validator.py @@ -3,7 +3,7 @@ check_compatibility). """ -from alignment.configs.config_validator import check_compatibility, validate_config, validate_experiment_config +from nodelens.configs.config_validator import check_compatibility, validate_config, validate_experiment_config # ========================================================================= # validate_config diff --git a/tests/unit/test_cross_layer_halo.py b/tests/unit/test_cross_layer_halo.py index d4583e86..c2953f48 100644 --- a/tests/unit/test_cross_layer_halo.py +++ b/tests/unit/test_cross_layer_halo.py @@ -11,7 +11,7 @@ import numpy as np import pytest -from alignment.analysis.clustering.cross_layer_halo import CrossLayerHaloAnalysis, HaloResult +from nodelens.analysis.clustering.cross_layer_halo import CrossLayerHaloAnalysis, HaloResult # --------------------------------------------------------------------------- # Fixtures diff --git a/tests/unit/test_cross_layer_metrics.py b/tests/unit/test_cross_layer_metrics.py index 75dd815e..855143f9 100644 --- a/tests/unit/test_cross_layer_metrics.py +++ b/tests/unit/test_cross_layer_metrics.py @@ -9,7 +9,7 @@ import pytest import torch -from alignment.metrics.cross_layer import compute_downstream_importance, compute_within_layer_redundancy +from nodelens.metrics.cross_layer import compute_downstream_importance, compute_within_layer_redundancy # --------------------------------------------------------------------------- # Tests: compute_downstream_importance diff --git a/tests/unit/test_dependency_aware.py b/tests/unit/test_dependency_aware.py index dd57d1b6..01821bc4 100644 --- a/tests/unit/test_dependency_aware.py +++ b/tests/unit/test_dependency_aware.py @@ -5,7 +5,7 @@ import torch import torch.nn as nn -from alignment.pruning.dependency_aware import DependencyAwarePruning +from nodelens.pruning.dependency_aware import DependencyAwarePruning # --------------------------------------------------------------------------- # Helpers diff --git a/tests/unit/test_dependency_aware_pruning.py b/tests/unit/test_dependency_aware_pruning.py index 43e46a71..0ebe4952 100644 --- a/tests/unit/test_dependency_aware_pruning.py +++ b/tests/unit/test_dependency_aware_pruning.py @@ -5,7 +5,7 @@ import torch import torch.nn as nn -from alignment.pruning.dependency_aware import DependencyAwarePruning +from nodelens.pruning.dependency_aware import DependencyAwarePruning, DependencyGraph class _TinyCNN(nn.Module): @@ -25,6 +25,21 @@ def forward(self, x): return x +class _ResidualLikeBlock(nn.Module): + """Small module with residual-compatible convolutions for graph metadata tests.""" + + def __init__(self): + super().__init__() + self.block = nn.Sequential( + nn.Conv2d(4, 4, kernel_size=3, padding=1, bias=False), + nn.ReLU(), + nn.Conv2d(4, 4, kernel_size=3, padding=1, bias=False), + ) + + def forward(self, x): + return x + self.block(x) + + class TestDependencyAwarePruning: """Tests for DependencyAwarePruning.""" @@ -83,3 +98,13 @@ def fake_validate(self, masks): assert "Invalid pruning plan" in str(e) else: assert False, "Expected ValueError due to invalid pruning plan" + + def test_marks_residual_compatible_conv_candidates(self): + """DependencyGraph records same-parent, channel-preserving conv candidates.""" + graph = DependencyGraph(_ResidualLikeBlock()) + + first = graph.graph["block.0"] + second = graph.graph["block.2"] + + assert first.skip_connection_with == ["block.2"] + assert second.skip_connection_with == ["block.0"] diff --git a/tests/unit/test_evaluation_covariance.py b/tests/unit/test_evaluation_covariance.py index 354eb39c..ff787410 100644 --- a/tests/unit/test_evaluation_covariance.py +++ b/tests/unit/test_evaluation_covariance.py @@ -6,8 +6,8 @@ import torch import torch.nn as nn -from alignment.dataops.processing.covariance import CovarianceEstimator, estimate_covariance -from alignment.training.evaluation import EvaluationManager, evaluate_classification, evaluate_model, evaluate_regression +from nodelens.dataops.processing.covariance import CovarianceEstimator, estimate_covariance +from nodelens.training.evaluation import EvaluationManager, evaluate_classification, evaluate_model, evaluate_regression # --------------------------------------------------------------------------- # Helpers diff --git a/tests/unit/test_experiments.py b/tests/unit/test_experiments.py index 263b2da4..1c070a87 100644 --- a/tests/unit/test_experiments.py +++ b/tests/unit/test_experiments.py @@ -2,9 +2,9 @@ Unit tests for experiment classes. """ -from alignment.experiments.base import ExperimentConfig -from alignment.experiments.general_alignment import GeneralAlignmentConfig -from alignment.pruning.base import PruningConfig +from nodelens.experiments.base import ExperimentConfig +from nodelens.experiments.general_alignment import GeneralAlignmentConfig +from nodelens.pruning.base import PruningConfig class TestExperimentConfig: diff --git a/tests/unit/test_gradient_based.py b/tests/unit/test_gradient_based.py index 7e3c6b23..7fc5ccfc 100644 --- a/tests/unit/test_gradient_based.py +++ b/tests/unit/test_gradient_based.py @@ -5,7 +5,7 @@ import pytest import torch -from alignment.metrics.gradient_based import ( +from nodelens.metrics.gradient_based import ( GradientAlignment, GradientStatisticsTracker, LocalLearningRuleSearch, diff --git a/tests/unit/test_llm_attention_pruning.py b/tests/unit/test_llm_attention_pruning.py index 9fdd426c..15548b5b 100644 --- a/tests/unit/test_llm_attention_pruning.py +++ b/tests/unit/test_llm_attention_pruning.py @@ -15,8 +15,8 @@ # Skip entire module if transformers not installed pytest.importorskip("transformers") -from alignment.experiments.base import ExperimentConfig -from alignment.experiments.llm_experiments import LLMAlignmentExperiment +from nodelens.experiments.base import ExperimentConfig +from nodelens.experiments.llm_experiments import LLMAlignmentExperiment class _TinySelfAttention(nn.Module): @@ -129,7 +129,7 @@ def hook(mod, inp, out, key=name): def tiny_llm_experiment(monkeypatch): """Create an LLMAlignmentExperiment with a tiny transformer backend.""" # Avoid initializing full metric stack for this tiny synthetic test. - from alignment.experiments.base import BaseExperiment + from nodelens.experiments.base import BaseExperiment monkeypatch.setattr(BaseExperiment, "_initialize_components", lambda self: None) monkeypatch.setattr(BaseExperiment, "_setup_directories", lambda self: None) diff --git a/tests/unit/test_mask_ops.py b/tests/unit/test_mask_ops.py index 0c4a7847..7ef28561 100644 --- a/tests/unit/test_mask_ops.py +++ b/tests/unit/test_mask_ops.py @@ -5,7 +5,7 @@ import pytest import torch -from alignment.services.mask_ops import MaskOperations +from nodelens.services.mask_ops import MaskOperations class TestCreateStructuredMask: diff --git a/tests/unit/test_metric_clustering.py b/tests/unit/test_metric_clustering.py index a3882b62..bc3de2f0 100644 --- a/tests/unit/test_metric_clustering.py +++ b/tests/unit/test_metric_clustering.py @@ -11,7 +11,7 @@ import numpy as np import pytest -from alignment.analysis.clustering.metric_clustering import ClusterResult, MetricSpaceClustering +from nodelens.analysis.clustering.metric_clustering import ClusterResult, MetricSpaceClustering # --------------------------------------------------------------------------- # Helpers diff --git a/tests/unit/test_metrics.py b/tests/unit/test_metrics.py index 463a97b6..0b5324db 100644 --- a/tests/unit/test_metrics.py +++ b/tests/unit/test_metrics.py @@ -7,7 +7,7 @@ import pytest import torch -from alignment.metrics import get_metric, list_metrics +from nodelens.metrics import get_metric, list_metrics class TestMetricRegistry: diff --git a/tests/unit/test_misc_modules.py b/tests/unit/test_misc_modules.py index 05f305ef..fe772135 100644 --- a/tests/unit/test_misc_modules.py +++ b/tests/unit/test_misc_modules.py @@ -8,11 +8,11 @@ import pytest import torch -from alignment.analysis.dynamic_scoring import DynamicScoreAggregator, compute_dynamic_importance -from alignment.analysis.reporting.json_reporter import JSONReporter -from alignment.analysis.reporting.markdown import MarkdownReporter -from alignment.metrics.pairwise_base import PairwiseMetric -from alignment.metrics.rayleigh.delta_alignment import DeltaAlignment, NormalizedDeltaAlignment +from nodelens.analysis.dynamic_scoring import DynamicScoreAggregator, compute_dynamic_importance +from nodelens.analysis.reporting.json_reporter import JSONReporter +from nodelens.analysis.reporting.markdown import MarkdownReporter +from nodelens.metrics.pairwise_base import PairwiseMetric +from nodelens.metrics.rayleigh.delta_alignment import DeltaAlignment, NormalizedDeltaAlignment # ========================================================================= # DynamicScoreAggregator @@ -181,7 +181,7 @@ def test_add_section(self): assert len(reporter.sections) == 1 def test_add_table(self): - pytest.importorskip("tabulate") + pytest.importorskip("tabulate", exc_type=ImportError) reporter = MarkdownReporter() df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) reporter.add_table("Data", df) diff --git a/tests/unit/test_model_wrapper.py b/tests/unit/test_model_wrapper.py index 81455c87..2326286d 100644 --- a/tests/unit/test_model_wrapper.py +++ b/tests/unit/test_model_wrapper.py @@ -5,8 +5,8 @@ import torch import torch.nn as nn -from alignment.models.base import BaseModelWrapper -from alignment.models.hooks import HookManager +from nodelens.models.base import BaseModelWrapper +from nodelens.models.hooks import HookManager # --------------------------------------------------------------------------- # Helpers diff --git a/tests/unit/test_models.py b/tests/unit/test_models.py index d5d232bc..0c06f046 100644 --- a/tests/unit/test_models.py +++ b/tests/unit/test_models.py @@ -6,7 +6,7 @@ import torch import torch.nn as nn -from alignment.models.architectures.standard_models import CNN2P2, MLP +from nodelens.models.architectures.standard_models import CNN2P2, MLP class TestMLP: diff --git a/tests/unit/test_node_scoring_service.py b/tests/unit/test_node_scoring_service.py index f69828cc..76ba0bc1 100644 --- a/tests/unit/test_node_scoring_service.py +++ b/tests/unit/test_node_scoring_service.py @@ -10,7 +10,7 @@ import pytest import torch -from alignment.services.scoring import CompositeScores, NodeScoringService, create_scoring_service +from nodelens.services.scoring import CompositeScores, NodeScoringService, create_scoring_service # --------------------------------------------------------------------------- # Mock metric diff --git a/tests/unit/test_parallel_pruning.py b/tests/unit/test_parallel_pruning.py index 62e9bb2e..5f1d5fd7 100644 --- a/tests/unit/test_parallel_pruning.py +++ b/tests/unit/test_parallel_pruning.py @@ -6,7 +6,7 @@ import torch import torch.nn as nn -from alignment.pruning.strategies.parallel import AsyncParallelPruning, ParallelModePruning, ParallelPruningResult, TensorizedPruning +from nodelens.pruning.strategies.parallel import AsyncParallelPruning, ParallelModePruning, ParallelPruningResult, TensorizedPruning # --------------------------------------------------------------------------- # Helpers diff --git a/tests/unit/test_pruning_distribution.py b/tests/unit/test_pruning_distribution.py index 49af3a68..8bfb2308 100644 --- a/tests/unit/test_pruning_distribution.py +++ b/tests/unit/test_pruning_distribution.py @@ -6,7 +6,7 @@ import torch import torch.nn as nn -from alignment.pruning.distribution import DistributionStrategy, PruningDistributionManager +from nodelens.pruning.distribution import DistributionStrategy, PruningDistributionManager # --------------------------------------------------------------------------- # Helpers diff --git a/tests/unit/test_pruning_pipeline.py b/tests/unit/test_pruning_pipeline.py index 724681b7..60af2fb7 100644 --- a/tests/unit/test_pruning_pipeline.py +++ b/tests/unit/test_pruning_pipeline.py @@ -5,7 +5,7 @@ import torch import torch.nn as nn -from alignment.pruning.pipeline import PruningPipelineOptions, _ensure_tensor, run_pruning_pipeline +from nodelens.pruning.pipeline import PruningPipelineOptions, _ensure_tensor, run_pruning_pipeline # --------------------------------------------------------------------------- # Helpers diff --git a/tests/unit/test_pruning_strategies.py b/tests/unit/test_pruning_strategies.py index a1ab1c15..bd57796e 100644 --- a/tests/unit/test_pruning_strategies.py +++ b/tests/unit/test_pruning_strategies.py @@ -12,11 +12,11 @@ import torch import torch.nn as nn -from alignment.pruning.base import PrecomputedScorePruning, PruningConfig -from alignment.pruning.strategies.gradient import FisherPruning, GradientPruning, MomentumPruning -from alignment.pruning.strategies.magnitude import GlobalMagnitudePruning, IterativeMagnitudePruning, MagnitudePruning -from alignment.pruning.strategies.movement import AdaptiveMovementPruning, MovementPruning -from alignment.pruning.strategies.random import BernoulliPruning, LayerwiseRandomPruning, RandomPruning +from nodelens.pruning.base import PrecomputedScorePruning, PruningConfig +from nodelens.pruning.strategies.gradient import FisherPruning, GradientPruning, MomentumPruning +from nodelens.pruning.strategies.magnitude import GlobalMagnitudePruning, IterativeMagnitudePruning, MagnitudePruning +from nodelens.pruning.strategies.movement import AdaptiveMovementPruning, MovementPruning +from nodelens.pruning.strategies.random import BernoulliPruning, LayerwiseRandomPruning, RandomPruning # --------------------------------------------------------------------------- # Helpers @@ -522,12 +522,12 @@ def test_init_and_direction(self): """Test init by monkeypatching get_metric to return a class.""" from unittest.mock import MagicMock, patch - from alignment.pruning.strategies.cascading import CascadingAlignmentPruning + from nodelens.pruning.strategies.cascading import CascadingAlignmentPruning mock_metric_cls = MagicMock() mock_metric_cls.return_value = MagicMock() - with patch("alignment.pruning.strategies.cascading.get_metric", return_value=mock_metric_cls): + with patch("nodelens.pruning.strategies.cascading.get_metric", return_value=mock_metric_cls): strategy = CascadingAlignmentPruning( metric="rayleigh_quotient", direction="forward", @@ -538,12 +538,12 @@ def test_init_and_direction(self): def test_compute_importance_requires_inputs(self): from unittest.mock import MagicMock, patch - from alignment.pruning.strategies.cascading import CascadingAlignmentPruning + from nodelens.pruning.strategies.cascading import CascadingAlignmentPruning mock_metric_cls = MagicMock() mock_metric_cls.return_value = MagicMock() - with patch("alignment.pruning.strategies.cascading.get_metric", return_value=mock_metric_cls): + with patch("nodelens.pruning.strategies.cascading.get_metric", return_value=mock_metric_cls): strategy = CascadingAlignmentPruning(metric="rayleigh_quotient") layer = _conv2d(3, 8, 3) with pytest.raises(ValueError, match="requires inputs"): diff --git a/tests/unit/test_rayleigh_quotient_extended.py b/tests/unit/test_rayleigh_quotient_extended.py index 4a5d8c84..47cc626b 100644 --- a/tests/unit/test_rayleigh_quotient_extended.py +++ b/tests/unit/test_rayleigh_quotient_extended.py @@ -11,7 +11,7 @@ import pytest import torch -from alignment.metrics.rayleigh.rayleigh_quotient import FastRayleighQuotient, RayleighQuotient +from nodelens.metrics.rayleigh.rayleigh_quotient import FastRayleighQuotient, RayleighQuotient # --------------------------------------------------------------------------- # Tests: _compute_from_covariance diff --git a/tests/unit/test_registry.py b/tests/unit/test_registry.py index 0230cbca..0b307701 100644 --- a/tests/unit/test_registry.py +++ b/tests/unit/test_registry.py @@ -5,7 +5,7 @@ import pytest -from alignment.core.registry import ComponentInfo, Registry, create_component, create_from_config, list_all_components +from nodelens.core.registry import ComponentInfo, Registry, create_component, create_from_config, list_all_components # --------------------------------------------------------------------------- # Helpers diff --git a/tests/unit/test_streaming_accumulators.py b/tests/unit/test_streaming_accumulators.py index 76b1ebe5..15a79206 100644 --- a/tests/unit/test_streaming_accumulators.py +++ b/tests/unit/test_streaming_accumulators.py @@ -10,7 +10,7 @@ import numpy as np import pytest -from alignment.experiments.cluster_experiments import _CovAccumulator, _VarAccumulator +from nodelens.experiments.cluster_experiments import _CovAccumulator, _VarAccumulator # --------------------------------------------------------------------------- # Tests: _CovAccumulator diff --git a/tests/unit/test_streaming_covariance.py b/tests/unit/test_streaming_covariance.py index bb2498d1..64e464ac 100644 --- a/tests/unit/test_streaming_covariance.py +++ b/tests/unit/test_streaming_covariance.py @@ -4,7 +4,7 @@ import torch -from alignment.core.streaming import StreamingCovariance +from nodelens.core.streaming import StreamingCovariance class TestStreamingCovariance: diff --git a/tests/unit/test_training_base.py b/tests/unit/test_training_base.py index f9a32bd4..84e73aa0 100644 --- a/tests/unit/test_training_base.py +++ b/tests/unit/test_training_base.py @@ -7,7 +7,7 @@ import torch.nn as nn from torch.utils.data import DataLoader, TensorDataset -from alignment.training.base import BaseTrainer, TrainingConfig +from nodelens.training.base import BaseTrainer, TrainingConfig # --------------------------------------------------------------------------- # Helpers diff --git a/tests/unit/test_unified_config.py b/tests/unit/test_unified_config.py index 0cbf5616..4c562e1e 100644 --- a/tests/unit/test_unified_config.py +++ b/tests/unit/test_unified_config.py @@ -5,7 +5,7 @@ import pytest import yaml -from alignment.configs.unified_config import ( +from nodelens.configs.unified_config import ( CalibrationConfig, CascadeConfig, ClusteringConfig,