From 07459129063dc070f281f3cf46d7126f7bb524b7 Mon Sep 17 00:00:00 2001
From: carlos-alm <127798846+carlos-alm@users.noreply.github.com>
Date: Thu, 30 Apr 2026 16:45:30 -0600
Subject: [PATCH 1/3] ci(release): gate npm publish on benchmark regressions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The regression guard previously ran inside `npm test` against the existing
benchmark history files. Because those files are only updated post-publish
(by the Benchmark workflow's auto-PRs), a regression introduced in vX could
ship to npm before its numbers were ever recorded — and the guard would
then fire on every dev push to main once the docs PR landed, blocking
unrelated work without ever having prevented the bad release.

Restructure so the gate runs where it can actually block:

- New `pre-publish-benchmark` job in publish.yml (release events only):
  measures the just-built native artifact, writes new history entries,
  runs the regression guard, and uploads the modified files. The `publish`
  job depends on it, so a regression fails the workflow before npm sees
  the new version. The history files are uploaded as an artifact.

- benchmark.yml's three measurement jobs (build/query/incremental) are
  replaced by a single `record-benchmarks` job that downloads the
  pre-publish artifact and opens one PR with the verified numbers.
  `workflow_run.conclusion == 'success'` already gates this, so no PR
  is opened for an aborted publish. The engine-parity gate runs here
  as a soft signal (unchanged semantics). The embedding-benchmark job
  is unchanged — no regression guard, can't fit in pre-publish.

- The regression-guard test is gated on `RUN_REGRESSION_GUARD=1` so the
  default `npm test` run shows it as skipped rather than failing on
  history that already passed gating at release time. CI sets the env
  var in the pre-publish step.
---
 .github/workflows/benchmark.yml           | 469 +++-------------------
 .github/workflows/publish.yml             | 133 +++++-
 package.json                              |   1 +
 tests/benchmarks/regression-guard.test.ts |  10 +-
 4 files changed, 188 insertions(+), 425 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 77f3affb0..a5b48d95c 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -6,19 +6,28 @@ on:
   workflow_dispatch:
     inputs:
       version:
-        description: 'Version to benchmark ("dev" for local, or semver like "2.4.0" for npm)'
+        description: 'Version to benchmark for embedding-only ("dev" for local, or semver like "2.4.0" for npm)'
         required: false
         default: "dev"
 
 permissions: {}
 
 jobs:
-  build-benchmark:
+  # ── Record benchmark history for the just-published release ──
+  #
+  # The build/query/incremental/resolution benchmarks are measured during the
+  # Publish workflow's pre-publish-benchmark gate (against the just-built
+  # native artifact). That job uploads the modified history files as an
+  # artifact only when the regression guard passes — meaning a publish that
+  # would have regressed is aborted before reaching npm, and no PR is opened
+  # for an un-published release. This job consumes that artifact and opens a
+  # single PR with the updates.
+  record-benchmarks:
     runs-on: ubuntu-latest
     if: >-
-      github.event_name == 'workflow_dispatch' ||
-      (github.event.workflow_run.conclusion == 'success' &&
-       github.event.workflow_run.event != 'push')
+      github.event_name == 'workflow_run' &&
+      github.event.workflow_run.conclusion == 'success' &&
+      github.event.workflow_run.event != 'push'
     permissions:
       actions: read
       contents: write
@@ -31,163 +40,52 @@ jobs:
           ref: main
           token: ${{ secrets.GITHUB_TOKEN }}
 
-      - uses: actions/setup-node@v6
-        with:
-          node-version: "22"
-          cache: "npm"
-
-      - name: Install dependencies
-        run: npm install --prefer-offline --no-audit --no-fund
-
-      - name: Determine benchmark mode
-        id: mode
-        run: |
-          if [ "${{ github.event_name }}" = "workflow_run" ]; then
-            # Release — find latest semver tag
-            TAG=$(git tag --sort=-version:refname --list 'v[0-9]*.[0-9]*.[0-9]*' | grep -v dev | head -1)
-            VERSION="${TAG#v}"
-            echo "source=npm" >> "$GITHUB_OUTPUT"
-            echo "version=$VERSION" >> "$GITHUB_OUTPUT"
-          elif [ "${{ inputs.version }}" = "dev" ] || [ -z "${{ inputs.version }}" ]; then
-            echo "source=local" >> "$GITHUB_OUTPUT"
-            echo "version=dev" >> "$GITHUB_OUTPUT"
-          else
-            echo "source=npm" >> "$GITHUB_OUTPUT"
-            echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Check for existing benchmark
-        id: existing
-        run: |
-          VERSION="${{ steps.mode.outputs.version }}"
-          VERSION_RE="${VERSION//./\\.}"
-          if [ "$VERSION" = "dev" ]; then
-            echo "skip=false" >> "$GITHUB_OUTPUT"
-          elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/benchmarks/BUILD-BENCHMARKS.md 2>/dev/null; then
-            echo "Benchmark for $VERSION already exists in BUILD-BENCHMARKS.md — skipping"
-            echo "skip=true" >> "$GITHUB_OUTPUT"
-          else
-            echo "skip=false" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Wait for npm propagation
-        if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm'
-        run: |
-          VERSION="${{ steps.mode.outputs.version }}"
-          echo "Waiting for @optave/codegraph@${VERSION} on npm..."
-          for i in $(seq 1 20); do
-            if npm view "@optave/codegraph@${VERSION}" version 2>/dev/null; then
-              echo "Package available on npm"
-              exit 0
-            fi
-            echo "  Attempt $i/20 — not yet available, waiting 30s..."
-            sleep 30
-          done
-          echo "::error::Package @optave/codegraph@${VERSION} not found on npm after 10 minutes"
-          exit 1
-
-      - name: Run build benchmark
-        if: steps.existing.outputs.skip != 'true'
-        run: |
-          STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
-          ARGS="--version ${{ steps.mode.outputs.version }}"
-          if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then
-            ARGS="$ARGS --npm"
-          fi
-          node $STRIP_FLAG --import ./scripts/ts-resolve-loader.js scripts/benchmark.ts $ARGS > benchmark-result.json
-
-      - name: Run resolution benchmark
-        if: steps.existing.outputs.skip != 'true'
-        run: |
-          STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
-          ARGS="--version ${{ steps.mode.outputs.version }}"
-          if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then
-            ARGS="$ARGS --npm"
-          fi
-          node $STRIP_FLAG --import ./scripts/ts-resolve-loader.js scripts/resolution-benchmark.ts $ARGS > resolution-result.json
-
-      - name: Gate on resolution thresholds
-        if: steps.existing.outputs.skip != 'true'
-        timeout-minutes: 30
-        run: npx vitest run tests/benchmarks/resolution/resolution-benchmark.test.ts --reporter=verbose
-
-      - name: Setup Python (for tracer validation)
-        if: steps.existing.outputs.skip != 'true'
-        uses: actions/setup-python@v6
+      - name: Download benchmark history artifact
+        uses: actions/download-artifact@v8
         with:
-          python-version: "3.12"
+          name: benchmark-files
+          run-id: ${{ github.event.workflow_run.id }}
+          github-token: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: Setup Go (for tracer validation)
-        if: steps.existing.outputs.skip != 'true'
-        uses: actions/setup-go@v6
+      - name: Download benchmark JSON results
+        uses: actions/download-artifact@v8
         with:
-          go-version: "stable"
-          cache: false
+          name: benchmark-results-json
+          run-id: ${{ github.event.workflow_run.id }}
+          github-token: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: Run tracer validation (same-file edge recall)
-        if: steps.existing.outputs.skip != 'true'
-        timeout-minutes: 10
-        run: npx vitest run tests/benchmarks/resolution/tracer/tracer-validation.test.ts --reporter=verbose
-
-      - name: Merge resolution into build result
-        if: steps.existing.outputs.skip != 'true'
-        run: |
-          node -e "
-            const fs = require('fs');
-            const build = JSON.parse(fs.readFileSync('benchmark-result.json', 'utf8'));
-            const resolution = JSON.parse(fs.readFileSync('resolution-result.json', 'utf8'));
-            build.resolution = resolution;
-            fs.writeFileSync('benchmark-result.json', JSON.stringify(build, null, 2));
-          "
-
-      - name: Update build report
-        if: steps.existing.outputs.skip != 'true'
+      - name: Determine release version
+        id: version
         run: |
-          STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
-          node $STRIP_FLAG scripts/update-benchmark-report.ts benchmark-result.json
-
-      - name: Upload build result
-        if: steps.existing.outputs.skip != 'true'
-        uses: actions/upload-artifact@v7
-        with:
-          name: build-benchmark-result
-          path: benchmark-result.json
+          TAG=$(git tag --sort=-version:refname --list 'v[0-9]*.[0-9]*.[0-9]*' | grep -v dev | head -1)
+          VERSION="${TAG#v}"
+          echo "version=$VERSION" >> "$GITHUB_OUTPUT"
 
       - name: Check for changes
-        if: steps.existing.outputs.skip != 'true'
         id: changes
         run: |
           CHANGED=false
-          # Detect modified tracked files
-          if ! git diff --quiet HEAD -- generated/benchmarks/BUILD-BENCHMARKS.md README.md 2>/dev/null; then
-            CHANGED=true
-          fi
-          # Detect newly created (untracked) files
-          if [ -n "$(git ls-files --others --exclude-standard generated/benchmarks/BUILD-BENCHMARKS.md)" ]; then
+          if ! git diff --quiet HEAD -- generated/benchmarks/ README.md 2>/dev/null; then
             CHANGED=true
           fi
           echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"
 
       - name: Commit and push via PR
-        if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true'
+        if: steps.changes.outputs.changed == 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          VERSION: ${{ steps.mode.outputs.version }}
+          VERSION: ${{ steps.version.outputs.version }}
         run: |
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
 
-          if [ "$VERSION" = "dev" ]; then
-            BRANCH="benchmark/build-dev-$(date +%Y%m%d-%H%M%S)"
-          else
-            BRANCH="benchmark/build-v${VERSION}-$(date +%Y%m%d-%H%M%S)"
-          fi
+          BRANCH="benchmark/v${VERSION}-$(date +%Y%m%d-%H%M%S)"
           git checkout -b "$BRANCH"
-          git add generated/benchmarks/BUILD-BENCHMARKS.md README.md
-          git commit -m "docs: update build performance benchmarks (${VERSION})"
+          git add generated/benchmarks/BUILD-BENCHMARKS.md generated/benchmarks/QUERY-BENCHMARKS.md generated/benchmarks/INCREMENTAL-BENCHMARKS.md README.md
+          git commit -m "docs: update performance benchmarks (${VERSION})"
           git push origin "$BRANCH"
 
-          TITLE="docs: update build performance benchmarks (${VERSION})"
+          TITLE="docs: update performance benchmarks (${VERSION})"
           if gh pr list --state open --json title --jq ".[].title" | grep -qF "$TITLE"; then
             echo "::notice::PR already open for '$TITLE' — skipping"
           else
@@ -195,16 +93,21 @@ jobs:
               --base main \
               --head "$BRANCH" \
               --title "$TITLE" \
-              --body "Automated build benchmark update for **${VERSION}** from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
+              --body "Automated benchmark history update for **${VERSION}** from publish run [#${{ github.event.workflow_run.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.event.workflow_run.id }}). These numbers were measured during the pre-publish gate and passed the regression guard before npm publish proceeded."
           fi
 
-      # Engine-parity gate: runs AFTER the doc PR is created so the PR still
-      # records raw benchmark data even when parity regresses. The job status
-      # going red alerts maintainers; the linked issues describe each threshold.
+      # Engine-parity gate: surfaces wasm/native divergence as a red workflow
+      # status (does not block — publish has already completed). Runs after
+      # the doc PR is created so the PR still records data even when parity
+      # regresses.
       - name: Engine parity gate
-        if: steps.existing.outputs.skip != 'true'
         run: node scripts/benchmark-parity-gate.mjs benchmark-result.json
 
+  # ── Embedding benchmark (post-publish, npm-installed package) ──
+  #
+  # Embeddings have no regression guard and take 2.5+ hours to run, so they
+  # cannot fit in the pre-publish path. They run after a successful publish
+  # against the npm-installed package and open their own PR.
   embedding-benchmark:
     runs-on: ubuntu-latest
     # 7 models x 30 min each = 210 min worst-case; symbols are sampled to 1500 so
@@ -362,283 +265,3 @@ jobs:
               --title "$TITLE" \
               --body "Automated embedding benchmark update for **${VERSION}** from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
           fi
-
-  query-benchmark:
-    runs-on: ubuntu-latest
-    if: >-
-      github.event_name == 'workflow_dispatch' ||
-      (github.event.workflow_run.conclusion == 'success' &&
-       github.event.workflow_run.event != 'push')
-    permissions:
-      actions: read
-      contents: write
-      pull-requests: write
-
-    steps:
-      - uses: actions/checkout@v6
-        with:
-          fetch-depth: 0
-          ref: main
-          token: ${{ secrets.GITHUB_TOKEN }}
-
-      - uses: actions/setup-node@v6
-        with:
-          node-version: "22"
-          cache: "npm"
-
-      - name: Install dependencies
-        run: npm install --prefer-offline --no-audit --no-fund
-
-      - name: Determine benchmark mode
-        id: mode
-        run: |
-          if [ "${{ github.event_name }}" = "workflow_run" ]; then
-            TAG=$(git tag --sort=-version:refname --list 'v[0-9]*.[0-9]*.[0-9]*' | grep -v dev | head -1)
-            VERSION="${TAG#v}"
-            echo "source=npm" >> "$GITHUB_OUTPUT"
-            echo "version=$VERSION" >> "$GITHUB_OUTPUT"
-          elif [ "${{ inputs.version }}" = "dev" ] || [ -z "${{ inputs.version }}" ]; then
-            echo "source=local" >> "$GITHUB_OUTPUT"
-            echo "version=dev" >> "$GITHUB_OUTPUT"
-          else
-            echo "source=npm" >> "$GITHUB_OUTPUT"
-            echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Check for existing benchmark
-        id: existing
-        run: |
-          VERSION="${{ steps.mode.outputs.version }}"
-          VERSION_RE="${VERSION//./\\.}"
-          if [ "$VERSION" = "dev" ]; then
-            echo "skip=false" >> "$GITHUB_OUTPUT"
-          elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/benchmarks/QUERY-BENCHMARKS.md 2>/dev/null; then
-            echo "Benchmark for $VERSION already exists in QUERY-BENCHMARKS.md — skipping"
-            echo "skip=true" >> "$GITHUB_OUTPUT"
-          else
-            echo "skip=false" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Wait for npm propagation
-        if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm'
-        run: |
-          VERSION="${{ steps.mode.outputs.version }}"
-          echo "Waiting for @optave/codegraph@${VERSION} on npm..."
-          for i in $(seq 1 20); do
-            if npm view "@optave/codegraph@${VERSION}" version 2>/dev/null; then
-              echo "Package available on npm"
-              exit 0
-            fi
-            echo "  Attempt $i/20 — not yet available, waiting 30s..."
-            sleep 30
-          done
-          echo "::error::Package @optave/codegraph@${VERSION} not found on npm after 10 minutes"
-          exit 1
-
-      - name: Run query benchmark
-        if: steps.existing.outputs.skip != 'true'
-        run: |
-          STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
-          ARGS="--version ${{ steps.mode.outputs.version }}"
-          if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then
-            ARGS="$ARGS --npm"
-          fi
-          node $STRIP_FLAG --import ./scripts/ts-resolve-loader.js scripts/query-benchmark.ts $ARGS > query-benchmark-result.json
-
-      - name: Update query report
-        if: steps.existing.outputs.skip != 'true'
-        run: |
-          STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
-          node $STRIP_FLAG scripts/update-query-report.ts query-benchmark-result.json
-
-      - name: Upload query result
-        if: steps.existing.outputs.skip != 'true'
-        uses: actions/upload-artifact@v7
-        with:
-          name: query-benchmark-result
-          path: query-benchmark-result.json
-
-      - name: Check for changes
-        if: steps.existing.outputs.skip != 'true'
-        id: changes
-        run: |
-          CHANGED=false
-          # Detect modified tracked files
-          if ! git diff --quiet HEAD -- generated/benchmarks/QUERY-BENCHMARKS.md 2>/dev/null; then
-            CHANGED=true
-          fi
-          # Detect newly created (untracked) files
-          if [ -n "$(git ls-files --others --exclude-standard generated/benchmarks/QUERY-BENCHMARKS.md)" ]; then
-            CHANGED=true
-          fi
-          echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"
-
-      - name: Commit and push via PR
-        if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true'
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          VERSION: ${{ steps.mode.outputs.version }}
-        run: |
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
-
-          if [ "$VERSION" = "dev" ]; then
-            BRANCH="benchmark/query-dev-$(date +%Y%m%d-%H%M%S)"
-          else
-            BRANCH="benchmark/query-v${VERSION}-$(date +%Y%m%d-%H%M%S)"
-          fi
-          git checkout -b "$BRANCH"
-          git add generated/benchmarks/QUERY-BENCHMARKS.md
-          git commit -m "docs: update query benchmarks (${VERSION})"
-          git push origin "$BRANCH"
-
-          TITLE="docs: update query benchmarks (${VERSION})"
-          if gh pr list --state open --json title --jq ".[].title" | grep -qF "$TITLE"; then
-            echo "::notice::PR already open for '$TITLE' — skipping"
-          else
-            gh pr create \
-              --base main \
-              --head "$BRANCH" \
-              --title "$TITLE" \
-              --body "Automated query benchmark update for **${VERSION}** from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
-          fi
-
-  incremental-benchmark:
-    runs-on: ubuntu-latest
-    if: >-
-      github.event_name == 'workflow_dispatch' ||
-      (github.event.workflow_run.conclusion == 'success' &&
-       github.event.workflow_run.event != 'push')
-    permissions:
-      actions: read
-      contents: write
-      pull-requests: write
-
-    steps:
-      - uses: actions/checkout@v6
-        with:
-          fetch-depth: 0
-          ref: main
-          token: ${{ secrets.GITHUB_TOKEN }}
-
-      - uses: actions/setup-node@v6
-        with:
-          node-version: "22"
-          cache: "npm"
-
-      - name: Install dependencies
-        run: npm install --prefer-offline --no-audit --no-fund
-
-      - name: Determine benchmark mode
-        id: mode
-        run: |
-          if [ "${{ github.event_name }}" = "workflow_run" ]; then
-            TAG=$(git tag --sort=-version:refname --list 'v[0-9]*.[0-9]*.[0-9]*' | grep -v dev | head -1)
-            VERSION="${TAG#v}"
-            echo "source=npm" >> "$GITHUB_OUTPUT"
-            echo "version=$VERSION" >> "$GITHUB_OUTPUT"
-          elif [ "${{ inputs.version }}" = "dev" ] || [ -z "${{ inputs.version }}" ]; then
-            echo "source=local" >> "$GITHUB_OUTPUT"
-            echo "version=dev" >> "$GITHUB_OUTPUT"
-          else
-            echo "source=npm" >> "$GITHUB_OUTPUT"
-            echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Check for existing benchmark
-        id: existing
-        run: |
-          VERSION="${{ steps.mode.outputs.version }}"
-          VERSION_RE="${VERSION//./\\.}"
-          if [ "$VERSION" = "dev" ]; then
-            echo "skip=false" >> "$GITHUB_OUTPUT"
-          elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/benchmarks/INCREMENTAL-BENCHMARKS.md 2>/dev/null; then
-            echo "Benchmark for $VERSION already exists in INCREMENTAL-BENCHMARKS.md — skipping"
-            echo "skip=true" >> "$GITHUB_OUTPUT"
-          else
-            echo "skip=false" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Wait for npm propagation
-        if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm'
-        run: |
-          VERSION="${{ steps.mode.outputs.version }}"
-          echo "Waiting for @optave/codegraph@${VERSION} on npm..."
-          for i in $(seq 1 20); do
-            if npm view "@optave/codegraph@${VERSION}" version 2>/dev/null; then
-              echo "Package available on npm"
-              exit 0
-            fi
-            echo "  Attempt $i/20 — not yet available, waiting 30s..."
-            sleep 30
-          done
-          echo "::error::Package @optave/codegraph@${VERSION} not found on npm after 10 minutes"
-          exit 1
-
-      - name: Run incremental benchmark
-        if: steps.existing.outputs.skip != 'true'
-        run: |
-          STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
-          ARGS="--version ${{ steps.mode.outputs.version }}"
-          if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then
-            ARGS="$ARGS --npm"
-          fi
-          node $STRIP_FLAG --import ./scripts/ts-resolve-loader.js scripts/incremental-benchmark.ts $ARGS > incremental-benchmark-result.json
-
-      - name: Update incremental report
-        if: steps.existing.outputs.skip != 'true'
-        run: |
-          STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
-          node $STRIP_FLAG scripts/update-incremental-report.ts incremental-benchmark-result.json
-
-      - name: Upload incremental result
-        if: steps.existing.outputs.skip != 'true'
-        uses: actions/upload-artifact@v7
-        with:
-          name: incremental-benchmark-result
-          path: incremental-benchmark-result.json
-
-      - name: Check for changes
-        if: steps.existing.outputs.skip != 'true'
-        id: changes
-        run: |
-          CHANGED=false
-          # Detect modified tracked files
-          if ! git diff --quiet HEAD -- generated/benchmarks/INCREMENTAL-BENCHMARKS.md 2>/dev/null; then
-            CHANGED=true
-          fi
-          # Detect newly created (untracked) files
-          if [ -n "$(git ls-files --others --exclude-standard generated/benchmarks/INCREMENTAL-BENCHMARKS.md)" ]; then
-            CHANGED=true
-          fi
-          echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"
-
-      - name: Commit and push via PR
-        if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true'
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          VERSION: ${{ steps.mode.outputs.version }}
-        run: |
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
-
-          if [ "$VERSION" = "dev" ]; then
-            BRANCH="benchmark/incremental-dev-$(date +%Y%m%d-%H%M%S)"
-          else
-            BRANCH="benchmark/incremental-v${VERSION}-$(date +%Y%m%d-%H%M%S)"
-          fi
-          git checkout -b "$BRANCH"
-          git add generated/benchmarks/INCREMENTAL-BENCHMARKS.md
-          git commit -m "docs: update incremental benchmarks (${VERSION})"
-          git push origin "$BRANCH"
-
-          TITLE="docs: update incremental benchmarks (${VERSION})"
-          if gh pr list --state open --json title --jq ".[].title" | grep -qF "$TITLE"; then
-            echo "::notice::PR already open for '$TITLE' — skipping"
-          else
-            gh pr create \
-              --base main \
-              --head "$BRANCH" \
-              --title "$TITLE" \
-              --body "Automated incremental benchmark update for **${VERSION}** from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
-          fi
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 758b9b2d1..184529c7f 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -225,6 +225,137 @@ jobs:
           path: crates/codegraph-core/*.node
           if-no-files-found: error
 
+  # ── Pre-publish benchmark gate (stable releases only) ──
+  #
+  # Measures the just-built native artifact against the local source, writes
+  # new entries into the benchmark history files, and runs the regression
+  # guard. If the new version regresses beyond the threshold vs the previous
+  # release, this job fails and the publish job is skipped — preventing the
+  # bad code from reaching npm. The modified history files are uploaded as
+  # an artifact so the post-publish Benchmark workflow can record them via
+  # PR without re-measuring (single source of truth, half the CI minutes).
+
+  pre-publish-benchmark:
+    name: Pre-publish benchmark gate
+    if: github.event_name != 'push'
+    needs: [compute-version, build-native]
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+
+      - uses: actions/setup-node@v6
+        with:
+          node-version: "22"
+
+      - name: Setup Python (for resolution benchmark)
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+
+      - name: Setup Go (for resolution benchmark)
+        uses: actions/setup-go@v6
+        with:
+          go-version: "stable"
+          cache: false
+
+      - name: Download native artifact (linux-x64)
+        uses: actions/download-artifact@v8
+        with:
+          name: native-linux-x64
+          path: crates/codegraph-core/
+
+      - run: npm install
+
+      - name: Install native addon over published binary
+        run: node scripts/ci-install-native.mjs
+
+      - name: Run build benchmark
+        env:
+          VERSION: ${{ needs.compute-version.outputs.version }}
+        run: |
+          STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
+          node $STRIP_FLAG --import ./scripts/ts-resolve-loader.js scripts/benchmark.ts --version "$VERSION" > benchmark-result.json
+
+      - name: Run resolution benchmark
+        env:
+          VERSION: ${{ needs.compute-version.outputs.version }}
+        run: |
+          STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
+          node $STRIP_FLAG --import ./scripts/ts-resolve-loader.js scripts/resolution-benchmark.ts --version "$VERSION" > resolution-result.json
+
+      - name: Merge resolution into build result
+        run: |
+          node -e "
+            const fs = require('fs');
+            const build = JSON.parse(fs.readFileSync('benchmark-result.json', 'utf8'));
+            const resolution = JSON.parse(fs.readFileSync('resolution-result.json', 'utf8'));
+            build.resolution = resolution;
+            fs.writeFileSync('benchmark-result.json', JSON.stringify(build, null, 2));
+          "
+
+      - name: Run query benchmark
+        env:
+          VERSION: ${{ needs.compute-version.outputs.version }}
+        run: |
+          STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
+          node $STRIP_FLAG --import ./scripts/ts-resolve-loader.js scripts/query-benchmark.ts --version "$VERSION" > query-benchmark-result.json
+
+      - name: Run incremental benchmark
+        env:
+          VERSION: ${{ needs.compute-version.outputs.version }}
+        run: |
+          STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
+          node $STRIP_FLAG --import ./scripts/ts-resolve-loader.js scripts/incremental-benchmark.ts --version "$VERSION" > incremental-benchmark-result.json
+
+      - name: Update build report
+        run: |
+          STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
+          node $STRIP_FLAG scripts/update-benchmark-report.ts benchmark-result.json
+
+      - name: Update query report
+        run: |
+          STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
+          node $STRIP_FLAG scripts/update-query-report.ts query-benchmark-result.json
+
+      - name: Update incremental report
+        run: |
+          STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
+          node $STRIP_FLAG scripts/update-incremental-report.ts incremental-benchmark-result.json
+
+      - name: Regression guard
+        env:
+          RUN_REGRESSION_GUARD: "1"
+        run: npm run test:regression-guard
+
+      - name: Upload benchmark history files
+        uses: actions/upload-artifact@v7
+        with:
+          name: benchmark-files
+          path: |
+            generated/benchmarks/BUILD-BENCHMARKS.md
+            generated/benchmarks/QUERY-BENCHMARKS.md
+            generated/benchmarks/INCREMENTAL-BENCHMARKS.md
+            README.md
+          if-no-files-found: error
+
+      # Raw JSON used by post-publish soft-signal jobs (e.g. engine-parity
+      # gate in the Benchmark workflow). Separated from the history-files
+      # artifact because consumers read different shapes.
+      - name: Upload benchmark JSON results
+        uses: actions/upload-artifact@v7
+        with:
+          name: benchmark-results-json
+          path: |
+            benchmark-result.json
+            query-benchmark-result.json
+            incremental-benchmark-result.json
+          if-no-files-found: error
+
   # ── Dev builds: GitHub pre-release with tarballs ──
 
   publish-dev:
@@ -399,7 +530,7 @@ jobs:
 
   publish:
     if: github.event_name != 'push'
-    needs: [compute-version, build-native]
+    needs: [compute-version, build-native, pre-publish-benchmark]
     runs-on: ubuntu-latest
     environment: npm-publish
     permissions:
diff --git a/package.json b/package.json
index a1958560c..5935c836f 100644
--- a/package.json
+++ b/package.json
@@ -86,6 +86,7 @@
     "test": "vitest run",
     "test:watch": "vitest",
     "test:coverage": "vitest run --coverage",
+    "test:regression-guard": "vitest run tests/benchmarks/regression-guard.test.ts",
     "lint": "biome check src/ tests/",
     "lint:fix": "biome check --write src/ tests/",
     "format": "biome format --write src/ tests/",
diff --git a/tests/benchmarks/regression-guard.test.ts b/tests/benchmarks/regression-guard.test.ts
index a12a451ca..c16b6363c 100644
--- a/tests/benchmarks/regression-guard.test.ts
+++ b/tests/benchmarks/regression-guard.test.ts
@@ -363,7 +363,15 @@ interface IncrementalEntry {
 
 // ── Tests ────────────────────────────────────────────────────────────────
 
-describe('Benchmark regression guard', () => {
+// Release-blocking gate: runs pre-publish (after fresh benchmark numbers are
+// written by the pre-publish-benchmark job in .github/workflows/publish.yml)
+// and during local invocations of `npm run test:regression-guard`. Skipped
+// in the default `npm test` run so docs commits that merge already-recorded
+// regressed history into main don't trigger false failures — by then the
+// release has already passed the gate.
+const RUN_REGRESSION_GUARD = process.env.RUN_REGRESSION_GUARD === '1';
+
+describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => {
   const buildHistory = extractJsonData<BuildEntry>(
     path.join(BENCHMARKS_DIR, 'BUILD-BENCHMARKS.md'),
     'BENCHMARK_DATA',

From 1fc92b259a43c8be32414256f95b61aba35d54ff Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Thu, 30 Apr 2026 18:52:38 -0600
Subject: [PATCH 2/3] fix(ci): cache npm in pre-publish-benchmark job (#1040)

Match the cache: "npm" setting used by every other job in publish.yml so
release runs reuse the npm cache instead of re-downloading dependencies.
---
 .github/workflows/publish.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 184529c7f..749395159 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -251,6 +251,7 @@ jobs:
       - uses: actions/setup-node@v6
         with:
           node-version: "22"
+          cache: "npm"
 
       - name: Setup Python (for resolution benchmark)
         uses: actions/setup-python@v6

From b0389af79175522aa9dee37a6d281ab9a072e76a Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Thu, 30 Apr 2026 23:39:58 -0600
Subject: [PATCH 3/3] fix(ci): restore untracked-file detection in
 record-benchmarks (#1040)

Each of the three pre-consolidation jobs paired `git diff --quiet HEAD`
with `git ls-files --others --exclude-standard` so a first-run history
file (or one re-created after deletion) would still be picked up. The
consolidated check dropped that second guard, which would silently skip
the PR step if a benchmark history file was untracked. Restore the
guard so verified numbers always make it back to main.
---
 .github/workflows/benchmark.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index a5b48d95c..4e91dba2c 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -68,6 +68,9 @@ jobs:
           if ! git diff --quiet HEAD -- generated/benchmarks/ README.md 2>/dev/null; then
             CHANGED=true
           fi
+          if [ -n "$(git ls-files --others --exclude-standard generated/benchmarks/)" ]; then
+            CHANGED=true
+          fi
           echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"
 
       - name: Commit and push via PR