diff --git a/scripts/fix-script-registry.json b/scripts/fix-script-registry.json index 79e72af4..eb085dbf 100644 --- a/scripts/fix-script-registry.json +++ b/scripts/fix-script-registry.json @@ -102,7 +102,8 @@ "codeql-cron-monthly": "fix-codeql-cron-monthly.sh", "chapel-manpath-guard": "fix-chapel-manpath-guard.sh", "chapel-chpl-llvm-export": "fix-chapel-chpl-llvm-export.sh", - "chapel-replace-chpl-about-with-version": "fix-chapel-replace-chpl-about.sh" + "chapel-replace-chpl-about-with-version": "fix-chapel-replace-chpl-about.sh", + "reusable_workflow_sha_bump_needs_propagation": "propagate-sha-bump.sh" } } } diff --git a/scripts/propagate-sha-bump.sh b/scripts/propagate-sha-bump.sh new file mode 100755 index 00000000..af923e89 --- /dev/null +++ b/scripts/propagate-sha-bump.sh @@ -0,0 +1,226 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: MPL-2.0 +# +# propagate-sha-bump.sh — actuation half of the three-system propagation arch. +# +# hypatia (detection) → gitbot-fleet (THIS) → .git-private-farm (propagation) +# +# Consumes a hypatia finding with +# rule = reusable_workflow_sha_bump_needs_propagation +# (see hyperpolymath/hypatia#418), pre-filters by title keyword (HARD — +# per feedback_pr_sweep_title_keyword_exclusion + feedback_no_automated_licence_edits), +# enumerates estate consumers pinning the old SHA, and fires a +# repository_dispatch event of type `propagate-sha-bump` into +# hyperpolymath/.git-private-farm where the receiver workflow runs +# `scripts/sha-bump-propagate.sh`. +# +# Usage (called by dispatch-runner.sh): +# propagate-sha-bump.sh +# +# `repo_path` is ignored — this script operates on the finding alone, not +# on the upstream repo's working tree. +# +# Required env: +# GH_TOKEN gh CLI auth, repo + workflow scopes +# +# Optional env: +# FARM_REPO default "hyperpolymath/.git-private-farm" +# DRY_RUN "true" prints the payload without dispatching +set -euo pipefail + +usage() { + echo "Usage: $0 " >&2 + exit 64 +} + +[[ $# -ge 2 ]] || usage + +FINDING_FILE="$2" +[[ -f "$FINDING_FILE" ]] || { echo "ERROR: finding file not found: $FINDING_FILE" >&2; exit 1; } + +FARM_REPO="${FARM_REPO:-hyperpolymath/.git-private-farm}" +DRY_RUN="${DRY_RUN:-false}" + +# Title-keyword exclusion regex. Keep in sync with: +# feedback_pr_sweep_title_keyword_exclusion +# farm receiver workflow .github/workflows/sha-bump-propagate.yml +# Case-insensitive — grep -iE. +FORBIDDEN_KEYWORDS='license|SPDX|PMPL|MPL|AGPL|GPL|Apache|copyright|attribution|relicens|secret|vulnerab|CVE-' + +# --- 1. Parse finding ---------------------------------------------------------- + +rule=$(jq -r '.rule // ""' "$FINDING_FILE") +source_repo=$(jq -r '.source_repo // ""' "$FINDING_FILE") +source_workflow=$(jq -r '.source_workflow // ""' "$FINDING_FILE") +old_sha=$(jq -r '.old_sha // ""' "$FINDING_FILE") +new_sha=$(jq -r '.new_sha // ""' "$FINDING_FILE") +pr_title=$(jq -r '.pr_title // ""' "$FINDING_FILE") +pr_number=$(jq -r '.pr_number // ""' "$FINDING_FILE") + +# Hard rule-name gate — refuse to operate on findings of any other shape. +if [[ "$rule" != "reusable_workflow_sha_bump_needs_propagation" ]]; then + echo "ERROR: finding rule mismatch: got '$rule', expected 'reusable_workflow_sha_bump_needs_propagation'" >&2 + exit 1 +fi + +# --- 2. SHA + path validation ------------------------------------------------- + +for v in old_sha new_sha; do + val="${!v}" + if ! printf '%s' "$val" | grep -qE '^[0-9a-f]{40}$'; then + echo "ERROR: $v is not a 40-char hex SHA: $val" >&2 + exit 1 + fi +done + +if [[ "$old_sha" == "$new_sha" ]]; then + echo "ERROR: old_sha equals new_sha — nothing to propagate" >&2 + exit 1 +fi + +case "$source_repo" in + hyperpolymath/*) ;; + *) echo "ERROR: source_repo not in hyperpolymath/* : '$source_repo'" >&2; exit 1 ;; +esac + +case "$source_workflow" in + .github/workflows/*.yml|.github/workflows/*.yaml|action.yml|action.yaml) ;; + *) echo "ERROR: source_workflow not in expected shape: '$source_workflow'" >&2; exit 1 ;; +esac + +# --- 3. Title-keyword pre-filter (HARD) --------------------------------------- + +# Per feedback_no_automated_licence_edits: licence/SPDX changes are MANUAL, +# even if policy-correct. The receiver workflow re-checks this (belt-and-braces), +# but the canonical gate lives HERE. +if printf '%s' "$pr_title" | grep -iqE "$FORBIDDEN_KEYWORDS"; then + echo "REFUSED: pr_title matched forbidden keyword pattern — routing to manual review." >&2 + echo " source_repo=$source_repo source_workflow=$source_workflow" >&2 + echo " pr_title=$pr_title" >&2 + echo " Owner must approve and apply this bump manually, per-consumer." >&2 + exit 0 # NOT an error — this is the expected, correct refusal path. +fi + +# --- 4. Build consumer TSV via code search ------------------------------------ + +# Construct the search pattern the codebases use to pin this workflow. +# Example: `uses: hyperpolymath/standards/.github/workflows/governance-reusable.yml@` +# (with `@` truncated — gh code-search is whitespace-tolerant). +# We search for the path + SHA combination; the TSV emits `/\t`. + +# Strip the `.github/workflows/` prefix for the search needle, since the full +# `uses: …` line includes the source repo path. +needle="${source_repo}/${source_workflow}@${old_sha}" + +TMPDIR_RUN=$(mktemp -d -t propagate-sha-bump.XXXXXX) +trap 'rm -rf "$TMPDIR_RUN"' EXIT + +CONSUMERS_TSV="$TMPDIR_RUN/consumers.tsv" + +echo "Enumerating consumers pinning: $needle" >&2 + +# gh code-search has a 100-result cap per query. For larger sweeps the +# operator should pre-build a TSV manually and supply it via a CONSUMERS_TSV +# env override. Tracked here for posterity. +if [[ -n "${CONSUMERS_TSV_OVERRIDE:-}" && -f "$CONSUMERS_TSV_OVERRIDE" ]]; then + cp "$CONSUMERS_TSV_OVERRIDE" "$CONSUMERS_TSV" + echo "Using override consumers TSV: $CONSUMERS_TSV_OVERRIDE" >&2 +else + gh search code "$needle" --owner hyperpolymath --limit 100 \ + --json repository,path \ + --jq '.[] | select(.path | startswith(".github/workflows/")) | "\(.repository.nameWithOwner)\t\(.path)"' \ + > "$CONSUMERS_TSV" || true +fi + +# Drop fork repos — per estate license policy, third-party / forked stuff is +# off-limits. (gh search code does not filter forks; we look up each owner-repo +# pair and skip forks.) For large sweeps this round-trips N times — cache as +# needed. +filter_forks() { + local tsv="$1" + local out="${tsv}.no-forks" + : > "$out" + while IFS=$'\t' read -r repo path; do + local is_fork + is_fork=$(gh repo view "$repo" --json isFork --jq '.isFork' 2>/dev/null || echo "true") + if [[ "$is_fork" == "false" ]]; then + printf '%s\t%s\n' "$repo" "$path" >> "$out" + else + echo "SKIP (fork): $repo" >&2 + fi + done < "$tsv" + mv "$out" "$tsv" +} + +# Skip fork-filter if the operator supplied an override TSV — they've already vetted it. +if [[ -s "$CONSUMERS_TSV" && -z "${CONSUMERS_TSV_OVERRIDE:-}" ]]; then + filter_forks "$CONSUMERS_TSV" +fi + +n_consumers=$(wc -l < "$CONSUMERS_TSV") +echo "Consumers identified: $n_consumers" >&2 + +if [[ "$n_consumers" -eq 0 ]]; then + echo "No estate consumers found for $needle — nothing to propagate." >&2 + exit 0 +fi + +# --- 5. Compose payload + fire repository_dispatch ---------------------------- + +# Slug the workflow basename for branch name. +workflow_slug=$(basename "$source_workflow" .yml | tr '/.' '--') +short_new_sha="${new_sha:0:7}" + +branch_name="ci/bump-${workflow_slug}-${short_new_sha}" + +# title_suffix re-checked against forbidden keywords; we synthesise it from +# safe metadata only (workflow slug + short SHA), NOT from pr_title. +title_suffix="bump ${source_workflow}@${short_new_sha}" + +body_blurb=$(cat <&2 + printf '%s\n' "$payload" + exit 0 +fi + +echo "Firing repository_dispatch propagate-sha-bump → $FARM_REPO ($n_consumers consumers)" >&2 + +printf '%s' "$payload" \ + | gh api -X POST "repos/${FARM_REPO}/dispatches" --input - + +echo "OK: dispatch fired. Receiver workflow will run async on $FARM_REPO." >&2 diff --git a/tests/propagate-sha-bump-smoke.sh b/tests/propagate-sha-bump-smoke.sh new file mode 100755 index 00000000..1ad1b825 --- /dev/null +++ b/tests/propagate-sha-bump-smoke.sh @@ -0,0 +1,178 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: MPL-2.0 +# +# Smoke test for scripts/propagate-sha-bump.sh. +# +# Exercises the four refusal paths + the DRY_RUN payload-composition path +# without dispatching anything. Does NOT hit the network — all calls +# return before the gh-search step (or in DRY_RUN, skip the dispatch). +set -euo pipefail + +cd "$(dirname "$0")/.." + +SCRIPT="scripts/propagate-sha-bump.sh" + +tmp=$(mktemp -d) +trap 'rm -rf "$tmp"' EXIT + +pass=0 +fail=0 + +assert_exit() { + local label="$1" expected_rc="$2" + local actual_rc="$3" + if [[ "$actual_rc" -eq "$expected_rc" ]]; then + echo "ok $label (rc=$actual_rc)" + ((pass++)) + else + echo "FAIL $label (expected rc=$expected_rc got rc=$actual_rc)" + ((fail++)) + fi +} + +# 1. Wrong rule name → exit 1. +cat > "$tmp/wrong-rule.json" <<'EOF' +{ + "rule": "some_other_rule", + "source_repo": "hyperpolymath/standards", + "source_workflow": ".github/workflows/governance-reusable.yml", + "old_sha": "0011223344556677889900112233445566778899", + "new_sha": "abcdef0123456789abcdef0123456789abcdef01", + "pr_title": "x", + "pr_number": 1 +} +EOF +set +e +"$SCRIPT" /ignored "$tmp/wrong-rule.json" >/dev/null 2>&1 +assert_exit "wrong rule name → exit 1" 1 $? +set -e + +# 2. Malformed SHA → exit 1. +cat > "$tmp/bad-sha.json" <<'EOF' +{ + "rule": "reusable_workflow_sha_bump_needs_propagation", + "source_repo": "hyperpolymath/standards", + "source_workflow": ".github/workflows/governance-reusable.yml", + "old_sha": "not-a-sha", + "new_sha": "abcdef0123456789abcdef0123456789abcdef01", + "pr_title": "x", + "pr_number": 1 +} +EOF +set +e +"$SCRIPT" /ignored "$tmp/bad-sha.json" >/dev/null 2>&1 +assert_exit "malformed SHA → exit 1" 1 $? +set -e + +# 3. old_sha == new_sha → exit 1. +cat > "$tmp/same-sha.json" <<'EOF' +{ + "rule": "reusable_workflow_sha_bump_needs_propagation", + "source_repo": "hyperpolymath/standards", + "source_workflow": ".github/workflows/governance-reusable.yml", + "old_sha": "abcdef0123456789abcdef0123456789abcdef01", + "new_sha": "abcdef0123456789abcdef0123456789abcdef01", + "pr_title": "x", + "pr_number": 1 +} +EOF +set +e +"$SCRIPT" /ignored "$tmp/same-sha.json" >/dev/null 2>&1 +assert_exit "old_sha == new_sha → exit 1" 1 $? +set -e + +# 4. Bad source_repo → exit 1. +cat > "$tmp/bad-repo.json" <<'EOF' +{ + "rule": "reusable_workflow_sha_bump_needs_propagation", + "source_repo": "evil-org/standards", + "source_workflow": ".github/workflows/governance-reusable.yml", + "old_sha": "0011223344556677889900112233445566778899", + "new_sha": "abcdef0123456789abcdef0123456789abcdef01", + "pr_title": "x", + "pr_number": 1 +} +EOF +set +e +"$SCRIPT" /ignored "$tmp/bad-repo.json" >/dev/null 2>&1 +assert_exit "non-estate source_repo → exit 1" 1 $? +set -e + +# 5. Title keyword → exit 0 (REFUSED, not error — this is the expected refusal path). +cat > "$tmp/license-title.json" <<'EOF' +{ + "rule": "reusable_workflow_sha_bump_needs_propagation", + "source_repo": "hyperpolymath/standards", + "source_workflow": ".github/workflows/governance-reusable.yml", + "old_sha": "0011223344556677889900112233445566778899", + "new_sha": "abcdef0123456789abcdef0123456789abcdef01", + "pr_title": "ci: bump license header normalisation", + "pr_number": 1 +} +EOF +set +e +out=$("$SCRIPT" /ignored "$tmp/license-title.json" 2>&1) +rc=$? +set -e +assert_exit "license keyword in pr_title → exit 0 (REFUSED)" 0 "$rc" +if printf '%s' "$out" | grep -q "REFUSED"; then + echo "ok refusal message printed" + ((pass++)) +else + echo "FAIL refusal message missing" + ((fail++)) +fi + +# 6. DRY_RUN with valid finding + override TSV → exit 0, payload printed. +cat > "$tmp/good.json" <<'EOF' +{ + "rule": "reusable_workflow_sha_bump_needs_propagation", + "source_repo": "hyperpolymath/standards", + "source_workflow": ".github/workflows/governance-reusable.yml", + "old_sha": "0011223344556677889900112233445566778899", + "new_sha": "abcdef0123456789abcdef0123456789abcdef01", + "pr_title": "ci(governance): tighten codeql pin set", + "pr_number": 999 +} +EOF + +cat > "$tmp/consumers.tsv" <<'EOF' +hyperpolymath/repo-a .github/workflows/governance.yml +hyperpolymath/repo-b .github/workflows/governance.yml +EOF + +set +e +out=$(CONSUMERS_TSV_OVERRIDE="$tmp/consumers.tsv" DRY_RUN=true "$SCRIPT" /ignored "$tmp/good.json" 2>/dev/null) +rc=$? +set -e +assert_exit "DRY_RUN with valid finding → exit 0" 0 "$rc" + +if printf '%s' "$out" | grep -q '"event_type": "propagate-sha-bump"'; then + echo "ok payload contains event_type" + ((pass++)) +else + echo "FAIL payload missing event_type" + ((fail++)) +fi + +if printf '%s' "$out" | grep -qE '"branch_name": "ci/bump-governance-reusable-abcdef0"'; then + echo "ok branch_name slug correct" + ((pass++)) +else + echo "FAIL branch_name slug wrong" + ((fail++)) +fi + +# Title suffix is synthesised from safe metadata (NOT from pr_title), so it +# should not echo the upstream PR title verbatim. +if printf '%s' "$out" | grep -qE '"title_suffix": "bump .github/workflows/governance-reusable.yml@abcdef0"'; then + echo "ok title_suffix synthesised from safe metadata" + ((pass++)) +else + echo "FAIL title_suffix wrong" + ((fail++)) +fi + +echo "" +echo "passed: $pass failed: $fail" +[[ "$fail" -eq 0 ]]