diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc index b29b0e1..accb0e8 100644 --- a/CHANGELOG.adoc +++ b/CHANGELOG.adoc @@ -12,6 +12,20 @@ https://semver.org/[Semantic Versioning]. === Added +==== `WorkflowAudit` rule `WF022` unanchored-heading-regex (2026-05-30, #360) + +Flags a markdown-heading-detection regex used *unanchored* inside inline +Python in a workflow `run:` block — e.g. `re.search(r'TypeScript [Ee]xemptions', line)` +with no `^#` anchor. Such a regex also matches prose mentions of the phrase, +so the parser silently walks the wrong section. This was the multi-week +silent failure of affinescript's governance gate (`standards#183` anchored +it). HIGH severity (estate-wide, invisible in logs). Heuristic: gated on +`python3` presence, a Title-Case heading shape, and absence of `^#` +anchoring; single-word matches (`Error: (.+)`) and anchored patterns are +ignored. Threaded into `audit/3`; covered in `test/workflow_audit_test.exs` +(positive + anchored / non-heading / no-python negatives). Zero findings on +hypatia's own workflows. Cohort hypatia#333, pattern 1. + ==== `WorkflowAudit` rule `WF021` concurrency-missing-readonly (2026-05-30, #365) Flags a read-only check workflow (runs on `pull_request`/`push`, with a diff --git a/CHANGELOG.md b/CHANGELOG.md index 1aa192a..085f293 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ this project aims to follow [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- feat(rules): WorkflowAudit WF022 `unanchored_heading_regex` — flag inline-python heading-detection regexes not anchored to `^#` (#360) - feat(rules): WorkflowAudit WF021 `concurrency_missing_readonly` — flag read-only PR/push check workflows lacking a `concurrency:` block (#365) - feat(rules): CicdRules `duplicate_cron_schedule` — flag workflows with redundant cron entries on the same day-of-week / daily-subset (#362) - feat(rules): AffineScript hand-port pitfalls — HANDLE-as-fn-name + OCaml float ops (#332) diff --git a/lib/rules/workflow_audit.ex b/lib/rules/workflow_audit.ex index 46145d5..4ba6c17 100644 --- a/lib/rules/workflow_audit.ex +++ b/lib/rules/workflow_audit.ex @@ -84,6 +84,7 @@ defmodule Hypatia.Rules.WorkflowAudit do workflow_linter_self_ref = check_workflow_linter_self_reference(workflow_contents) codeql_missing_actions = check_codeql_missing_actions_language(workflow_contents) concurrency_missing = check_concurrency_missing_readonly(workflow_contents) + heading_regex_issues = check_unanchored_heading_regex(workflow_contents) %{ findings: @@ -93,7 +94,8 @@ defmodule Hypatia.Rules.WorkflowAudit do reusable_caller_context_self_checkout ++ missing_timeouts ++ scorecard_publish_run ++ nonroot_container_eacces ++ orphan_reusable_pins ++ ungated_secret_action ++ scorecard_wrapper_missing_perms ++ - workflow_linter_self_ref ++ codeql_missing_actions ++ concurrency_missing, + workflow_linter_self_ref ++ codeql_missing_actions ++ concurrency_missing ++ + heading_regex_issues, missing_count: length(missing), unpinned_count: length(unpinned), wrong_pin_count: length(wrong_pins), @@ -114,6 +116,7 @@ defmodule Hypatia.Rules.WorkflowAudit do workflow_linter_self_ref_count: length(workflow_linter_self_ref), codeql_missing_actions_count: length(codeql_missing_actions), concurrency_missing_count: length(concurrency_missing), + heading_regex_issues_count: length(heading_regex_issues), workflow_count: length(workflow_files), standard_coverage: coverage_percentage(workflow_files) } @@ -1476,4 +1479,62 @@ defmodule Hypatia.Rules.WorkflowAudit do end end) end + + # ─── WF022: Unanchored heading regex in inline-python workflow scripts ─ + # + # An inline-python `run:` step that detects a markdown section heading with + # `re.search(r'TitleCase Phrase', line)` — without anchoring to `^#` / + # `^#{1,4}\s+` — also matches *prose* mentions of the same phrase, so the + # parser silently walks the wrong table. This was the multi-week silent + # gate failure on affinescript's governance check (standards#183 anchored + # the regex). HIGH severity: estate-wide and invisible in logs. + # + # See hyperpolymath/hypatia#360 (cohort hypatia#333, pattern 1). + + @wf022_re_call ~r/re\.(?:search|match)\(\s*r(['"])(.*?)\1/ + @wf022_reason "inline-python heading-detection regex is not anchored to `^#` / `^\#{1,4}\\s+`; it also matches prose mentions of the heading phrase, so the parser can silently walk the wrong section. Anchor the regex to the heading shape (e.g. `^\#{1,4}\\s+.*Phrase`) and name the intended heading in a comment." + + @doc """ + WF022: Detect a heading-detection regex used unanchored inside inline + Python in a workflow `run:` block. + + Heuristic: in a file that uses `python3`, find `re.search`/`re.match` whose + literal pattern has a markdown-heading shape (a Title-Case word followed by + another Title-Case word or a `[Xx]`-style char class) and does NOT start + with `^#`. + + Sensitivity / specificity: + * Specific — gated on `python3` presence, the heading-phrase shape, and + absence of `^#` anchoring; single-word matches (e.g. `Error: (.+)`) and + already-anchored patterns are ignored. + * Sensitive — fires on any such unanchored heading regex in the file. + """ + def check_unanchored_heading_regex(workflow_contents) do + Enum.flat_map(workflow_contents, fn {filename, content} -> + if String.contains?(content, "python3") do + @wf022_re_call + |> Regex.scan(content) + |> Enum.map(fn [_, _q, pat] -> pat end) + |> Enum.filter(&wf022_heading_unanchored?/1) + |> Enum.map(fn pat -> + %{ + rule: "WF022", + type: :unanchored_heading_regex, + file: filename, + severity: :high, + pattern: pat, + reason: @wf022_reason, + fix_recipe: :anchor_heading_regex + } + end) + else + [] + end + end) + end + + defp wf022_heading_unanchored?(pat) do + heading_like? = Regex.match?(~r/[A-Z][a-z]+\s+(?:[A-Z][a-z]+|\[[A-Za-z])/, pat) + heading_like? and not String.starts_with?(pat, "^#") + end end diff --git a/test/workflow_audit_test.exs b/test/workflow_audit_test.exs index a2a79c6..c04dbba 100644 --- a/test/workflow_audit_test.exs +++ b/test/workflow_audit_test.exs @@ -529,4 +529,57 @@ defmodule Hypatia.Rules.WorkflowAuditTest do assert [] = WorkflowAudit.check_concurrency_missing_readonly(%{"labeler.yml" => wf}) end end + + describe "check_unanchored_heading_regex/1 (WF022)" do + test "flags an unanchored heading regex inside inline python" do + wf = """ + jobs: + gate: + steps: + - run: | + python3 << 'PYEOF' + if re.search(r'TypeScript [Ee]xemptions', line): + pass + PYEOF + """ + + [f] = WorkflowAudit.check_unanchored_heading_regex(%{"gov.yml" => wf}) + assert f.rule == "WF022" + assert f.severity == :high + end + + test "silent when the heading regex is anchored to ^#" do + wf = """ + jobs: + gate: + steps: + - run: | + python3 << 'PYEOF' + if re.search(r'^# TypeScript Exemptions', line): + pass + PYEOF + """ + + assert [] = WorkflowAudit.check_unanchored_heading_regex(%{"gov.yml" => wf}) + end + + test "silent for a non-heading regex (single capitalised word)" do + wf = """ + jobs: + gate: + steps: + - run: | + python3 << 'PYEOF' + m = re.search(r'Error: (.+)', line) + PYEOF + """ + + assert [] = WorkflowAudit.check_unanchored_heading_regex(%{"gov.yml" => wf}) + end + + test "silent when the workflow has no inline python" do + wf = "jobs:\n build:\n steps:\n - run: echo hi\n" + assert [] = WorkflowAudit.check_unanchored_heading_regex(%{"ci.yml" => wf}) + end + end end