From 8408b10e3a1c67eaeb3e54fd45b52da9a53e7b56 Mon Sep 17 00:00:00 2001 From: hyperpolymath <6759885+hyperpolymath@users.noreply.github.com> Date: Sun, 31 May 2026 00:11:01 +0100 Subject: [PATCH] =?UTF-8?q?feat(chapel):=20per-prover=20cwd=20+=20filename?= =?UTF-8?q?Override=20hooks=20=E2=80=94=20Idris2/Agda=20success=20in=20ben?= =?UTF-8?q?ch=20(closes=20#158,=20closes=20#159)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wave-1 MRR baseline (docs/bench/2026-05-30-chapel-mrr-baseline.md) caveats 1 + 2: Idris2 and Agda fixtures failed across all three strategies because tryProver's uniform spawn contract (parent CWD, generic `goal__.` basename) violates each prover's module-name + source-directory constraints. ProverInfo gains two optional fields with empty-string defaults that preserve the prior contract for the 28 unaffected provers: - cwd: shell-wrapped `sh -c "cd && exec ..."` to be coforall- safe (process-global chdir would race against parallel spawn calls). - filenameOverride: literal basename for provers that enforce module-name = filename. Registry: Agda gets cwd=/tmp/echidna-chapel + filenameOverride= "Trivial.agda" (matches new fixture's `module Trivial where`); Idris2 gets cwd + filenameOverride="Trivial.idr" (existing fixture already declares `module Trivial`). Justfile bench-chapel-mrr derives IDRIS2_PREFIX from `which idris2` so the recipe is self-contained. Verified: `just bench-chapel-mrr` shows agda_trivial → true and idris2_trivial → true across sequential/best-of/speculative (0.101-1.970s, full results updated in the baseline doc). ADR: docs/decisions/2026-05-31-chapel-prover-spawn-hooks.md. Co-Authored-By: Claude Opus 4.7 (1M context) --- Justfile | 19 +- docs/bench/2026-05-30-chapel-mrr-baseline.md | 31 +++ .../2026-05-30-chapel-rehabilitation.md | 7 + .../2026-05-31-chapel-prover-spawn-hooks.md | 182 ++++++++++++++++++ src/chapel/bench_mrr.chpl | 5 +- src/chapel/parallel_proof_search.chpl | 101 +++++++++- tests/chapel_fixtures/agda_trivial.agda | 14 ++ 7 files changed, 349 insertions(+), 10 deletions(-) create mode 100644 docs/decisions/2026-05-31-chapel-prover-spawn-hooks.md create mode 100644 tests/chapel_fixtures/agda_trivial.agda diff --git a/Justfile b/Justfile index ae135a0..977d20e 100644 --- a/Justfile +++ b/Justfile @@ -530,9 +530,24 @@ chapel-test: build-chapel-ffi # parallelProofSearch / parallelProofSearchSpeculative against the # fixture corpus in tests/chapel_fixtures/. Emits CSV to stdout. Doc: # docs/bench/2026-05-30-chapel-mrr-baseline.md. +# +# IDRIS2_PREFIX is derived from `which idris2` because Idris2's prelude +# resolution requires the env var to be set when invoking `--check`; +# the per-prover cwd hook (#158) sets the subprocess working dir but +# doesn't help if the parent never had the env var (see +# src/chapel/parallel_proof_search.chpl ProverInfo[4]). The derived +# prefix is `dirname(dirname(realpath(which idris2)))` — works for +# both source builds and asdf-shimmed installs. bench-chapel-mrr: - cd src/chapel && chpl -o bench_mrr bench_mrr.chpl && \ - ./bench_mrr --verbose=false --timeout=10 + #!/usr/bin/env bash + set -euo pipefail + if command -v idris2 >/dev/null 2>&1 && [ -z "${IDRIS2_PREFIX:-}" ]; then + export IDRIS2_PREFIX="$(dirname "$(dirname "$(readlink -f "$(command -v idris2)")")")" + echo "# IDRIS2_PREFIX=$IDRIS2_PREFIX (derived from which idris2)" >&2 + fi + cd src/chapel + chpl -o bench_mrr bench_mrr.chpl + ./bench_mrr --verbose=false --timeout=10 # Rebuild Chapel 2.8.0 from source with CHPL_LIB_PIC=pic so that # `chpl --library --dynamic` can produce a shared-library form of the diff --git a/docs/bench/2026-05-30-chapel-mrr-baseline.md b/docs/bench/2026-05-30-chapel-mrr-baseline.md index ce8f7fb..acf5045 100644 --- a/docs/bench/2026-05-30-chapel-mrr-baseline.md +++ b/docs/bench/2026-05-30-chapel-mrr-baseline.md @@ -106,3 +106,34 @@ hand-off and the Wave-2 Idris2/Agda fixes above. ## Raw data 5-run readings preserved at `docs/bench/2026-05-30-chapel-mrr-baseline.csv`. + +## Update 2026-05-31 — Caveats 1 & 2 resolved (#158 + #159) + +`ProverInfo` gained two optional spawn hooks: `cwd` (subprocess +working directory, shell-wrapped to be coforall-safe) and +`filenameOverride` (literal basename for provers that enforce +module-name = filename). Registry entries for Idris2 and Agda set +both; the bench fixture set gained `agda_trivial.agda`. The +`bench-chapel-mrr` Justfile recipe now derives `IDRIS2_PREFIX` from +`which idris2` so the parent shell doesn't need to export it. + +Single-run readings (replace with 5-run medians in a follow-up): + +| fixture | strategy | wallclock | winning prover | +|---|---|---|---| +| idris2_trivial | sequential | 1.970 | Idris2 | +| idris2_trivial | parallel_bestof | 1.037 | Idris2 | +| idris2_trivial | parallel_speculative| 1.032 | Idris2 | +| agda_trivial | sequential | 0.101 | Agda | +| agda_trivial | parallel_bestof | 0.751 | Agda | +| agda_trivial | parallel_speculative| 0.450 | Agda | + +Note the new Agda regime: `sequential` is fastest (0.101 s) because +Agda sits at registry position 0 — the first prover tried — and +succeeds immediately, so the parallel strategies pay coforall spawn +overhead with no benefit. The trivial-goal regime observation from +the original bench applies symmetrically here. + +Wave-3 follow-ups still open: real-corpus speedup bench (#161, +10-30 s prover invocations) and per-prover preempted/timeout/success +telemetry (#162). diff --git a/docs/decisions/2026-05-30-chapel-rehabilitation.md b/docs/decisions/2026-05-30-chapel-rehabilitation.md index 72c0cd6..5993d8f 100644 --- a/docs/decisions/2026-05-30-chapel-rehabilitation.md +++ b/docs/decisions/2026-05-30-chapel-rehabilitation.md @@ -128,6 +128,13 @@ Wave 3+ (longer term): tactics consumed by the Chapel side); gated on a producer for the serialised record format. +Wave-3 follow-ups landed: +- Per-prover `cwd` + `filenameOverride` hooks on `ProverInfo` + (closes #158 / #159). Resolves caveats 1 + 2 of the Wave-1 MRR + baseline — Idris2 and Agda fixtures now succeed end-to-end + through `bench-chapel-mrr`. Mechanism + alternatives: + [2026-05-31-chapel-prover-spawn-hooks.md](./2026-05-31-chapel-prover-spawn-hooks.md). + ## Consequences **Positive.** diff --git a/docs/decisions/2026-05-31-chapel-prover-spawn-hooks.md b/docs/decisions/2026-05-31-chapel-prover-spawn-hooks.md new file mode 100644 index 0000000..c22cd2e --- /dev/null +++ b/docs/decisions/2026-05-31-chapel-prover-spawn-hooks.md @@ -0,0 +1,182 @@ + + +# 2026-05-31 — Per-prover spawn hooks: cwd + filenameOverride + +ADR-style record of the mechanism added to `ProverInfo` and +`tryProver` (`src/chapel/parallel_proof_search.chpl`) to handle +provers whose CLI invocation requires per-prover state at spawn +time. Closes issues #158 (Idris2 env hook) and #159 (Agda +filename hook), both surfaced by the Wave-1 MRR baseline +(`docs/bench/2026-05-30-chapel-mrr-baseline.md`). + +## Status + +Accepted. Implemented in the PR that introduces this ADR. + +## Context + +The L2.2 rehabilitation arc (#133 → PR #146) shipped the Chapel +metalayer with a single uniform `tryProver(info, goal, …)` +contract: every prover gets a temp file named +`goal__.`, spawned from the parent's +CWD with the parent's environment. That contract held for ~22 of +the 30 registered backends (Coq, Lean, the SMT row, ATPs, etc.) +but two of the 30 — Idris2 and Agda — broke at spawn time: + +- **Idris2** (`idris2 --check`) resolves its prelude relative to + `IDRIS2_PREFIX` (or the install root) AND requires the source + file to live inside the configured source directory. The + Wave-1 invocation `idris2 --check /tmp/echidna-chapel/goal_Idris2_0.idr` + failed with `Module Prelude not found` even when the parent + shell had `IDRIS2_PREFIX` set, because the source file wasn't + in the cwd Idris2 used as its source dir (the parent's cwd, + not the temp dir). +- **Agda** (`agda --safe`) requires the source-file basename + to be a valid Agda identifier AND the module declaration + inside the file to match the basename. `goal_Agda_0.agda` + is rejected by the lexer (`in the name 0, the part 0 is not + valid because it is a literal`); even after fixing the + lexer issue, Agda then enforces module-name = file-name and + rejects mismatched module declarations. + +The Wave-1 baseline (`docs/bench/2026-05-30-chapel-mrr-baseline.md`) +explicitly tracked these as caveats 1 + 2, deferred as Wave-2 +follow-up issues (`#158`/`#159`). + +## Decision + +**Two new optional fields on `ProverInfo`** with empty-string +defaults that preserve the prior contract for the 28 provers +that don't need per-prover spawn state: + +```chapel +record ProverInfo { + var id: int; + var name: string; + // … existing fields … + var cwd: string; // "" = inherit parent CWD + var filenameOverride: string; // "" = goal__. +} +``` + +A custom `proc init(…, cwd = "", filenameOverride = "")` keeps +all 30 existing registry call-sites compiling unchanged +(default arguments fill the new fields). A zero-arg `proc init()` +is also defined so `var provers: [0..29] ProverInfo` continues +to default-construct each slot — without it the custom positional +init would shadow Chapel's auto-generated zero-arg init and the +array declaration would fail to compile. + +`tryProver` consumes both fields: + +- **`filenameOverride`** swaps the generic + `goal__.` for a literal basename (with + extension) when set. The default form remains locale-id-suffixed + so non-overriding provers stay collision-free across locales in + multi-locale runs. +- **`cwd`** is implemented as a shell wrapper around the spawn: + `sh -c "cd && exec "`. This is the + thread-safe equivalent of `chdir()` — process-global `chdir` + would race against other parallel spawn calls inside a + `coforall`, but a per-subprocess `sh -c` keeps the directory + change local. The parent's full environment (including + `IDRIS2_PREFIX`, `IDRIS2_DATA_DIR`) is inherited regardless, + per POSIX spawn defaults. + +Registry entries updated: + +- `provers[0]` (Agda) sets `cwd = "/tmp/echidna-chapel"` and + `filenameOverride = "Trivial.agda"`. The fixture + `tests/chapel_fixtures/agda_trivial.agda` declares + `module Trivial where` to match. +- `provers[4]` (Idris2) sets `cwd = "/tmp/echidna-chapel"` and + `filenameOverride = "Trivial.idr"`. The fixture + `tests/chapel_fixtures/idris2_trivial.idr` already declared + `module Trivial`, so its content is unchanged. + +The Justfile `bench-chapel-mrr` recipe gained a derived +`IDRIS2_PREFIX` step: if the env var isn't set and `which idris2` +resolves, the recipe sets it to `dirname(dirname(realpath(which idris2)))`. +This means a user with idris2 on PATH no longer needs to remember +to export the prefix manually for `just bench-chapel-mrr` to +work end-to-end. + +## Alternatives considered + +1. **Per-prover env-extra list (key=value pairs)** — rejected as + over-engineered. Chapel `spawn` inherits parent env by default; + the only env hole was Idris2's `IDRIS2_PREFIX`, which the + Justfile recipe now derives and exports parent-side. No prover + in the current registry needs a per-spawn env override that + isn't already in the parent. +2. **Auto-extract module name from goal content** (regex + `^module\s+(\w+)`) — rejected as fragile. Works for clean + fixtures but breaks on goals containing comments-before-module, + pragmas, or multi-module files. The explicit + `filenameOverride` field keeps the registry honest about which + provers have basename constraints. +3. **Per-prover spawn callback (function pointer in the record)** — + rejected for now: Chapel records with function fields are + awkward to construct in a literal-initialiser-heavy registry, + and the two-field mechanism handles every concrete need + surfaced by Wave-1. Revisit if a third spawn-time wart appears. +4. **`POSIX_spawn`-style explicit cwd in Chapel `spawn`** — Chapel + 2.x `Subprocess.spawn` does not expose a `cwd` parameter at + the time of writing (verified against the apt-shipped 2.3.0 and + the source-built 2.8.0). The shell-wrapper workaround sidesteps + the gap with no Chapel-version dependency. + +## Consequences + +**Positive.** + +- `just bench-chapel-mrr` now shows `idris2_trivial → true` and + `agda_trivial → true` across all three strategies, satisfying + the explicit acceptance criteria for both #158 and #159. +- The hook pattern is general — future provers with similar + module-name or working-directory requirements (Lean 4 already + works out-of-the-box but a future fixture might trigger its + `Lake` workspace check; Isabelle has session-name resolution) + can be wired by adding two field values to their registry + entry, no `tryProver` changes. +- The Wave-1 baseline document's caveats 1 + 2 (formerly tracked + as Wave-2 follow-ups) are closed, leaving only caveat 3 + (sub-second wall-clock jitter, which is a measurement-method + issue, not a metalayer defect). + +**Negative.** + +- Shell-wrapping every `cwd`-using prover costs one `fork+exec` + for the `sh` itself per invocation. For 0.1-second prover runs + this is ~5% overhead; for the 10-30 s real-corpus benchmark + target (#161), it's negligible. If this becomes a hot path, + switching to a direct-spawn cwd implementation (when Chapel + exposes it) would remove the overhead. +- `filenameOverride` couples the registry entry to fixture + content (the fixture's `module X where` declaration must match + the override). This is acceptable because the fixture corpus + is small and version-controlled together with the registry; + a real-corpus runner would need a different convention (or + the auto-extract approach from "Alternatives" above). + +**Net.** Two scoped one-line additions to `ProverInfo` close +both acceptance-criterion-driven Wave-3 issues with no churn +to the 28 unaffected provers; the shell-wrapper approach buys +portability across Chapel 2.x runtimes; the Justfile derivation +of `IDRIS2_PREFIX` removes the last user-side env-export +ceremony. + +## Toolchain version pinning + +- `chpl 2.3.0` (CI) and `chpl 2.8.0` (local). The custom + `proc init` syntax + `init this` invariant used here is + stable across both. +- `idris2 0.8.0` (local). `IDRIS2_PREFIX`-based prelude + resolution is the documented mechanism; the source-directory + check is `idris2 --check`-specific (a `package`-driven build + doesn't have the same constraint). +- `agda 2.6.3`. The module-name = filename rule is permanent + Agda design (`Issue #1062` upstream). + +These pins are re-stated in `chapel-ci.yml`, `Justfile`, and +`docs/decisions/2026-05-30-chapel-rehabilitation.md`. diff --git a/src/chapel/bench_mrr.chpl b/src/chapel/bench_mrr.chpl index 3d67822..b9a7a25 100644 --- a/src/chapel/bench_mrr.chpl +++ b/src/chapel/bench_mrr.chpl @@ -81,10 +81,11 @@ proc benchOne(goal: string, fixtureName: string, allProvers: [] ProverInfo, proc main(): int { var allProvers = buildProverRegistry(); - const fixtures: [0..2] Fixture = [ + const fixtures: [0..3] Fixture = [ new Fixture("coq_trivial", fixtureDir + "/coq_trivial.v"), new Fixture("lean_trivial", fixtureDir + "/lean_trivial.lean"), - new Fixture("idris2_trivial", fixtureDir + "/idris2_trivial.idr") + new Fixture("idris2_trivial", fixtureDir + "/idris2_trivial.idr"), + new Fixture("agda_trivial", fixtureDir + "/agda_trivial.agda") ]; writeln("fixture,strategy,wallclock_s,success,winning_prover"); diff --git a/src/chapel/parallel_proof_search.chpl b/src/chapel/parallel_proof_search.chpl index 92479e2..d53199d 100644 --- a/src/chapel/parallel_proof_search.chpl +++ b/src/chapel/parallel_proof_search.chpl @@ -43,6 +43,54 @@ record ProverInfo { var category: ProverCategory; var fileExt: string; // file extension for temp input files var argTemplate: string; // how to pass the input file (%FILE% placeholder) + // Optional spawn hooks. Default empty preserves prior behaviour: + // inherit parent CWD + use generic `goal__.` + // filename. Set per-prover when the prover enforces special + // requirements at subprocess spawn time: + // cwd — Idris2 resolves its prelude relative to CWD + // and requires the source file to live inside + // the configured source directory (#158). + // filenameOverride — Agda + Idris2 enforce module-name = filename; + // generic `goal__` either fails the Agda + // lexer ("the part 0 is not valid because it is + // a literal") or the Idris2 module/file-mismatch + // check (#159). Use a literal basename (with + // extension) that matches the fixture's module + // declaration. + var cwd: string; + var filenameOverride: string; + + proc init(id: int, name: string, executable: string, + category: ProverCategory, fileExt: string, + argTemplate: string, + cwd: string = "", + filenameOverride: string = "") { + this.id = id; + this.name = name; + this.executable = executable; + this.category = category; + this.fileExt = fileExt; + this.argTemplate = argTemplate; + this.cwd = cwd; + this.filenameOverride = filenameOverride; + init this; + } + + // Zero-arg init for array default-initialisation + // (`var provers: [0..29] ProverInfo`). Custom positional init above + // suppresses the auto-generated zero-arg form, so the array + // declaration needs this fallback to compile. + proc init() { + this.id = -1; + this.name = ""; + this.executable = ""; + this.category = ProverCategory.InteractiveAssistant; + this.fileExt = ""; + this.argTemplate = ""; + this.cwd = ""; + this.filenameOverride = ""; + init this; + } } // Build the full 30-prover registry @@ -50,11 +98,32 @@ proc buildProverRegistry(): [0..29] ProverInfo { var provers: [0..29] ProverInfo; // Tier 1: Interactive proof assistants (10) - provers[0] = new ProverInfo(0, "Agda", "agda", ProverCategory.InteractiveAssistant, "agda", "--safe %FILE%"); + // Agda: --safe rejects postulate/admit/believe_me. Two filename + // hooks needed for the module-name resolver: + // filenameOverride: the source-file basename must be a valid + // identifier (generic `goal_Agda_0` is rejected + // because the part `0` is parsed as a literal). + // The fixture declares `module Trivial where`. + // cwd: the module-name resolver searches relative + // to the cwd (plus agda-stdlib paths). Setting + // cwd to the temp dir makes the override basename + // resolve from the file actually written there. + provers[0] = new ProverInfo(0, "Agda", "agda", ProverCategory.InteractiveAssistant, "agda", "--safe %FILE%", + cwd = "/tmp/echidna-chapel", + filenameOverride = "Trivial.agda"); provers[1] = new ProverInfo(1, "Coq", "coqc", ProverCategory.InteractiveAssistant, "v", "%FILE%"); provers[2] = new ProverInfo(2, "Lean", "lean", ProverCategory.InteractiveAssistant, "lean", "%FILE%"); provers[3] = new ProverInfo(3, "Isabelle", "isabelle", ProverCategory.InteractiveAssistant, "thy", "process %FILE%"); - provers[4] = new ProverInfo(4, "Idris2", "idris2", ProverCategory.InteractiveAssistant, "idr", "--check %FILE%"); + // Idris2: `--check` resolves the prelude relative to IDRIS2_PREFIX + // (or the install root) AND the source file must live in the + // configured source directory. cwd = /tmp/echidna-chapel matches + // where tryProver writes the temp file; the parent process's + // IDRIS2_PREFIX is inherited by the subprocess (POSIX spawn + // default). filenameOverride pins the basename to the fixture's + // `module Trivial where` declaration. + provers[4] = new ProverInfo(4, "Idris2", "idris2", ProverCategory.InteractiveAssistant, "idr", "--check %FILE%", + cwd = "/tmp/echidna-chapel", + filenameOverride = "Trivial.idr"); provers[5] = new ProverInfo(5, "FStar", "fstar.exe", ProverCategory.InteractiveAssistant, "fst", "%FILE%"); provers[6] = new ProverInfo(6, "HOL4", "hol", ProverCategory.InteractiveAssistant, "sml", "< %FILE%"); provers[7] = new ProverInfo(7, "HOLLight", "ocaml", ProverCategory.InteractiveAssistant, "ml", "%FILE%"); @@ -181,7 +250,13 @@ proc tryProver(info: ProverInfo, goal: string, timeout: int = defaultTimeout, const tmpDir = "/tmp/echidna-chapel"; if !exists(tmpDir) then mkdir(tmpDir); - const tmpFile = tmpDir + "/goal_" + info.name:string + "_" + here.id:string + "." + info.fileExt; + // #159: provers that enforce module-name = filename (Agda, Idris2) + // override the generic basename via filenameOverride. Default keeps + // the locale-id-suffixed form so non-overriding provers stay + // collision-free across locales. + const tmpFile = if info.filenameOverride != "" + then tmpDir + "/" + info.filenameOverride + else tmpDir + "/goal_" + info.name:string + "_" + here.id:string + "." + info.fileExt; try { var f = open(tmpFile, ioMode.cw); @@ -206,11 +281,25 @@ proc tryProver(info: ProverInfo, goal: string, timeout: int = defaultTimeout, // from splitting `argTemplate` after `%FILE%` substitution. `list(string)` // is used rather than a fixed array because the argument count is // template-driven and varies per prover. + // + // #158: when info.cwd is set, wrap the call in `sh -c "cd && + // exec "` so the subprocess starts in the + // configured directory. A shell wrapper (vs process-global chdir) + // is required because chdir would race against other parallel + // spawn calls inside a coforall. The parent's full environment + // (incl. IDRIS2_PREFIX / IDRIS2_DATA_DIR) is inherited regardless + // — POSIX spawn defaults preserve env. var cmdStr = info.argTemplate.replace("%FILE%", tmpFile); var argList: list(string); - argList.pushBack(info.executable); - for part in cmdStr.split(" ") { - if part.size > 0 then argList.pushBack(part); + if info.cwd != "" { + argList.pushBack("sh"); + argList.pushBack("-c"); + argList.pushBack("cd " + info.cwd + " && exec " + info.executable + " " + cmdStr); + } else { + argList.pushBack(info.executable); + for part in cmdStr.split(" ") { + if part.size > 0 then argList.pushBack(part); + } } var args = argList.toArray(); diff --git a/tests/chapel_fixtures/agda_trivial.agda b/tests/chapel_fixtures/agda_trivial.agda new file mode 100644 index 0000000..4917c3c --- /dev/null +++ b/tests/chapel_fixtures/agda_trivial.agda @@ -0,0 +1,14 @@ +-- SPDX-License-Identifier: MPL-2.0 +-- Trivial Agda goal: identity on Unit. Accepted by `agda --safe`. +-- Module name MUST match the filename written by tryProver — the +-- registry entry for Agda sets filenameOverride = "Trivial.agda" +-- (src/chapel/parallel_proof_search.chpl), so this declaration must +-- read `module Trivial where`. + +module Trivial where + +record ⊤ : Set where + constructor tt + +trivial-true : ⊤ +trivial-true = tt