diff --git a/.gitignore b/.gitignore index b42f65eb3f44..ef56cb633b43 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,7 @@ __pycache__ # Local opt-in: install nargo from the matching official noir release instead of building from source. noir-from-release.flag + +# Manifests written by scripts/worktrees.sh (link-mode deps store; local-dev only). +.deps-manifest.json +.deps-manifest.linked diff --git a/CLAUDE.md b/CLAUDE.md index 96b74c043987..8f2639065424 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -53,6 +53,10 @@ Follow Conventional Commits: `fix:`, `feat:`, `chore:`, `refactor:`, `docs:`, `t When staging files, prefer `git add -u` or name specific files rather than `git add -A` or `git add .`. The aggregate flags will pick up unrelated untracked working directories (e.g. personal scratch projects at the repo root) and quietly stage them. Subagents must always name specific files in `git add` — never `-u`, `-A`, or `.` — because they lack the main conversation's context for judging which changes belong to the current task. + +To create a git worktree, use `scripts/worktrees.sh create [base-ref]` instead of bare `git worktree add` followed by a full bootstrap: it seeds the worktree from cached build artifacts (shared read-only store + copies of the yarn layer) in minutes. Upstream artifacts in such worktrees are read-only symlinks — run `scripts/worktrees.sh thaw ` before rebuilding an upstream component locally. See `scripts/worktrees.sh --help`. + + Never bulk-update lockfiles (`Cargo.lock`, `yarn.lock`). Use targeted updates only: `cargo update --precise --package ` for Rust, and `yarn up @` in the relevant workspace for TypeScript. Bulk updates drag in unrelated transitive changes that make review impossible and frequently break reproducibility. diff --git a/barretenberg/cpp/bootstrap.sh b/barretenberg/cpp/bootstrap.sh index d1ceff023bce..a5280405119f 100755 --- a/barretenberg/cpp/bootstrap.sh +++ b/barretenberg/cpp/bootstrap.sh @@ -15,6 +15,12 @@ export native_build_dir=$(scripts/preset-build-dir $native_preset) # Uses a sentinel prefix to reliably find the version location, enabling re-injection on cached binaries. function inject_version { local binary=$1 + # Read-only binaries are frozen cached-store artifacts (CACHE_LINK_DIR worktrees); stamping them + # would mutate state shared across checkouts, so leave them with their as-built version. + if [ ! -w "$binary" ]; then + echo "Skipping version injection into read-only cached binary $binary." >&2 + return 0 + fi if semver check "$REF_NAME"; then local version=${REF_NAME#v} else diff --git a/barretenberg/ts/bootstrap.sh b/barretenberg/ts/bootstrap.sh index 7a8fb3be4f5b..95af2895ec7c 100755 --- a/barretenberg/ts/bootstrap.sh +++ b/barretenberg/ts/bootstrap.sh @@ -28,14 +28,20 @@ function build { # We copy snapshot dirs to dest so we can run tests from dest. # This is because web-workers run into issues with transpilation. - for snapshot_dir in src/**/__snapshots__; do - dest_dir="${snapshot_dir/src\//dest\/node\/}" - rm -rf "$dest_dir" - cp -r "$snapshot_dir" "$dest_dir" - for file in $dest_dir/*.test.ts.snap; do - mv "$file" "${file/.test.ts.snap/.test.js.snap}" + # Skipped when dest is a read-only cached-store symlink (CACHE_LINK_DIR worktrees): bb.js's own + # tests can't run from such a checkout anyway, and consumers don't need the snapshots. + if [ -w dest/node ]; then + for snapshot_dir in src/**/__snapshots__; do + dest_dir="${snapshot_dir/src\//dest\/node\/}" + rm -rf "$dest_dir" + cp -r "$snapshot_dir" "$dest_dir" + for file in $dest_dir/*.test.ts.snap; do + mv "$file" "${file/.test.ts.snap/.test.js.snap}" + done done - done + else + echo "Skipping snapshot copy into read-only cached dest." >&2 + fi } function test_cmds { diff --git a/ci3/cache_download b/ci3/cache_download index d3cca549febe..3d4285ebeff5 100755 --- a/ci3/cache_download +++ b/ci3/cache_download @@ -20,11 +20,202 @@ fi tar_file="$1" out_dir="${2:-.}" +# Link mode: when CACHE_LINK_DIR points at an extracted, read-only, content-addressed store, +# extract each tarball into the store once and graft symlinks into out_dir instead of extracting +# in place. Never on CI (the store is a local-dev convenience; CI always extracts in place). +# Excluded tarballs always extract in place: +# - yarn-project: contents interleave with tracked src/ files and the generated outputs must stay +# writable for incremental rebuilds. +# - bb.js / noir-packages: their contents are loaded as Node.js modules. Node resolves imports from +# a module's REAL path, so store-resident JS cannot see the checkout's node_modules and runtime +# dependencies (msgpackr, pako, ...) fail to resolve. +link_mode=0 +case "$tar_file" in + yarn-project*|bb.js-*|noir-packages-*) ;; + *) + if [[ -n "${CACHE_LINK_DIR:-}" && "${CI:-0}" -ne 1 ]]; then + link_mode=1 + fi + ;; +esac + +# Strip the compression suffix to derive the store entry name. +entry_name="$tar_file" +entry_name="${entry_name%.tar.gz}" +entry_name="${entry_name%.zst}" +entry_name="${entry_name%.tar}" + function extract_tar { if [[ "$tar_file" == *.zst ]]; then - tar --use-compress-program="zstd -d" -x -C "$out_dir" &>/dev/null + tar --use-compress-program="zstd -d" -x -C "$1" &>/dev/null + else + tar -xzf - -C "$1" &>/dev/null + fi +} + +# List the paths inside the tarball (one per line, dirs have a trailing slash, no leading ./). +function list_tar { + if [[ "$tar_file" == *.zst ]]; then + tar --use-compress-program="zstd -d" -tf "$1" 2>/dev/null + else + tar -tzf "$1" 2>/dev/null + fi +} + +# Ensure the store entry exists and is frozen (read-only). Concurrency-safe via an mkdir lock so +# parallel per-contract downloads of the same entry extract it exactly once. +function ensure_store_entry { + local tarfile="$1" + local entry_dir="$CACHE_LINK_DIR/$entry_name" + [[ -d "$entry_dir" ]] && return 0 + + mkdir -p "$CACHE_LINK_DIR" + local lock="$CACHE_LINK_DIR/.lock.$entry_name" + local waited=0 + while ! mkdir "$lock" 2>/dev/null; do + # Another process is extracting this entry. Wait for it to finish. + [[ -d "$entry_dir" ]] && return 0 + sleep 0.2 + waited=$((waited + 1)) + if [[ $waited -gt 600 ]]; then + echo_stderr "Timed out waiting for store extraction lock on $entry_name." + return 1 + fi + done + # shellcheck disable=SC2064 + trap "rmdir '$lock' 2>/dev/null || true" RETURN + + # Recheck under the lock in case another process won the race before we acquired it. + if [[ -d "$entry_dir" ]]; then + return 0 + fi + + local tmp_dir="$CACHE_LINK_DIR/.tmp.$entry_name.$$" + rm -rf "$tmp_dir" + mkdir -p "$tmp_dir" + if ! extract_tar "$tmp_dir" < "$tarfile"; then + rm -rf "$tmp_dir" + echo_stderr "Failed to extract $tar_file into the store." + return 1 + fi + if ! mv "$tmp_dir" "$entry_dir" 2>/dev/null; then + # Lost the race: another process created the entry between our recheck and the mv. + rm -rf "$tmp_dir" + [[ -d "$entry_dir" ]] && return 0 + echo_stderr "Failed to move store entry into place for $entry_name." + return 1 + fi + chmod -R a-w "$entry_dir" 2>/dev/null || true +} + +# Graft symlinks from out_dir into the store entry. For every tarball path we descend through +# directory components that already exist as REAL descendable directories in out_dir (tracked dirs, +# uninitialised submodule dirs, thawed local copies) and create an absolute symlink at the first +# component that is not such a directory (the "link root"). Existing symlinks are repointed; an +# existing real file/dir at the link root is left alone with a warning (a deliberate local override). +# +# A directory in out_dir is "descendable" iff it exists, is a real (non-symlink) dir, AND every one +# of its ancestors is descendable. The ancestor condition stops us from descending past an existing +# store symlink: if noir-repo/target is a symlink, paths beneath it resolve through it to real store +# dirs, but noir-repo/target is not descendable so noir-repo/target stays the single link root. +# +# A link root must also be IGNORED by git once created: gitignore patterns with a trailing slash +# (e.g. barretenberg/cpp's "build*/") match directories only, so a symlink at that path would show +# up as untracked, dirtying git status and disabling content-hash caching for the whole checkout. +# When the would-be symlink is not ignored and the store side is a directory, we degrade: create a +# real directory there and push the link root one level deeper, repeating until the path is ignored +# (a real dir DOES match dir-only patterns, and everything beneath an ignored dir is ignored). +# +# Performance: yarn-project-sized tarballs list tens of thousands of files, but most share a handful +# of link roots. We walk each path top-down, memoising the descendable decision per directory prefix +# and recording each link root once, so the work is roughly proportional to the number of distinct +# directories, not files. The walk derives link roots from the paths themselves and so is correct +# even when the tarball omits explicit entries for intermediate directories (e.g. a listing that +# starts at build/bin/ with no bare build/ entry). +function graft_from_store { + local entry_dir="$CACHE_LINK_DIR/$entry_name" + local listing + listing=$(list_tar "$1") + [[ -z "$listing" ]] && return 0 + + # descendable[prefix]: 1 = real descendable dir, 0 = not (memoised, including negative results). + local -A descendable=() + # seen_root[prefix]: a link root we have already created a symlink for. + local -A seen_root=() + local p + while IFS= read -r p; do + p="${p%/}" + [[ -z "$p" ]] && continue + + # Re-walk the same path after each ignore-degradation; bounded by the path's depth. + while :; do + # Walk components top-down until we hit the first non-descendable prefix (the link root). + local acc="" comp link_root="" decided=0 + IFS='/' read -ra parts <<<"$p" + for comp in "${parts[@]}"; do + acc="${acc:+$acc/}$comp" + local d="${descendable[$acc]:-}" + if [[ -z "$d" ]]; then + if [[ -d "$out_dir/$acc" && ! -L "$out_dir/$acc" ]]; then + d=1 + else + d=0 + fi + descendable["$acc"]=$d + fi + if [[ "$d" -eq 0 ]]; then + link_root="$acc" + decided=1 + break + fi + done + # Whole path is descendable (all components are real dirs already) -> nothing to link. + [[ "$decided" -eq 0 ]] && break + [[ -n "${seen_root[$link_root]:-}" ]] && break + + local dest="$out_dir/$link_root" + local target="$entry_dir/$link_root" + if [[ -e "$dest" && ! -L "$dest" ]]; then + echo_stderr "Not grafting $link_root: a real file/dir already exists in $out_dir (local override)." + seen_root["$link_root"]=1 + break + fi + + # check-ignore exits 0 = ignored, 1 = not ignored, 128 = error (e.g. not in a git repo). + # Run from the link root's parent so paths inside an initialised submodule (noir-repo) are + # checked against the submodule's own ignore rules, not the parent repo's. A nonexistent path + # is evaluated as a file, so dir-only patterns report "not ignored" and we degrade below. + local ignore_rc=0 + git -C "$out_dir/$(dirname "$link_root")" check-ignore -q "$(basename "$link_root")" 2>/dev/null || ignore_rc=$? + if [[ "$ignore_rc" -eq 1 && -d "$target" ]]; then + # Degrade: real dir at the link root (matches dir-only ignore patterns), link one level deeper. + [[ -L "$dest" ]] && rm -f "$dest" + mkdir -p "$dest" + descendable["$link_root"]=1 + continue + fi + if [[ "$ignore_rc" -eq 1 ]]; then + echo_stderr "WARNING: grafted $link_root is not gitignored; it will show as untracked." + fi + mkdir -p "$(dirname "$dest")" + ln -sfn "$target" "$dest" + seen_root["$link_root"]=1 + break + done + done <<<"$listing" + + # Record the linked entry for gc, crash-safe append (one entry name per line). + echo "$entry_name" >> "$root/.deps-manifest.linked" +} + +# Place the obtained tarball file: graft symlinks in link mode, otherwise extract in place. +function place_tar { + local tarfile="$1" + if [[ "$link_mode" -eq 1 ]]; then + ensure_store_entry "$tarfile" || return 1 + graft_from_store "$tarfile" || return 1 else - tar -xzf - -C "$out_dir" &>/dev/null + extract_tar "$out_dir" < "$tarfile" || return 1 fi } @@ -65,7 +256,7 @@ if [[ -n "${CACHE_LOCAL_DIR:-}" ]]; then if [[ -f "$local_cache_file" ]]; then echo_stderr "Local cache hit for $tar_file." - extract_tar < "$local_cache_file" + place_tar "$local_cache_file" echo_stderr "Cache extraction of $tar_file from local cache complete in ${SECONDS}s." exit 0 fi @@ -77,12 +268,29 @@ if [[ -n "${CACHE_LOCAL_DIR:-}" ]]; then exit 1 fi - extract_tar < "$local_cache_file" + place_tar "$local_cache_file" + echo_stderr "Cache download and extraction of $tar_file complete in ${SECONDS}s." + exit 0 +fi + +# No local tarball cache. Link mode needs the tarball as a file (to extract into the store and to +# list its paths), so stream into a temp file; otherwise extract straight from the download stream. +if [[ "$link_mode" -eq 1 ]]; then + tmp_tar=$(mktemp) + trap 'rm -f "$tmp_tar"' EXIT + if ! download_from_remote > "$tmp_tar"; then + echo_stderr "Cache download of $tar_file failed." + exit 1 + fi + if ! place_tar "$tmp_tar"; then + echo_stderr "Cache download of $tar_file failed." + exit 1 + fi echo_stderr "Cache download and extraction of $tar_file complete in ${SECONDS}s." exit 0 fi -if ! download_from_remote | extract_tar; then +if ! download_from_remote | extract_tar "$out_dir"; then echo_stderr "Cache download of $tar_file failed." exit 1 fi diff --git a/ci3/cache_local.test.sh b/ci3/cache_local.test.sh index ce919ffe6768..673533be3261 100755 --- a/ci3/cache_local.test.sh +++ b/ci3/cache_local.test.sh @@ -204,6 +204,36 @@ test_inaccessible_cache_dir_falls_through() { fi } +test_upload_local_save_without_ci() { + log "\nTest 9: cache_upload saves to local cache with CI=0 and no S3_FORCE_UPLOAD" + + export CACHE_LOCAL_DIR="$test_root/local-cache-ci0" + mkdir -p "$CACHE_LOCAL_DIR" + + local stderr_output + stderr_output=$(CI=0 "$script_dir/cache_upload" "test-ci0.tar.gz" "$test_root/source/file1.txt" 2>&1 >/dev/null) || true + + if [[ -f "$CACHE_LOCAL_DIR/test-ci0.tar.gz" ]]; then + pass "Local build populated local cache without CI/S3_FORCE_UPLOAD" + else + fail "Local build did not populate local cache (got: $stderr_output)" + fi + if echo "$stderr_output" | grep -q "Skipping S3 upload"; then + pass "S3 upload still skipped at CI=0" + else + fail "Expected S3 upload skip at CI=0 (got: $stderr_output)" + fi + + # With no CACHE_LOCAL_DIR either, upload is a no-op and skips tarring entirely. + unset CACHE_LOCAL_DIR + stderr_output=$(CI=0 "$script_dir/cache_upload" "test-ci0-noop.tar.gz" "$test_root/source/file1.txt" 2>&1 >/dev/null) || true + if echo "$stderr_output" | grep -q "no CACHE_LOCAL_DIR"; then + pass "No-op exit when there is nowhere to save" + else + fail "Expected no-op exit message (got: $stderr_output)" + fi +} + main() { log "=== Local Cache Test Suite ===\n" @@ -217,6 +247,7 @@ main() { test_roundtrip test_disabled_cache_skips_local test_inaccessible_cache_dir_falls_through + test_upload_local_save_without_ci log "\n=== Results ===" echo -e "\033[32mPassed: $passed\033[0m" diff --git a/ci3/cache_upload b/ci3/cache_upload index 21a206d7c934..a8401425e9a3 100755 --- a/ci3/cache_upload +++ b/ci3/cache_upload @@ -17,28 +17,16 @@ name="$1" # Now $@ = our binary path args shift 1 -if [[ -z "${S3_FORCE_UPLOAD:-}" && "${CI:-0}" -eq 0 ]]; then - echo_stderr "Skipping upload because CI=0 and S3_FORCE_UPLOAD not set." - exit 0 -fi - if [[ "${NO_CACHE_UPLOAD:-0}" -eq 1 ]]; then echo_stderr "Skipping upload because NO_CACHE_UPLOAD=1." exit 0 fi -# In SSM/instance-profile mode, AWS CLI falls back to IMDS for credentials. -if [[ "${CI_SSM_MODE:-0}" -eq 0 ]]; then - if [[ -z "${AWS_ACCESS_KEY_ID:-}" || -z "${AWS_SECRET_ACCESS_KEY:-}" ]] && ! aws configure get aws_access_key_id &>/dev/null; then - echo_stderr "Skipping upload, no AWS credentials found." - exit 0 - fi -fi - -if [ -z "${S3_FORCE_UPLOAD:-}" ] && \ - aws ${S3_BUILD_CACHE_AWS_PARAMS:-} s3 ls "s3://aztec-ci-artifacts/build-cache/$name" &>/dev/null; then - echo_stderr "Skipping upload, already exists: $name" - exit 0 +# Skip tarring entirely when there is nowhere to save: no local cache configured and the S3 upload +# would be skipped anyway. Tarring large artifacts (e.g. bb build dirs) is not free. +if [[ -z "${CACHE_LOCAL_DIR:-}" && -z "${S3_FORCE_UPLOAD:-}" && "${CI:-0}" -eq 0 ]]; then + echo_stderr "Skipping upload because CI=0, S3_FORCE_UPLOAD not set, and no CACHE_LOCAL_DIR." + exit 0 fi out_tar=$(mktemp) @@ -50,16 +38,37 @@ else tar -czf $out_tar "$@" fi -# Save to local cache if enabled. +# Save to local cache if enabled. This happens regardless of CI/S3_FORCE_UPLOAD so that local builds +# (remote cache miss) populate $CACHE_LOCAL_DIR for reuse by other checkouts/worktrees. A local-save +# failure (e.g. read-only dir) must not fail the build, so it only warns. if [[ -n "${CACHE_LOCAL_DIR:-}" ]]; then - if mkdir -p "$CACHE_LOCAL_DIR" 2>/dev/null; then - cp "$out_tar" "$CACHE_LOCAL_DIR/$name" + if mkdir -p "$CACHE_LOCAL_DIR" 2>/dev/null && cp "$out_tar" "$CACHE_LOCAL_DIR/$name" 2>/dev/null; then echo_stderr "Saved $name to local cache." else - echo_stderr "Warning: Cannot create local cache dir $CACHE_LOCAL_DIR, skipping local cache." + echo_stderr "Warning: Cannot create local cache dir $CACHE_LOCAL_DIR or save into it, skipping local cache." fi fi +# Everything below is the S3 upload, gated to CI / S3_FORCE_UPLOAD only. +if [[ -z "${S3_FORCE_UPLOAD:-}" && "${CI:-0}" -eq 0 ]]; then + echo_stderr "Skipping S3 upload because CI=0 and S3_FORCE_UPLOAD not set." + exit 0 +fi + +# In SSM/instance-profile mode, AWS CLI falls back to IMDS for credentials. +if [[ "${CI_SSM_MODE:-0}" -eq 0 ]]; then + if [[ -z "${AWS_ACCESS_KEY_ID:-}" || -z "${AWS_SECRET_ACCESS_KEY:-}" ]] && ! aws configure get aws_access_key_id &>/dev/null; then + echo_stderr "Skipping S3 upload, no AWS credentials found." + exit 0 + fi +fi + +if [ -z "${S3_FORCE_UPLOAD:-}" ] && \ + aws ${S3_BUILD_CACHE_AWS_PARAMS:-} s3 ls "s3://aztec-ci-artifacts/build-cache/$name" &>/dev/null; then + echo_stderr "Skipping S3 upload, already exists: $name" + exit 0 +fi + # Pipe tar directly to AWS S3 cp if aws ${S3_BUILD_CACHE_AWS_PARAMS:-} s3 cp $out_tar "s3://aztec-ci-artifacts/build-cache/$name" &>/dev/null; then echo_stderr "Cache upload of $name complete in ${SECONDS}s." diff --git a/noir-projects/noir-contracts/bootstrap.sh b/noir-projects/noir-contracts/bootstrap.sh index 3d2a9dcedb34..25e657467233 100755 --- a/noir-projects/noir-contracts/bootstrap.sh +++ b/noir-projects/noir-contracts/bootstrap.sh @@ -98,10 +98,16 @@ export -f get_contract_path # ci3/release_prep_package_json for npm packages, release-image/Dockerfile for the docker image. function stamp_dev_aztec_version { local json_path=$1 + if [ "$(jq -r '.aztec_version // empty' "$json_path")" == "dev" ]; then + return 0 + fi local tmp=$(mktemp) jq '.aztec_version = "dev"' "$json_path" > "$tmp" - cat "$tmp" > "$json_path" - rm "$tmp" + chmod 644 "$tmp" + # Replace by rename rather than writing through the path: when the artifact is a symlink into a + # frozen cached store (CACHE_LINK_DIR worktrees), this swaps the symlink for a real stamped copy + # instead of failing to write the read-only store file. + mv -f "$tmp" "$json_path" } export -f stamp_dev_aztec_version @@ -123,10 +129,14 @@ function compile { if ! cache_download contract-$contract_hash.tar.gz; then $NARGO compile --package $contract --inliner-aggressiveness 0 --deny-warnings $BB aztec_process -i $json_path + # Stamp before upload so every cached tarball already carries aztec_version "dev". This lets the + # post-block stamp below fast-path to a no-op on a cache hit, preserving CACHE_LINK_DIR symlinks. + stamp_dev_aztec_version "$json_path" cache_upload contract-$contract_hash.tar.gz $json_path fi - # Stamp the version after the cache block so the field is always present, whether the artifact came from a fresh - # compile or a cache hit. + # Stamp the version after the cache block so the field is always present. For tarballs predating the + # pre-upload stamp the field is absent and this writes it; for newer tarballs (already "dev") it + # fast-paths to a no-op, which leaves a grafted store symlink untouched in CACHE_LINK_DIR worktrees. stamp_dev_aztec_version "$json_path" } export -f compile diff --git a/scripts/worktrees.md b/scripts/worktrees.md new file mode 100644 index 000000000000..4464b182c167 --- /dev/null +++ b/scripts/worktrees.md @@ -0,0 +1,143 @@ +# Fast worktrees via a shared frozen deps store + +`scripts/worktrees.sh create` makes a git worktree of aztec-packages ready to build and test in +minutes instead of the many-minute full `./bootstrap.sh`, by reusing build artifacts that already +exist: the ci3 build cache for upstream components, and the source checkout's yarn layer for +yarn-project. This document explains how the pieces fit together; see `scripts/worktrees.sh --help` +for command-by-command usage. + +## The two pieces + +1. **Link mode in `ci3/cache_download`** (env var `CACHE_LINK_DIR`). Every component bootstrap + already downloads content-addressed tarballs (`-.tar.gz`) from the build + cache, optionally keeping them in a local tarball cache (`CACHE_LOCAL_DIR`). With + `CACHE_LINK_DIR` set, instead of extracting a tarball into the checkout, `cache_download` + extracts it ONCE into a shared store and grafts symlinks into the checkout. CI never uses this + path (hard-guarded on `$CI`). + +2. **`scripts/worktrees.sh`** orchestrates worktree creation on top of it: `git worktree add`, init + the `noir/noir-repo` submodule, copy the writable yarn-project layer from the source checkout, + run each upstream component's bootstrap inside the worktree with `CACHE_LINK_DIR` exported, and + record a manifest. It also provides `status`, `thaw`, and `gc`. + +## The store + +``` +$CACHE_LOCAL_DIR/ # default ~/.cache/aztec-build-cache + .tar.gz | .zst # tarball cache (pre-existing behavior) + extracted/ # $CACHE_LINK_DIR + / # one tarball, extracted once, then chmod -R a-w +``` + +Entries are **content-addressed** (the tarball name embeds the content hash of the component's +inputs), so an entry never changes after creation — every checkout that links `noir-` sees +identical bytes forever. Extraction goes into a temp dir and is atomically renamed into place, with +an mkdir-based lock so concurrent downloads of the same entry (e.g. per-contract tarballs fetched in +parallel) extract exactly once. + +Entries are **frozen** (`chmod -R a-w`). Any accidental write through a worktree symlink — a stray +rebuild, codegen, `yarn install` in the wrong place — fails immediately with `EACCES` instead of +silently corrupting state shared by every other worktree. This is the core safety property: shared +state is immutable by construction *and* enforced by the filesystem. + +## Grafting + +For each path in the tarball listing, the graft walks components top-down through directories that +already exist as real (non-symlink) dirs in the checkout — tracked dirs, the uninitialized-submodule +dir, thawed local copies — and creates one absolute symlink at the first missing component (the +"link root"). A real file/dir already present at a link root is left alone with a warning: it is +treated as a deliberate local override (e.g. a thawed component). + +One subtlety: a link root must be **gitignored once created**, and gitignore patterns with a +trailing slash (`build*/`) match directories only — a symlink at that path would show up as +untracked, dirtying `git status` and, worse, flipping the repo's content hashes to "disabled-cache" +(uncommitted-changes detection), which silently disables caching for everything. So after deciding a +link root the graft asks `git check-ignore`: if the would-be symlink is not ignored and the store +side is a directory, it creates a *real* directory there instead (real dirs do match dir-only +patterns) and pushes the link root one level deeper, repeating until the path is ignored. Submodule +paths are checked against the submodule's own ignore rules. + +## What is never linked (extracted in place instead) + +- **`yarn-project-*`**: its outputs interleave with tracked `src/` files and must stay writable for + incremental rebuilds (`yarn build` writes `dest/`). +- **`bb.js-*` and `noir-packages-*`**: their contents are loaded as Node.js modules, and Node + resolves imports from a module's **real path**. Code living in the store cannot see the checkout's + `node_modules`, so runtime dependencies (`msgpackr`, `pako`, …) fail to resolve. They must be real + files inside the checkout tree. (~30M per worktree.) + +Everything else — bb binaries and wasm, `nargo`/`acvm`, transpiler binary, `l1-contracts` build +outputs, per-contract and per-circuit artifacts — is data or executables that nothing resolves +modules from, and stays in the store. + +## Per-checkout stamping on cache hits + +Some bootstraps deliberately mutate cached artifacts after extraction, which conflicts with a frozen +store. Three sites were made store-tolerant: + +- `barretenberg/cpp` `inject_version` patches the version into `bb`/`bb-avm` binaries in place; it + now **skips read-only binaries** (a worktree's `bb --version` reports the unstamped sentinel — + harmless for development). +- `noir-contracts` `stamp_dev_aztec_version` rewrites every contract JSON with + `aztec_version: "dev"`; it now **replaces by rename**, so in a worktree the store symlink is + swapped for a real stamped copy (the store stays pristine), and it is idempotent. Freshly-built + contracts are additionally **stamped before `cache_upload`**, so newly-cached tarballs already + carry the field and the post-hit stamp fast-paths to a no-op, leaving the symlink in place; only + tarballs predating that change get materialized as real copies. +- `bb.js` copies test snapshots into `dest/` so its own tests can run from there; it now **skips + when dest is read-only** (moot anyway now that bb.js extracts in place). + +If you add a bootstrap step that writes into a component's cached output directory after a cache +hit, follow one of these patterns or the step will fail with `EACCES` in linked worktrees. + +## Content-hash pitfalls (why your cache might miss) + +- **`noir/noir-repo` must be an initialized submodule** before computing any hash: in an empty + submodule dir, `git -C noir-repo rev-parse HEAD` walks up and returns the *parent repo's* HEAD, + corrupting the noir hash and — through the dependency chain (avm-transpiler → barretenberg → + bb.js) — almost every other hash. `create` inits it first; keep that in mind if you drive + bootstraps manually in a fresh worktree. +- **Editing a component's `bootstrap.sh`** (or anything matched by its `.rebuild_patterns`) changes + its content hash — the recipe is part of the input. Worktrees based on such a branch rebuild that + component locally until CI builds the branch and uploads tarballs at the new hashes. The pain is + one-time per machine: `cache_upload` saves locally-built artifacts into `CACHE_LOCAL_DIR` even + with `CI=0`, so the first local build at a new hash populates the cache and later worktrees link + from it. +- **Untracked, non-ignored files** under a component flip its hash to `disabled-cache`. Keep + checkouts clean of stray scratch files, or expect local rebuilds. + +## Manifests and gc + +Each linked checkout has `.deps-manifest.json` (consolidated at create time) plus +`.deps-manifest.linked` (crash-safe append log written by `cache_download`), both gitignored. They +record which store entries the checkout references; living *inside* the worktree, they disappear +with it. `gc` is mark-and-sweep: roots are the manifests of every checkout in `git worktree list`, +unreferenced entries are deleted (`chmod -R u+w` first — they are frozen), with a final +symlink-scan safety net before each deletion, and stale tarballs older than `--keep-days` go too. + +## Day-to-day + +```bash +# create (run from anywhere inside your built checkout; ~2-5 min on cache hits). The worktree lands +# as a sibling of the checkout (/my-feature) on branch /my-feature. +scripts/worktrees.sh create my-feature +scripts/worktrees.sh create my-feature origin/next +scripts/worktrees.sh create my-feature --dry-run # print resolved source/path/branch, no changes + +# work: yarn build / yarn test in the worktree's yarn-project is fully isolated (local copies) + +# rebuild an upstream component locally (bb, contracts, ...): thaw first +scripts/worktrees.sh thaw barretenberg/cpp/build + +# after rebasing the worktree across upstream changes: re-run that component's bootstrap in link +# mode to repoint at the new content +(cd /noir && CACHE_LINK_DIR=... CACHE_LOCAL_DIR=... ./bootstrap.sh) + +# inspect / clean up +scripts/worktrees.sh status +git worktree remove && scripts/worktrees.sh gc +``` + +Set `CACHE_LOCAL_DIR` consistently (e.g. export it from a profile that non-interactive shells also +read): the store derives from it, and a shell that misses the export will look at an empty default +(`~/.cache/aztec-build-cache`) and re-download or rebuild everything. diff --git a/scripts/worktrees.sh b/scripts/worktrees.sh new file mode 100755 index 000000000000..9d80d1f59fa1 --- /dev/null +++ b/scripts/worktrees.sh @@ -0,0 +1,678 @@ +#!/usr/bin/env bash +# Fast worktree setup for aztec-packages backed by a shared, frozen, content-addressed deps store. +# +# Instead of a full multi-minute ./bootstrap.sh, `create` makes a git worktree, copies the writable +# yarn-project layer (node_modules, .yarn/cache, generated build outputs) from the source checkout, +# and runs each upstream component's bootstrap in link mode so their cached artifacts are symlinked +# from a shared read-only store (CACHE_LINK_DIR) instead of extracted in place. +set -euo pipefail + +# SCRIPT_ROOT is the checkout this script lives in (resolved via the script's own path, not CWD, +# which could point at a different — possibly unbuilt — checkout). It is the fallback source for +# `create` and the anchor for the other commands when CWD is not inside a repo. +SCRIPT_ROOT=$(git -C "$(dirname "${BASH_SOURCE[0]}")" rev-parse --show-toplevel) +ROOT=$(git rev-parse --show-toplevel 2>/dev/null || echo "$SCRIPT_ROOT") + +# Store locations (overridable via env). CACHE_LOCAL_DIR holds downloaded tarballs (existing ci3 +# behavior); CACHE_LINK_DIR holds the extracted, frozen, content-addressed entries we symlink into. +CACHE_LOCAL_DIR=${CACHE_LOCAL_DIR:-$HOME/.cache/aztec-build-cache} +CACHE_LINK_DIR=${CACHE_LINK_DIR:-$CACHE_LOCAL_DIR/extracted} +export CACHE_LOCAL_DIR CACHE_LINK_DIR + +# Upstream components bootstrapped in link mode, in dependency order. yarn-project is intentionally +# absent: its layer is copied from the source checkout (its tarball is excluded from link mode). +UPSTREAM_COMPONENTS=( + "barretenberg/cpp" + "barretenberg/ts" + "noir" + "avm-transpiler" + "l1-contracts" + "noir-projects" +) + +function log { echo -e "$@" >&2; } +function die { log "Error: $*"; exit 1; } + +function usage { + cat >&2 <<'EOF' +worktrees.sh — fast git worktrees for aztec-packages backed by a shared frozen deps store. + +USAGE + scripts/worktrees.sh create [base-ref] [--branch ] [--frozen-only] [--dry-run] + scripts/worktrees.sh status [path] + scripts/worktrees.sh thaw ... + scripts/worktrees.sh gc [--dry-run] [--keep-days N] + scripts/worktrees.sh --help + +COMMANDS + + create [base-ref] + Create a worktree as a sibling of the source checkout (/), on a new + branch, based on (default: the source checkout's current HEAD). + + SOURCE CHECKOUT + The source is the aztec-packages checkout containing your current directory (so you can run + this from anywhere inside your checkout). If CWD is not inside such a checkout, the checkout + this script lives in is used instead. + + BRANCH NAME (first match wins) + --branch use verbatim. + contains a / IS the branch; the worktree dir is its last segment + (e.g. create ab/fix-thing -> branch ab/fix-thing, dir fix-thing). + otherwise prefix with your initials from the source checkout's git config: + user.initials if set, else initials derived from user.name + ("Jane van Doe" -> jvd). With neither set, is used unprefixed. + + --dry-run + Resolve and print the source checkout, worktree path, branch, and base-ref, then exit + without fetching, creating the worktree, or touching the store. + + What happens: + 1. git worktree add / (git fetch first if base-ref looks remote). + 2. Copy the WRITABLE yarn layer from the source checkout (real copies, ext4 has no reflink): + - yarn-project/.yarn/cache + .yarn/install-state.gz + - root + per-workspace node_modules (preserves the relative @aztec/* symlinks so they + resolve inside the worktree) + - all gitignored yarn-project build outputs (dest/, generated src/, artifacts, + .tsbuildinfo), enumerated dynamically; node_modules and junk (logs, joblog.txt) excluded. + Build outputs are only copied when the source and the worktree are at the same yarn-project + content state (same cache_content_hash, or same HEAD + clean tree when hashes are disabled + by uncommitted changes); otherwise they are skipped and you run yarn-project/bootstrap.sh or + yarn build in the worktree. node_modules is still copied when yarn.lock content matches. + 3. Run each upstream component bootstrap inside the worktree in LINK mode. On a store/cache hit + this is download + extract-once + symlink time only. On a cache MISS the component builds + locally (correct, but slow) — pass --frozen-only to abort instead of building. + 4. Write .deps-manifest.json and print a summary. + + SYMLINKED vs COPIED + Symlinked (read-only, shared via the store): everything an upstream component bootstrap pulls + from cache — barretenberg build/ (bb binary + wasm), noir-repo/target/release binaries, + l1-contracts out/cache/generated, per-contract and per-circuit artifacts. + Extracted in place (real files, per-worktree): bb.js dest/build and noir/packages — their + contents are loaded as Node.js modules, which resolve imports from real paths and so must + live inside the checkout — plus the copied yarn-project layer above. + + FREEZE SEMANTICS + Store entries are content-addressed and immutable. After extraction they are chmod -R a-w, so a + stray rebuild writing through a symlink fails loudly with EACCES instead of silently corrupting + a shared entry. To rebuild an upstream component locally, thaw it first (see `thaw`). + + DRIFT / REFRESH + Symlinks point at content-addressed store entries, so they never go stale on their own. After a + rebase that changes an upstream component, re-run that component's bootstrap in the worktree with + CACHE_LINK_DIR + CACHE_LOCAL_DIR exported (e.g. `CACHE_LINK_DIR=... CACHE_LOCAL_DIR=... + ./barretenberg/cpp/bootstrap.sh`) to repoint links at the new content. If the worktree's + yarn.lock diverges from the copied node_modules, delete node_modules and run `yarn install`. + + CHICKEN-AND-EGG + Link mode only kicks in if the worktree's checked-out ci3/cache_download honors CACHE_LINK_DIR. + If your base-ref predates that patch, create warns and deps are extracted in place (correct, + just more disk). + + status [path] + For the given checkout (default: current), show linked store entries and whether their store + paths still exist, the copied-layer provenance (source + commit) from the manifest, and a drift + hint comparing the worktree yarn.lock hash to the manifest. + + thaw ... + Replace store symlinks at the given paths with writable copies (cp from the store, chmod u+w) and + drop those entries from the checkout's manifest. Use before rebuilding an upstream component + locally. Refreeze by re-running that component's bootstrap in link mode. + + gc [--dry-run] [--keep-days N] + Garbage-collect the store. Roots = the manifests of every checkout in `git worktree list` (a + removed worktree drops its roots automatically). Extracted entries not referenced by any live + checkout are chmod -R u+w then rm -rf'd. As a safety net, an entry is kept (with a warning) if any + registered checkout still has a symlink pointing into it. Tarballs older than --keep-days (default + 30) whose extracted entry is dead are also removed. --dry-run prints what would be removed. + +ENVIRONMENT + CACHE_LOCAL_DIR tarball cache dir (default ~/.cache/aztec-build-cache) + CACHE_LINK_DIR extracted frozen store (default $CACHE_LOCAL_DIR/extracted) + +Design docs (store layout, grafting, exclusions, hash pitfalls): scripts/worktrees.md +EOF +} + +# --------------------------------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------------------------------- + +# Compute the yarn-project content hash for a checkout, or empty if the cache is disabled (uncommitted +# changes) or the helper is unavailable. +function yp_content_hash { + local checkout="$1" + local h + h=$(cd "$checkout/yarn-project" && ./bootstrap.sh hash 2>/dev/null) || return 0 + [[ "$h" == *disabled-cache* ]] && return 0 + echo "$h" +} + +# True when source and worktree are at the same yarn-project content state, so build outputs can be +# copied. Prefer content hashes; fall back to "same HEAD commit + clean tracked files". +function yp_same_state { + local src="$1" wt="$2" + local hs hw + hs=$(yp_content_hash "$src") + hw=$(yp_content_hash "$wt") + if [[ -n "$hs" && -n "$hw" ]]; then + [[ "$hs" == "$hw" ]] && return 0 || return 1 + fi + # Hashes disabled (uncommitted changes somewhere): fall back to commit + clean tracked files. + # -uno: untracked scratch files can't change build outputs (nothing tracked references them), and + # blocking on them would force a full rebuild in every worktree made from a mildly messy checkout. + local cs cw + cs=$(git -C "$src" rev-parse HEAD) + cw=$(git -C "$wt" rev-parse HEAD) + [[ "$cs" == "$cw" ]] || return 1 + [[ -z "$(git -C "$src" status --porcelain -uno -- yarn-project)" ]] || return 1 + return 0 +} + +function yarn_lock_hash { + local checkout="$1" + local lock="$checkout/yarn-project/yarn.lock" + [[ -f "$lock" ]] || return 0 + sha256sum "$lock" | cut -d' ' -f1 +} + +# True if the given directory looks like an aztec-packages checkout root. +function is_aztec_checkout { + local d="$1" + [[ -n "$d" && -f "$d/scripts/worktrees.sh" && -d "$d/yarn-project" ]] +} + +# Resolve the SOURCE checkout for `create`. Prefer the aztec-packages checkout containing CWD; fall +# back to SCRIPT_ROOT (the checkout this script lives in). If both resolve, differ, and CWD wins, +# note which source is used so a teammate isn't surprised when they invoke the script by an absolute +# path from inside a different checkout. +function resolve_source { + local cwd_root="" + cwd_root=$(git rev-parse --show-toplevel 2>/dev/null) || cwd_root="" + if [[ -n "$cwd_root" ]] && is_aztec_checkout "$cwd_root"; then + [[ "$cwd_root" != "$SCRIPT_ROOT" ]] && log "Using source checkout from CWD: $cwd_root" + echo "$cwd_root" + return 0 + fi + echo "$SCRIPT_ROOT" +} + +# Derive the default branch name for a worktree from the source checkout's git config, following the +# repo convention (yarn-project/CLAUDE.md): user.initials if set, else initials derived from +# user.name (lowercased first letter of each word), else the bare name. +function default_branch { + local source="$1" name="$2" + local initials + initials=$(git -C "$source" config user.initials 2>/dev/null || true) + if [[ -z "$initials" ]]; then + local fullname + fullname=$(git -C "$source" config user.name 2>/dev/null || true) + if [[ -n "$fullname" ]]; then + initials=$(echo "$fullname" | awk '{ out=""; for (i=1;i<=NF;i++) out=out tolower(substr($i,1,1)); print out }') + fi + fi + if [[ -n "$initials" ]]; then + echo "$initials/$name" + else + echo "$name" + fi +} + +# --------------------------------------------------------------------------------------------------- +# create +# --------------------------------------------------------------------------------------------------- + +function cmd_create { + local name="" base_ref="" branch="" frozen_only=0 dry_run=0 + while [[ $# -gt 0 ]]; do + case "$1" in + --branch) branch="$2"; shift 2 ;; + --frozen-only) frozen_only=1; shift ;; + --dry-run) dry_run=1; shift ;; + --help|-h) usage; exit 0 ;; + -*) die "Unknown option: $1" ;; + *) + if [[ -z "$name" ]]; then name="$1"; + elif [[ -z "$base_ref" ]]; then base_ref="$1"; + else die "Unexpected argument: $1"; fi + shift ;; + esac + done + [[ -n "$name" ]] || { usage; die "create requires "; } + + local source + source=$(resolve_source) + + # A containing a slash IS the full branch name; the worktree dir is its last path segment. + local dir_name="$name" + if [[ "$name" == */* ]]; then + branch=${branch:-$name} + dir_name="${name##*/}" + fi + branch=${branch:-$(default_branch "$source" "$name")} + + local wt_path + wt_path="$(dirname "$source")/$dir_name" + base_ref=${base_ref:-HEAD} + + if [[ "$dry_run" -eq 1 ]]; then + log "Dry run (no changes made):" + log " source: $source" + log " path: $wt_path" + log " branch: $branch" + log " base-ref: $base_ref" + return 0 + fi + + [[ -d "$source/yarn-project/node_modules" ]] \ + || die "Source checkout $source has no yarn-project/node_modules — bootstrap it before creating worktrees." + [[ -e "$wt_path" ]] && die "Path already exists: $wt_path" + + # Fetch first if base-ref looks like a remote ref (origin/... or a remote-tracking name). + if [[ "$base_ref" == origin/* || "$base_ref" == */* ]]; then + log "Fetching to resolve base-ref $base_ref..." + git -C "$source" fetch || die "git fetch failed" + fi + + log "Creating worktree $wt_path on branch $branch (base $base_ref)..." + if git -C "$source" show-ref --verify --quiet "refs/heads/$branch"; then + git -C "$source" worktree add "$wt_path" "$branch" + else + git -C "$source" worktree add -b "$branch" "$wt_path" "$base_ref" + fi + + # An uninitialized noir-repo makes `git -C noir-repo rev-parse HEAD` resolve to the PARENT repo's + # HEAD (git walks up from the empty dir), corrupting the noir content hash and, through the + # dependency chain, every downstream component hash — turning cache hits into misses. + log "Initializing noir/noir-repo submodule..." + git -C "$wt_path" submodule update --init noir/noir-repo || die "submodule init failed" + + # Chicken-and-egg: only graft if the worktree's own cache_download honors CACHE_LINK_DIR. + local link_supported=1 + if ! grep -q CACHE_LINK_DIR "$wt_path/ci3/cache_download" 2>/dev/null; then + link_supported=0 + log "WARNING: this base-ref's ci3/cache_download has no CACHE_LINK_DIR support." + log " Upstream deps will be EXTRACTED IN PLACE (correct, just more disk)." + fi + + # --- copy the writable yarn layer --- + local copied=() + copy_yarn_layer "$source" "$wt_path" copied + + # --- bootstrap upstream components in link mode --- + : > "$wt_path/.deps-manifest.linked" + local failed_frozen=() + local comp + for comp in "${UPSTREAM_COMPONENTS[@]}"; do + [[ -x "$wt_path/$comp/bootstrap.sh" ]] || { log "Skipping $comp (no bootstrap.sh)."; continue; } + if [[ "$frozen_only" -eq 1 ]]; then + if ! frozen_precheck "$wt_path" "$comp"; then + failed_frozen+=("$comp") + continue + fi + fi + log "Bootstrapping $comp in link mode..." + if ! ( cd "$wt_path/$comp" && CACHE_LINK_DIR="$CACHE_LINK_DIR" CACHE_LOCAL_DIR="$CACHE_LOCAL_DIR" CI=0 ./bootstrap.sh ); then + log "WARNING: $comp bootstrap returned non-zero; continuing." + fi + done + + if [[ ${#failed_frozen[@]} -gt 0 ]]; then + die "--frozen-only: missing cached artifacts for: ${failed_frozen[*]}. Aborting before any local build." + fi + + write_manifest "$wt_path" "$source" copied + + log "" + log "Worktree ready: $wt_path (branch $branch)" + log " Linked store entries: $(wc -l < "$wt_path/.deps-manifest.linked" 2>/dev/null || echo 0)" + log " Copied yarn layer items: ${#copied[@]}" + log "" + log "Next steps:" + log " cd $wt_path/yarn-project" + if [[ "$link_supported" -eq 0 ]]; then + log " (deps were extracted in place; this base-ref lacks CACHE_LINK_DIR support)" + fi + log " # If build outputs were skipped (hash mismatch), run: ./bootstrap.sh or yarn build" +} + +# Copy the writable yarn-project layer. Appends copied item descriptors to the named array. +function copy_yarn_layer { + local source="$1" wt="$2" + local -n _copied="$3" + local syp="$source/yarn-project" + local wyp="$wt/yarn-project" + + log "Copying yarn-project writable layer..." + + # .yarn/cache + install-state (so a fresh worktree doesn't re-download every package zip). + if [[ -d "$syp/.yarn/cache" ]]; then + mkdir -p "$wyp/.yarn" + cp -a --reflink=auto "$syp/.yarn/cache" "$wyp/.yarn/cache" + _copied+=("yarn-project/.yarn/cache") + fi + if [[ -f "$syp/.yarn/install-state.gz" ]]; then + mkdir -p "$wyp/.yarn" + cp -a --reflink=auto "$syp/.yarn/install-state.gz" "$wyp/.yarn/install-state.gz" + _copied+=("yarn-project/.yarn/install-state.gz") + fi + + # node_modules: copy when yarn.lock content matches; otherwise warn (user runs yarn install). + local lock_match=1 + if [[ "$(yarn_lock_hash "$source")" != "$(yarn_lock_hash "$wt")" ]]; then + lock_match=0 + log " yarn.lock differs between source and worktree; skipping node_modules copy." + log " Run 'yarn install' in the worktree's yarn-project." + fi + if [[ "$lock_match" -eq 1 ]]; then + # Root node_modules + per-workspace node_modules (the @aztec/* relative symlinks survive cp -a). + local nm + while IFS= read -r nm; do + local dst="$wyp/$nm" + mkdir -p "$(dirname "$dst")" + cp -a --reflink=auto "$syp/$nm" "$dst" + _copied+=("yarn-project/$nm") + done < <(cd "$syp" && { [[ -d node_modules ]] && echo node_modules; find . -maxdepth 2 -type d -name node_modules ! -path './node_modules' -printf '%P\n' 2>/dev/null; }) + fi + + # Build outputs: only when source and worktree are at the same yarn-project content state. + if yp_same_state "$source" "$wt"; then + log " Copying yarn-project build outputs (same content state)..." + local f count=0 + while IFS= read -r f; do + [[ -z "$f" ]] && continue + local dst="$wyp/$f" + mkdir -p "$(dirname "$dst")" + cp -a --reflink=auto "$syp/$f" "$dst" 2>/dev/null || continue + count=$((count + 1)) + done < <(cd "$syp" && git -C "$syp" ls-files --others --ignored --exclude-standard \ + | grep -vE '(^|/)node_modules/' \ + | grep -vE '(^|/)joblog\.txt$|\.log$' ) + _copied+=("yarn-project/") + log " Copied $count build-output files." + else + log " yarn-project content state differs from source; SKIPPING build outputs." + log " Run './bootstrap.sh' or 'yarn build' in the worktree's yarn-project." + fi +} + +# Best-effort pre-check for --frozen-only: confirm the component's primary cached artifact exists +# before bootstrap would start a local build. Per-contract / per-circuit granularity is not checked +# (documented limitation); we check the coarse top-level artifact per component. +function frozen_precheck { + local wt="$1" comp="$2" + local h + case "$comp" in + barretenberg/cpp) + h=$(cd "$wt/barretenberg/cpp" && ./bootstrap.sh hash 2>/dev/null) || return 0 + _frozen_check "barretenberg-$(_bb_native_preset "$wt")-$h.zst" ;; + barretenberg/ts) + h=$(cd "$wt/barretenberg/ts" && ./bootstrap.sh hash 2>/dev/null) || return 0 + _frozen_check "bb.js-$h.tar.gz" ;; + noir) + h=$(cd "$wt/noir" && ./bootstrap.sh hash 2>/dev/null) || return 0 + _frozen_check "noir-$h.tar.gz" ;; + avm-transpiler) + h=$(cd "$wt/avm-transpiler" && ./bootstrap.sh hash 2>/dev/null) || return 0 + _frozen_check "avm-transpiler-$h.tar.gz" ;; + *) + # l1-contracts and noir-projects use per-artifact/per-contract granularity; not pre-checked. + return 0 ;; + esac +} + +function _bb_native_preset { + # Read the native preset the worktree's cpp bootstrap actually uses (e.g. clang20), honoring + # an explicit NATIVE_PRESET override, so the coarse pre-check looks for the right artifact name. + local wt="$1" + if [[ -n "${NATIVE_PRESET:-}" ]]; then echo "$NATIVE_PRESET"; return 0; fi + local p + p=$(grep -oE 'native_preset=\$\{NATIVE_PRESET:-[a-zA-Z0-9_-]+\}' "$wt/barretenberg/cpp/bootstrap.sh" 2>/dev/null \ + | head -1 | sed -E 's/.*:-([a-zA-Z0-9_-]+)\}/\1/') + echo "${p:-clang20}" +} + +function _frozen_check { + local artifact="$1" + if CACHE_LINK_DIR="$CACHE_LINK_DIR" CACHE_LOCAL_DIR="$CACHE_LOCAL_DIR" "$ROOT/ci3/cache_exists" "$artifact" 2>/dev/null; then + return 0 + fi + if [[ -f "$CACHE_LOCAL_DIR/$artifact" ]]; then + return 0 + fi + log " --frozen-only: missing cached artifact $artifact" + return 1 +} + +function write_manifest { + local wt="$1" source="$2" + local -n _copied_ref="$3" + local linked_json copied_json + linked_json=$(sort -u "$wt/.deps-manifest.linked" 2>/dev/null | jq -R . | jq -s . 2>/dev/null || echo "[]") + copied_json=$(printf '%s\n' "${_copied_ref[@]}" | jq -R . | jq -s . 2>/dev/null || echo "[]") + jq -n \ + --arg source "$source" \ + --arg sourceCommit "$(git -C "$source" rev-parse HEAD)" \ + --arg createdAt "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ + --arg yarnLockHash "$(yarn_lock_hash "$wt")" \ + --argjson linked "$linked_json" \ + --argjson copied "$copied_json" \ + '{source: $source, sourceCommit: $sourceCommit, createdAt: $createdAt, yarnLockHash: $yarnLockHash, linked: $linked, copied: $copied}' \ + > "$wt/.deps-manifest.json" +} + +# --------------------------------------------------------------------------------------------------- +# status +# --------------------------------------------------------------------------------------------------- + +function cmd_status { + local checkout="${1:-$ROOT}" + checkout=$(cd "$checkout" && git rev-parse --show-toplevel) + local manifest="$checkout/.deps-manifest.json" + log "Checkout: $checkout" + if [[ ! -f "$manifest" ]]; then + log "No .deps-manifest.json found (not set up via worktrees.sh create)." + if [[ -f "$checkout/.deps-manifest.linked" ]]; then + log "Linked entries (from .deps-manifest.linked):" + sort -u "$checkout/.deps-manifest.linked" | while read -r e; do + [[ -z "$e" ]] && continue + if [[ -d "$CACHE_LINK_DIR/$e" ]]; then log " [ok] $e"; else log " [MISSING] $e"; fi + done + fi + return 0 + fi + log "Source: $(jq -r .source "$manifest")" + log "Source commit:$(jq -r .sourceCommit "$manifest")" + log "Created: $(jq -r .createdAt "$manifest")" + log "" + log "Linked store entries:" + jq -r '.linked[]' "$manifest" | while read -r e; do + [[ -z "$e" ]] && continue + if [[ -d "$CACHE_LINK_DIR/$e" ]]; then log " [ok] $e"; else log " [MISSING] $e"; fi + done + log "" + local mh ch + mh=$(jq -r .yarnLockHash "$manifest") + ch=$(yarn_lock_hash "$checkout") + if [[ "$mh" == "$ch" ]]; then + log "yarn.lock: unchanged since setup." + else + log "yarn.lock: DRIFTED since setup (delete node_modules + run 'yarn install' if builds break)." + fi +} + +# --------------------------------------------------------------------------------------------------- +# thaw +# --------------------------------------------------------------------------------------------------- + +function cmd_thaw { + [[ $# -gt 0 ]] || die "thaw requires at least one path" + local checkout + checkout=$(git rev-parse --show-toplevel) + local p + for p in "$@"; do + local abs + # -s: do not follow symlinks, so a path that IS a store symlink stays the symlink (we thaw it), + # rather than resolving to the store target. + abs=$(realpath -m -s "$p") + thaw_path "$abs" "$checkout" + done +} + +# Replace store symlinks at or under a path with writable copies; drop thawed entries from the manifest. +function thaw_path { + local target="$1" checkout="$2" + local -A thawed_entries=() + + if [[ -L "$target" ]]; then + _thaw_one "$target" thawed_entries + elif [[ -d "$target" ]]; then + local link + while IFS= read -r -d '' link; do + _thaw_one "$link" thawed_entries + done < <(find "$target" -type l -lname "$CACHE_LINK_DIR/*" -print0 2>/dev/null) + else + log "thaw: $target is not a symlink or directory; skipping." + return 0 + fi + + # Drop thawed entries from this checkout's manifests. + local e + for e in "${!thawed_entries[@]}"; do + if [[ -f "$checkout/.deps-manifest.linked" ]]; then + grep -vxF "$e" "$checkout/.deps-manifest.linked" > "$checkout/.deps-manifest.linked.tmp" || true + mv "$checkout/.deps-manifest.linked.tmp" "$checkout/.deps-manifest.linked" + fi + if [[ -f "$checkout/.deps-manifest.json" ]]; then + jq --arg e "$e" '.linked |= map(select(. != $e))' "$checkout/.deps-manifest.json" \ + > "$checkout/.deps-manifest.json.tmp" && mv "$checkout/.deps-manifest.json.tmp" "$checkout/.deps-manifest.json" + fi + done + log "Thawed ${#thawed_entries[@]} store entr$([[ ${#thawed_entries[@]} -eq 1 ]] && echo y || echo ies) under $target." +} + +function _thaw_one { + local link="$1" + local -n _thawed="$2" + local store_target + store_target=$(readlink "$link") + # entry name = first path component of store_target relative to CACHE_LINK_DIR. + local rel="${store_target#"$CACHE_LINK_DIR"/}" + local entry="${rel%%/*}" + log " thawing $link -> writable copy" + rm -f "$link" + cp -a --reflink=auto "$store_target" "$link" + chmod -R u+w "$link" + _thawed["$entry"]=1 +} + +# --------------------------------------------------------------------------------------------------- +# gc +# --------------------------------------------------------------------------------------------------- + +function cmd_gc { + local dry_run=0 keep_days=30 + while [[ $# -gt 0 ]]; do + case "$1" in + --dry-run) dry_run=1; shift ;; + --keep-days) keep_days="$2"; shift 2 ;; + *) die "Unknown gc option: $1" ;; + esac + done + + git -C "$ROOT" worktree prune + + [[ -d "$CACHE_LINK_DIR" ]] || { log "No store at $CACHE_LINK_DIR; nothing to collect."; return 0; } + + # Live roots = union of linked entries across every registered checkout. + local -A live=() + local checkout + while IFS= read -r checkout; do + [[ -z "$checkout" ]] && continue + local m="$checkout/.deps-manifest.json" + [[ -f "$m" ]] && while read -r e; do [[ -n "$e" ]] && live["$e"]=1; done < <(jq -r '.linked[]?' "$m" 2>/dev/null) + local l="$checkout/.deps-manifest.linked" + [[ -f "$l" ]] && while read -r e; do [[ -n "$e" ]] && live["$e"]=1; done < "$l" + done < <(git -C "$ROOT" worktree list --porcelain | awk '/^worktree /{print $2}') + + log "Live store entries: ${#live[@]}" + + # Registered checkouts (for the symlink safety-net scan). + local checkouts=() + while IFS= read -r checkout; do + [[ -n "$checkout" ]] && checkouts+=("$checkout") + done < <(git -C "$ROOT" worktree list --porcelain | awk '/^worktree /{print $2}') + + local entry collected=0 + for entry_dir in "$CACHE_LINK_DIR"/*/; do + [[ -d "$entry_dir" ]] || continue + entry=$(basename "$entry_dir") + [[ "$entry" == .tmp.* ]] && continue + if [[ -n "${live[$entry]:-}" ]]; then + continue + fi + # Safety net: keep if any registered checkout still has a symlink into this entry. + local referenced=0 co + for co in "${checkouts[@]}"; do + if find "$co" -maxdepth 6 -type l -lname "$CACHE_LINK_DIR/$entry/*" -print -quit 2>/dev/null | grep -q .; then + referenced=1; break + fi + if find "$co" -maxdepth 6 -type l -lname "$CACHE_LINK_DIR/$entry" -print -quit 2>/dev/null | grep -q .; then + referenced=1; break + fi + done + if [[ "$referenced" -eq 1 ]]; then + log " KEEP (still symlinked, not in manifest): $entry" + continue + fi + if [[ "$dry_run" -eq 1 ]]; then + log " would remove entry: $entry" + else + log " removing entry: $entry" + chmod -R u+w "$entry_dir" 2>/dev/null || true + rm -rf "$entry_dir" + fi + collected=$((collected + 1)) + done + + # Sweep dead tarballs older than keep_days whose extracted entry is gone. + local tarball name base collected_tar=0 + if [[ -d "$CACHE_LOCAL_DIR" ]]; then + while IFS= read -r tarball; do + [[ -z "$tarball" ]] && continue + name=$(basename "$tarball") + base="$name"; base="${base%.tar.gz}"; base="${base%.zst}"; base="${base%.tar}" + [[ -d "$CACHE_LINK_DIR/$base" ]] && continue + [[ -n "${live[$base]:-}" ]] && continue + if [[ "$dry_run" -eq 1 ]]; then + log " would remove tarball (>$keep_days days, dead): $name" + else + log " removing tarball: $name" + rm -f "$tarball" + fi + collected_tar=$((collected_tar + 1)) + done < <(find "$CACHE_LOCAL_DIR" -maxdepth 1 -type f \( -name '*.tar.gz' -o -name '*.zst' -o -name '*.tar' \) -mtime "+$keep_days" 2>/dev/null) + fi + + log "" + if [[ "$dry_run" -eq 1 ]]; then + log "Dry run: $collected store entr$([[ $collected -eq 1 ]] && echo y || echo ies) and $collected_tar tarball(s) would be collected." + else + log "Collected $collected store entr$([[ $collected -eq 1 ]] && echo y || echo ies) and $collected_tar tarball(s)." + fi +} + +# --------------------------------------------------------------------------------------------------- +# Dispatch +# --------------------------------------------------------------------------------------------------- + +cmd="${1:-}" +if [[ $# -gt 0 ]]; then shift; fi +case "$cmd" in + create) cmd_create "$@" ;; + status) cmd_status "$@" ;; + thaw) cmd_thaw "$@" ;; + gc) cmd_gc "$@" ;; + --help|-h|help|"") usage; [[ -z "$cmd" ]] && exit 1 || exit 0 ;; + *) usage; die "Unknown command: $cmd" ;; +esac diff --git a/yarn-project/.claude/skills/worktree-spawn/SKILL.md b/yarn-project/.claude/skills/worktree-spawn/SKILL.md index 7232f1a45115..8561c9368b7c 100644 --- a/yarn-project/.claude/skills/worktree-spawn/SKILL.md +++ b/yarn-project/.claude/skills/worktree-spawn/SKILL.md @@ -16,17 +16,27 @@ Spawn an independent Claude instance in a separate git worktree to work on a tas ## Workflow -1. Determine branch name using author initials (from `git config user.initials` or `git config user.name`) and task description -2. Choose a worktree directory name (typically `../aztec-`) -3. Create the worktree with a new branch -4. Spawn Claude in the worktree with a detailed task prompt +1. Choose a short worktree name from the task description (e.g. `fix-bug-123`) +2. Create the worktree with `scripts/worktrees.sh create` — NOT with bare `git worktree add`. The script + creates the worktree as a sibling of the source checkout (`/`) on a new branch + (`/`, from the checkout's git `user.initials`/`user.name`), initializes the + `noir/noir-repo` submodule, copies the writable yarn layer (`node_modules`, build outputs) from the + source checkout, and links upstream build artifacts (bb, nargo, contract artifacts, l1 out) from the + shared read-only store — leaving the worktree ready to build and test in minutes instead of a full + bootstrap. The source checkout is whichever aztec-packages checkout you run the script from. +3. Spawn Claude in the worktree with a detailed task prompt ## Command Template +The worktree path and branch are derived by the script (sibling dir of the checkout, branch prefixed with +your git initials). Use `--dry-run` first to learn the resolved path, then create and spawn in it. Run from +anywhere inside the checkout — no `cd` to the git root needed. + ```bash -cd $(git rev-parse --show-toplevel) && \ -git worktree add -b / ../ && \ -cd ..//yarn-project && \ +# Resolve where the worktree will land (no changes made), then create it and spawn Claude there. +WT_PATH=$(scripts/worktrees.sh create [base-ref] --dry-run 2>&1 | awk '/^ path:/{print $2}') && \ +scripts/worktrees.sh create [base-ref] && \ +cd "$WT_PATH/yarn-project" && \ claude "$(cat <<'EOF' Task: [Brief task description] @@ -39,20 +49,27 @@ IMPORTANT: Read CLAUDE.md first to understand the project structure and workflow [Any additional context or requirements] - Working directory: yarn-project in the worktree -- Branch: / - PR target: next (unless specified otherwise) EOF )" ``` +- `base-ref` defaults to the current checkout's HEAD. Pass `origin/next` (or another CI-built ref) when the + task should start from the latest base instead. +- The default branch is `/` (initials from the checkout's `user.initials`, else derived from + `user.name`). To set the branch explicitly, either pass `--branch `, or give `` itself with a + slash — e.g. `create ab/fix-thing` makes branch `ab/fix-thing` with the worktree dir `fix-thing`. +- If the script reports upstream cache misses, the affected components compile locally — slower but correct. + `--frozen-only` aborts instead of building on a miss. + ## Example For a task "Fix bug #123 in the sequencer": ```bash -cd $(git rev-parse --show-toplevel) && \ -git worktree add -b jd/fix-bug-123 ../aztec-fix-bug && \ -cd ../aztec-fix-bug/yarn-project && \ +WT_PATH=$(scripts/worktrees.sh create fix-bug-123 --dry-run 2>&1 | awk '/^ path:/{print $2}') && \ +scripts/worktrees.sh create fix-bug-123 && \ +cd "$WT_PATH/yarn-project" && \ claude "$(cat <<'EOF' Task: Fix bug #123 in the sequencer @@ -70,10 +87,13 @@ EOF ## Key Points -- Always go to git root first before creating worktree -- Use `-b` flag to create a new branch -- Navigate to `yarn-project` within the worktree -- Always include "Read CLAUDE.md first" in the prompt -- Worktree directories are typically named `../aztec-` +- Always use `scripts/worktrees.sh create` — it replaces the full bootstrap with shared cached artifacts. + See `scripts/worktrees.sh --help` for what is symlinked vs copied and how to refresh after a rebase +- Upstream artifacts (bb, nargo, contract artifacts) are read-only symlinks into a shared store: do NOT + rebuild upstream components or run codegen in the worktree without `scripts/worktrees.sh thaw` first +- Rebuilding yarn-project workspaces (`yarn build`, `yarn workspace ... build`) is safe — those are + worktree-local copies +- When done, remove with `git worktree remove ` (the sibling dir printed at create time); run + `scripts/worktrees.sh gc` occasionally to clean orphaned store entries - The spawned Claude instance works independently from the current session - PR target is `next` unless specified otherwise