From 07dc9e1c4176b346868f7590387fcf6750509cd5 Mon Sep 17 00:00:00 2001 From: Elizabeth Worstell Date: Tue, 19 May 2026 14:58:21 -0700 Subject: [PATCH] perf(cli): apply bundle via git fetch + git merge --ff-only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces `git pull --ff-only ` in cachew git restore's bundle-apply path with an explicit `git fetch` + `git merge --ff-only FETCH_HEAD` pair. `git pull` runs two implicit subcommands that add no value for a one-shot bundle apply: - `git rev-list --objects --stdin --not --exclude-hidden=fetch --all` (connectivity check across all local refs) - `git maintenance run --auto` (background gc/repack heuristic) — can trigger surprise pack rewrites during a hot bootstrap path The new path also emits separate cachew.git_fetch_seconds and cachew.git_merge_seconds span attributes so future work can attribute bundle-apply cost between fetch vs working-tree update. Measured on a staging cachew-bench workstation against squareup/cash-server (5.5 GiB snapshot, 290 KiB bundle, 13 commits ahead): bundle-apply wall-clock improves ~300ms (14.95s -> 14.41s median over 3 runs). The cold `git merge` itself dominates bundle apply cost on a fresh extract; this PR is mainly about predictability and instrumentation, not raw wall-clock. Amp-Thread-ID: https://ampcode.com/threads/T-019e41af-0a15-718d-a9d8-e26df6071f9b Co-authored-by: Amp --- cmd/cachew/git.go | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/cmd/cachew/git.go b/cmd/cachew/git.go index 4e30e01..589fe59 100644 --- a/cmd/cachew/git.go +++ b/cmd/cachew/git.go @@ -309,15 +309,28 @@ func applyBundle(ctx context.Context, api *client.Client, bundleURL, directory s branch := strings.TrimSpace(string(branchOut)) span.SetAttributes(attribute.String("cachew.branch", branch)) - // Pull the bundle's branch into the working tree via fast-forward. - applyStart := time.Now() - cmd := exec.CommandContext(ctx, "git", "-C", directory, "pull", "--ff-only", tmpFile.Name(), branch) //nolint:gosec - if output, err := cmd.CombinedOutput(); err != nil { + // Apply the bundle as two explicit steps instead of `git pull --ff-only`. + // `git pull` runs an implicit connectivity check (`git rev-list --objects + // --stdin --not --exclude-hidden=fetch --all`) and `git maintenance run + // --auto` after the fetch, which together add real wall-clock time on + // large repos and have no value for a one-shot ff-only bundle apply. + fetchStart := time.Now() + fetchCmd := exec.CommandContext(ctx, "git", "-C", directory, "fetch", "--no-tags", tmpFile.Name(), branch) //nolint:gosec + if output, err := fetchCmd.CombinedOutput(); err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, err.Error()) + return errors.Wrapf(err, "git fetch from bundle: %s", string(output)) + } + span.SetAttributes(attribute.Float64("cachew.git_fetch_seconds", time.Since(fetchStart).Seconds())) + + mergeStart := time.Now() + mergeCmd := exec.CommandContext(ctx, "git", "-C", directory, "merge", "--ff-only", "FETCH_HEAD") //nolint:gosec + if output, err := mergeCmd.CombinedOutput(); err != nil { span.RecordError(err) span.SetStatus(codes.Error, err.Error()) - return errors.Wrapf(err, "git pull from bundle: %s", string(output)) + return errors.Wrapf(err, "git merge --ff-only FETCH_HEAD: %s", string(output)) } - span.SetAttributes(attribute.Float64("cachew.git_pull_seconds", time.Since(applyStart).Seconds())) + span.SetAttributes(attribute.Float64("cachew.git_merge_seconds", time.Since(mergeStart).Seconds())) return nil }