From aa8e4f2af738bdec3071b9243d070786f7c937e1 Mon Sep 17 00:00:00 2001 From: Jerry Date: Tue, 5 May 2026 15:02:05 -0700 Subject: [PATCH 1/4] feat(complexity,sizes): delta gating against merge-base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In diff mode, complexity and size gates now compare each finding to its value at the merge-base and drop findings that did not get worse on this branch. A PR that touches a 4000-line legacy file or a complexity=91 function without making it bigger or more complex no longer fails CI on metrics it inherited. How it works: - diff.Result.MergeBase is populated by Parse (empty for refactoring mode via CollectPaths). - diff.ShowAtRef fetches a file's contents at the merge-base, returning (nil, nil) for paths absent from the tree and bubbling up errors for bad refs / non-repos so a misconfigured CI doesn't silently weaken the gate. - internal/baseline writes the base content to a temp file (extension preserved) so existing language analyzers run unchanged with a full-coverage synthetic FileChange. - complexity.Analyze and sizes.Analyze drop findings where base ≥ head (function: complexity / line count; file: line count). New files/functions absent from base are kept so absolute thresholds still gate genuinely new debt. Refactoring mode (--paths) leaves MergeBase empty and falls through to absolute thresholds — no behavior change there. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/baseline/baseline.go | 57 +++++ internal/complexity/complexity.go | 66 ++++++ internal/complexity/complexity_delta_test.go | 221 ++++++++++++++++++ internal/diff/diff.go | 31 ++- internal/diff/show_at_ref_test.go | 76 ++++++ internal/sizes/sizes.go | 111 +++++++++ internal/sizes/sizes_delta_test.go | 231 +++++++++++++++++++ 7 files changed, 792 insertions(+), 1 deletion(-) create mode 100644 internal/baseline/baseline.go create mode 100644 internal/complexity/complexity_delta_test.go create mode 100644 internal/diff/show_at_ref_test.go create mode 100644 internal/sizes/sizes_delta_test.go diff --git a/internal/baseline/baseline.go b/internal/baseline/baseline.go new file mode 100644 index 0000000..e979e15 --- /dev/null +++ b/internal/baseline/baseline.go @@ -0,0 +1,57 @@ +// Package baseline provides helpers for "delta gating": running an analyzer +// against the pre-change version of a file (via `git show :`) +// so that callers can drop findings whose underlying metric did not get worse +// in the diff. +// +// The package keeps language analyzers stateless and unaware of base refs: +// the file content is fetched, written to a temp file preserving the original +// extension, and the existing AnalyzeFile / ExtractFunctions methods run on +// it with a synthetic full-coverage FileChange. +package baseline + +import ( + "fmt" + "math" + "os" + "path/filepath" + + "github.com/0xPolygon/diffguard/internal/diff" +) + +// FullCoverage returns a FileChange whose single region spans the entire file, +// so per-language overlap filters include every function. +func FullCoverage(repoRelPath string) diff.FileChange { + return diff.FileChange{ + Path: repoRelPath, + Regions: []diff.ChangedRegion{{StartLine: 1, EndLine: math.MaxInt32}}, + } +} + +// FetchToTemp fetches repoRelPath at ref and writes it to a temp file whose +// name preserves the original extension (some analyzers branch on path +// extension). Returns ("", nil) if the file did not exist at the base ref. +// Caller is responsible for os.Remove(path). +func FetchToTemp(repoPath, ref, repoRelPath string) (string, error) { + content, err := diff.ShowAtRef(repoPath, ref, repoRelPath) + if err != nil { + return "", err + } + if content == nil { + return "", nil + } + ext := filepath.Ext(repoRelPath) + tmp, err := os.CreateTemp("", "diffguard-base-*"+ext) + if err != nil { + return "", fmt.Errorf("temp file: %w", err) + } + if _, err := tmp.Write(content); err != nil { + tmp.Close() + os.Remove(tmp.Name()) + return "", fmt.Errorf("write base content: %w", err) + } + if err := tmp.Close(); err != nil { + os.Remove(tmp.Name()) + return "", err + } + return tmp.Name(), nil +} diff --git a/internal/complexity/complexity.go b/internal/complexity/complexity.go index c3bf9a8..43de3c2 100644 --- a/internal/complexity/complexity.go +++ b/internal/complexity/complexity.go @@ -11,9 +11,11 @@ package complexity import ( "fmt" "math" + "os" "path/filepath" "sort" + "github.com/0xPolygon/diffguard/internal/baseline" "github.com/0xPolygon/diffguard/internal/diff" "github.com/0xPolygon/diffguard/internal/lang" "github.com/0xPolygon/diffguard/internal/report" @@ -34,9 +36,73 @@ func Analyze(repoPath string, d *diff.Result, threshold int, calc lang.Complexit } results = append(results, fnResults...) } + + // Delta gating: in diff mode, drop pre-existing violations so PRs aren't + // blamed for legacy complexity they merely touched. A function over the + // threshold is only flagged if its complexity is greater than its value + // at the merge-base (or the function is brand new on this branch). + // Refactoring mode (CollectPaths) leaves MergeBase empty and falls + // through to absolute thresholds. + if d.MergeBase != "" { + results = applyDeltaFilter(repoPath, d, results, calc) + } + return buildSection(results, threshold), nil } +// applyDeltaFilter drops findings whose complexity at the merge-base was +// already >= the head value — i.e. the diff did not make the function worse. +// New functions (absent at base) are kept as-is so they're still gated by the +// absolute threshold downstream. +func applyDeltaFilter(repoPath string, d *diff.Result, head []lang.FunctionComplexity, calc lang.ComplexityCalculator) []lang.FunctionComplexity { + baseByFile := make(map[string]map[string]int) + for _, fc := range d.Files { + if base, ok := baseComplexity(repoPath, d.MergeBase, fc.Path, calc); ok { + baseByFile[fc.Path] = base + } + } + + var out []lang.FunctionComplexity + for _, h := range head { + if worsened(h, baseByFile[h.File]) { + out = append(out, h) + } + } + return out +} + +// worsened reports whether a head finding represents a genuine regression +// against the base map. A nil/missing entry (function did not exist at base) +// counts as worsened so brand-new over-threshold functions still fail. +func worsened(h lang.FunctionComplexity, baseFuncs map[string]int) bool { + base, exists := baseFuncs[h.Name] + if !exists { + return true + } + return h.Complexity > base +} + +// baseComplexity returns a name->complexity map for repoRelPath at ref, or +// (nil, false) if the file did not exist at ref or could not be analyzed +// (treated as "no baseline" — head findings stay as-is). +func baseComplexity(repoPath, ref, repoRelPath string, calc lang.ComplexityCalculator) (map[string]int, bool) { + tmp, err := baseline.FetchToTemp(repoPath, ref, repoRelPath) + if err != nil || tmp == "" { + return nil, false + } + defer os.Remove(tmp) + + baseFuncs, err := calc.AnalyzeFile(tmp, baseline.FullCoverage(repoRelPath)) + if err != nil { + return nil, false + } + m := make(map[string]int, len(baseFuncs)) + for _, f := range baseFuncs { + m[f.Name] = f.Complexity + } + return m, true +} + func collectComplexityFindings(results []lang.FunctionComplexity, threshold int) ([]report.Finding, []float64, int) { var findings []report.Finding var values []float64 diff --git a/internal/complexity/complexity_delta_test.go b/internal/complexity/complexity_delta_test.go new file mode 100644 index 0000000..16b9f03 --- /dev/null +++ b/internal/complexity/complexity_delta_test.go @@ -0,0 +1,221 @@ +package complexity + +import ( + "os" + "os/exec" + "path/filepath" + "slices" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" + _ "github.com/0xPolygon/diffguard/internal/lang/goanalyzer" + "github.com/0xPolygon/diffguard/internal/report" +) + +func runGit(t *testing.T, dir string, args ...string) { + t.Helper() + cmd := exec.Command("git", args...) + cmd.Dir = dir + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git %v: %v\n%s", args, err, out) + } +} + +func initRepo(t *testing.T) string { + t.Helper() + dir := t.TempDir() + runGit(t, dir, "init", "-q", "--initial-branch=main") + runGit(t, dir, "config", "user.email", "test@example.com") + runGit(t, dir, "config", "user.name", "Test") + runGit(t, dir, "config", "commit.gpgsign", "false") + return dir +} + +func writeAndCommit(t *testing.T, dir, path, content, msg string) { + t.Helper() + if err := os.WriteFile(filepath.Join(dir, path), []byte(content), 0644); err != nil { + t.Fatal(err) + } + runGit(t, dir, "add", ".") + runGit(t, dir, "commit", "-q", "-m", msg) +} + +// complexBodyV1 / V2 carry the same cognitive complexity but differ on the +// inner-most line, so the diff lands *inside* the function body. Without +// this, OverlapsRange would skip the function entirely and the delta-gating +// code path wouldn't be exercised. +const complexBodyV1 = ` if x > 0 { + if x > 10 { + if x > 100 { + if x > 1000 { + if x > 10000 { + if x > 100000 { + _ = x // v1 + } + } + } + } + } + }` + +const complexBodyV2 = ` if x > 0 { + if x > 10 { + if x > 100 { + if x > 1000 { + if x > 10000 { + if x > 100000 { + _ = x // v2 + } + } + } + } + } + }` + +// parseAndAnalyze runs the same diff.Parse + complexity.Analyze flow main() +// uses, returning the section so tests can assert on it directly. +func parseAndAnalyze(t *testing.T, dir, base string) report.Section { + t.Helper() + calc := goCalc(t) + l, _ := lang.Get("go") + d, err := diff.Parse(dir, base, diff.Filter{ + DiffGlobs: l.FileFilter().DiffGlobs, + Includes: func(p string) bool { + ff := l.FileFilter() + if !slices.Contains(ff.Extensions, filepath.Ext(p)) { + return false + } + return !ff.IsTestFile(p) + }, + }) + if err != nil { + t.Fatalf("diff.Parse: %v", err) + } + section, err := Analyze(dir, d, 10, calc) + if err != nil { + t.Fatalf("Analyze: %v", err) + } + return section +} + +// TestDeltaGating_PreExistingViolationDropped covers the headline use case: +// a PR that touches a legacy complex function without making it worse should +// not be blamed for that function's complexity. +func TestDeltaGating_PreExistingViolationDropped(t *testing.T) { + dir := initRepo(t) + base := "package x\n\nfunc Complex(x int) {\n" + complexBodyV1 + "\n}\n" + writeAndCommit(t, dir, "a.go", base, "base") + + runGit(t, dir, "checkout", "-q", "-b", "feature") + // Diff lands *inside* the function (comment on a body line), so the + // function is analyzed, but its cognitive score is identical to base. + // Delta gating must drop the finding. + feature := "package x\n\nfunc Complex(x int) {\n" + complexBodyV2 + "\n}\n" + writeAndCommit(t, dir, "a.go", feature, "tweak inner comment") + + s := parseAndAnalyze(t, dir, "main") + if s.Severity != report.SeverityPass { + t.Errorf("severity = %v, want PASS (legacy complexity not worsened); findings=%+v", s.Severity, s.Findings) + } + if len(s.Findings) != 0 { + t.Errorf("findings = %d, want 0", len(s.Findings)) + } +} + +// TestDeltaGating_IncreasedComplexityKept covers the regression case: a PR +// that pushes a function's complexity higher must still fail. +func TestDeltaGating_IncreasedComplexityKept(t *testing.T) { + dir := initRepo(t) + // Baseline: complexity right at the threshold (10) — mild but not + // flagged. The smaller body lets us add nesting in the feature commit + // without rewriting it from scratch. + baseBody := ` if x > 0 { + if x > 10 { + if x > 100 { + if x > 1000 { + _ = x + } + } + } + }` + base := "package x\n\nfunc Complex(x int) {\n" + baseBody + "\n}\n" + writeAndCommit(t, dir, "a.go", base, "base") + + runGit(t, dir, "checkout", "-q", "-b", "feature") + // Feature: add two more levels of nesting → complexity climbs above + // threshold. Delta gating must keep this finding. + feature := "package x\n\nfunc Complex(x int) {\n" + complexBodyV1 + "\n}\n" + writeAndCommit(t, dir, "a.go", feature, "deepen nesting") + + s := parseAndAnalyze(t, dir, "main") + if s.Severity != report.SeverityFail { + t.Errorf("severity = %v, want FAIL", s.Severity) + } + if len(s.Findings) != 1 || s.Findings[0].Function != "Complex" { + t.Errorf("findings = %+v, want one for Complex", s.Findings) + } +} + +// TestDeltaGating_NewFunctionKept covers the "new debt" case: a brand-new +// over-threshold function in the diff has no baseline, so it stays flagged. +func TestDeltaGating_NewFunctionKept(t *testing.T) { + dir := initRepo(t) + writeAndCommit(t, dir, "a.go", "package x\n", "base") + + runGit(t, dir, "checkout", "-q", "-b", "feature") + feature := "package x\n\nfunc Complex(x int) {\n" + complexBodyV1 + "\n}\n" + writeAndCommit(t, dir, "a.go", feature, "add Complex") + + s := parseAndAnalyze(t, dir, "main") + if s.Severity != report.SeverityFail { + t.Errorf("severity = %v, want FAIL", s.Severity) + } + if len(s.Findings) != 1 || s.Findings[0].Function != "Complex" { + t.Errorf("findings = %+v, want one for Complex", s.Findings) + } +} + +func TestWorsened(t *testing.T) { + mk := func(name string, c int) lang.FunctionComplexity { + return lang.FunctionComplexity{ + FunctionInfo: lang.FunctionInfo{Name: name}, + Complexity: c, + } + } + cases := []struct { + name string + h lang.FunctionComplexity + base map[string]int + want bool + }{ + {"absent_at_base", mk("f", 12), map[string]int{}, true}, + {"nil_base_map", mk("f", 12), nil, true}, + {"head_higher", mk("f", 13), map[string]int{"f": 12}, true}, + {"head_equal", mk("f", 12), map[string]int{"f": 12}, false}, + {"head_lower", mk("f", 8), map[string]int{"f": 12}, false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := worsened(tc.h, tc.base); got != tc.want { + t.Errorf("worsened = %v, want %v", got, tc.want) + } + }) + } +} + +// TestDeltaGating_NewFileKept covers the "the whole file is new" case: when +// `git show base:path` returns nothing, fall back to absolute thresholds. +func TestDeltaGating_NewFileKept(t *testing.T) { + dir := initRepo(t) + writeAndCommit(t, dir, "other.go", "package x\n", "base") + + runGit(t, dir, "checkout", "-q", "-b", "feature") + feature := "package x\n\nfunc Complex(x int) {\n" + complexBodyV1 + "\n}\n" + writeAndCommit(t, dir, "newfile.go", feature, "add new file") + + s := parseAndAnalyze(t, dir, "main") + if s.Severity != report.SeverityFail { + t.Errorf("severity = %v, want FAIL (new file gated by absolute threshold)", s.Severity) + } +} diff --git a/internal/diff/diff.go b/internal/diff/diff.go index a1c378d..7e8f17a 100644 --- a/internal/diff/diff.go +++ b/internal/diff/diff.go @@ -52,7 +52,11 @@ func (fc FileChange) OverlapsRange(start, end int) bool { // Result holds all changed source files parsed from a git diff. type Result struct { BaseBranch string - Files []FileChange + // MergeBase is the resolved commit SHA of merge-base(BaseBranch, HEAD). + // Populated by Parse; empty in refactoring mode (CollectPaths). Used by + // delta-gating analyzers to fetch pre-change file content via `git show`. + MergeBase string + Files []FileChange } // ChangedPackages returns the unique set of package directories with changes. @@ -136,10 +140,35 @@ func Parse(repoPath, baseBranch string, filter Filter) (*Result, error) { return &Result{ BaseBranch: baseBranch, + MergeBase: mergeBase, Files: files, }, nil } +// ShowAtRef returns the bytes of repoRelPath at the given git ref, or +// (nil, nil) if the path didn't exist there. Other failures (bad ref, not a +// git repo) bubble up as errors. Used by delta-gating analyzers to compare a +// changed file's metric against its pre-change baseline. +// +// `git show ref:path` exits 128 for several distinct conditions: path absent +// in the tree, ref unknown, not a git repo. Only the first should turn into +// the "no baseline" signal — the others must surface as errors so a broken +// CI config doesn't silently weaken the gate. We disambiguate on stderr. +func ShowAtRef(repoPath, ref, repoRelPath string) ([]byte, error) { + cmd := exec.Command("git", "show", ref+":"+repoRelPath) + cmd.Dir = repoPath + var stderr strings.Builder + cmd.Stderr = &stderr + out, err := cmd.Output() + if err == nil { + return out, nil + } + if strings.Contains(stderr.String(), "does not exist in") { + return nil, nil + } + return nil, fmt.Errorf("git show %s:%s: %w (%s)", ref, repoRelPath, err, strings.TrimSpace(stderr.String())) +} + // CollectPaths builds a Result by treating each analyzable source file under // the given paths as fully changed. Useful for refactoring mode where you // want to analyze entire files rather than diffed regions only. diff --git a/internal/diff/show_at_ref_test.go b/internal/diff/show_at_ref_test.go new file mode 100644 index 0000000..9d34df2 --- /dev/null +++ b/internal/diff/show_at_ref_test.go @@ -0,0 +1,76 @@ +package diff + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +// TestShowAtRef_ReturnsContent: happy path — file exists at the requested +// ref and the bytes come back verbatim. +func TestShowAtRef_ReturnsContent(t *testing.T) { + dir := t.TempDir() + initGitRepo(t, dir) + mustWrite(t, filepath.Join(dir, "a.txt"), "hello\n") + runGit(t, dir, "add", ".") + runGit(t, dir, "commit", "-q", "-m", "init") + + got, err := ShowAtRef(dir, "HEAD", "a.txt") + if err != nil { + t.Fatalf("ShowAtRef: %v", err) + } + if string(got) != "hello\n" { + t.Errorf("content = %q, want %q", got, "hello\n") + } +} + +// TestShowAtRef_AbsentPath: a path that doesn't exist at ref must come back +// as (nil, nil), not as an error — that's the "no baseline" signal that +// callers (delta gating) rely on. +func TestShowAtRef_AbsentPath(t *testing.T) { + dir := t.TempDir() + initGitRepo(t, dir) + mustWrite(t, filepath.Join(dir, "exists.txt"), "x\n") + runGit(t, dir, "add", ".") + runGit(t, dir, "commit", "-q", "-m", "init") + + got, err := ShowAtRef(dir, "HEAD", "missing.txt") + if err != nil { + t.Errorf("err = %v, want nil for absent path", err) + } + if got != nil { + t.Errorf("got = %q, want nil for absent path", got) + } +} + +// TestShowAtRef_BadRef: a malformed/unknown ref must surface as an error so +// callers don't silently treat it as "no baseline" and let regressions slip. +func TestShowAtRef_BadRef(t *testing.T) { + dir := t.TempDir() + initGitRepo(t, dir) + mustWrite(t, filepath.Join(dir, "a.txt"), "x\n") + runGit(t, dir, "add", ".") + runGit(t, dir, "commit", "-q", "-m", "init") + + _, err := ShowAtRef(dir, "no-such-ref", "a.txt") + if err == nil { + t.Fatal("expected error for unknown ref, got nil") + } + if !strings.Contains(err.Error(), "git show") { + t.Errorf("err = %v, expected to mention git show", err) + } +} + +// TestShowAtRef_NotGitRepo: running outside a git repo must surface an +// error, not be silently swallowed as "absent path". +func TestShowAtRef_NotGitRepo(t *testing.T) { + dir := t.TempDir() + if _, err := os.Stat(filepath.Join(dir, ".git")); err == nil { + t.Fatalf("temp dir unexpectedly has .git") + } + _, err := ShowAtRef(dir, "HEAD", "a.txt") + if err == nil { + t.Fatal("expected error outside git repo") + } +} diff --git a/internal/sizes/sizes.go b/internal/sizes/sizes.go index bf67a5e..a4cfa6f 100644 --- a/internal/sizes/sizes.go +++ b/internal/sizes/sizes.go @@ -5,9 +5,11 @@ package sizes import ( "fmt" + "os" "path/filepath" "sort" + "github.com/0xPolygon/diffguard/internal/baseline" "github.com/0xPolygon/diffguard/internal/diff" "github.com/0xPolygon/diffguard/internal/lang" "github.com/0xPolygon/diffguard/internal/report" @@ -31,9 +33,118 @@ func Analyze(repoPath string, d *diff.Result, funcThreshold, fileThreshold int, } } + // Delta gating mirrors the complexity gate: in diff mode, a PR is only + // blamed for size violations it caused. Per-function: drop if base lines + // >= head lines. Per-file: drop if base lines >= head lines (touching a + // 4000-line file without growing it is not a regression). + if d.MergeBase != "" { + funcResults, fileResults = applySizeDeltaFilter(repoPath, d, funcResults, fileResults, extractor) + } + return buildSection(funcResults, fileResults, funcThreshold, fileThreshold), nil } +// applySizeDeltaFilter drops per-function and per-file findings whose +// underlying line count did not grow vs. the merge-base. Files/functions that +// did not exist at base are kept as-is so absolute thresholds still gate +// brand-new code. +func applySizeDeltaFilter( + repoPath string, + d *diff.Result, + headFuncs []lang.FunctionSize, + headFiles []lang.FileSize, + extractor lang.FunctionExtractor, +) ([]lang.FunctionSize, []lang.FileSize) { + byFile := make(map[string]baseSizes) + for _, fc := range d.Files { + byFile[fc.Path] = baseSizesFor(repoPath, d.MergeBase, fc.Path, extractor) + } + return filterFuncSizes(headFuncs, byFile), filterFileSizes(headFiles, byFile) +} + +// filterFuncSizes drops per-function findings whose base-side line count was +// already >= the head-side count (no growth this PR). +func filterFuncSizes(head []lang.FunctionSize, byFile map[string]baseSizes) []lang.FunctionSize { + var out []lang.FunctionSize + for _, h := range head { + if !grewFunc(h, byFile[h.File]) { + continue + } + out = append(out, h) + } + return out +} + +// filterFileSizes drops per-file findings whose whole-file line count was +// already >= the head-side count. +func filterFileSizes(head []lang.FileSize, byFile map[string]baseSizes) []lang.FileSize { + var out []lang.FileSize + for _, h := range head { + if !grewFile(h, byFile[h.Path]) { + continue + } + out = append(out, h) + } + return out +} + +// grewFunc reports whether a function-size finding represents real growth on +// this branch. Missing baseline (file absent at base, or function not present +// at base) counts as growth so absolute thresholds still apply. +func grewFunc(h lang.FunctionSize, b baseSizes) bool { + if !b.ok { + return true + } + base, exists := b.funcs[h.Name] + if !exists { + return true + } + return h.Lines > base +} + +// grewFile mirrors grewFunc for whole-file size: missing baseline → growth; +// otherwise growth iff head exceeds base. +func grewFile(h lang.FileSize, b baseSizes) bool { + if !b.ok { + return true + } + return h.Lines > b.file +} + +// baseSizes captures the per-function and whole-file line counts of a single +// repo-relative path at the merge-base. ok=false means "no baseline" (file +// absent at base, or extractor failed) — callers should treat that as +// "compare nothing" so head findings stay in place. +type baseSizes struct { + funcs map[string]int + file int + ok bool +} + +func baseSizesFor(repoPath, ref, repoRelPath string, extractor lang.FunctionExtractor) baseSizes { + tmp, err := baseline.FetchToTemp(repoPath, ref, repoRelPath) + if err != nil || tmp == "" { + return baseSizes{} + } + defer os.Remove(tmp) + + baseFuncs, baseFile, err := extractor.ExtractFunctions(tmp, baseline.FullCoverage(repoRelPath)) + if err != nil { + return baseSizes{} + } + out := baseSizes{ + funcs: make(map[string]int, len(baseFuncs)), + ok: true, + } + for _, f := range baseFuncs { + out.funcs[f.Name] = f.Lines + } + if baseFile != nil { + out.file = baseFile.Lines + } + return out +} + func checkFuncSizes(funcs []lang.FunctionSize, threshold int) []report.Finding { var findings []report.Finding for _, f := range funcs { diff --git a/internal/sizes/sizes_delta_test.go b/internal/sizes/sizes_delta_test.go new file mode 100644 index 0000000..2bd6979 --- /dev/null +++ b/internal/sizes/sizes_delta_test.go @@ -0,0 +1,231 @@ +package sizes + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "slices" + "strings" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" + _ "github.com/0xPolygon/diffguard/internal/lang/goanalyzer" + "github.com/0xPolygon/diffguard/internal/report" +) + +func runGit(t *testing.T, dir string, args ...string) { + t.Helper() + cmd := exec.Command("git", args...) + cmd.Dir = dir + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git %v: %v\n%s", args, err, out) + } +} + +func initRepo(t *testing.T) string { + t.Helper() + dir := t.TempDir() + runGit(t, dir, "init", "-q", "--initial-branch=main") + runGit(t, dir, "config", "user.email", "test@example.com") + runGit(t, dir, "config", "user.name", "Test") + runGit(t, dir, "config", "commit.gpgsign", "false") + return dir +} + +func writeAndCommit(t *testing.T, dir, path, content, msg string) { + t.Helper() + if err := os.WriteFile(filepath.Join(dir, path), []byte(content), 0644); err != nil { + t.Fatal(err) + } + runGit(t, dir, "add", ".") + runGit(t, dir, "commit", "-q", "-m", msg) +} + +// bigFunc returns a Go function whose body has `lines` no-op statements, +// landing the FunctionExtractor's line count well above any threshold the +// tests use. +func bigFunc(name string, lines int) string { + var sb strings.Builder + fmt.Fprintf(&sb, "func %s() {\n", name) + for i := range lines { + fmt.Fprintf(&sb, "\t_ = %d\n", i) + } + sb.WriteString("}\n") + return sb.String() +} + +func parseAndAnalyze(t *testing.T, dir, base string, funcThreshold, fileThreshold int) report.Section { + t.Helper() + l, _ := lang.Get("go") + d, err := diff.Parse(dir, base, diff.Filter{ + DiffGlobs: l.FileFilter().DiffGlobs, + Includes: func(p string) bool { + ff := l.FileFilter() + if !slices.Contains(ff.Extensions, filepath.Ext(p)) { + return false + } + return !ff.IsTestFile(p) + }, + }) + if err != nil { + t.Fatalf("diff.Parse: %v", err) + } + section, err := Analyze(dir, d, funcThreshold, fileThreshold, goExtractor(t)) + if err != nil { + t.Fatalf("Analyze: %v", err) + } + return section +} + +func TestGrewFunc(t *testing.T) { + mk := func(name string, lines int) lang.FunctionSize { + return lang.FunctionSize{ + FunctionInfo: lang.FunctionInfo{Name: name}, + Lines: lines, + } + } + withBase := baseSizes{funcs: map[string]int{"f": 60}, ok: true} + noBase := baseSizes{} + + cases := []struct { + name string + h lang.FunctionSize + b baseSizes + want bool + }{ + {"no_baseline_treated_as_grew", mk("f", 80), noBase, true}, + {"absent_at_base", mk("g", 80), withBase, true}, + {"head_higher", mk("f", 80), withBase, true}, + {"head_equal", mk("f", 60), withBase, false}, + {"head_lower", mk("f", 40), withBase, false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := grewFunc(tc.h, tc.b); got != tc.want { + t.Errorf("grewFunc = %v, want %v", got, tc.want) + } + }) + } +} + +func TestGrewFile(t *testing.T) { + withBase := baseSizes{file: 600, ok: true} + noBase := baseSizes{} + + cases := []struct { + name string + h lang.FileSize + b baseSizes + want bool + }{ + {"no_baseline_treated_as_grew", lang.FileSize{Path: "x.go", Lines: 700}, noBase, true}, + {"head_higher", lang.FileSize{Path: "x.go", Lines: 700}, withBase, true}, + {"head_equal", lang.FileSize{Path: "x.go", Lines: 600}, withBase, false}, + {"head_lower", lang.FileSize{Path: "x.go", Lines: 500}, withBase, false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := grewFile(tc.h, tc.b); got != tc.want { + t.Errorf("grewFile = %v, want %v", got, tc.want) + } + }) + } +} + +// TestSizesDelta_FunctionUnchangedDropped: legacy oversized function, only a +// comment inside is touched by the PR. Line count unchanged → drop. +func TestSizesDelta_FunctionUnchangedDropped(t *testing.T) { + dir := initRepo(t) + body := bigFunc("Big", 80) // 80+2 = ~82 lines, well over funcThreshold=50. + base := "package x\n\n// v1\n" + body + writeAndCommit(t, dir, "a.go", base, "base") + + runGit(t, dir, "checkout", "-q", "-b", "feature") + feature := strings.Replace(base, "_ = 0\n", "_ = 0 // touched\n", 1) + writeAndCommit(t, dir, "a.go", feature, "comment tweak") + + s := parseAndAnalyze(t, dir, "main", 50, 500) + for _, f := range s.Findings { + if f.Function == "Big" { + t.Errorf("legacy function size finding leaked through delta gate: %+v", f) + } + } +} + +// TestSizesDelta_FunctionGrewKept: function grew past the threshold during +// this PR — must still be flagged. +func TestSizesDelta_FunctionGrewKept(t *testing.T) { + dir := initRepo(t) + base := "package x\n\n" + bigFunc("Grow", 30) + writeAndCommit(t, dir, "a.go", base, "base") + + runGit(t, dir, "checkout", "-q", "-b", "feature") + feature := "package x\n\n" + bigFunc("Grow", 80) + writeAndCommit(t, dir, "a.go", feature, "grow Grow") + + s := parseAndAnalyze(t, dir, "main", 50, 500) + found := false + for _, f := range s.Findings { + if f.Function == "Grow" { + found = true + } + } + if !found { + t.Errorf("expected finding for Grow, got %+v", s.Findings) + } +} + +// TestSizesDelta_FileTouchedNotGrownDropped: 4000-line legacy file, PR +// changes one line without growing the file. File-size finding must be +// dropped. +func TestSizesDelta_FileTouchedNotGrownDropped(t *testing.T) { + dir := initRepo(t) + var sb strings.Builder + sb.WriteString("package x\n\n") + for i := range 600 { + fmt.Fprintf(&sb, "var V%d = %d\n", i, i) + } + base := sb.String() + writeAndCommit(t, dir, "big.go", base, "base") + + runGit(t, dir, "checkout", "-q", "-b", "feature") + // Same line count, different content on a single line. + feature := strings.Replace(base, "var V0 = 0", "var V0 = 1", 1) + writeAndCommit(t, dir, "big.go", feature, "tweak V0") + + s := parseAndAnalyze(t, dir, "main", 50, 500) + for _, f := range s.Findings { + if f.File == "big.go" && f.Function == "" { + t.Errorf("legacy file size finding leaked: %+v", f) + } + } +} + +// TestSizesDelta_FileGrewKept: oversized file grew further. Must still flag. +func TestSizesDelta_FileGrewKept(t *testing.T) { + dir := initRepo(t) + var sb strings.Builder + sb.WriteString("package x\n\n") + for i := range 600 { + fmt.Fprintf(&sb, "var V%d = %d\n", i, i) + } + base := sb.String() + writeAndCommit(t, dir, "big.go", base, "base") + + runGit(t, dir, "checkout", "-q", "-b", "feature") + feature := base + "var Extra = 1\nvar Extra2 = 2\n" + writeAndCommit(t, dir, "big.go", feature, "grow") + + s := parseAndAnalyze(t, dir, "main", 50, 500) + found := false + for _, f := range s.Findings { + if f.File == "big.go" && f.Function == "" { + found = true + } + } + if !found { + t.Errorf("expected file-size finding for big.go, got %+v", s.Findings) + } +} From b31e8920ee2d6062884d288c84dbe3d28436f4c6 Mon Sep 17 00:00:00 2001 From: Jerry Date: Tue, 5 May 2026 15:13:40 -0700 Subject: [PATCH 2/4] feat(complexity,sizes): show delta in messages, tolerance flag for complexity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Findings now render '(+N vs base)' when the function/file existed at the merge-base, so PR authors can tell "added new hot code" from "made existing hot code hotter". Brand-new code stays with the bare message. - New --complexity-delta-tolerance flag (default 3) — head must exceed base by *more than* tolerance to count as worsened. Forgives the +1/+2/+3 noise that's typical when threading a parameter through a legacy hot function. Size deltas remain strict (tolerance=0); cheap to add later if needed. - The orchestrators thread per-file/per-function delta maps to the formatters; helper extraction (recordDelta, recordFuncDelta) keeps applyDeltaFilter / filterFuncSizes under the cognitive-complexity threshold. - New tests cover formatComplexityMsg / formatFuncSizeMsg / formatFileSizeMsg (lock subtraction direction so the head-base operand order doesn't silently flip), and the previously-untested internal/baseline package. On the bor blockstm_redesign branch (vs origin/develop), tolerance=3 drops three +1..+3 regressions (ProcessBlock, subfetcher.loop, stateTransition.execute) while keeping legitimate +5/+4 worsenings (IntermediateRoot, getStateObject) and the brand-new block-stm v2 functions. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/diffguard/main.go | 10 +- internal/baseline/baseline_test.go | 122 +++++++++++++++ internal/complexity/complexity.go | 80 +++++++--- internal/complexity/complexity_delta_test.go | 150 +++++++++++++++++-- internal/complexity/complexity_test.go | 14 +- internal/sizes/sizes.go | 93 +++++++++--- internal/sizes/sizes_delta_test.go | 86 +++++++++++ internal/sizes/sizes_test.go | 16 +- 8 files changed, 502 insertions(+), 69 deletions(-) create mode 100644 internal/baseline/baseline_test.go diff --git a/cmd/diffguard/main.go b/cmd/diffguard/main.go index 699baff..2c1a5a4 100644 --- a/cmd/diffguard/main.go +++ b/cmd/diffguard/main.go @@ -27,6 +27,7 @@ import ( func main() { var cfg Config flag.IntVar(&cfg.ComplexityThreshold, "complexity-threshold", 10, "Maximum cognitive complexity per function") + flag.IntVar(&cfg.ComplexityDeltaTolerance, "complexity-delta-tolerance", 3, "In diff mode, ignore complexity regressions where head exceeds base by this much or less; brand-new functions still gated by --complexity-threshold") flag.IntVar(&cfg.FunctionSizeThreshold, "function-size-threshold", 50, "Maximum lines per function") flag.IntVar(&cfg.FileSizeThreshold, "file-size-threshold", 500, "Maximum lines per file") flag.BoolVar(&cfg.SkipMutation, "skip-mutation", false, "Skip mutation testing") @@ -69,9 +70,10 @@ func main() { // Config holds CLI configuration. type Config struct { - ComplexityThreshold int - FunctionSizeThreshold int - FileSizeThreshold int + ComplexityThreshold int + ComplexityDeltaTolerance int + FunctionSizeThreshold int + FileSizeThreshold int SkipMutation bool SkipDeadCode bool SkipGenerated bool @@ -272,7 +274,7 @@ func announceRun(d *diff.Result, cfg Config, l lang.Language, numLanguages int) func runAnalyses(repoPath string, d *diff.Result, cfg Config, l lang.Language) ([]report.Section, error) { var sections []report.Section - complexitySection, err := complexity.Analyze(repoPath, d, cfg.ComplexityThreshold, l.ComplexityCalculator()) + complexitySection, err := complexity.Analyze(repoPath, d, cfg.ComplexityThreshold, cfg.ComplexityDeltaTolerance, l.ComplexityCalculator()) if err != nil { return nil, fmt.Errorf("complexity analysis: %w", err) } diff --git a/internal/baseline/baseline_test.go b/internal/baseline/baseline_test.go new file mode 100644 index 0000000..82413b6 --- /dev/null +++ b/internal/baseline/baseline_test.go @@ -0,0 +1,122 @@ +package baseline + +import ( + "math" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" +) + +func runGit(t *testing.T, dir string, args ...string) { + t.Helper() + cmd := exec.Command("git", args...) + cmd.Dir = dir + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git %v: %v\n%s", args, err, out) + } +} + +func initRepoWith(t *testing.T, files map[string]string) string { + t.Helper() + dir := t.TempDir() + runGit(t, dir, "init", "-q", "--initial-branch=main") + runGit(t, dir, "config", "user.email", "test@example.com") + runGit(t, dir, "config", "user.name", "Test") + runGit(t, dir, "config", "commit.gpgsign", "false") + for name, content := range files { + if err := os.WriteFile(filepath.Join(dir, name), []byte(content), 0644); err != nil { + t.Fatal(err) + } + } + runGit(t, dir, "add", ".") + runGit(t, dir, "commit", "-q", "-m", "init") + return dir +} + +func TestFullCoverage(t *testing.T) { + fc := FullCoverage("foo/bar.go") + if fc.Path != "foo/bar.go" { + t.Errorf("Path = %q, want foo/bar.go", fc.Path) + } + if len(fc.Regions) != 1 { + t.Fatalf("Regions = %d, want 1", len(fc.Regions)) + } + r := fc.Regions[0] + if r.StartLine != 1 { + t.Errorf("StartLine = %d, want 1", r.StartLine) + } + if r.EndLine != math.MaxInt32 { + t.Errorf("EndLine = %d, want math.MaxInt32 (so OverlapsRange always matches)", r.EndLine) + } +} + +func TestFetchToTemp_HappyPath(t *testing.T) { + dir := initRepoWith(t, map[string]string{"a.go": "package x\nfunc F() {}\n"}) + + tmp, err := FetchToTemp(dir, "HEAD", "a.go") + if err != nil { + t.Fatalf("FetchToTemp: %v", err) + } + if tmp == "" { + t.Fatal("tmp = \"\", want a path for an existing file") + } + defer os.Remove(tmp) + + // Extension preservation matters: some analyzers branch on path ext. + if filepath.Ext(tmp) != ".go" { + t.Errorf("temp ext = %q, want .go (must be preserved)", filepath.Ext(tmp)) + } + + // Bytes round-trip. + got, err := os.ReadFile(tmp) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(got), "func F()") { + t.Errorf("temp content missing original; got %q", got) + } +} + +func TestFetchToTemp_AbsentReturnsEmptyPath(t *testing.T) { + dir := initRepoWith(t, map[string]string{"a.go": "package x\n"}) + + tmp, err := FetchToTemp(dir, "HEAD", "missing.go") + if err != nil { + t.Errorf("err = %v, want nil for absent path", err) + } + if tmp != "" { + os.Remove(tmp) + t.Errorf("tmp = %q, want \"\" for absent path", tmp) + } +} + +func TestFetchToTemp_BadRefSurfacesError(t *testing.T) { + dir := initRepoWith(t, map[string]string{"a.go": "package x\n"}) + + _, err := FetchToTemp(dir, "no-such-ref", "a.go") + if err == nil { + t.Fatal("expected error for unknown ref") + } +} + +func TestFetchToTemp_PreservesExtension(t *testing.T) { + dir := initRepoWith(t, map[string]string{ + "a.ts": "export const x = 1\n", + "b.rs": "fn main() {}\n", + "c.txt": "hi\n", + }) + + for _, name := range []string{"a.ts", "b.rs", "c.txt"} { + tmp, err := FetchToTemp(dir, "HEAD", name) + if err != nil { + t.Fatalf("FetchToTemp(%s): %v", name, err) + } + want := filepath.Ext(name) + if got := filepath.Ext(tmp); got != want { + t.Errorf("%s: tmp ext = %q, want %q", name, got, want) + } + os.Remove(tmp) + } +} diff --git a/internal/complexity/complexity.go b/internal/complexity/complexity.go index 43de3c2..1d04b65 100644 --- a/internal/complexity/complexity.go +++ b/internal/complexity/complexity.go @@ -26,7 +26,7 @@ import ( // "Cognitive Complexity" report section. Parse errors are swallowed at the // calculator layer (returning nil) so a single malformed file doesn't fail // the whole run. -func Analyze(repoPath string, d *diff.Result, threshold int, calc lang.ComplexityCalculator) (report.Section, error) { +func Analyze(repoPath string, d *diff.Result, threshold, deltaTolerance int, calc lang.ComplexityCalculator) (report.Section, error) { var results []lang.FunctionComplexity for _, fc := range d.Files { absPath := filepath.Join(repoPath, fc.Path) @@ -39,22 +39,30 @@ func Analyze(repoPath string, d *diff.Result, threshold int, calc lang.Complexit // Delta gating: in diff mode, drop pre-existing violations so PRs aren't // blamed for legacy complexity they merely touched. A function over the - // threshold is only flagged if its complexity is greater than its value - // at the merge-base (or the function is brand new on this branch). - // Refactoring mode (CollectPaths) leaves MergeBase empty and falls - // through to absolute thresholds. + // threshold is only flagged if its complexity grew by more than + // deltaTolerance vs. the merge-base (or the function is brand new on + // this branch). Refactoring mode (CollectPaths) leaves MergeBase empty + // and falls through to absolute thresholds. + // + // deltas tracks (file, name) -> base complexity for findings that survive + // the filter, so the section can render head/base/Δ in the message. + var deltas map[string]map[string]int if d.MergeBase != "" { - results = applyDeltaFilter(repoPath, d, results, calc) + results, deltas = applyDeltaFilter(repoPath, d, results, calc, deltaTolerance) } - return buildSection(results, threshold), nil + return buildSection(results, threshold, deltas), nil } // applyDeltaFilter drops findings whose complexity at the merge-base was -// already >= the head value — i.e. the diff did not make the function worse. -// New functions (absent at base) are kept as-is so they're still gated by the -// absolute threshold downstream. -func applyDeltaFilter(repoPath string, d *diff.Result, head []lang.FunctionComplexity, calc lang.ComplexityCalculator) []lang.FunctionComplexity { +// within tolerance of the head value — i.e. the diff did not measurably +// worsen the function. New functions (absent at base) are kept as-is so +// they're still gated by the absolute threshold downstream. +// +// Returned `deltas` is a (file -> name -> base complexity) map covering only +// kept findings whose function existed at base, so the report can show the +// delta alongside the head value. +func applyDeltaFilter(repoPath string, d *diff.Result, head []lang.FunctionComplexity, calc lang.ComplexityCalculator, tolerance int) ([]lang.FunctionComplexity, map[string]map[string]int) { baseByFile := make(map[string]map[string]int) for _, fc := range d.Files { if base, ok := baseComplexity(repoPath, d.MergeBase, fc.Path, calc); ok { @@ -63,23 +71,42 @@ func applyDeltaFilter(repoPath string, d *diff.Result, head []lang.FunctionCompl } var out []lang.FunctionComplexity + deltas := make(map[string]map[string]int) for _, h := range head { - if worsened(h, baseByFile[h.File]) { - out = append(out, h) + baseFuncs := baseByFile[h.File] + if !worsened(h, baseFuncs, tolerance) { + continue } + out = append(out, h) + recordDelta(deltas, h, baseFuncs) + } + return out, deltas +} + +// recordDelta stores the base complexity for h in deltas[h.File][h.Name] when +// the function existed at base. New functions (no entry) are skipped so the +// report can distinguish them from regressions. +func recordDelta(deltas map[string]map[string]int, h lang.FunctionComplexity, baseFuncs map[string]int) { + base, exists := baseFuncs[h.Name] + if !exists { + return + } + if deltas[h.File] == nil { + deltas[h.File] = make(map[string]int) } - return out + deltas[h.File][h.Name] = base } // worsened reports whether a head finding represents a genuine regression // against the base map. A nil/missing entry (function did not exist at base) // counts as worsened so brand-new over-threshold functions still fail. -func worsened(h lang.FunctionComplexity, baseFuncs map[string]int) bool { +// Otherwise the head value must exceed base by more than tolerance. +func worsened(h lang.FunctionComplexity, baseFuncs map[string]int, tolerance int) bool { base, exists := baseFuncs[h.Name] if !exists { return true } - return h.Complexity > base + return h.Complexity > base+tolerance } // baseComplexity returns a name->complexity map for repoRelPath at ref, or @@ -103,7 +130,7 @@ func baseComplexity(repoPath, ref, repoRelPath string, calc lang.ComplexityCalcu return m, true } -func collectComplexityFindings(results []lang.FunctionComplexity, threshold int) ([]report.Finding, []float64, int) { +func collectComplexityFindings(results []lang.FunctionComplexity, threshold int, deltas map[string]map[string]int) ([]report.Finding, []float64, int) { var findings []report.Finding var values []float64 failCount := 0 @@ -118,7 +145,7 @@ func collectComplexityFindings(results []lang.FunctionComplexity, threshold int) File: r.File, Line: r.Line, Function: r.Name, - Message: fmt.Sprintf("complexity=%d", r.Complexity), + Message: formatComplexityMsg(r, deltas), Value: float64(r.Complexity), Limit: float64(threshold), Severity: report.SeverityFail, @@ -132,7 +159,20 @@ func collectComplexityFindings(results []lang.FunctionComplexity, threshold int) return findings, values, failCount } -func buildSection(results []lang.FunctionComplexity, threshold int) report.Section { +// formatComplexityMsg renders the per-finding message, appending a +// "(+Δ vs base)" suffix when delta-gating recorded a baseline value for the +// function. Brand-new functions (no entry in deltas) get the bare form so +// PR authors can tell "I added this hot function" from "I made an existing +// hot function hotter". +func formatComplexityMsg(r lang.FunctionComplexity, deltas map[string]map[string]int) string { + base, ok := deltas[r.File][r.Name] + if !ok { + return fmt.Sprintf("complexity=%d", r.Complexity) + } + return fmt.Sprintf("complexity=%d (+%d vs base)", r.Complexity, r.Complexity-base) +} + +func buildSection(results []lang.FunctionComplexity, threshold int, deltas map[string]map[string]int) report.Section { if len(results) == 0 { return report.Section{ Name: "Cognitive Complexity", @@ -141,7 +181,7 @@ func buildSection(results []lang.FunctionComplexity, threshold int) report.Secti } } - findings, values, failCount := collectComplexityFindings(results, threshold) + findings, values, failCount := collectComplexityFindings(results, threshold, deltas) sev := report.SeverityPass if failCount > 0 { diff --git a/internal/complexity/complexity_delta_test.go b/internal/complexity/complexity_delta_test.go index 16b9f03..10e1f7f 100644 --- a/internal/complexity/complexity_delta_test.go +++ b/internal/complexity/complexity_delta_test.go @@ -74,7 +74,8 @@ const complexBodyV2 = ` if x > 0 { }` // parseAndAnalyze runs the same diff.Parse + complexity.Analyze flow main() -// uses, returning the section so tests can assert on it directly. +// uses, with delta-tolerance=0 so tests can pin down exact head/base +// boundaries without the production tolerance hiding small regressions. func parseAndAnalyze(t *testing.T, dir, base string) report.Section { t.Helper() calc := goCalc(t) @@ -92,7 +93,7 @@ func parseAndAnalyze(t *testing.T, dir, base string) report.Section { if err != nil { t.Fatalf("diff.Parse: %v", err) } - section, err := Analyze(dir, d, 10, calc) + section, err := Analyze(dir, d, 10, 0, calc) if err != nil { t.Fatalf("Analyze: %v", err) } @@ -184,26 +185,151 @@ func TestWorsened(t *testing.T) { } } cases := []struct { - name string - h lang.FunctionComplexity - base map[string]int - want bool + name string + h lang.FunctionComplexity + base map[string]int + tolerance int + want bool }{ - {"absent_at_base", mk("f", 12), map[string]int{}, true}, - {"nil_base_map", mk("f", 12), nil, true}, - {"head_higher", mk("f", 13), map[string]int{"f": 12}, true}, - {"head_equal", mk("f", 12), map[string]int{"f": 12}, false}, - {"head_lower", mk("f", 8), map[string]int{"f": 12}, false}, + {"absent_at_base", mk("f", 12), map[string]int{}, 0, true}, + {"nil_base_map", mk("f", 12), nil, 0, true}, + {"head_higher_no_tolerance", mk("f", 13), map[string]int{"f": 12}, 0, true}, + {"head_equal_no_tolerance", mk("f", 12), map[string]int{"f": 12}, 0, false}, + {"head_lower", mk("f", 8), map[string]int{"f": 12}, 0, false}, + // Tolerance-aware cases: head must exceed base by *more than* + // tolerance to count as worsened. tolerance=3 means +1, +2, +3 are + // all forgiven; +4 is the first regression that fails. + {"within_tolerance_lower_bound", mk("f", 13), map[string]int{"f": 12}, 3, false}, + {"within_tolerance_at_bound", mk("f", 15), map[string]int{"f": 12}, 3, false}, + {"just_over_tolerance", mk("f", 16), map[string]int{"f": 12}, 3, true}, + {"new_function_tolerance_irrelevant", mk("g", 8), map[string]int{"f": 12}, 100, true}, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - if got := worsened(tc.h, tc.base); got != tc.want { + if got := worsened(tc.h, tc.base, tc.tolerance); got != tc.want { t.Errorf("worsened = %v, want %v", got, tc.want) } }) } } +// TestDeltaGating_WithinToleranceDropped covers the production default: a +// small (+3 or less) regression on a legacy hot function should not block +// the PR. The function is over the absolute threshold both before and after, +// but the delta is within tolerance. +func TestDeltaGating_WithinToleranceDropped(t *testing.T) { + dir := initRepo(t) + // Base: complexity=21 (six nested ifs). + base := "package x\n\nfunc Complex(x int) {\n" + complexBodyV1 + "\n}\n" + writeAndCommit(t, dir, "a.go", base, "base") + + runGit(t, dir, "checkout", "-q", "-b", "feature") + // Feature: tack one logical operator onto the outermost condition. + // Cognitive complexity goes up by exactly 1 (op-type-change counter) + // without adding a new branch — well within the production tolerance. + feature := `package x + +func Complex(x int) { + if x > 0 && x < 1<<31 { + if x > 10 { + if x > 100 { + if x > 1000 { + if x > 10000 { + if x > 100000 { + _ = x + } + } + } + } + } + } +} +` + writeAndCommit(t, dir, "a.go", feature, "tiny bump") + + calc := goCalc(t) + l, _ := lang.Get("go") + d, err := diff.Parse(dir, "main", diff.Filter{ + DiffGlobs: l.FileFilter().DiffGlobs, + Includes: func(p string) bool { + ff := l.FileFilter() + if !slices.Contains(ff.Extensions, filepath.Ext(p)) { + return false + } + return !ff.IsTestFile(p) + }, + }) + if err != nil { + t.Fatalf("diff.Parse: %v", err) + } + // With tolerance=3: +1 regression is forgiven → PASS. + s, err := Analyze(dir, d, 10, 3, calc) + if err != nil { + t.Fatalf("Analyze: %v", err) + } + if s.Severity != report.SeverityPass { + t.Errorf("severity = %v, want PASS (within tolerance); findings=%+v", s.Severity, s.Findings) + } + // Same data, tolerance=0: same +1 regression now fails. Locks in the + // invariant that tolerance-3 is the only thing softening the gate here. + s, err = Analyze(dir, d, 10, 0, calc) + if err != nil { + t.Fatalf("Analyze (tol=0): %v", err) + } + if s.Severity != report.SeverityFail { + t.Errorf("tol=0: severity = %v, want FAIL", s.Severity) + } +} + +func TestFormatComplexityMsg(t *testing.T) { + mk := func(file, name string, c int) lang.FunctionComplexity { + return lang.FunctionComplexity{ + FunctionInfo: lang.FunctionInfo{File: file, Name: name}, + Complexity: c, + } + } + cases := []struct { + name string + fn lang.FunctionComplexity + deltas map[string]map[string]int + want string + }{ + { + name: "no_baseline_bare_message", + fn: mk("a.go", "f", 12), + deltas: nil, + want: "complexity=12", + }, + { + name: "no_entry_for_function", + fn: mk("a.go", "f", 12), + deltas: map[string]map[string]int{"a.go": {"other": 5}}, + want: "complexity=12", + }, + { + name: "renders_positive_delta", + fn: mk("a.go", "f", 17), + deltas: map[string]map[string]int{"a.go": {"f": 12}}, + want: "complexity=17 (+5 vs base)", + }, + { + // Locks the subtraction direction: head - base, not the other + // way around. (Mutation testing flips operands; this catches it.) + name: "subtraction_not_addition", + fn: mk("a.go", "f", 30), + deltas: map[string]map[string]int{"a.go": {"f": 28}}, + want: "complexity=30 (+2 vs base)", + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := formatComplexityMsg(tc.fn, tc.deltas); got != tc.want { + t.Errorf("formatComplexityMsg = %q, want %q", got, tc.want) + } + }) + } +} + // TestDeltaGating_NewFileKept covers the "the whole file is new" case: when // `git show base:path` returns nothing, fall back to absolute thresholds. func TestDeltaGating_NewFileKept(t *testing.T) { diff --git a/internal/complexity/complexity_test.go b/internal/complexity/complexity_test.go index 1f8dcfb..b75503d 100644 --- a/internal/complexity/complexity_test.go +++ b/internal/complexity/complexity_test.go @@ -62,7 +62,7 @@ func complex_fn(x int) { }, } - section, err := Analyze(dir, d, 10, goCalc(t)) + section, err := Analyze(dir, d, 10, 0, goCalc(t)) if err != nil { t.Fatalf("Analyze: %v", err) } @@ -80,7 +80,7 @@ func complex_fn(x int) { func TestAnalyze_EmptyResult(t *testing.T) { d := &diff.Result{} // no files - section, err := Analyze(t.TempDir(), d, 10, goCalc(t)) + section, err := Analyze(t.TempDir(), d, 10, 0, goCalc(t)) if err != nil { t.Fatalf("Analyze: %v", err) } @@ -99,7 +99,7 @@ func TestBuildSection_StatsValues(t *testing.T) { {FunctionInfo: lang.FunctionInfo{File: "c.go", Line: 1, Name: "f3"}, Complexity: 12}, } - s := buildSection(results, 10) + s := buildSection(results, 10, nil) stats := s.Stats.(map[string]any) if stats["total_functions"] != 3 { @@ -120,7 +120,7 @@ func TestBuildSection_StatsValues(t *testing.T) { } func TestBuildSection_Empty(t *testing.T) { - s := buildSection(nil, 10) + s := buildSection(nil, 10, nil) if s.Severity != report.SeverityPass { t.Errorf("severity = %v, want PASS", s.Severity) } @@ -132,7 +132,7 @@ func TestBuildSection_WithViolations(t *testing.T) { {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "simple"}, Complexity: 3}, } - s := buildSection(results, 10) + s := buildSection(results, 10, nil) if s.Severity != report.SeverityFail { t.Errorf("severity = %v, want FAIL", s.Severity) } @@ -148,7 +148,7 @@ func TestCollectComplexityFindings(t *testing.T) { {FunctionInfo: lang.FunctionInfo{File: "c.go", Line: 1, Name: "medium"}, Complexity: 10}, } - findings, values, failCount := collectComplexityFindings(results, 10) + findings, values, failCount := collectComplexityFindings(results, 10, nil) if failCount != 1 { t.Errorf("failCount = %d, want 1", failCount) @@ -167,7 +167,7 @@ func TestCollectComplexityFindings_AtBoundary(t *testing.T) { {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "over"}, Complexity: 11}, } - _, _, failCount := collectComplexityFindings(results, 10) + _, _, failCount := collectComplexityFindings(results, 10, nil) if failCount != 1 { t.Errorf("failCount = %d, want 1 (11 > 10, 10 is not > 10)", failCount) } diff --git a/internal/sizes/sizes.go b/internal/sizes/sizes.go index a4cfa6f..33a6a5a 100644 --- a/internal/sizes/sizes.go +++ b/internal/sizes/sizes.go @@ -37,11 +37,17 @@ func Analyze(repoPath string, d *diff.Result, funcThreshold, fileThreshold int, // blamed for size violations it caused. Per-function: drop if base lines // >= head lines. Per-file: drop if base lines >= head lines (touching a // 4000-line file without growing it is not a regression). + // + // The returned delta maps (head only retains entries whose function/file + // existed at base) feed the message formatter so PR authors can see + // "+10 vs base" alongside the head line count. + var funcDeltas map[string]map[string]int + var fileDeltas map[string]int if d.MergeBase != "" { - funcResults, fileResults = applySizeDeltaFilter(repoPath, d, funcResults, fileResults, extractor) + funcResults, fileResults, funcDeltas, fileDeltas = applySizeDeltaFilter(repoPath, d, funcResults, fileResults, extractor) } - return buildSection(funcResults, fileResults, funcThreshold, fileThreshold), nil + return buildSection(funcResults, fileResults, funcThreshold, fileThreshold, funcDeltas, fileDeltas), nil } // applySizeDeltaFilter drops per-function and per-file findings whose @@ -54,38 +60,68 @@ func applySizeDeltaFilter( headFuncs []lang.FunctionSize, headFiles []lang.FileSize, extractor lang.FunctionExtractor, -) ([]lang.FunctionSize, []lang.FileSize) { +) ([]lang.FunctionSize, []lang.FileSize, map[string]map[string]int, map[string]int) { byFile := make(map[string]baseSizes) for _, fc := range d.Files { byFile[fc.Path] = baseSizesFor(repoPath, d.MergeBase, fc.Path, extractor) } - return filterFuncSizes(headFuncs, byFile), filterFileSizes(headFiles, byFile) + keptFuncs, funcDeltas := filterFuncSizes(headFuncs, byFile) + keptFiles, fileDeltas := filterFileSizes(headFiles, byFile) + return keptFuncs, keptFiles, funcDeltas, fileDeltas } // filterFuncSizes drops per-function findings whose base-side line count was -// already >= the head-side count (no growth this PR). -func filterFuncSizes(head []lang.FunctionSize, byFile map[string]baseSizes) []lang.FunctionSize { +// already >= the head-side count (no growth this PR). Returns both the kept +// findings and a delta map (file -> name -> base lines) so the message +// formatter can render "+N vs base" alongside the head value. +func filterFuncSizes(head []lang.FunctionSize, byFile map[string]baseSizes) ([]lang.FunctionSize, map[string]map[string]int) { var out []lang.FunctionSize + deltas := make(map[string]map[string]int) for _, h := range head { - if !grewFunc(h, byFile[h.File]) { + b := byFile[h.File] + if !grewFunc(h, b) { continue } out = append(out, h) + recordFuncDelta(deltas, h, b) } - return out + return out, deltas +} + +// recordFuncDelta records the base line count for a kept finding so the +// report can render "+N vs base". Brand-new functions (no base entry) skip +// the record so the message stays bare. +func recordFuncDelta(deltas map[string]map[string]int, h lang.FunctionSize, b baseSizes) { + if !b.ok { + return + } + base, exists := b.funcs[h.Name] + if !exists { + return + } + if deltas[h.File] == nil { + deltas[h.File] = make(map[string]int) + } + deltas[h.File][h.Name] = base } // filterFileSizes drops per-file findings whose whole-file line count was -// already >= the head-side count. -func filterFileSizes(head []lang.FileSize, byFile map[string]baseSizes) []lang.FileSize { +// already >= the head-side count. Returns kept findings + a path -> base +// lines map for delta-aware message formatting. +func filterFileSizes(head []lang.FileSize, byFile map[string]baseSizes) ([]lang.FileSize, map[string]int) { var out []lang.FileSize + deltas := make(map[string]int) for _, h := range head { - if !grewFile(h, byFile[h.Path]) { + b := byFile[h.Path] + if !grewFile(h, b) { continue } out = append(out, h) + if b.ok { + deltas[h.Path] = b.file + } } - return out + return out, deltas } // grewFunc reports whether a function-size finding represents real growth on @@ -145,7 +181,7 @@ func baseSizesFor(repoPath, ref, repoRelPath string, extractor lang.FunctionExtr return out } -func checkFuncSizes(funcs []lang.FunctionSize, threshold int) []report.Finding { +func checkFuncSizes(funcs []lang.FunctionSize, threshold int, deltas map[string]map[string]int) []report.Finding { var findings []report.Finding for _, f := range funcs { if f.Lines > threshold { @@ -153,7 +189,7 @@ func checkFuncSizes(funcs []lang.FunctionSize, threshold int) []report.Finding { File: f.File, Line: f.Line, Function: f.Name, - Message: fmt.Sprintf("function=%d lines", f.Lines), + Message: formatFuncSizeMsg(f, deltas), Value: float64(f.Lines), Limit: float64(threshold), Severity: report.SeverityFail, @@ -163,13 +199,13 @@ func checkFuncSizes(funcs []lang.FunctionSize, threshold int) []report.Finding { return findings } -func checkFileSizes(files []lang.FileSize, threshold int) []report.Finding { +func checkFileSizes(files []lang.FileSize, threshold int, deltas map[string]int) []report.Finding { var findings []report.Finding for _, f := range files { if f.Lines > threshold { findings = append(findings, report.Finding{ File: f.Path, - Message: fmt.Sprintf("file=%d lines", f.Lines), + Message: formatFileSizeMsg(f, deltas), Value: float64(f.Lines), Limit: float64(threshold), Severity: report.SeverityFail, @@ -179,7 +215,28 @@ func checkFileSizes(files []lang.FileSize, threshold int) []report.Finding { return findings } -func buildSection(funcs []lang.FunctionSize, files []lang.FileSize, funcThreshold, fileThreshold int) report.Section { +// formatFuncSizeMsg renders the per-function size message with a "+Δ vs +// base" suffix when the function existed at base. New functions get the +// bare form so the report distinguishes "added a 200-line function" from +// "made an existing 195-line function 200 lines". +func formatFuncSizeMsg(f lang.FunctionSize, deltas map[string]map[string]int) string { + base, ok := deltas[f.File][f.Name] + if !ok { + return fmt.Sprintf("function=%d lines", f.Lines) + } + return fmt.Sprintf("function=%d lines (+%d vs base)", f.Lines, f.Lines-base) +} + +// formatFileSizeMsg mirrors formatFuncSizeMsg for whole-file size. +func formatFileSizeMsg(f lang.FileSize, deltas map[string]int) string { + base, ok := deltas[f.Path] + if !ok { + return fmt.Sprintf("file=%d lines", f.Lines) + } + return fmt.Sprintf("file=%d lines (+%d vs base)", f.Lines, f.Lines-base) +} + +func buildSection(funcs []lang.FunctionSize, files []lang.FileSize, funcThreshold, fileThreshold int, funcDeltas map[string]map[string]int, fileDeltas map[string]int) report.Section { if len(funcs) == 0 && len(files) == 0 { return report.Section{ Name: "Code Sizes", @@ -188,7 +245,7 @@ func buildSection(funcs []lang.FunctionSize, files []lang.FileSize, funcThreshol } } - findings := append(checkFuncSizes(funcs, funcThreshold), checkFileSizes(files, fileThreshold)...) + findings := append(checkFuncSizes(funcs, funcThreshold, funcDeltas), checkFileSizes(files, fileThreshold, fileDeltas)...) sort.Slice(findings, func(i, j int) bool { return findings[i].Value > findings[j].Value diff --git a/internal/sizes/sizes_delta_test.go b/internal/sizes/sizes_delta_test.go index 2bd6979..1d0431f 100644 --- a/internal/sizes/sizes_delta_test.go +++ b/internal/sizes/sizes_delta_test.go @@ -134,6 +134,92 @@ func TestGrewFile(t *testing.T) { } } +func TestFormatFuncSizeMsg(t *testing.T) { + mk := func(file, name string, lines int) lang.FunctionSize { + return lang.FunctionSize{ + FunctionInfo: lang.FunctionInfo{File: file, Name: name}, + Lines: lines, + } + } + cases := []struct { + name string + fn lang.FunctionSize + deltas map[string]map[string]int + want string + }{ + { + name: "no_baseline_bare_message", + fn: mk("a.go", "f", 80), + deltas: nil, + want: "function=80 lines", + }, + { + name: "no_entry_for_function", + fn: mk("a.go", "f", 80), + deltas: map[string]map[string]int{"a.go": {"other": 5}}, + want: "function=80 lines", + }, + { + name: "renders_positive_delta", + fn: mk("a.go", "f", 80), + deltas: map[string]map[string]int{"a.go": {"f": 70}}, + want: "function=80 lines (+10 vs base)", + }, + { + // Lock subtraction direction (head - base). + name: "subtraction_not_addition", + fn: mk("a.go", "f", 100), + deltas: map[string]map[string]int{"a.go": {"f": 95}}, + want: "function=100 lines (+5 vs base)", + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := formatFuncSizeMsg(tc.fn, tc.deltas); got != tc.want { + t.Errorf("formatFuncSizeMsg = %q, want %q", got, tc.want) + } + }) + } +} + +func TestFormatFileSizeMsg(t *testing.T) { + mk := func(path string, lines int) lang.FileSize { + return lang.FileSize{Path: path, Lines: lines} + } + cases := []struct { + name string + f lang.FileSize + deltas map[string]int + want string + }{ + { + name: "no_baseline_bare_message", + f: mk("big.go", 600), + deltas: nil, + want: "file=600 lines", + }, + { + name: "no_entry_for_path", + f: mk("big.go", 600), + deltas: map[string]int{"other.go": 100}, + want: "file=600 lines", + }, + { + name: "renders_positive_delta", + f: mk("big.go", 600), + deltas: map[string]int{"big.go": 580}, + want: "file=600 lines (+20 vs base)", + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := formatFileSizeMsg(tc.f, tc.deltas); got != tc.want { + t.Errorf("formatFileSizeMsg = %q, want %q", got, tc.want) + } + }) + } +} + // TestSizesDelta_FunctionUnchangedDropped: legacy oversized function, only a // comment inside is touched by the PR. Line count unchanged → drop. func TestSizesDelta_FunctionUnchangedDropped(t *testing.T) { diff --git a/internal/sizes/sizes_test.go b/internal/sizes/sizes_test.go index d53f11d..9aad557 100644 --- a/internal/sizes/sizes_test.go +++ b/internal/sizes/sizes_test.go @@ -27,7 +27,7 @@ func TestCheckFuncSizes(t *testing.T) { {FunctionInfo: lang.FunctionInfo{File: "c.go", Line: 1, Name: "huge"}, Lines: 100}, } - findings := checkFuncSizes(funcs, 50) + findings := checkFuncSizes(funcs, 50, nil) if len(findings) != 2 { t.Errorf("expected 2 violations, got %d", len(findings)) } @@ -44,7 +44,7 @@ func TestCheckFuncSizes_AtBoundary(t *testing.T) { {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "over"}, Lines: 51}, } - findings := checkFuncSizes(funcs, 50) + findings := checkFuncSizes(funcs, 50, nil) if len(findings) != 1 { t.Errorf("expected 1 violation (51 > 50), got %d", len(findings)) } @@ -56,7 +56,7 @@ func TestCheckFileSizes(t *testing.T) { {Path: "big.go", Lines: 600}, } - findings := checkFileSizes(files, 500) + findings := checkFileSizes(files, 500, nil) if len(findings) != 1 { t.Errorf("expected 1 violation, got %d", len(findings)) } @@ -68,14 +68,14 @@ func TestCheckFileSizes_AtBoundary(t *testing.T) { {Path: "over.go", Lines: 501}, } - findings := checkFileSizes(files, 500) + findings := checkFileSizes(files, 500, nil) if len(findings) != 1 { t.Errorf("expected 1 violation (501 > 500), got %d", len(findings)) } } func TestBuildSection_Empty(t *testing.T) { - s := buildSection(nil, nil, 50, 500) + s := buildSection(nil, nil, 50, 500, nil, nil) if s.Severity != report.SeverityPass { t.Errorf("empty section severity = %v, want PASS", s.Severity) } @@ -86,7 +86,7 @@ func TestBuildSection_Empty(t *testing.T) { func TestBuildSection_WithViolations(t *testing.T) { funcs := []lang.FunctionSize{{FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "big"}, Lines: 100}} - s := buildSection(funcs, nil, 50, 500) + s := buildSection(funcs, nil, 50, 500, nil, nil) if s.Severity != report.SeverityFail { t.Errorf("section severity = %v, want FAIL", s.Severity) } @@ -98,7 +98,7 @@ func TestBuildSection_WithViolations(t *testing.T) { func TestBuildSection_NoViolations(t *testing.T) { funcs := []lang.FunctionSize{{FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "small"}, Lines: 10}} files := []lang.FileSize{{Path: "a.go", Lines: 100}} - s := buildSection(funcs, files, 50, 500) + s := buildSection(funcs, files, 50, 500, nil, nil) if s.Severity != report.SeverityPass { t.Errorf("severity = %v, want PASS", s.Severity) } @@ -110,7 +110,7 @@ func TestBuildSection_SortedByValue(t *testing.T) { {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "huge"}, Lines: 200}, {FunctionInfo: lang.FunctionInfo{File: "c.go", Line: 1, Name: "big"}, Lines: 80}, } - s := buildSection(funcs, nil, 50, 500) + s := buildSection(funcs, nil, 50, 500, nil, nil) if len(s.Findings) != 3 { t.Fatalf("expected 3 findings, got %d", len(s.Findings)) } From cbc2481c5c379cc724041617e8875ae4e3961013 Mon Sep 17 00:00:00 2001 From: Jerry Date: Tue, 5 May 2026 15:21:51 -0700 Subject: [PATCH 3/4] fix(complexity,sizes): keep all-functions stats separate from filtered findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous commit reassigned the analyzer's full result list to the post-delta-filter subset, which corrupted the section's mean / median / max / total-functions stats. Worse: when the filter dropped *every* finding (e.g. the only complex function in the diff was untouched), the section reported "No changed functions to analyze" — which is the empty-input case, not the all-clear case. Surfaced via TS/Rust smoke tests where a single-file diff has exactly one analyzable function. Both Analyze functions now thread two lists: `analyzed` (everything the language calculator returned, used for stats) and `candidates` (the post-filter subset eligible to become findings). buildSection stays a single function, taking both. Also adds multi-language regression tests: - internal/complexity/multilang_delta_test.go covers TypeScript and Rust delta gating end-to-end. - internal/sizes/multilang_delta_test.go does the same for sizes. Both confirm that the orchestrator drives whichever ComplexityCalculator / FunctionExtractor each language registers, and that the temp-file-with-preserved-extension trick works for the tree-sitter analyzers as well as Go's go/parser. On the bor blockstm_redesign branch (vs origin/develop), the stats now read "285 functions analyzed | Max: 91 | 7 over threshold" — the full diff is described, with 7 violations vs the original 17. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/complexity/complexity.go | 40 +++-- internal/complexity/complexity_test.go | 6 +- internal/complexity/multilang_delta_test.go | 153 ++++++++++++++++++++ internal/sizes/multilang_delta_test.go | 121 ++++++++++++++++ internal/sizes/sizes.go | 31 +++- internal/sizes/sizes_test.go | 8 +- 6 files changed, 334 insertions(+), 25 deletions(-) create mode 100644 internal/complexity/multilang_delta_test.go create mode 100644 internal/sizes/multilang_delta_test.go diff --git a/internal/complexity/complexity.go b/internal/complexity/complexity.go index 1d04b65..362d05c 100644 --- a/internal/complexity/complexity.go +++ b/internal/complexity/complexity.go @@ -27,14 +27,14 @@ import ( // calculator layer (returning nil) so a single malformed file doesn't fail // the whole run. func Analyze(repoPath string, d *diff.Result, threshold, deltaTolerance int, calc lang.ComplexityCalculator) (report.Section, error) { - var results []lang.FunctionComplexity + var analyzed []lang.FunctionComplexity for _, fc := range d.Files { absPath := filepath.Join(repoPath, fc.Path) fnResults, err := calc.AnalyzeFile(absPath, fc) if err != nil { return report.Section{}, fmt.Errorf("analyzing %s: %w", fc.Path, err) } - results = append(results, fnResults...) + analyzed = append(analyzed, fnResults...) } // Delta gating: in diff mode, drop pre-existing violations so PRs aren't @@ -44,14 +44,18 @@ func Analyze(repoPath string, d *diff.Result, threshold, deltaTolerance int, cal // this branch). Refactoring mode (CollectPaths) leaves MergeBase empty // and falls through to absolute thresholds. // - // deltas tracks (file, name) -> base complexity for findings that survive - // the filter, so the section can render head/base/Δ in the message. + // `candidates` is the subset of analyzed functions eligible to become + // findings; `analyzed` stays full so the section's mean / median / max / + // total stats describe everything in the diff, not just the worsened + // subset. deltas tracks (file, name) -> base complexity for those + // candidates that existed at base, so the message shows head/base/Δ. + candidates := analyzed var deltas map[string]map[string]int if d.MergeBase != "" { - results, deltas = applyDeltaFilter(repoPath, d, results, calc, deltaTolerance) + candidates, deltas = applyDeltaFilter(repoPath, d, analyzed, calc, deltaTolerance) } - return buildSection(results, threshold, deltas), nil + return buildSection(analyzed, candidates, threshold, deltas), nil } // applyDeltaFilter drops findings whose complexity at the merge-base was @@ -172,8 +176,13 @@ func formatComplexityMsg(r lang.FunctionComplexity, deltas map[string]map[string return fmt.Sprintf("complexity=%d (+%d vs base)", r.Complexity, r.Complexity-base) } -func buildSection(results []lang.FunctionComplexity, threshold int, deltas map[string]map[string]int) report.Section { - if len(results) == 0 { +// buildSection renders the section. `analyzed` is every function the +// per-language calculator returned (used for stats / summary so the report +// describes the whole diff, not just the worsened subset). `candidates` is +// the subset eligible to become findings — in refactoring mode the two are +// the same; in diff mode candidates is the post-delta-filter list. +func buildSection(analyzed, candidates []lang.FunctionComplexity, threshold int, deltas map[string]map[string]int) report.Section { + if len(analyzed) == 0 { return report.Section{ Name: "Cognitive Complexity", Summary: "No changed functions to analyze", @@ -181,7 +190,8 @@ func buildSection(results []lang.FunctionComplexity, threshold int, deltas map[s } } - findings, values, failCount := collectComplexityFindings(results, threshold, deltas) + findings, _, failCount := collectComplexityFindings(candidates, threshold, deltas) + values := complexityValues(analyzed) sev := report.SeverityPass if failCount > 0 { @@ -190,7 +200,7 @@ func buildSection(results []lang.FunctionComplexity, threshold int, deltas map[s m, med := mean(values), median(values) summary := fmt.Sprintf("%d functions analyzed | Mean: %.1f | Median: %.0f | Max: %.0f | %d over threshold (%d)", - len(results), m, med, max(values), failCount, threshold) + len(analyzed), m, med, max(values), failCount, threshold) return report.Section{ Name: "Cognitive Complexity", @@ -198,7 +208,7 @@ func buildSection(results []lang.FunctionComplexity, threshold int, deltas map[s Severity: sev, Findings: findings, Stats: map[string]any{ - "total_functions": len(results), + "total_functions": len(analyzed), "mean": math.Round(m*10) / 10, "median": med, "max": max(values), @@ -209,6 +219,14 @@ func buildSection(results []lang.FunctionComplexity, threshold int, deltas map[s } } +func complexityValues(results []lang.FunctionComplexity) []float64 { + values := make([]float64, len(results)) + for i, r := range results { + values[i] = float64(r.Complexity) + } + return values +} + func mean(vals []float64) float64 { if len(vals) == 0 { return 0 diff --git a/internal/complexity/complexity_test.go b/internal/complexity/complexity_test.go index b75503d..c4e7e19 100644 --- a/internal/complexity/complexity_test.go +++ b/internal/complexity/complexity_test.go @@ -99,7 +99,7 @@ func TestBuildSection_StatsValues(t *testing.T) { {FunctionInfo: lang.FunctionInfo{File: "c.go", Line: 1, Name: "f3"}, Complexity: 12}, } - s := buildSection(results, 10, nil) + s := buildSection(results, results, 10, nil) stats := s.Stats.(map[string]any) if stats["total_functions"] != 3 { @@ -120,7 +120,7 @@ func TestBuildSection_StatsValues(t *testing.T) { } func TestBuildSection_Empty(t *testing.T) { - s := buildSection(nil, 10, nil) + s := buildSection(nil, nil, 10, nil) if s.Severity != report.SeverityPass { t.Errorf("severity = %v, want PASS", s.Severity) } @@ -132,7 +132,7 @@ func TestBuildSection_WithViolations(t *testing.T) { {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "simple"}, Complexity: 3}, } - s := buildSection(results, 10, nil) + s := buildSection(results, results, 10, nil) if s.Severity != report.SeverityFail { t.Errorf("severity = %v, want FAIL", s.Severity) } diff --git a/internal/complexity/multilang_delta_test.go b/internal/complexity/multilang_delta_test.go new file mode 100644 index 0000000..0a47c97 --- /dev/null +++ b/internal/complexity/multilang_delta_test.go @@ -0,0 +1,153 @@ +package complexity + +import ( + "path/filepath" + "slices" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" + _ "github.com/0xPolygon/diffguard/internal/lang/rustanalyzer" + _ "github.com/0xPolygon/diffguard/internal/lang/tsanalyzer" + "github.com/0xPolygon/diffguard/internal/report" +) + +// TestDeltaGating_AcrossLanguages locks in that delta gating is +// language-agnostic: the orchestrator drives whichever ComplexityCalculator +// the language registers, and the temp-file-with-preserved-extension trick +// works for the tree-sitter-based TypeScript and Rust analyzers as well as +// for Go. Without this test, a regression in the TS/Rust path (e.g. an +// analyzer that rejects unknown paths or that produces non-deterministic +// names) could land silently. +func TestDeltaGating_AcrossLanguages(t *testing.T) { + cases := []struct { + name string + lang string + ext string + manifest map[string]string + // baseV / featV differ on a single comment line *inside* the + // function body, so OverlapsRange picks the function up but + // cognitive complexity is identical → delta gating must drop it. + baseV string + featV string + }{ + { + name: "typescript", + lang: "typescript", + ext: ".ts", + manifest: map[string]string{ + "package.json": `{"name":"x","version":"1.0.0"}` + "\n", + }, + baseV: tsBody("v1"), + featV: tsBody("v2"), + }, + { + name: "rust", + lang: "rust", + ext: ".rs", + manifest: map[string]string{ + "Cargo.toml": "[package]\nname = \"x\"\nversion = \"0.1.0\"\nedition = \"2021\"\n", + }, + baseV: rustBody("v1"), + featV: rustBody("v2"), + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + dir := initRepo(t) + for name, content := range tc.manifest { + writeAndCommit(t, dir, name, content, "manifest") + } + // Tree-sitter front-ends parse the file directly — Cargo's + // usual src/ layout isn't required for complexity analysis. + path := "lib" + tc.ext + writeAndCommit(t, dir, path, tc.baseV, "base") + + runGit(t, dir, "checkout", "-q", "-b", "feature") + writeAndCommit(t, dir, path, tc.featV, "tweak inner comment") + + s := analyzeMultiLang(t, dir, "main", tc.lang) + if s.Severity != report.SeverityPass { + t.Errorf("severity = %v, want PASS (legacy complexity not worsened); findings=%+v", s.Severity, s.Findings) + } + if len(s.Findings) != 0 { + t.Errorf("findings = %d, want 0", len(s.Findings)) + } + }) + } +} + +// analyzeMultiLang mirrors parseAndAnalyze but takes a language name so the +// TS / Rust analyzers can be exercised without dragging Go-specific helpers +// into the test setup. +func analyzeMultiLang(t *testing.T, dir, base, langName string) report.Section { + t.Helper() + l, ok := lang.Get(langName) + if !ok { + t.Fatalf("language %q not registered", langName) + } + d, err := diff.Parse(dir, base, diff.Filter{ + DiffGlobs: l.FileFilter().DiffGlobs, + Includes: func(p string) bool { + ff := l.FileFilter() + if !slices.Contains(ff.Extensions, filepath.Ext(p)) { + return false + } + return !ff.IsTestFile(p) + }, + }) + if err != nil { + t.Fatalf("diff.Parse: %v", err) + } + section, err := Analyze(dir, d, 10, 0, l.ComplexityCalculator()) + if err != nil { + t.Fatalf("Analyze: %v", err) + } + return section +} + +// tsBody returns a TypeScript function body whose 6-deep nested ifs land +// cognitive complexity well above the threshold. tag goes inside the +// inner-most block so the diff between v1 and v2 lands inside the function. +func tsBody(tag string) string { + return `export function complex(x: number) { + if (x > 0) { + if (x > 10) { + if (x > 100) { + if (x > 1000) { + if (x > 10000) { + if (x > 100000) { + return x // ` + tag + ` + } + } + } + } + } + } + return 0 +} +` +} + +// rustBody mirrors tsBody for Rust. Same nesting shape so cognitive scores +// stay comparable across languages. +func rustBody(tag string) string { + return `pub fn complex(x: i32) -> i32 { + if x > 0 { + if x > 10 { + if x > 100 { + if x > 1000 { + if x > 10000 { + if x > 100000 { + return x; // ` + tag + ` + } + } + } + } + } + } + 0 +} +` +} diff --git a/internal/sizes/multilang_delta_test.go b/internal/sizes/multilang_delta_test.go new file mode 100644 index 0000000..eb39849 --- /dev/null +++ b/internal/sizes/multilang_delta_test.go @@ -0,0 +1,121 @@ +package sizes + +import ( + "fmt" + "path/filepath" + "slices" + "strings" + "testing" + + "github.com/0xPolygon/diffguard/internal/diff" + "github.com/0xPolygon/diffguard/internal/lang" + _ "github.com/0xPolygon/diffguard/internal/lang/rustanalyzer" + _ "github.com/0xPolygon/diffguard/internal/lang/tsanalyzer" + "github.com/0xPolygon/diffguard/internal/report" +) + +// TestSizesDelta_AcrossLanguages mirrors the complexity multi-language test: +// confirms that delta-gating drops oversized-but-untouched-size functions +// and files for TypeScript and Rust as well as Go. The orchestrator is +// language-agnostic, but tree-sitter analyzers can fail in subtle ways on +// temp paths if extension preservation is wrong, so this test guards the +// integration. +func TestSizesDelta_AcrossLanguages(t *testing.T) { + cases := []struct { + name string + lang string + ext string + manifest map[string]string + // header / footer wrap a language-appropriate "no-op statement". + // The body has 80 such statements so the function lands above + // funcThreshold=50; a single-line tweak in the diff overlaps the + // function but doesn't change line count → delta gate must drop. + header string + footer string + stmt func(i int) string + }{ + { + name: "typescript", + lang: "typescript", + ext: ".ts", + manifest: map[string]string{ + "package.json": `{"name":"x","version":"1.0.0"}` + "\n", + }, + header: "export function big() {\n", + footer: "}\n", + stmt: func(i int) string { return fmt.Sprintf(" let x%d = %d\n", i, i) }, + }, + { + name: "rust", + lang: "rust", + ext: ".rs", + manifest: map[string]string{ + "Cargo.toml": "[package]\nname = \"x\"\nversion = \"0.1.0\"\nedition = \"2021\"\n", + }, + header: "pub fn big() {\n", + footer: "}\n", + stmt: func(i int) string { return fmt.Sprintf(" let _x%d = %d;\n", i, i) }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + dir := initRepo(t) + for name, content := range tc.manifest { + writeAndCommit(t, dir, name, content, "manifest") + } + path := "lib" + tc.ext + writeAndCommit(t, dir, path, buildBigFile(tc.header, tc.footer, tc.stmt, "v1"), "base") + + runGit(t, dir, "checkout", "-q", "-b", "feature") + writeAndCommit(t, dir, path, buildBigFile(tc.header, tc.footer, tc.stmt, "v2"), "tweak inner comment") + + s := analyzeMultiLangSizes(t, dir, "main", tc.lang) + for _, f := range s.Findings { + t.Errorf("%s: legacy size finding leaked through delta gate: %+v", tc.name, f) + } + }) + } +} + +func analyzeMultiLangSizes(t *testing.T, dir, base, langName string) report.Section { + t.Helper() + l, ok := lang.Get(langName) + if !ok { + t.Fatalf("language %q not registered", langName) + } + d, err := diff.Parse(dir, base, diff.Filter{ + DiffGlobs: l.FileFilter().DiffGlobs, + Includes: func(p string) bool { + ff := l.FileFilter() + if !slices.Contains(ff.Extensions, filepath.Ext(p)) { + return false + } + return !ff.IsTestFile(p) + }, + }) + if err != nil { + t.Fatalf("diff.Parse: %v", err) + } + section, err := Analyze(dir, d, 50, 500, l.FunctionExtractor()) + if err != nil { + t.Fatalf("Analyze: %v", err) + } + return section +} + +// buildBigFile assembles a >50-line function whose only diff between +// versions is a single comment marker inside the body, so the diff overlaps +// the function but the line count is unchanged. +func buildBigFile(header, footer string, stmt func(int) string, marker string) string { + var sb strings.Builder + sb.WriteString(header) + for i := range 80 { + if i == 0 { + sb.WriteString(" // " + marker + "\n") + } + sb.WriteString(stmt(i)) + } + sb.WriteString(footer) + return sb.String() +} diff --git a/internal/sizes/sizes.go b/internal/sizes/sizes.go index 33a6a5a..58db9fc 100644 --- a/internal/sizes/sizes.go +++ b/internal/sizes/sizes.go @@ -38,16 +38,21 @@ func Analyze(repoPath string, d *diff.Result, funcThreshold, fileThreshold int, // >= head lines. Per-file: drop if base lines >= head lines (touching a // 4000-line file without growing it is not a regression). // - // The returned delta maps (head only retains entries whose function/file - // existed at base) feed the message formatter so PR authors can see - // "+10 vs base" alongside the head line count. + // candidate{Funcs,Files} is the post-filter subset eligible to become + // findings; the original {func,file}Results stay in scope so the section's + // summary describes the whole diff, not just the worsened subset. Without + // this split, "0 over threshold" tests against an empty list and the + // section misreports "No changed functions or files to analyze" when + // every legacy violation got correctly filtered out. + candidateFuncs := funcResults + candidateFiles := fileResults var funcDeltas map[string]map[string]int var fileDeltas map[string]int if d.MergeBase != "" { - funcResults, fileResults, funcDeltas, fileDeltas = applySizeDeltaFilter(repoPath, d, funcResults, fileResults, extractor) + candidateFuncs, candidateFiles, funcDeltas, fileDeltas = applySizeDeltaFilter(repoPath, d, funcResults, fileResults, extractor) } - return buildSection(funcResults, fileResults, funcThreshold, fileThreshold, funcDeltas, fileDeltas), nil + return buildSection(funcResults, fileResults, candidateFuncs, candidateFiles, funcThreshold, fileThreshold, funcDeltas, fileDeltas), nil } // applySizeDeltaFilter drops per-function and per-file findings whose @@ -236,7 +241,19 @@ func formatFileSizeMsg(f lang.FileSize, deltas map[string]int) string { return fmt.Sprintf("file=%d lines (+%d vs base)", f.Lines, f.Lines-base) } -func buildSection(funcs []lang.FunctionSize, files []lang.FileSize, funcThreshold, fileThreshold int, funcDeltas map[string]map[string]int, fileDeltas map[string]int) report.Section { +// buildSection renders the section. {func,file}s are everything the language +// extractor returned (used for stats); candidate{Funcs,Files} is the subset +// eligible to become findings. In refactoring mode they're identical; in +// diff mode candidate{Funcs,Files} is the post-delta-filter list. +func buildSection( + funcs []lang.FunctionSize, + files []lang.FileSize, + candidateFuncs []lang.FunctionSize, + candidateFiles []lang.FileSize, + funcThreshold, fileThreshold int, + funcDeltas map[string]map[string]int, + fileDeltas map[string]int, +) report.Section { if len(funcs) == 0 && len(files) == 0 { return report.Section{ Name: "Code Sizes", @@ -245,7 +262,7 @@ func buildSection(funcs []lang.FunctionSize, files []lang.FileSize, funcThreshol } } - findings := append(checkFuncSizes(funcs, funcThreshold, funcDeltas), checkFileSizes(files, fileThreshold, fileDeltas)...) + findings := append(checkFuncSizes(candidateFuncs, funcThreshold, funcDeltas), checkFileSizes(candidateFiles, fileThreshold, fileDeltas)...) sort.Slice(findings, func(i, j int) bool { return findings[i].Value > findings[j].Value diff --git a/internal/sizes/sizes_test.go b/internal/sizes/sizes_test.go index 9aad557..0ffb57d 100644 --- a/internal/sizes/sizes_test.go +++ b/internal/sizes/sizes_test.go @@ -75,7 +75,7 @@ func TestCheckFileSizes_AtBoundary(t *testing.T) { } func TestBuildSection_Empty(t *testing.T) { - s := buildSection(nil, nil, 50, 500, nil, nil) + s := buildSection(nil, nil, nil, nil, 50, 500, nil, nil) if s.Severity != report.SeverityPass { t.Errorf("empty section severity = %v, want PASS", s.Severity) } @@ -86,7 +86,7 @@ func TestBuildSection_Empty(t *testing.T) { func TestBuildSection_WithViolations(t *testing.T) { funcs := []lang.FunctionSize{{FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "big"}, Lines: 100}} - s := buildSection(funcs, nil, 50, 500, nil, nil) + s := buildSection(funcs, nil, funcs, nil, 50, 500, nil, nil) if s.Severity != report.SeverityFail { t.Errorf("section severity = %v, want FAIL", s.Severity) } @@ -98,7 +98,7 @@ func TestBuildSection_WithViolations(t *testing.T) { func TestBuildSection_NoViolations(t *testing.T) { funcs := []lang.FunctionSize{{FunctionInfo: lang.FunctionInfo{File: "a.go", Line: 1, Name: "small"}, Lines: 10}} files := []lang.FileSize{{Path: "a.go", Lines: 100}} - s := buildSection(funcs, files, 50, 500, nil, nil) + s := buildSection(funcs, files, funcs, files, 50, 500, nil, nil) if s.Severity != report.SeverityPass { t.Errorf("severity = %v, want PASS", s.Severity) } @@ -110,7 +110,7 @@ func TestBuildSection_SortedByValue(t *testing.T) { {FunctionInfo: lang.FunctionInfo{File: "b.go", Line: 1, Name: "huge"}, Lines: 200}, {FunctionInfo: lang.FunctionInfo{File: "c.go", Line: 1, Name: "big"}, Lines: 80}, } - s := buildSection(funcs, nil, 50, 500, nil, nil) + s := buildSection(funcs, nil, funcs, nil, 50, 500, nil, nil) if len(s.Findings) != 3 { t.Fatalf("expected 3 findings, got %d", len(s.Findings)) } From 04617aad1d65b7960b7569eb56c031bd24b79dca Mon Sep 17 00:00:00 2001 From: Jerry Date: Tue, 5 May 2026 15:46:56 -0700 Subject: [PATCH 4/4] feat(sizes): tolerances for per-function and per-file size deltas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function size: --function-size-delta-tolerance flag, default 5 lines. Threading a parameter through a 60-line function or adding a couple of defensive guards no longer fails on a function that was already over threshold. File size: --file-size-delta-tolerance-pct (default 5) + --file-size-delta-tolerance-floor (default 10). The drop rule is "growth ≤ max(floor, base × pct%)". Percentage scales with file size: +99 lines on a 4382-line file (2.3%) is forgiven, but +298 on a 2031-line file (14.7%) still flags. The floor stops "+8 to a 600-line file" (which is 1.3%) from over-firing on small files where %-of-base collapses below the noise level. Tolerances live behind a new sizes.DeltaTolerances struct so adding language-specific or per-rule overrides later is local. Zero value = strict (preserves previous behavior for any caller that doesn't populate it). On the bor blockstm_redesign branch, this drops 7 further findings: files dropped: blockchain.go +99 (2.3%), instructions.go +16 (1.5%), state_transition.go +11 (1.4%), trie_prefetcher.go +18 (3.4%) funcs dropped: stateTransition.execute +1, NewEVM +4, updateTrie (no growth), preloadAddressAsync +4 kept legitimately: statedb.go +298 (14.7%), pps.go +705 (145%), trie.go +59 (6.8%), reader.go +76 (10.8%), evm.go +57 (7.7%), plus brand-new files / functions. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/diffguard/main.go | 20 ++++-- internal/sizes/multilang_delta_test.go | 2 +- internal/sizes/sizes.go | 82 +++++++++++++++++------- internal/sizes/sizes_delta_test.go | 88 ++++++++++++++++++++------ internal/sizes/sizes_test.go | 2 +- 5 files changed, 144 insertions(+), 50 deletions(-) diff --git a/cmd/diffguard/main.go b/cmd/diffguard/main.go index 2c1a5a4..8cd7b6f 100644 --- a/cmd/diffguard/main.go +++ b/cmd/diffguard/main.go @@ -29,7 +29,10 @@ func main() { flag.IntVar(&cfg.ComplexityThreshold, "complexity-threshold", 10, "Maximum cognitive complexity per function") flag.IntVar(&cfg.ComplexityDeltaTolerance, "complexity-delta-tolerance", 3, "In diff mode, ignore complexity regressions where head exceeds base by this much or less; brand-new functions still gated by --complexity-threshold") flag.IntVar(&cfg.FunctionSizeThreshold, "function-size-threshold", 50, "Maximum lines per function") + flag.IntVar(&cfg.FunctionSizeDeltaTolerance, "function-size-delta-tolerance", 5, "In diff mode, ignore per-function size regressions where head grows by this many lines or fewer") flag.IntVar(&cfg.FileSizeThreshold, "file-size-threshold", 500, "Maximum lines per file") + flag.IntVar(&cfg.FileSizeDeltaTolerancePct, "file-size-delta-tolerance-pct", 5, "In diff mode, ignore per-file size regressions where head grows by no more than this % of base lines (subject to --file-size-delta-tolerance-floor)") + flag.IntVar(&cfg.FileSizeDeltaToleranceFloor, "file-size-delta-tolerance-floor", 10, "Minimum absolute line growth tolerated regardless of --file-size-delta-tolerance-pct, so tiny absolute additions to small files don't fail") flag.BoolVar(&cfg.SkipMutation, "skip-mutation", false, "Skip mutation testing") flag.BoolVar(&cfg.SkipDeadCode, "skip-deadcode", false, "Skip dead code (unused symbol) detection") flag.BoolVar(&cfg.SkipGenerated, "skip-generated", true, "Skip files marked as generated (for example `Code generated ... DO NOT EDIT`)") @@ -70,10 +73,13 @@ func main() { // Config holds CLI configuration. type Config struct { - ComplexityThreshold int - ComplexityDeltaTolerance int - FunctionSizeThreshold int - FileSizeThreshold int + ComplexityThreshold int + ComplexityDeltaTolerance int + FunctionSizeThreshold int + FunctionSizeDeltaTolerance int + FileSizeThreshold int + FileSizeDeltaTolerancePct int + FileSizeDeltaToleranceFloor int SkipMutation bool SkipDeadCode bool SkipGenerated bool @@ -280,7 +286,11 @@ func runAnalyses(repoPath string, d *diff.Result, cfg Config, l lang.Language) ( } sections = append(sections, complexitySection) - sizesSection, err := sizes.Analyze(repoPath, d, cfg.FunctionSizeThreshold, cfg.FileSizeThreshold, l.FunctionExtractor()) + sizesSection, err := sizes.Analyze(repoPath, d, cfg.FunctionSizeThreshold, cfg.FileSizeThreshold, sizes.DeltaTolerances{ + FuncLines: cfg.FunctionSizeDeltaTolerance, + FilePct: cfg.FileSizeDeltaTolerancePct, + FileFloorLines: cfg.FileSizeDeltaToleranceFloor, + }, l.FunctionExtractor()) if err != nil { return nil, fmt.Errorf("size analysis: %w", err) } diff --git a/internal/sizes/multilang_delta_test.go b/internal/sizes/multilang_delta_test.go index eb39849..a558fa0 100644 --- a/internal/sizes/multilang_delta_test.go +++ b/internal/sizes/multilang_delta_test.go @@ -97,7 +97,7 @@ func analyzeMultiLangSizes(t *testing.T, dir, base, langName string) report.Sect if err != nil { t.Fatalf("diff.Parse: %v", err) } - section, err := Analyze(dir, d, 50, 500, l.FunctionExtractor()) + section, err := Analyze(dir, d, 50, 500, DeltaTolerances{}, l.FunctionExtractor()) if err != nil { t.Fatalf("Analyze: %v", err) } diff --git a/internal/sizes/sizes.go b/internal/sizes/sizes.go index 58db9fc..b9d0c37 100644 --- a/internal/sizes/sizes.go +++ b/internal/sizes/sizes.go @@ -15,9 +15,22 @@ import ( "github.com/0xPolygon/diffguard/internal/report" ) +// DeltaTolerances captures the size deltas that diff mode is willing to +// forgive. A function whose line count grew by FuncLines or fewer is +// dropped. A file is dropped when its line growth is within +// max(FileFloorLines, base * FilePct%) — percentage scales with file size +// so adding 50 lines to a 5000-line file is forgiven while the same growth +// on a 500-line file is flagged. A zero-value DeltaTolerances acts as +// strict gating (all growth flagged) to preserve the pre-tolerance default. +type DeltaTolerances struct { + FuncLines int + FilePct int + FileFloorLines int +} + // Analyze measures lines of code for changed functions and files using the // supplied language extractor. -func Analyze(repoPath string, d *diff.Result, funcThreshold, fileThreshold int, extractor lang.FunctionExtractor) (report.Section, error) { +func Analyze(repoPath string, d *diff.Result, funcThreshold, fileThreshold int, tol DeltaTolerances, extractor lang.FunctionExtractor) (report.Section, error) { var funcResults []lang.FunctionSize var fileResults []lang.FileSize @@ -49,7 +62,7 @@ func Analyze(repoPath string, d *diff.Result, funcThreshold, fileThreshold int, var funcDeltas map[string]map[string]int var fileDeltas map[string]int if d.MergeBase != "" { - candidateFuncs, candidateFiles, funcDeltas, fileDeltas = applySizeDeltaFilter(repoPath, d, funcResults, fileResults, extractor) + candidateFuncs, candidateFiles, funcDeltas, fileDeltas = applySizeDeltaFilter(repoPath, d, funcResults, fileResults, tol, extractor) } return buildSection(funcResults, fileResults, candidateFuncs, candidateFiles, funcThreshold, fileThreshold, funcDeltas, fileDeltas), nil @@ -64,27 +77,28 @@ func applySizeDeltaFilter( d *diff.Result, headFuncs []lang.FunctionSize, headFiles []lang.FileSize, + tol DeltaTolerances, extractor lang.FunctionExtractor, ) ([]lang.FunctionSize, []lang.FileSize, map[string]map[string]int, map[string]int) { byFile := make(map[string]baseSizes) for _, fc := range d.Files { byFile[fc.Path] = baseSizesFor(repoPath, d.MergeBase, fc.Path, extractor) } - keptFuncs, funcDeltas := filterFuncSizes(headFuncs, byFile) - keptFiles, fileDeltas := filterFileSizes(headFiles, byFile) + keptFuncs, funcDeltas := filterFuncSizes(headFuncs, byFile, tol.FuncLines) + keptFiles, fileDeltas := filterFileSizes(headFiles, byFile, tol.FilePct, tol.FileFloorLines) return keptFuncs, keptFiles, funcDeltas, fileDeltas } -// filterFuncSizes drops per-function findings whose base-side line count was -// already >= the head-side count (no growth this PR). Returns both the kept -// findings and a delta map (file -> name -> base lines) so the message -// formatter can render "+N vs base" alongside the head value. -func filterFuncSizes(head []lang.FunctionSize, byFile map[string]baseSizes) ([]lang.FunctionSize, map[string]map[string]int) { +// filterFuncSizes drops per-function findings whose growth from base is +// within tolerance lines. Returns both the kept findings and a delta map +// (file -> name -> base lines) so the message formatter can render "+N vs +// base" alongside the head value. +func filterFuncSizes(head []lang.FunctionSize, byFile map[string]baseSizes, tolerance int) ([]lang.FunctionSize, map[string]map[string]int) { var out []lang.FunctionSize deltas := make(map[string]map[string]int) for _, h := range head { b := byFile[h.File] - if !grewFunc(h, b) { + if !grewFunc(h, b, tolerance) { continue } out = append(out, h) @@ -110,15 +124,15 @@ func recordFuncDelta(deltas map[string]map[string]int, h lang.FunctionSize, b ba deltas[h.File][h.Name] = base } -// filterFileSizes drops per-file findings whose whole-file line count was -// already >= the head-side count. Returns kept findings + a path -> base +// filterFileSizes drops per-file findings whose growth is within +// max(floorLines, base * pct%). Returns kept findings + a path -> base // lines map for delta-aware message formatting. -func filterFileSizes(head []lang.FileSize, byFile map[string]baseSizes) ([]lang.FileSize, map[string]int) { +func filterFileSizes(head []lang.FileSize, byFile map[string]baseSizes, pct, floorLines int) ([]lang.FileSize, map[string]int) { var out []lang.FileSize deltas := make(map[string]int) for _, h := range head { b := byFile[h.Path] - if !grewFile(h, b) { + if !grewFile(h, b, pct, floorLines) { continue } out = append(out, h) @@ -129,10 +143,11 @@ func filterFileSizes(head []lang.FileSize, byFile map[string]baseSizes) ([]lang. return out, deltas } -// grewFunc reports whether a function-size finding represents real growth on -// this branch. Missing baseline (file absent at base, or function not present -// at base) counts as growth so absolute thresholds still apply. -func grewFunc(h lang.FunctionSize, b baseSizes) bool { +// grewFunc reports whether a function's line growth is large enough to +// flag. Missing baseline (file absent at base, or function not present at +// base) counts as growth so absolute thresholds still apply to brand-new +// code; otherwise the head must exceed base by more than tolerance. +func grewFunc(h lang.FunctionSize, b baseSizes, tolerance int) bool { if !b.ok { return true } @@ -140,16 +155,37 @@ func grewFunc(h lang.FunctionSize, b baseSizes) bool { if !exists { return true } - return h.Lines > base + return h.Lines > base+tolerance } -// grewFile mirrors grewFunc for whole-file size: missing baseline → growth; -// otherwise growth iff head exceeds base. -func grewFile(h lang.FileSize, b baseSizes) bool { +// grewFile mirrors grewFunc with a percentage-based tolerance: drop if +// growth ≤ max(floorLines, base * pct%). The floor stops "added 3 lines to +// a 600-line file" from over-firing on small files; the percentage stops +// "+50 to a 5000-line file" from over-firing on giant ones. +func grewFile(h lang.FileSize, b baseSizes, pct, floorLines int) bool { if !b.ok { return true } - return h.Lines > b.file + delta := h.Lines - b.file + if delta <= 0 { + return false + } + return delta > fileGrowthTolerance(b.file, pct, floorLines) +} + +// fileGrowthTolerance returns the larger of floorLines and base * pct / 100 +// (integer arithmetic — pct is already %, so 5% of 1000 = 50). Negative or +// zero pct collapses to floorLines so callers can disable the percentage +// component without disabling the absolute floor. +func fileGrowthTolerance(base, pct, floorLines int) int { + pctTolerance := 0 + if pct > 0 && base > 0 { + pctTolerance = base * pct / 100 + } + if pctTolerance > floorLines { + return pctTolerance + } + return floorLines } // baseSizes captures the per-function and whole-file line counts of a single diff --git a/internal/sizes/sizes_delta_test.go b/internal/sizes/sizes_delta_test.go index 1d0431f..e337936 100644 --- a/internal/sizes/sizes_delta_test.go +++ b/internal/sizes/sizes_delta_test.go @@ -72,7 +72,7 @@ func parseAndAnalyze(t *testing.T, dir, base string, funcThreshold, fileThreshol if err != nil { t.Fatalf("diff.Parse: %v", err) } - section, err := Analyze(dir, d, funcThreshold, fileThreshold, goExtractor(t)) + section, err := Analyze(dir, d, funcThreshold, fileThreshold, DeltaTolerances{}, goExtractor(t)) if err != nil { t.Fatalf("Analyze: %v", err) } @@ -90,20 +90,28 @@ func TestGrewFunc(t *testing.T) { noBase := baseSizes{} cases := []struct { - name string - h lang.FunctionSize - b baseSizes - want bool + name string + h lang.FunctionSize + b baseSizes + tolerance int + want bool }{ - {"no_baseline_treated_as_grew", mk("f", 80), noBase, true}, - {"absent_at_base", mk("g", 80), withBase, true}, - {"head_higher", mk("f", 80), withBase, true}, - {"head_equal", mk("f", 60), withBase, false}, - {"head_lower", mk("f", 40), withBase, false}, + {"no_baseline_treated_as_grew", mk("f", 80), noBase, 0, true}, + {"absent_at_base", mk("g", 80), withBase, 0, true}, + {"head_higher_no_tolerance", mk("f", 80), withBase, 0, true}, + {"head_equal_no_tolerance", mk("f", 60), withBase, 0, false}, + {"head_lower", mk("f", 40), withBase, 0, false}, + // Tolerance cases: head must exceed base by *more than* tolerance. + // Default production setting is 5, so +1..+5 line bumps on a legacy + // function get forgiven; +6 is the first regression. + {"within_tolerance_lower_bound", mk("f", 61), withBase, 5, false}, + {"within_tolerance_at_bound", mk("f", 65), withBase, 5, false}, + {"just_over_tolerance", mk("f", 66), withBase, 5, true}, + {"new_function_tolerance_irrelevant", mk("g", 80), withBase, 100, true}, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - if got := grewFunc(tc.h, tc.b); got != tc.want { + if got := grewFunc(tc.h, tc.b, tc.tolerance); got != tc.want { t.Errorf("grewFunc = %v, want %v", got, tc.want) } }) @@ -111,29 +119,69 @@ func TestGrewFunc(t *testing.T) { } func TestGrewFile(t *testing.T) { + mk := func(path string, lines int) lang.FileSize { + return lang.FileSize{Path: path, Lines: lines} + } withBase := baseSizes{file: 600, ok: true} noBase := baseSizes{} cases := []struct { - name string - h lang.FileSize - b baseSizes - want bool + name string + h lang.FileSize + b baseSizes + pct, floor int + want bool }{ - {"no_baseline_treated_as_grew", lang.FileSize{Path: "x.go", Lines: 700}, noBase, true}, - {"head_higher", lang.FileSize{Path: "x.go", Lines: 700}, withBase, true}, - {"head_equal", lang.FileSize{Path: "x.go", Lines: 600}, withBase, false}, - {"head_lower", lang.FileSize{Path: "x.go", Lines: 500}, withBase, false}, + {"no_baseline_treated_as_grew", mk("x.go", 700), noBase, 0, 0, true}, + {"head_higher_no_tolerance", mk("x.go", 700), withBase, 0, 0, true}, + {"head_equal", mk("x.go", 600), withBase, 0, 0, false}, + {"head_lower", mk("x.go", 500), withBase, 0, 0, false}, + // 5% of 600 = 30. Floor 10. max(30, 10) = 30. Growth must exceed 30. + {"within_pct_below_floor", mk("x.go", 605), withBase, 5, 10, false}, + {"within_pct", mk("x.go", 625), withBase, 5, 10, false}, + {"at_pct_boundary", mk("x.go", 630), withBase, 5, 10, false}, + {"just_over_pct", mk("x.go", 631), withBase, 5, 10, true}, + // Floor dominates on small files. 5% of 100 = 5; floor=10 wins. + // Growth ≤10 → drop; >10 → flag. + {"floor_dominates_drops_at_floor", mk("y.go", 110), baseSizes{file: 100, ok: true}, 5, 10, false}, + {"floor_dominates_flags_just_over", mk("y.go", 111), baseSizes{file: 100, ok: true}, 5, 10, true}, + // Big files: pct dominates so a fixed flat tolerance doesn't + // rubber-stamp huge additions. + {"large_file_pct_dominates", mk("z.go", 5050), baseSizes{file: 5000, ok: true}, 5, 10, false}, + {"large_file_just_over_pct", mk("z.go", 5251), baseSizes{file: 5000, ok: true}, 5, 10, true}, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - if got := grewFile(tc.h, tc.b); got != tc.want { + if got := grewFile(tc.h, tc.b, tc.pct, tc.floor); got != tc.want { t.Errorf("grewFile = %v, want %v", got, tc.want) } }) } } +func TestFileGrowthTolerance(t *testing.T) { + cases := []struct { + name string + base int + pct, floor int + want int + }{ + {"floor_only_when_pct_zero", 1000, 0, 50, 50}, + {"pct_dominates_large_base", 1000, 5, 10, 50}, + {"floor_dominates_small_base", 100, 5, 10, 10}, + {"pct_at_exact_match_to_floor", 200, 5, 10, 10}, + {"zero_base_collapses_to_floor", 0, 5, 10, 10}, + {"negative_pct_treated_as_zero", 1000, -1, 25, 25}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := fileGrowthTolerance(tc.base, tc.pct, tc.floor); got != tc.want { + t.Errorf("fileGrowthTolerance(%d, %d, %d) = %d, want %d", tc.base, tc.pct, tc.floor, got, tc.want) + } + }) + } +} + func TestFormatFuncSizeMsg(t *testing.T) { mk := func(file, name string, lines int) lang.FunctionSize { return lang.FunctionSize{ diff --git a/internal/sizes/sizes_test.go b/internal/sizes/sizes_test.go index 0ffb57d..7ba9de0 100644 --- a/internal/sizes/sizes_test.go +++ b/internal/sizes/sizes_test.go @@ -142,7 +142,7 @@ func small() { }, } - section, err := Analyze(dir, d, 50, 500, goExtractor(t)) + section, err := Analyze(dir, d, 50, 500, DeltaTolerances{}, goExtractor(t)) if err != nil { t.Fatalf("Analyze error: %v", err) }