From af1fef61a241006b034926ccebe639e4f24338b4 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Tue, 12 May 2026 18:26:28 -0700 Subject: [PATCH 1/2] feat(ci): add check for fingerprint stability --- .../__snapshots__/exhaustive_test.snap | 31 ++ internal/fingerprint/exhaustive_test.go | 459 ++++++++++++++++++ 2 files changed, 490 insertions(+) create mode 100755 internal/fingerprint/__snapshots__/exhaustive_test.snap create mode 100644 internal/fingerprint/exhaustive_test.go diff --git a/internal/fingerprint/__snapshots__/exhaustive_test.snap b/internal/fingerprint/__snapshots__/exhaustive_test.snap new file mode 100755 index 00000000..0dacc4bd --- /dev/null +++ b/internal/fingerprint/__snapshots__/exhaustive_test.snap @@ -0,0 +1,31 @@ + +[TestComputeIdentity_PerFieldTable - 1] +: sha256:da85ac19338964f253e355924e4e6f9c40db64bd5765fffae46f8f7f879d200d +ComponentConfig.Build.Check.Skip: sha256:5b08c9e1f1160b1f3076f335951ae4a338f98c3bd9ccf55d404ca4a957153325 +ComponentConfig.Build.Defines.: sha256:d54e83fda98dbfd1a5d726f6463dcc178ca42b26cc695da76929e2739de8c879 +ComponentConfig.Build.Defines.: sha256:5a558ccc71ffca9622ba590357d47e3a55fe5f9ea8156c414356a2c688775553 +ComponentConfig.Build.Undefines[0]: sha256:f1df85441b43bcfd196c14f3745b4246c6b5d2dfad2a305c9a5c29daab1b8b4e +ComponentConfig.Build.With[0]: sha256:2844b2c05673d2a04ed54394be05ea0faec4ddf3304c4b05ee8d4c7b67830b75 +ComponentConfig.Build.Without[0]: sha256:3f7f83c176faef53a0006e613668e7dfc5662cde81525836894b70253a11bb40 +ComponentConfig.Overlays[0].Filename: sha256:b96b0519b1d356e9fafbbaeccaf19cd634dec7fc924e88f50c438e7c6050f190 +ComponentConfig.Overlays[0].Lines[0]: sha256:f7afa95c9d646bee07b34f96104fbd5560d7ae034d8eab8a6f81c3e5dd89eb35 +ComponentConfig.Overlays[0].PackageName: sha256:1a3cc5b94c6e8322c0400bdfad78d8307c5819686a8cc56a959c623f7356bd47 +ComponentConfig.Overlays[0].Regex: sha256:61a876c503f2a620ce8cc9f227a9568b7adb9220a162a921ed83fdf6d0c1a610 +ComponentConfig.Overlays[0].Replacement: sha256:2ed6803b9778d0635dab57da13c9f1eb010485f76b0a6e2a8a01b939662e99f3 +ComponentConfig.Overlays[0].SectionName: sha256:e31f869ed0400efd5d790dd962565c7cde56208f7636f998bfe4285810d2d6a7 +ComponentConfig.Overlays[0].Tag: sha256:df1c8f3edb512722182b01df3f7abcef72bdbb1c5b4f49a105ee2d80e2d0afdb +ComponentConfig.Overlays[0].Type: sha256:26328597023fa68c27bf7a095335c9765b28707e5187e37ab675de58e642dc44 +ComponentConfig.Overlays[0].Value: sha256:7ffddc9591f9cb14a9c209ad972021e4462ffbac2b0ed0172fbb2951a7b178a3 +ComponentConfig.Packages.: sha256:2d191b70c95a44b034a8679321d73471b2d838fbbc8865f3f54bb2c4beeb5027 +ComponentConfig.Release.Calculation: sha256:57dfdc126214241f8f4134a3fb2e1d1456e672a6838bcf99583cf99d6cb869d9 +ComponentConfig.Render.SkipFileFilter: sha256:6414d5913fb752619bfc72fadef4e58bdbbdd46d8d8b5c589afffac496260f41 +ComponentConfig.SourceFiles[0].Filename: sha256:26b3a797f963dd9a3e23281e77bfa39b6732b429567eb8a849744e505d17d3c5 +ComponentConfig.SourceFiles[0].Hash: sha256:087d24af9c966c19894ecaec013c8641b17de7f602e3e350a7e43998769f5dfb +ComponentConfig.SourceFiles[0].HashType: sha256:35abe305577e6ad5378ac06089c12c99adc5f2d29e7f584eb75aefcf32739926 +ComponentConfig.SourceFiles[0].ReplaceUpstream: sha256:030a400c892cd77a2feeaef020dd5787151114be93e5e10e078c2149b0fba2ca +ComponentConfig.Spec.SourceType: sha256:e5004986ce636e8500028910f1a9d784f4ee9f5f52c5d704614681e1ffd4f956 +ComponentConfig.Spec.UpstreamCommit: sha256:82494177e9d0200063200f2d96dfd93783b67a65395a1876bd0934546f8f1d71 +ComponentConfig.Spec.UpstreamDistro.Name: sha256:b7cc574a1b87d6acd08070147307286f3530698dec9d8f1ca0f5085a403e0bc3 +ComponentConfig.Spec.UpstreamDistro.Version: sha256:f8d7d24aec6ad81d2f729ee0cd24f17da676c5aa31a610087c31e50462f637ec +ComponentConfig.Spec.UpstreamName: sha256:ca5ce6f45cb243b3d50a9ec3ef5080c695a3d8e83b0e281b75f673179949fd7c +--- diff --git a/internal/fingerprint/exhaustive_test.go b/internal/fingerprint/exhaustive_test.go new file mode 100644 index 00000000..5c1ebbaf --- /dev/null +++ b/internal/fingerprint/exhaustive_test.go @@ -0,0 +1,459 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package fingerprint_test + +import ( + "crypto/sha256" + "encoding/binary" + "encoding/hex" + "fmt" + "reflect" + "sort" + "strings" + "testing" + + "github.com/gkampitakis/go-snaps/snaps" + "github.com/microsoft/azure-linux-dev-tools/internal/fingerprint" + "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" + "github.com/microsoft/azure-linux-dev-tools/internal/global/testctx" + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// canonicalSeed is the deterministic seed used to derive per-field values +// for the per-field table and as the canonical entry in the fuzz corpus. +const canonicalSeed = int64(0) + +// baselineRowKey is the table-row key for the all-zero ComponentConfig +// baseline. Sorted as the first row by virtue of leading "<". +const baselineRowKey = "" + +// TestComputeIdentity_PerFieldTable is the canonical regression guard for +// [fingerprint.ComputeIdentity] stability across schema changes. +// +// The test enumerates every fingerprinted leaf field reachable from +// [projectconfig.ComponentConfig] (recursing through structs, slices, maps, +// and pointers, skipping anything tagged `fingerprint:"-"`). For each leaf +// path P it builds a ComponentConfig in which: +// +// - Containers along the path from root to P are allocated (slices get +// one element; maps get one entry; pointers are non-nil). +// - The leaf at P is set to a deterministic non-zero value derived from +// (path, seed). Every other leaf is left at its zero value. +// +// It then computes the fingerprint and records `path: fingerprint` as a +// row in a table. The full table — plus a baseline row for an all-zero +// ComponentConfig — is asserted against a snapshot via go-snaps. +// +// Properties this guarantees: +// +// - Adding a fingerprinted field appears as a NEW row. Reviewer sees +// exactly which field changed and what its single-field hash is. +// - Adding an excluded field (`fingerprint:"-"`) does NOT add a row; +// the snapshot is unchanged. +// - Removing or renaming a fingerprinted field removes / renames a row. +// - Retyping a fingerprinted field changes that row's hash (different +// value type produces a different hash) but leaves siblings alone. +// - A change to ComputeIdentity that only affects the empty-collection +// branch shifts the baseline row. +// - A change that only affects the non-empty-collection branch (the +// original bug class — fingerprint regression hidden because most +// components had empty SourceFiles) shifts the rows for any path +// inside that collection. +// +// Companion: TestAllFingerprintedFieldsHaveDecision in package +// projectconfig forces every field to carry an explicit include/exclude +// decision so no field ever defaults silently into the fingerprint. +// +// To accept legitimate changes, run with UPDATE_SNAPS=true. +func TestComputeIdentity_PerFieldTable(t *testing.T) { + paths := collectFingerprintedLeafPaths( + reflect.TypeFor[projectconfig.ComponentConfig](), + "ComponentConfig", + ) + + rows := make([]string, 0, len(paths)+1) + rows = append(rows, fmt.Sprintf("%s: %s", baselineRowKey, computeFingerprintWithOnly(t, "", canonicalSeed))) + + for _, p := range paths { + fp := computeFingerprintWithOnly(t, p, canonicalSeed) + rows = append(rows, fmt.Sprintf("%s: %s", p, fp)) + } + + // Snapshot rows are pre-sorted by row key; the baseline sorts first. + sort.Strings(rows[1:]) + + snaps.MatchSnapshot(t, strings.Join(rows, "\n")) +} + +// FuzzComputeIdentity_Exhaustive runs property checks against fingerprints +// computed from exhaustively-populated components. +// +// Seed corpus mode (default `go test`): +// - Runs the explicit corpus entries below. Cheap; runs on every CI build. +// +// Fuzz mode (`go test -fuzz=FuzzComputeIdentity_Exhaustive`): +// - The fuzz engine explores random seed values. We verify the properties +// below hold for every seed. Useful for catching non-determinism +// (e.g., accidental dependence on map iteration order). +// +// Properties checked for every seed: +// +// - Determinism: computing the fingerprint twice from the same populated +// component yields the same value. +// - Sensitivity: a fully populated component has a fingerprint different +// from the all-zero component (sanity check that population is reaching +// fingerprinted fields). +func FuzzComputeIdentity_Exhaustive(f *testing.F) { + f.Add(canonicalSeed) + f.Add(int64(1)) + f.Add(int64(-1)) + f.Add(int64(0x0123456789ABCDEF)) + + zeroFP := computeFingerprintWithOnly(f, "", canonicalSeed) + + f.Fuzz(func(t *testing.T, seed int64) { + fp1 := computeFingerprintExhaustive(t, seed) + fp2 := computeFingerprintExhaustive(t, seed) + + assert.Equal(t, fp1, fp2, + "determinism: same seed must produce same fingerprint (seed=%d)", seed) + + assert.NotEqual(t, zeroFP, fp1, + "sensitivity: populated component must differ from zero component (seed=%d)", seed) + }) +} + +// computeFingerprintWithOnly builds a [projectconfig.ComponentConfig] in +// which only the leaf at targetPath is set (containers along the way are +// allocated; every other leaf is zero), then computes its fingerprint. +// An empty targetPath returns the fingerprint of an all-zero ComponentConfig. +func computeFingerprintWithOnly(tb testing.TB, targetPath string, seed int64) string { + tb.Helper() + + var comp projectconfig.ComponentConfig + + if targetPath != "" { + found := setLeafAtPath( + tb, reflect.ValueOf(&comp).Elem(), + "ComponentConfig", targetPath, seed, + ) + require.Truef(tb, found, "target path %q not reachable from ComponentConfig", targetPath) + } + + ctx := testctx.NewCtx() + normalizeForCompute(tb, &comp, ctx.FS()) + + identity, err := fingerprint.ComputeIdentity( + ctx.FS(), comp, "release-ver-value", + fingerprint.IdentityOptions{ + ManualBump: 7, + SourceIdentity: "test-source-identity", + }) + require.NoError(tb, err) + + return identity.Fingerprint +} + +// computeFingerprintExhaustive populates every fingerprinted leaf and +// returns the fingerprint. Used by [FuzzComputeIdentity_Exhaustive]. +func computeFingerprintExhaustive(tb testing.TB, seed int64) string { + tb.Helper() + + var comp projectconfig.ComponentConfig + populateExhaustively(tb, reflect.ValueOf(&comp).Elem(), "ComponentConfig", seed) + + ctx := testctx.NewCtx() + normalizeForCompute(tb, &comp, ctx.FS()) + + identity, err := fingerprint.ComputeIdentity( + ctx.FS(), comp, "release-ver-value", + fingerprint.IdentityOptions{ + ManualBump: 7, + SourceIdentity: "test-source-identity", + }) + require.NoError(tb, err) + + return identity.Fingerprint +} + +// normalizeForCompute fills in fields that [fingerprint.ComputeIdentity] +// requires but that the populator skips because they are tagged +// `fingerprint:"-"` or because the target leaf did not happen to be them: +// +// - Every [projectconfig.SourceFileReference] needs a non-empty Hash, +// otherwise ComputeIdentity refuses to produce a fingerprint. +// - Every [projectconfig.ComponentOverlay] whose effective source name +// is non-empty needs a backing file on disk so SourceContentIdentity +// can hash its content. Source itself is `fingerprint:"-"`. +// +// Required fields receive a fixed placeholder value (not derived from +// seed/path) so the per-field table only varies in the column under test. +func normalizeForCompute(tb testing.TB, comp *projectconfig.ComponentConfig, fs opctx.FS) { + tb.Helper() + + for i := range comp.SourceFiles { + if comp.SourceFiles[i].Hash == "" { + comp.SourceFiles[i].Hash = "placeholder-hash" + } + } + + for i := range comp.Overlays { + src := fmt.Sprintf("/exhaustive/overlay-%d.src", i) + comp.Overlays[i].Source = src + require.NoError(tb, fileutils.WriteFile( + fs, src, + []byte(fmt.Sprintf("overlay-content-%d", i)), + fileperms.PublicFile, + )) + } +} + +// collectFingerprintedLeafPaths returns every leaf path reachable from typ +// that would be populated by [populateExhaustively]. Leaves are scalar +// (string / bool / numeric) fields; container types (struct / slice / map +// / pointer) are recursed into. Fields tagged `fingerprint:"-"` are +// skipped. Map types yield `` and `` synthetic path segments. +func collectFingerprintedLeafPaths(typ reflect.Type, path string) []string { + //nolint:exhaustive // reflect.Kind has many kinds we never expect in fingerprinted configs. + switch typ.Kind() { + case reflect.Struct: + var out []string + + for fldIdx := range typ.NumField() { + fld := typ.Field(fldIdx) + if !fld.IsExported() { + continue + } + + if fld.Tag.Get("fingerprint") == "-" { + continue + } + + out = append(out, collectFingerprintedLeafPaths(fld.Type, path+"."+fld.Name)...) + } + + return out + + case reflect.Pointer: + return collectFingerprintedLeafPaths(typ.Elem(), path) + + case reflect.Slice: + return collectFingerprintedLeafPaths(typ.Elem(), path+"[0]") + + case reflect.Map: + var out []string + + out = append(out, collectFingerprintedLeafPaths(typ.Key(), path+".")...) + out = append(out, collectFingerprintedLeafPaths(typ.Elem(), path+".")...) + + return out + + case reflect.String, reflect.Bool, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Float32, reflect.Float64: + return []string{path} + + default: + panic(fmt.Sprintf( + "collectFingerprintedLeafPaths: unhandled kind %v at %q", typ.Kind(), path)) + } +} + +// setLeafAtPath descends val from currentPath toward targetPath, allocating +// containers along the way, and sets the leaf at targetPath to a seeded +// non-zero value. Other leaves are left zero. Returns true if the target +// was found in this subtree. +// +// Container handling: +// - Struct: recurse into the single field whose path is a prefix of target. +// - Pointer: allocate and recurse. +// - Slice: allocate one-element slice and recurse into element [0]. +// - Map: allocate one-entry map; populate key, value, or both depending +// on which contains target. +func setLeafAtPath(tb testing.TB, val reflect.Value, currentPath, targetPath string, seed int64) bool { + tb.Helper() + + if !val.CanSet() { + return false + } + + if currentPath == targetPath { + setLeafValue(tb, val, currentPath, seed) + + return true + } + + // Target must be a descendant of currentPath. + if !strings.HasPrefix(targetPath, currentPath+".") && + !strings.HasPrefix(targetPath, currentPath+"[") { + return false + } + + //nolint:exhaustive // Only container kinds reachable here; leaves are handled above. + switch val.Kind() { + case reflect.Struct: + for fldIdx := range val.NumField() { + fld := val.Type().Field(fldIdx) + if !fld.IsExported() || fld.Tag.Get("fingerprint") == "-" { + continue + } + + if setLeafAtPath(tb, val.Field(fldIdx), currentPath+"."+fld.Name, targetPath, seed) { + return true + } + } + + return false + + case reflect.Pointer: + val.Set(reflect.New(val.Type().Elem())) + + return setLeafAtPath(tb, val.Elem(), currentPath, targetPath, seed) + + case reflect.Slice: + slc := reflect.MakeSlice(val.Type(), 1, 1) + if setLeafAtPath(tb, slc.Index(0), currentPath+"[0]", targetPath, seed) { + val.Set(slc) + + return true + } + + return false + + case reflect.Map: + // Allocate placeholders for key and value; only the side that contains + // targetPath actually gets non-zero. Map keys are not addressable, so + // we build into a settable copy and insert. + keyVal := reflect.New(val.Type().Key()).Elem() + valVal := reflect.New(val.Type().Elem()).Elem() + + keyFound := setLeafAtPath(tb, keyVal, currentPath+".", targetPath, seed) + valFound := setLeafAtPath(tb, valVal, currentPath+".", targetPath, seed) + + if !keyFound && !valFound { + return false + } + + mapVal := reflect.MakeMap(val.Type()) + mapVal.SetMapIndex(keyVal, valVal) + val.Set(mapVal) + + return true + + default: + tb.Fatalf("setLeafAtPath: unexpected non-container kind %v at %q "+ + "(target=%q)", val.Kind(), currentPath, targetPath) + + return false + } +} + +// setLeafValue assigns a deterministic seeded non-zero value to a scalar +// reflect.Value. The kind switch mirrors the leaf kinds enumerated by +// [collectFingerprintedLeafPaths]. +func setLeafValue(tb testing.TB, val reflect.Value, path string, seed int64) { + tb.Helper() + + //nolint:exhaustive // Container kinds are handled by callers; only leaves reach here. + switch val.Kind() { + case reflect.String: + val.SetString(seededString(path, seed)) + case reflect.Bool: + val.SetBool(true) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + val.SetInt(seededInt(path, seed)) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + // #nosec G115 -- value is bounded to 24 bits by seededInt, never negative. + val.SetUint(uint64(seededInt(path, seed))) + case reflect.Float32, reflect.Float64: + val.SetFloat(float64(seededInt(path, seed))) + default: + tb.Fatalf("setLeafValue: unhandled kind %v at %q", val.Kind(), path) + } +} + +// populateExhaustively recursively assigns a deterministic non-zero value +// to every fingerprinted, exported, settable field reachable from val. +// Used by [FuzzComputeIdentity_Exhaustive] to stress every fingerprinted +// field simultaneously. +func populateExhaustively(tb testing.TB, val reflect.Value, path string, seed int64) { + tb.Helper() + + if !val.CanSet() { + return + } + + //nolint:exhaustive // Default case delegates remaining kinds to setLeafValue. + switch val.Kind() { + case reflect.Struct: + for fldIdx := range val.NumField() { + fld := val.Type().Field(fldIdx) + if !fld.IsExported() || fld.Tag.Get("fingerprint") == "-" { + continue + } + + populateExhaustively(tb, val.Field(fldIdx), path+"."+fld.Name, seed) + } + + case reflect.Pointer: + val.Set(reflect.New(val.Type().Elem())) + populateExhaustively(tb, val.Elem(), path, seed) + + case reflect.Slice: + slc := reflect.MakeSlice(val.Type(), 1, 1) + populateExhaustively(tb, slc.Index(0), path+"[0]", seed) + val.Set(slc) + + case reflect.Map: + mapVal := reflect.MakeMap(val.Type()) + keyVal := reflect.New(val.Type().Key()).Elem() + innerVal := reflect.New(val.Type().Elem()).Elem() + + populateExhaustively(tb, keyVal, path+".", seed) + populateExhaustively(tb, innerVal, path+".", seed) + mapVal.SetMapIndex(keyVal, innerVal) + val.Set(mapVal) + + default: + setLeafValue(tb, val, path, seed) + } +} + +// seededHash returns a SHA256 digest of (path | seed). Used to derive +// per-field, per-seed values without bleeding between fields. +func seededHash(path string, seed int64) [sha256.Size]byte { + var seedBytes [8]byte + // #nosec G115 -- bit-pattern reinterpretation; signed/unsigned irrelevant for hash input. + binary.LittleEndian.PutUint64(seedBytes[:], uint64(seed)) + + hash := sha256.New() + hash.Write([]byte(path)) + hash.Write(seedBytes[:]) + + var out [sha256.Size]byte + copy(out[:], hash.Sum(nil)) + + return out +} + +// seededString returns a short deterministic non-empty string for (path, seed). +func seededString(path string, seed int64) string { + sum := seededHash(path, seed) + + return "v:" + hex.EncodeToString(sum[:8]) +} + +// seededInt returns a deterministic non-zero positive int64 for (path, seed). +// Bounded to the low 24 bits so it fits inside any integer kind without +// surprising sign-extension when SetInt truncates. +func seededInt(path string, seed int64) int64 { + sum := seededHash(path, seed) + // #nosec G115 -- bounded to 0x1000000, never exceeds int64. + return int64(uint64(sum[0])|uint64(sum[1])<<8|uint64(sum[2])<<16) + 1 +} From 1dda89ef48bed07aceffb06157bf149a310bcd29 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Thu, 14 May 2026 15:41:24 -0700 Subject: [PATCH 2/2] add more fuzz invariants --- internal/fingerprint/exhaustive_test.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/internal/fingerprint/exhaustive_test.go b/internal/fingerprint/exhaustive_test.go index 5c1ebbaf..0dcdbda9 100644 --- a/internal/fingerprint/exhaustive_test.go +++ b/internal/fingerprint/exhaustive_test.go @@ -108,6 +108,9 @@ func TestComputeIdentity_PerFieldTable(t *testing.T) { // - Sensitivity: a fully populated component has a fingerprint different // from the all-zero component (sanity check that population is reaching // fingerprinted fields). +// - Format: the fingerprint is a 64-character hex string (SHA256). +// - Uniqueness: different seeds produce different fingerprints (a +// collision within the fuzz run would be highly suspicious). func FuzzComputeIdentity_Exhaustive(f *testing.F) { f.Add(canonicalSeed) f.Add(int64(1)) @@ -125,6 +128,15 @@ func FuzzComputeIdentity_Exhaustive(f *testing.F) { assert.NotEqual(t, zeroFP, fp1, "sensitivity: populated component must differ from zero component (seed=%d)", seed) + + assert.Len(t, fp1, 71, + "format: fingerprint should be 'sha256:' + 64 hex chars (seed=%d)", seed) + assert.True(t, strings.HasPrefix(fp1, "sha256:"), + "format: fingerprint should have 'sha256:' prefix (seed=%d)", seed) + + fp3 := computeFingerprintExhaustive(t, seed+1) + assert.NotEqual(t, fp1, fp3, + "uniqueness: different seeds should produce different fingerprints (seed=%d)", seed) }) }