From 46230272823ce28f4bf17455b80a9e67be01483b Mon Sep 17 00:00:00 2001 From: Maxwell Elliott Date: Fri, 22 May 2026 10:50:11 -0400 Subject: [PATCH] Fix #359: configuration-aware cquery hashing Under --useCquery, BazelRule.ruleInputList() previously folded each ConfiguredRuleInput to its bare label via `.map { it.label }`, dropping `configurationChecksum`. Two configured graphs that shared dep labels but differed in per-edge configuration -- a `cfg = "exec"` transition flip, a --platforms swap, a --config=... change, or a `--define` toggling a select() inside a dep -- collapsed to identical ruleInputList() output, so RuleHasher's transitive walk produced identical digests and bazel-diff returned an empty impacted set under cquery mode. This change folds each dep edge's `configurationChecksum` into the rule-input string using `|` -- a character that is invalid in a Bazel label, so the encoding can never collide with a real input and survives the existing `List` return type. The full encoded string is what gets mixed into RuleHasher's transitive hash, so two configured graphs differing only by per-edge configuration now produce distinct parent digests. RuleHasher splits on the separator to recover the bare label for `allRulesMap` / `sourceDigests` lookups and dep tracking, so the user-visible deps in the generated JSON stay clean. The empty-checksum fallback (a known Bazel quirk where cquery's `transitions=lite` output sometimes omits the checksum for an edge) returns the bare label unchanged, preserving pre-#359 behaviour for that path. This is a hash-format change: hashes generated by this version are not comparable to hashes from earlier versions under --useCquery, which is the intended behaviour -- the old hashes were missing the configuration signal. Tests: * Unignore the PR #361 reproducer (`testCqueryRuleInputListDistinguishesConfigurationChecksum_issue359`) and drop the two `_currentBehaviourBuggy_*` companions. * Add `testCqueryRuleInputListEncodesLabelAndChecksum_issue359` to pin the on-the-wire encoding. * Add `testCqueryRuleInputListEmptyChecksumFallsBackToBareLabel_issue359` to cover the Bazel-quirk fallback. * Add `testNonCqueryRuleInputListIgnoresConfiguredRuleInput_issue359` to confirm the encoding does not leak into the non-cquery path. * Add `testDecodeConfiguredRuleInputLabelRoundTrip_issue359` to pin the round-trip behaviour RuleHasher relies on. This is the minimal symptom-fixing slice of #359 -- the parent target's hash now reflects per-edge configuration changes, so the empty-impacted set under cquery is gone. Full target-determinator parity (per-(label, configuration) digest entries, `bazel config --output=json` enumeration, empty-checksum inheritance from the depending target) is intentionally out of scope here; those would change the on-disk JSON schema and warrant a separate design discussion on the issue. Refs Tinder/bazel-diff#359, Tinder/bazel-diff#361 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../kotlin/com/bazel_diff/bazel/BazelRule.kt | 29 ++++- .../kotlin/com/bazel_diff/hash/RuleHasher.kt | 23 ++-- .../com/bazel_diff/bazel/BazelRuleTest.kt | 114 +++++++++++------- 3 files changed, 115 insertions(+), 51 deletions(-) diff --git a/cli/src/main/kotlin/com/bazel_diff/bazel/BazelRule.kt b/cli/src/main/kotlin/com/bazel_diff/bazel/BazelRule.kt index da2f8723..998f932e 100644 --- a/cli/src/main/kotlin/com/bazel_diff/bazel/BazelRule.kt +++ b/cli/src/main/kotlin/com/bazel_diff/bazel/BazelRule.kt @@ -11,6 +11,33 @@ import com.google.devtools.build.lib.query2.proto.proto2api.Build // https://github.com/bazelbuild/bazel/blob/6971b016f1e258e3bb567a0f9fe7a88ad565d8f2/src/main/java/com/google/devtools/build/lib/query2/query/output/SyntheticAttributeHashCalculator.java#L78-L81 private val DEFAULT_IGNORED_ATTRS = arrayOf("generator_location") +// Separator used to fold a cquery dep-edge's `configurationChecksum` into a rule-input string. +// Bazel target/package names are restricted to `[A-Za-z0-9/._+=,@~-]*` plus `:` between package +// and target, so `|` is invalid in a label and is a safe in-band encoding that survives the +// existing `List` return type of `ruleInputList()`. `RuleHasher` splits on this character +// to recover the bare label for `allRulesMap` / `sourceDigests` lookups and dep tracking; the +// full encoded string is what gets mixed into the transitive hash, so two configured graphs that +// share dep labels but differ in per-edge configuration produce distinct rule digests. See #359. +const val CONFIGURED_RULE_INPUT_SEPARATOR: Char = '|' + +fun encodeConfiguredRuleInput(input: Build.ConfiguredRuleInput): String { + // Fall back to a bare label when the checksum is empty -- this matches the pre-#359 behaviour + // for the known Bazel quirk where cquery's `transitions=lite` output sometimes omits the + // configurationChecksum for an edge, and avoids appending a dangling separator. + val checksum = input.configurationChecksum + return if (checksum.isNullOrEmpty()) { + input.label + } else { + "${input.label}${CONFIGURED_RULE_INPUT_SEPARATOR}${checksum}" + } +} + +fun decodeConfiguredRuleInputLabel(encoded: String): String { + // No separator present -> the input is a bare label (non-cquery callers, empty-checksum + // fallback, or //external:* synthetic inputs), so this is a no-op. + return encoded.substringBefore(CONFIGURED_RULE_INPUT_SEPARATOR) +} + class BazelRule(private val rule: Build.Rule) { fun digest(ignoredAttrs: Set): ByteArray { return sha256 { @@ -27,7 +54,7 @@ class BazelRule(private val rule: Build.Rule) { fun ruleInputList(useCquery: Boolean, fineGrainedHashExternalRepos: Set): List { return if (useCquery) { - rule.configuredRuleInputList.map { it.label } + + rule.configuredRuleInputList.map { encodeConfiguredRuleInput(it) } + rule.ruleInputList .map { ruleInput: String -> transformRuleInput(fineGrainedHashExternalRepos, ruleInput) diff --git a/cli/src/main/kotlin/com/bazel_diff/hash/RuleHasher.kt b/cli/src/main/kotlin/com/bazel_diff/hash/RuleHasher.kt index dc47b450..faa917b6 100644 --- a/cli/src/main/kotlin/com/bazel_diff/hash/RuleHasher.kt +++ b/cli/src/main/kotlin/com/bazel_diff/hash/RuleHasher.kt @@ -2,6 +2,7 @@ package com.bazel_diff.hash import com.bazel_diff.bazel.BazelRule import com.bazel_diff.bazel.BazelSourceFileTarget +import com.bazel_diff.bazel.decodeConfiguredRuleInputLabel import com.bazel_diff.log.Logger import com.google.common.annotations.VisibleForTesting import java.nio.file.Path @@ -56,12 +57,18 @@ class RuleHasher( putDirectBytes(seedHash) for (ruleInput in rule.ruleInputList(useCquery, fineGrainedHashExternalRepos)) { + // Under --useCquery, `ruleInput` may carry an embedded configurationChecksum (see + // BazelRule.CONFIGURED_RULE_INPUT_SEPARATOR / #359). The full encoded string is what + // we mix into the hash so two configured graphs differing only by per-edge + // configuration produce distinct digests; the bare label is what we use to look up + // the input in `allRulesMap` / `sourceDigests` and to track in `deps`. putDirectBytes(ruleInput.toByteArray()) + val inputLabel = decodeConfiguredRuleInputLabel(ruleInput) - val inputRule = allRulesMap[ruleInput] + val inputRule = allRulesMap[inputLabel] when { - inputRule == null && sourceDigests.containsKey(ruleInput) -> { - putDirectBytes(sourceDigests[ruleInput]) + inputRule == null && sourceDigests.containsKey(inputLabel) -> { + putDirectBytes(sourceDigests[inputLabel]) } inputRule?.name != null && inputRule.name != rule.name -> { val ruleInputHash = @@ -74,23 +81,23 @@ class RuleHasher( depPathClone, ignoredAttrs, modifiedFilepaths) - putTransitiveBytes(ruleInput, ruleInputHash.overallDigest) + putTransitiveBytes(inputLabel, ruleInputHash.overallDigest) } else -> { val heuristicDigest = sourceFileHasher.softDigest( - BazelSourceFileTarget(ruleInput, ByteArray(0)), modifiedFilepaths) + BazelSourceFileTarget(inputLabel, ByteArray(0)), modifiedFilepaths) when { heuristicDigest != null -> { logger.i { - "Source file $ruleInput picked up as an input for rule ${rule.name}" + "Source file $inputLabel picked up as an input for rule ${rule.name}" } - sourceDigests[ruleInput] = heuristicDigest + sourceDigests[inputLabel] = heuristicDigest putDirectBytes(heuristicDigest) } else -> logger.w { - "Unable to calculate digest for input $ruleInput for rule ${rule.name}" + "Unable to calculate digest for input $inputLabel for rule ${rule.name}" } } } diff --git a/cli/src/test/kotlin/com/bazel_diff/bazel/BazelRuleTest.kt b/cli/src/test/kotlin/com/bazel_diff/bazel/BazelRuleTest.kt index 393d647b..c6610a60 100644 --- a/cli/src/test/kotlin/com/bazel_diff/bazel/BazelRuleTest.kt +++ b/cli/src/test/kotlin/com/bazel_diff/bazel/BazelRuleTest.kt @@ -6,7 +6,6 @@ import assertk.assertions.isNotEqualTo import com.google.devtools.build.lib.query2.proto.proto2api.Build import com.google.devtools.build.lib.query2.proto.proto2api.Build.Attribute import com.google.devtools.build.lib.query2.proto.proto2api.Build.Rule -import org.junit.Ignore import org.junit.Test class BazelRuleTest { @@ -51,26 +50,18 @@ class BazelRuleTest { .isEqualTo(BazelRule(rule2Pb).digest(emptySet())) } - // Reproducer for https://github.com/Tinder/bazel-diff/issues/359 + // Fix for https://github.com/Tinder/bazel-diff/issues/359 // - // Under --useCquery, BazelRule.ruleInputList() reads each entry from configured_rule_input but - // only takes `.label`, discarding `.configurationChecksum`. Two configured graphs that share - // dep labels but differ in per-edge configuration -- a `cfg = "exec"` transition flip, a - // --platforms swap, a --config=... change, or a `--define` toggling a select() inside a dep -- - // produce identical ruleInputList() output, so RuleHasher's transitive walk visits the same - // labels and yields identical digests. The user-visible symptom is that bazel-diff under - // cquery returns an empty impacted-targets set despite a real change in the configured graph, - // diverging from bazel-contrib/target-determinator which keys hashes by - // (label, configurationChecksum) and mixes the dep-edge configuration into the rule digest. - // - // @Ignore on the assertion below pins the current (buggy) behaviour as a reproducer: the - // companion `_currentBehaviourBuggy` test asserts the lists are equal, so the suite stays - // green; the `@Ignore`d test documents the desired post-fix invariant and can be flipped on - // (or merged with `_currentBehaviourBuggy`) when the fix lands. + // Under --useCquery, BazelRule.ruleInputList() now folds each ConfiguredRuleInput's + // configurationChecksum into the rule-input string via CONFIGURED_RULE_INPUT_SEPARATOR (`|`). + // Two configured graphs that share dep labels but differ in per-edge configuration -- a + // `cfg = "exec"` transition flip, a --platforms swap, a --config=... change, or a `--define` + // toggling a select() inside a dep -- produce distinct ruleInputList() output, so RuleHasher's + // transitive walk mixes those bytes into the digest and the parent target's hash changes. + // RuleHasher decodes back to the bare label when looking the input up in `allRulesMap` / + // `sourceDigests` and when tracking deps, so the user-visible deps output stays clean. @Test - @Ignore( - "Reproducer for #359 -- expected to pass once cquery rule inputs are keyed by (label, configurationChecksum). Today the lists collapse and this assertion fails. The companion *_currentBehaviourBuggy_issue359 tests pin down the current behaviour so CI stays green; flipping this @Ignore off should be the second-to-last step of the fix PR.") - fun testCqueryRuleInputListDistinguishesConfigurationChecksum_reproducerForIssue359() { + fun testCqueryRuleInputListDistinguishesConfigurationChecksum_issue359() { val ruleA = configuredGenrule(depLabel = "//:dep", configurationChecksum = "cfg-checksum-A") val ruleB = configuredGenrule(depLabel = "//:dep", configurationChecksum = "cfg-checksum-B") @@ -79,39 +70,78 @@ class BazelRuleTest { val inputsB = BazelRule(ruleB).ruleInputList(useCquery = true, fineGrainedHashExternalRepos = emptySet()) - // Today: both `[//:dep]` -- the test fails. After the fix it should pass. assertThat(inputsA).isNotEqualTo(inputsB) } - // Companion test that locks in the *current* buggy behaviour so we have a regression signal: - // if this test ever starts failing, it means the collapse has been (partially) fixed and the - // `@Ignore`d assertion above should be revisited. This keeps the reproducer visible in source - // without breaking CI. + // Pins the on-the-wire encoding so the format is observable and future changes are intentional. + // The `|` separator is invalid in a Bazel label, so the encoded string cannot collide with any + // real label and can be safely round-tripped by RuleHasher's `decodeConfiguredRuleInputLabel`. @Test - fun testCqueryRuleInputListCollapsesConfigurationChecksum_currentBehaviourBuggy_issue359() { - val ruleA = configuredGenrule(depLabel = "//:dep", configurationChecksum = "cfg-checksum-A") - val ruleB = configuredGenrule(depLabel = "//:dep", configurationChecksum = "cfg-checksum-B") + fun testCqueryRuleInputListEncodesLabelAndChecksum_issue359() { + val rule = configuredGenrule(depLabel = "//:dep", configurationChecksum = "cfg-checksum-A") - val inputsA = - BazelRule(ruleA).ruleInputList(useCquery = true, fineGrainedHashExternalRepos = emptySet()) - val inputsB = - BazelRule(ruleB).ruleInputList(useCquery = true, fineGrainedHashExternalRepos = emptySet()) + val inputs = + BazelRule(rule).ruleInputList(useCquery = true, fineGrainedHashExternalRepos = emptySet()) - // Current (buggy) behaviour: both lists are exactly `[//:dep]`. - assertThat(inputsA).isEqualTo(listOf("//:dep")) - assertThat(inputsB).isEqualTo(listOf("//:dep")) - assertThat(inputsA).isEqualTo(inputsB) + assertThat(inputs).isEqualTo(listOf("//:dep|cfg-checksum-A")) } - // Also pins the buggy behaviour at the digest level: two BazelRules that differ ONLY in the - // dep-edge configuration checksum currently produce identical `digest(...)` output, because - // configured_rule_input is not part of BazelRule.digest() at all (see BazelRule.digest()). + // The empty-checksum fallback (a known Bazel cquery quirk) must NOT append a dangling separator + // -- we drop back to a bare label so RuleHasher's lookup keeps working unchanged. @Test - fun testBazelRuleDigestIgnoresConfigurationChecksum_currentBehaviourBuggy_issue359() { - val ruleA = configuredGenrule(depLabel = "//:dep", configurationChecksum = "cfg-checksum-A") - val ruleB = configuredGenrule(depLabel = "//:dep", configurationChecksum = "cfg-checksum-B") + fun testCqueryRuleInputListEmptyChecksumFallsBackToBareLabel_issue359() { + val rule = configuredGenrule(depLabel = "//:dep", configurationChecksum = "") + + val inputs = + BazelRule(rule).ruleInputList(useCquery = true, fineGrainedHashExternalRepos = emptySet()) + + assertThat(inputs).isEqualTo(listOf("//:dep")) + } + + // Without --useCquery, configured_rule_input is irrelevant: the legacy ruleInputList field is + // the source of truth, so the encoding must not leak into the non-cquery path. + @Test + fun testNonCqueryRuleInputListIgnoresConfiguredRuleInput_issue359() { + val rule = + Rule.newBuilder() + .setRuleClass("genrule") + .setName("//:gen") + .addRuleInput("//:legacy_dep") + .addConfiguredRuleInput( + Build.ConfiguredRuleInput.newBuilder() + .setLabel("//:dep") + .setConfigurationChecksum("cfg-checksum-A") + .build()) + .build() + + val inputs = + BazelRule(rule).ruleInputList(useCquery = false, fineGrainedHashExternalRepos = emptySet()) + + assertThat(inputs).isEqualTo(listOf("//:legacy_dep")) + } + + // Pins the round-trip behaviour `RuleHasher` relies on: the full encoded string lives in the + // hash, and the bare label is what gets looked up in `allRulesMap` / `sourceDigests` and + // tracked in `deps`. If the round-trip ever drifts the user-facing JSON would start emitting + // dep labels with `|` suffixes. + @Test + fun testDecodeConfiguredRuleInputLabelRoundTrip_issue359() { + val withChecksum = + encodeConfiguredRuleInput( + Build.ConfiguredRuleInput.newBuilder() + .setLabel("//:dep") + .setConfigurationChecksum("cfg-A") + .build()) + assertThat(decodeConfiguredRuleInputLabel(withChecksum)).isEqualTo("//:dep") + + val withoutChecksum = + encodeConfiguredRuleInput(Build.ConfiguredRuleInput.newBuilder().setLabel("//:dep").build()) + assertThat(decodeConfiguredRuleInputLabel(withoutChecksum)).isEqualTo("//:dep") - assertThat(BazelRule(ruleA).digest(emptySet())).isEqualTo(BazelRule(ruleB).digest(emptySet())) + // Non-encoded labels (the non-cquery path or //external:* synthetic inputs) must round-trip + // unchanged so the decode is safe to call unconditionally in RuleHasher. + assertThat(decodeConfiguredRuleInputLabel("//:bare")).isEqualTo("//:bare") + assertThat(decodeConfiguredRuleInputLabel("//external:foo")).isEqualTo("//external:foo") } private fun configuredGenrule(depLabel: String, configurationChecksum: String): Rule {