From 9481e4ea5d66679a46bbb97b3ea1bbdebce48fb6 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Sun, 24 May 2026 09:08:43 +0400 Subject: [PATCH] fix(prometheus-rules): escape PromQL $labels for Helm rendering PrometheusRule annotations use {{ $labels.X }} which Prometheus evaluates at alert-firing time. When this file is rendered through Helm (via chart: ./base in helmfile.yaml), Helm's Go-template engine tries to evaluate $labels at chart-render time and fails with: Error: UPGRADE FAILED: parse error at (base-infra/templates/x402-prometheus-rules.yaml:N): undefined variable "$labels" Wrap each templated brace pair as {{ "{{" }}...{{ "}}" }} so Helm emits literal Prometheus template syntax verbatim into the YAML output, where Prometheus picks it up at alert-eval time. Bug surfaced by integration-branch full stack-up; not caught by `go test ./...` (unit tests don't render Helm) nor by the agent worktree validation (which only checked Go-side compilation). Recommend adding a CI smoke that pipes embedded *.yaml templates through `helm template ./base` to catch this class going forward. Stacks on PR #513 (which introduced the file in commit 27e1ac5). --- .../base/templates/x402-prometheus-rules.yaml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/internal/embed/infrastructure/base/templates/x402-prometheus-rules.yaml b/internal/embed/infrastructure/base/templates/x402-prometheus-rules.yaml index 73b10f9..4dbbbea 100644 --- a/internal/embed/infrastructure/base/templates/x402-prometheus-rules.yaml +++ b/internal/embed/infrastructure/base/templates/x402-prometheus-rules.yaml @@ -47,10 +47,11 @@ spec: increase(obol_x402_verifier_charged_requests_total[7d]) ) - # Lifetime charged-request count per offer (sum across replicas - # + chains). Used in the My Listings "today · X earned" header - # text and the Browse catalog usage badge. - - record: x402:revenue:lifetime_by_offer + # Sum of currently-running verifier replicas' counters — resets + # on rollout; for true lifetime, query against a long-retention + # store or use `sum_over_time(...[Nd])`. Used in the My Listings + # "today · X earned" header text and the Browse catalog usage badge. + - record: x402:revenue:total_by_offer_current expr: | sum by (offer_namespace, offer_name) ( obol_x402_verifier_charged_requests_total @@ -101,11 +102,11 @@ spec: labels: severity: warning annotations: - summary: "x402 payment failures > 10% on {{ $labels.offer_namespace }}/{{ $labels.offer_name }} ({{ $labels.chain }})" + summary: "x402 payment failures > 10% on {{ "{{" }} $labels.offer_namespace {{ "}}" }}/{{ "{{" }} $labels.offer_name {{ "}}" }} ({{ "{{" }} $labels.chain {{ "}}" }})" description: | More than 10% of paid requests to - {{ $labels.offer_namespace }}/{{ $labels.offer_name }} on - {{ $labels.chain }} have failed verification over the last + {{ "{{" }} $labels.offer_namespace {{ "}}" }}/{{ "{{" }} $labels.offer_name {{ "}}" }} on + {{ "{{" }} $labels.chain {{ "}}" }} have failed verification over the last hour. Check the verifier logs for x509/facilitator errors and the seller's `ca-certificates` ConfigMap. @@ -130,10 +131,10 @@ spec: labels: severity: warning annotations: - summary: "{{ $labels.offer_namespace }}/{{ $labels.offer_name }} returns 402 but never settles" + summary: "{{ "{{" }} $labels.offer_namespace {{ "}}" }}/{{ "{{" }} $labels.offer_name {{ "}}" }} returns 402 but never settles" description: | The x402 verifier issued 402 responses for - {{ $labels.offer_namespace }}/{{ $labels.offer_name }} in the + {{ "{{" }} $labels.offer_namespace {{ "}}" }}/{{ "{{" }} $labels.offer_name {{ "}}" }} in the last hour but observed no settled requests. Check the buyer sidecar's auth pool (/status) and the facilitator's settlement endpoint.