diff --git a/internal/embed/infrastructure/base/templates/erpc.yaml b/internal/embed/infrastructure/base/templates/erpc.yaml new file mode 100644 index 00000000..635665d3 --- /dev/null +++ b/internal/embed/infrastructure/base/templates/erpc.yaml @@ -0,0 +1,100 @@ +# Relocated from helmfile.yaml `erpc-httproute`, `erpc-x402-middleware`, +# and `erpc-metadata` bedag/raw releases. These resources live alongside +# their workload (eRPC in the `erpc` namespace) instead of inlined in +# helmfile so the chart layout is the single source of truth for what +# ships in the erpc namespace. +# +# CRD prerequisites: +# - HTTPRoute -> gateway.networking.k8s.io/v1 (shipped by the Traefik +# v38+ chart's bundled CRDs) +# - Middleware -> traefik.io/v1alpha1 (shipped by the Traefik chart) +# `base` now declares `needs: [traefik/traefik]` in helmfile.yaml to +# guarantee CRDs are present before these templates apply. +# +# The eRPC Deployment + Service themselves still come from the upstream +# `ethereum/erpc` Helm chart (separate release in helmfile.yaml); only +# the routing + discovery metadata is owned here. + +--- +# eRPC namespace. Pre-created here so resources in this file (HTTPRoute, +# Middleware, ConfigMap) can apply during the `base` release without +# waiting for the `erpc` upstream chart release to create it. The `erpc` +# release still sets `createNamespace: true` — kubectl apply on an +# existing namespace is a no-op. +apiVersion: v1 +kind: Namespace +metadata: + name: erpc + +--- +# eRPC HTTPRoute — gates /rpc through the x402-payment Middleware and +# restricts the route to the obol.stack hostname so it cannot be reached +# via the public cloudflared tunnel (see CLAUDE.md "Security: Tunnel +# Exposure"). Removing the hostnames restriction is a critical security +# regression. +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: erpc + namespace: erpc +spec: + hostnames: + - "obol.stack" + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + rules: + - matches: + - path: + type: PathPrefix + value: /rpc + filters: + - type: ExtensionRef + extensionRef: + group: traefik.io + kind: Middleware + name: x402-payment + backendRefs: + - name: erpc + port: 80 + +--- +# x402 Middleware for the eRPC namespace (ForwardAuth -> central +# verifier). Always deployed; the verifier returns 200 for routes with +# no pricing rules. +apiVersion: traefik.io/v1alpha1 +kind: Middleware +metadata: + name: x402-payment + namespace: erpc +spec: + forwardAuth: + address: http://x402-verifier.x402.svc.cluster.local:8080/verify + authResponseHeaders: + - X-Payment-Response + +--- +# eRPC metadata ConfigMap for frontend discovery. `.Values.network` +# resolves against the `network` value passed to the `base` release +# (default "mainnet", overridable via helmfile state values). +apiVersion: v1 +kind: ConfigMap +metadata: + name: erpc-metadata + namespace: erpc + labels: + app.kubernetes.io/part-of: obol.stack + obol.stack/id: default + obol.stack/app: erpc +data: + metadata.json: | + { + "network": "{{ .Values.network }}", + "endpoints": { + "rpc": { + "external": "http://obol.stack/rpc/{{ .Values.network }}", + "internal": "http://erpc.erpc.svc.cluster.local/rpc/{{ .Values.network }}" + } + } + } diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index cf34841f..d2bc7b90 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -298,3 +298,27 @@ spec: port: 4000 targetPort: http protocol: TCP + +--- +# Relocated from helmfile.yaml `llm-buyer-podmonitor` bedag/raw release. +# Lives alongside its workload (litellm + x402-buyer sidecar) instead of +# inlined in helmfile so the chart layout is the single source of truth +# for what ships in the llm namespace. The PodMonitor CRD comes from the +# monitoring release (kube-prometheus-stack), so `base` now declares a +# `needs: [monitoring/monitoring]` in helmfile.yaml to guarantee CRD +# presence before this template applies. +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: litellm-x402-buyer + namespace: llm + labels: + release: monitoring +spec: + selector: + matchLabels: + app: litellm + podMetricsEndpoints: + - port: buyer-http + path: /metrics + interval: 30s diff --git a/internal/embed/infrastructure/base/templates/obol-frontend.yaml b/internal/embed/infrastructure/base/templates/obol-frontend.yaml new file mode 100644 index 00000000..397a192e --- /dev/null +++ b/internal/embed/infrastructure/base/templates/obol-frontend.yaml @@ -0,0 +1,95 @@ +# Relocated from helmfile.yaml `obol-frontend-httproute` and +# `obol-frontend-rbac` bedag/raw releases. These resources live +# alongside their workload (the obol-frontend Helm release in the +# `obol-frontend` namespace) instead of inlined in helmfile so the +# chart layout is the single source of truth for what ships in the +# obol-frontend namespace. +# +# The obol-frontend Deployment + Service themselves still come from +# the `obol/obol-app` upstream chart (separate release in +# helmfile.yaml); only the HTTPRoute and discovery RBAC are owned +# here. +# +# CRD prerequisite: HTTPRoute -> gateway.networking.k8s.io/v1 +# (shipped by the Traefik v38+ chart's bundled CRDs). `base` now +# declares `needs: [traefik/traefik]` in helmfile.yaml to guarantee +# the CRDs are present before this template applies. + +--- +# obol-frontend namespace. Pre-created here so the HTTPRoute and +# ClusterRoleBinding subject reference can resolve during the `base` +# release without waiting for the `obol-frontend` upstream chart +# release to create it. The chart release still sets +# `createNamespace: true` — kubectl apply on an existing namespace is +# a no-op. +apiVersion: v1 +kind: Namespace +metadata: + name: obol-frontend + +--- +# obol-frontend HTTPRoute. The `hostnames: ["obol.stack"]` restriction +# keeps the frontend UI off the public cloudflared tunnel — removing +# it is a critical security regression (see CLAUDE.md "Security: +# Tunnel Exposure"). +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: obol-frontend + namespace: obol-frontend +spec: + hostnames: + - "obol.stack" + parentRefs: + - name: traefik-gateway + namespace: traefik + sectionName: web + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: obol-frontend-obol-app + port: 3000 + +--- +# obol-frontend RBAC for OpenClaw instance discovery and ServiceOffer +# CRUD from the frontend sell modal. The ClusterRoleBinding subject +# references the `obol-frontend` ServiceAccount that the upstream +# `obol/obol-app` chart creates — the binding applies fine even if +# the SA does not exist yet, and starts granting permissions the +# moment the SA appears. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: obol-frontend-openclaw-discovery + labels: + app.kubernetes.io/name: obol-frontend +rules: + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["get", "list"] + - apiGroups: [""] + resources: ["pods", "configmaps", "secrets"] + verbs: ["get", "list"] + # ServiceOffer CRD — frontend sell modal creates offers + - apiGroups: ["obol.org"] + resources: ["serviceoffers", "serviceoffers/status"] + verbs: ["get", "list", "create", "update", "patch", "delete"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: obol-frontend-openclaw-discovery + labels: + app.kubernetes.io/name: obol-frontend +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: obol-frontend-openclaw-discovery +subjects: + - kind: ServiceAccount + name: obol-frontend + namespace: obol-frontend diff --git a/internal/embed/infrastructure/helmfile.yaml b/internal/embed/infrastructure/helmfile.yaml index aa7fc052..84ddc538 100644 --- a/internal/embed/infrastructure/helmfile.yaml +++ b/internal/embed/infrastructure/helmfile.yaml @@ -11,8 +11,6 @@ repositories: url: https://obolnetwork.github.io/helm-charts/ - name: ethereum url: https://ethpandaops.github.io/ethereum-helm-charts - - name: bedag - url: https://bedag.github.io/helm-charts/ - name: stakater url: https://stakater.github.io/stakater-charts @@ -27,15 +25,9 @@ values: enabled: true releases: - # Local storage provisioner (raw manifests wrapped as chart) - - name: base - namespace: kube-system - chart: ./base - values: - - dataDir: /data - - network: "{{ .Values.network }}" - - # Monitoring stack (Prometheus operator + Prometheus) + # Monitoring stack (Prometheus operator + Prometheus). Must run before + # `base` so the PodMonitor CRD exists when base/templates/llm.yaml + # applies the litellm-x402-buyer PodMonitor. - name: monitoring namespace: monitoring createNamespace: true @@ -44,34 +36,12 @@ releases: values: - ./values/monitoring.yaml.gotmpl - - name: llm-buyer-podmonitor - namespace: llm - createNamespace: true - chart: bedag/raw - version: 2.0.2 - needs: - - monitoring/monitoring - - kube-system/base - values: - - resources: - - apiVersion: monitoring.coreos.com/v1 - kind: PodMonitor - metadata: - name: litellm-x402-buyer - namespace: llm - labels: - release: monitoring - spec: - selector: - matchLabels: - app: litellm - podMetricsEndpoints: - - port: buyer-http - path: /metrics - interval: 30s - # Traefik ingress controller with Gateway API support - # Traefik v38+ bundles Gateway API CRDs in its crds/ directory + # Traefik v38+ bundles Gateway API CRDs in its crds/ directory. + # Declared before `base` so the Traefik CRDs (Middleware, + # IngressRoute, …) and Gateway API CRDs are available when base + # templates that depend on them (erpc.yaml, obol-frontend.yaml) + # apply. - name: traefik namespace: traefik createNamespace: true @@ -135,6 +105,24 @@ releases: dashboard: enabled: false + # Local storage provisioner + co-located cluster-wide manifests: + # CRDs, agent RBAC, x402 controller + verifier, LiteLLM + buyer + # PodMonitor, eRPC HTTPRoute + Middleware + metadata ConfigMap, and + # the obol-frontend HTTPRoute + discovery RBAC. The `needs` on + # traefik + monitoring guarantee the Traefik / Gateway API and + # monitoring CRDs are present before the relocated routing / + # PodMonitor templates (previously shipped as separate bedag/raw + # helmfile releases) apply. + - name: base + namespace: kube-system + chart: ./base + needs: + - traefik/traefik + - monitoring/monitoring + values: + - dataDir: /data + - network: "{{ .Values.network }}" + # Cloudflare Tunnel (dormant until configured via obol tunnel login/provision). # `condition: cloudflared.enabled` lets `obol stack up` flip this off when an # active quick tunnel is already serving — re-syncing the chart kills the @@ -168,75 +156,16 @@ releases: - ./values/erpc.yaml.gotmpl # The chart exposes port 4000 (container) via Service port 4000. # In-cluster callers use erpc.erpc.svc.cluster.local:4000. + # + # The eRPC HTTPRoute, x402-payment Middleware, and erpc-metadata + # ConfigMap previously shipped as separate bedag/raw helmfile + # releases now live in base/templates/erpc.yaml. - # eRPC HTTPRoute - - name: erpc-httproute - namespace: erpc - chart: bedag/raw - version: 2.0.2 - needs: - - traefik/traefik - - erpc/erpc - values: - - resources: - - apiVersion: gateway.networking.k8s.io/v1 - kind: HTTPRoute - metadata: - name: erpc - namespace: erpc - spec: - hostnames: - - "obol.stack" - parentRefs: - - name: traefik-gateway - namespace: traefik - sectionName: web - rules: - - matches: - - path: - type: PathPrefix - value: /rpc - filters: - - type: ExtensionRef - extensionRef: - group: traefik.io - kind: Middleware - name: x402-payment - backendRefs: - - name: erpc - port: 80 - - # x402 Middleware for eRPC namespace (ForwardAuth -> central verifier). - # Always deployed; the verifier returns 200 for routes with no pricing rules. - - name: erpc-x402-middleware - namespace: erpc - chart: bedag/raw - version: 2.0.2 - needs: - - traefik/traefik - values: - - resources: - - apiVersion: traefik.io/v1alpha1 - kind: Middleware - metadata: - name: x402-payment - namespace: erpc - spec: - forwardAuth: - address: http://x402-verifier.x402.svc.cluster.local:8080/verify - authResponseHeaders: - - X-Payment-Response - - # eRPC metadata ConfigMap for frontend discovery - - name: erpc-metadata - namespace: erpc - chart: bedag/raw - needs: - - erpc/erpc - values: - - ./values/erpc-metadata.yaml.gotmpl - - # Obol Stack frontend + # Obol Stack frontend. + # + # The frontend HTTPRoute and discovery RBAC (ClusterRole + + # ClusterRoleBinding) previously shipped as separate bedag/raw + # helmfile releases now live in base/templates/obol-frontend.yaml. - name: obol-frontend namespace: obol-frontend createNamespace: true @@ -247,75 +176,3 @@ releases: - erpc/erpc values: - ./values/obol-frontend.yaml.gotmpl - - # Obol Frontend HTTPRoute - - name: obol-frontend-httproute - namespace: obol-frontend - chart: bedag/raw - version: 2.0.2 - needs: - - traefik/traefik - - obol-frontend/obol-frontend - values: - - resources: - - apiVersion: gateway.networking.k8s.io/v1 - kind: HTTPRoute - metadata: - name: obol-frontend - namespace: obol-frontend - spec: - hostnames: - - "obol.stack" - parentRefs: - - name: traefik-gateway - namespace: traefik - sectionName: web - rules: - - matches: - - path: - type: PathPrefix - value: / - backendRefs: - - name: obol-frontend-obol-app - port: 3000 - - # Obol Frontend RBAC (OpenClaw instance discovery via Kubernetes API) - - name: obol-frontend-rbac - namespace: obol-frontend - chart: bedag/raw - version: 2.0.2 - needs: - - obol-frontend/obol-frontend - values: - - resources: - - apiVersion: rbac.authorization.k8s.io/v1 - kind: ClusterRole - metadata: - name: obol-frontend-openclaw-discovery - labels: - app.kubernetes.io/name: obol-frontend - rules: - - apiGroups: [""] - resources: ["namespaces"] - verbs: ["get", "list"] - - apiGroups: [""] - resources: ["pods", "configmaps", "secrets"] - verbs: ["get", "list"] - # ServiceOffer CRD — frontend sell modal creates offers - - apiGroups: ["obol.org"] - resources: ["serviceoffers", "serviceoffers/status"] - verbs: ["get", "list", "create", "update", "patch", "delete"] - - apiVersion: rbac.authorization.k8s.io/v1 - kind: ClusterRoleBinding - metadata: - name: obol-frontend-openclaw-discovery - labels: - app.kubernetes.io/name: obol-frontend - roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: obol-frontend-openclaw-discovery - subjects: - - kind: ServiceAccount - name: obol-frontend - namespace: obol-frontend diff --git a/internal/embed/infrastructure/values/erpc-metadata.yaml.gotmpl b/internal/embed/infrastructure/values/erpc-metadata.yaml.gotmpl deleted file mode 100644 index fe94d8ef..00000000 --- a/internal/embed/infrastructure/values/erpc-metadata.yaml.gotmpl +++ /dev/null @@ -1,21 +0,0 @@ -resources: - - apiVersion: v1 - kind: ConfigMap - metadata: - name: erpc-metadata - namespace: erpc - labels: - app.kubernetes.io/part-of: obol.stack - obol.stack/id: default - obol.stack/app: erpc - data: - metadata.json: | - { - "network": "{{ .Values.network }}", - "endpoints": { - "rpc": { - "external": "http://obol.stack/rpc/{{ .Values.network }}", - "internal": "http://erpc.erpc.svc.cluster.local/rpc/{{ .Values.network }}" - } - } - } diff --git a/internal/stack/stack_test.go b/internal/stack/stack_test.go index 0d7cca31..a4c032ea 100644 --- a/internal/stack/stack_test.go +++ b/internal/stack/stack_test.go @@ -434,33 +434,38 @@ func TestDockerBridgeGatewayIP(t *testing.T) { t.Logf("docker0 gateway IP: %s", ip) } +// TestHelmfile_IncludesBuyerPodMonitor asserts the litellm-x402-buyer +// PodMonitor is shipped with the stack. The PodMonitor previously lived +// as an inline `bedag/raw` release in helmfile.yaml; it now lives next +// to its workload in base/templates/llm.yaml. The chart layout (the +// `base` Helm release) renders it during `obol stack up`. func TestHelmfile_IncludesBuyerPodMonitor(t *testing.T) { projectRoot := findProjectRoot() if projectRoot == "" { t.Fatal("project root not found") } - data, err := os.ReadFile(filepath.Join(projectRoot, "internal/embed/infrastructure/helmfile.yaml")) + data, err := os.ReadFile(filepath.Join(projectRoot, "internal/embed/infrastructure/base/templates/llm.yaml")) if err != nil { - t.Fatalf("read helmfile: %v", err) + t.Fatalf("read llm template: %v", err) } out := string(data) if !strings.Contains(out, "kind: PodMonitor") { - t.Fatalf("helmfile missing PodMonitor:\n%s", out) + t.Fatalf("llm template missing PodMonitor:\n%s", out) } if !strings.Contains(out, "name: litellm-x402-buyer") { - t.Fatalf("helmfile missing buyer PodMonitor name:\n%s", out) + t.Fatalf("llm template missing buyer PodMonitor name:\n%s", out) } if !strings.Contains(out, "release: monitoring") { - t.Fatalf("helmfile missing monitoring label:\n%s", out) + t.Fatalf("llm template missing monitoring label:\n%s", out) } if !strings.Contains(out, "port: buyer-http") || !strings.Contains(out, "path: /metrics") { - t.Fatalf("helmfile missing buyer metrics endpoint:\n%s", out) + t.Fatalf("llm template missing buyer metrics endpoint:\n%s", out) } }