From 04b9a6e1b43b0dbb8e70a82fda8c39c9c88de3cb Mon Sep 17 00:00:00 2001 From: bussyjd Date: Sat, 23 May 2026 23:03:23 +0400 Subject: [PATCH] feat(security): Restricted Pod Security Standard across embedded workloads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Brings every embedded Deployment shipped by obol-stack up to PSS Restricted: - runAsNonRoot: true with fixed non-zero UID/GID (65532) - allowPrivilegeEscalation: false - capabilities.drop: [ALL] - seccompProfile: RuntimeDefault - readOnlyRootFilesystem: true (with named emptyDir mounts where Python needs writeable /tmp and HOME/.cache) PSS labels (enforce=restricted, audit/warn=restricted) added to the x402 and llm namespaces so future Deployment edits that omit per-pod securityContext are rejected at admission. Also switches the serviceoffer-controller Dockerfile from gcr.io/distroless/static-debian12 (UID 0) to ...:nonroot (UID 65532). Container escape via a Go runtime CVE on a UID-0 / no-seccomp / no-cap-drop / RW-rootfs container was the easiest path to host pivot on k3s single-node; this closes it. Files touched: - Dockerfile.serviceoffer-controller (:nonroot base) - internal/embed/infrastructure/base/templates/x402.yaml (verifier + controller securityContext blocks, x402 ns PSS label) - internal/embed/infrastructure/base/templates/llm.yaml (litellm + x402-buyer securityContext, litellm-tmp + litellm-home emptyDir mounts with HOME/XDG_CACHE_HOME/HF_HOME redirection, llm ns PSS label) Scope notes: - local-path-provisioner lives in kube-system (k3d-managed); not relabeled per PSS guidance to skip system namespaces. - hermes-obol-agent runtime is generated dynamically by serviceoffer-controller (internal/serviceoffercontroller/agent_render.go and internal/hermes/hermes.go), not from the embedded templates; its init-hermes-perms initContainer legitimately runs as UID 0 for /data chown and is intentionally left out of this PR's scope. - cloudflared chart (internal/embed/infrastructure/cloudflared/...) is a separate Helm chart and not in this PR's file list. What may break: - LiteLLM with readOnlyRootFilesystem may fail if it writes outside /tmp or $HOME — watch the next release-smoke for permission-denied errors and add named emptyDir mounts for any new write paths. --- Dockerfile.serviceoffer-controller | 2 +- .../infrastructure/base/templates/llm.yaml | 57 +++++++++++++++++++ .../infrastructure/base/templates/x402.yaml | 42 ++++++++++++++ 3 files changed, 100 insertions(+), 1 deletion(-) diff --git a/Dockerfile.serviceoffer-controller b/Dockerfile.serviceoffer-controller index 5214a93a..09f6935b 100644 --- a/Dockerfile.serviceoffer-controller +++ b/Dockerfile.serviceoffer-controller @@ -5,6 +5,6 @@ RUN go mod download COPY . . RUN CGO_ENABLED=0 go build -o /serviceoffer-controller ./cmd/serviceoffer-controller -FROM gcr.io/distroless/static-debian12 +FROM gcr.io/distroless/static-debian12:nonroot COPY --from=builder /serviceoffer-controller /serviceoffer-controller ENTRYPOINT ["/serviceoffer-controller"] diff --git a/internal/embed/infrastructure/base/templates/llm.yaml b/internal/embed/infrastructure/base/templates/llm.yaml index cf34841f..956f59d4 100644 --- a/internal/embed/infrastructure/base/templates/llm.yaml +++ b/internal/embed/infrastructure/base/templates/llm.yaml @@ -21,6 +21,15 @@ apiVersion: v1 kind: Namespace metadata: name: llm + labels: + # Pod Security Standards: Restricted profile enforced at admission. + # The litellm pod (litellm + x402-buyer sidecar) runs as non-root with + # all caps dropped, seccomp=RuntimeDefault, and readOnlyRootFilesystem; + # write paths are routed to named emptyDir mounts. + pod-security.kubernetes.io/enforce: restricted + pod-security.kubernetes.io/enforce-version: latest + pod-security.kubernetes.io/audit: restricted + pod-security.kubernetes.io/warn: restricted --- # ClusterIP Service + Endpoints: routes ollama.llm.svc.cluster.local → host Ollama. @@ -142,6 +151,17 @@ spec: secret.reloader.stakater.com/reload: "litellm-secrets" spec: terminationGracePeriodSeconds: 60 + # PSS Restricted: pod-level identity. UID/GID 65532 is the nonroot + # distroless convention; the Obol LiteLLM fork's working dirs are + # routed onto emptyDir mounts below so readOnlyRootFilesystem can + # stay on without breaking Python's tempfile / cache writes. + securityContext: + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + fsGroup: 65532 + seccompProfile: + type: RuntimeDefault containers: - name: litellm # Obol fork of LiteLLM with config-only model management API. @@ -150,6 +170,13 @@ spec: # Source: https://github.com/ObolNetwork/litellm image: ghcr.io/obolnetwork/litellm:sha-c16b156 imagePullPolicy: IfNotPresent + # PSS Restricted: drop all caps, no privilege escalation, RO rootfs. + # Python writes are funneled to the emptyDir mounts below. + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] args: - --config - /etc/litellm/config.yaml @@ -167,10 +194,22 @@ spec: value: "false" - name: DISABLE_SCHEMA_UPDATE value: "true" + # Redirect Python / HF / pip cache lookups onto the writeable + # emptyDir at /home/litellm so readOnlyRootFilesystem=true holds. + - name: HOME + value: /home/litellm + - name: XDG_CACHE_HOME + value: /home/litellm/.cache + - name: HF_HOME + value: /home/litellm/.cache/huggingface volumeMounts: - name: litellm-config mountPath: /etc/litellm/config.yaml subPath: config.yaml + - name: litellm-tmp + mountPath: /tmp + - name: litellm-home + mountPath: /home/litellm startupProbe: httpGet: path: /health/readiness @@ -214,6 +253,14 @@ spec: # across flow-08/11/14/13. See internal/embed/embed_image_pin_test.go. image: ghcr.io/obolnetwork/x402-buyer:b13254e@sha256:446d730fefbe1860e8b3245289aa8979d765ae977b7f0eaa053543e2468313cb imagePullPolicy: IfNotPresent + # PSS Restricted: Go distroless:nonroot image already runs as + # UID 65532; only the state dir under /state needs to be writeable + # and it's already an emptyDir mount. + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] args: - --config-dir=/config/buyer-config - --auths-dir=/config/buyer-auths @@ -258,6 +305,16 @@ spec: items: - key: config.yaml path: config.yaml + # Writable /tmp for Python tempfile / multipart uploads. Sized + # modestly — LiteLLM streams responses rather than buffering them. + - name: litellm-tmp + emptyDir: + sizeLimit: 128Mi + # Writable HOME for LiteLLM's pip/HF/XDG cache lookups so the + # container can run with readOnlyRootFilesystem=true. + - name: litellm-home + emptyDir: + sizeLimit: 256Mi - name: buyer-config configMap: name: x402-buyer-config diff --git a/internal/embed/infrastructure/base/templates/x402.yaml b/internal/embed/infrastructure/base/templates/x402.yaml index 9dcc933e..11fdfba3 100644 --- a/internal/embed/infrastructure/base/templates/x402.yaml +++ b/internal/embed/infrastructure/base/templates/x402.yaml @@ -6,6 +6,16 @@ apiVersion: v1 kind: Namespace metadata: name: x402 + labels: + # Pod Security Standards: Restricted profile enforced at admission. + # Future Deployment edits that omit the per-pod securityContext will be + # rejected by the apiserver. Both x402-verifier and serviceoffer-controller + # run as non-root with all caps dropped, seccomp=RuntimeDefault, and + # readOnlyRootFilesystem. + pod-security.kubernetes.io/enforce: restricted + pod-security.kubernetes.io/enforce-version: latest + pod-security.kubernetes.io/audit: restricted + pod-security.kubernetes.io/warn: restricted --- # Static gateway settings plus optional manual routes. In cluster mode the @@ -210,10 +220,25 @@ spec: app: x402-verifier spec: serviceAccountName: x402-verifier + # PSS Restricted: pod-level identity. + securityContext: + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + fsGroup: 65532 + seccompProfile: + type: RuntimeDefault containers: - name: verifier image: ghcr.io/obolnetwork/x402-verifier:b13254e imagePullPolicy: IfNotPresent + # PSS Restricted: per-container hardening. Verifier is a Go binary + # reading two RO ConfigMaps; no writeable rootfs paths required. + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] ports: - name: http containerPort: 8080 @@ -281,10 +306,27 @@ spec: app: serviceoffer-controller spec: serviceAccountName: serviceoffer-controller + # PSS Restricted: pod-level identity. Paired with Dockerfile + # FROM gcr.io/distroless/static-debian12:nonroot which default-runs + # as UID/GID 65532. Container escape via a Go-runtime CVE on a + # UID-0 / no-seccomp / no-cap-drop / RW-rootfs container was the + # easiest path to host pivot on k3s single-node; this closes it. + securityContext: + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + fsGroup: 65532 + seccompProfile: + type: RuntimeDefault containers: - name: controller image: ghcr.io/obolnetwork/serviceoffer-controller:b13254e imagePullPolicy: IfNotPresent + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] env: - name: POD_NAMESPACE valueFrom: