From b9ff17261f634649612beb67797aa682938bc7cc Mon Sep 17 00:00:00 2001 From: bussyjd Date: Thu, 21 May 2026 14:07:46 +0400 Subject: [PATCH 1/4] chore(model): remove inert --name flag from `obol model setup custom` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `--name` flag on `obol model setup custom` was documented as informational only and never participated in any routing or persistence: - ModelEntry has only `model_name` (route key) + `litellm_params`; the CLI `--name` value was never written to either. - `detectProvider` (used by `obol model list/status`) inspects `entry.ModelName` + `entry.LiteLLMParams.Model` prefixes; the `--name` string never reached it. - It was only echoed back in two log lines and passed as a UI label to `RestartLiteLLM` on the hot-add fallback path. This caused confusion in QA: an operator running obol model setup custom --name foo --model my/model ... would later call the route as `foo` and get LiteLLM's BadRequestError: ... There are no healthy deployments for this model. (The same error message the operator at #v0.10.0-rc1-upgrade-report attributed to a cache-survives-stack-up bug. Five fresh-cluster probes on rc3 could not reproduce the cache bug — the consistent reproducer turned out to be calling the route by the user-given `--name` rather than the actual registered `--model` value.) Changes: - cmd/obol/model.go: drop --name flag from modelSetupCustomCommand - internal/model/model.go: drop name parameter from AddCustomEndpoint; fallback RestartLiteLLM label now uses modelName - flows/lib.sh: route_llm_via_obol_cli no longer reads OBOL_LLM_NAME or passes --name - flows/buy-external.sh: OBOL_LLM_NAME env var removed (orphan) - CLAUDE.md / monetize-guide SKILL.md / llm-routing.md: example commands and env-var lists drop --name / OBOL_LLM_NAME --- .../obol-stack-dev/references/llm-routing.md | 3 +-- CLAUDE.md | 3 +-- cmd/obol/model.go | 4 +-- flows/buy-external.sh | 4 +-- flows/lib.sh | 9 +++---- internal/embed/skills/monetize-guide/SKILL.md | 2 +- internal/model/model.go | 25 ++++++------------- 7 files changed, 16 insertions(+), 34 deletions(-) diff --git a/.agents/skills/obol-stack-dev/references/llm-routing.md b/.agents/skills/obol-stack-dev/references/llm-routing.md index 49e014d1..2e7c04d8 100644 --- a/.agents/skills/obol-stack-dev/references/llm-routing.md +++ b/.agents/skills/obol-stack-dev/references/llm-routing.md @@ -53,7 +53,6 @@ obol model remove qwen3.5:9b obol model remove qwen3.5:4b obol model setup custom \ - --name spark1-vllm \ --endpoint http://192.168.18.23:8000/v1 \ --model qwen36-deep # `setup custom` validates the endpoint, patches LiteLLM, and internally calls @@ -64,7 +63,7 @@ obol model list # confirm the custom entry is the only local model obol model status # provider state ``` -The flow scripts (`flows/lib.sh::route_llm_via_obol_cli`) wrap this exact sequence behind `OBOL_LLM_ENDPOINT` / `OBOL_LLM_MODEL` / `OBOL_LLM_NAME` / `OBOL_LLM_API_KEY` env vars so smoke tests target a GPU host without burning host CPU on local Ollama. +The flow scripts (`flows/lib.sh::route_llm_via_obol_cli`) wrap this exact sequence behind `OBOL_LLM_ENDPOINT` / `OBOL_LLM_MODEL` / `OBOL_LLM_API_KEY` env vars so smoke tests target a GPU host without burning host CPU on local Ollama. ## Paid Routing (`paid/`) diff --git a/CLAUDE.md b/CLAUDE.md index 04a71396..447496ea 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -244,7 +244,6 @@ obol model remove qwen3.5:9b obol model remove qwen3.5:4b obol model setup custom \ - --name spark1-vllm \ --endpoint http://192.168.18.23:8000/v1 \ --model qwen36-deep # `setup custom` validates the endpoint, patches LiteLLM, and internally calls @@ -259,7 +258,7 @@ obol model list # confirm head of obol model status # show provider state ``` -The flow scripts (`flows/lib.sh:route_llm_via_obol_cli`) wrap this exact sequence behind `OBOL_LLM_ENDPOINT` / `OBOL_LLM_MODEL` / `OBOL_LLM_NAME` / `OBOL_LLM_API_KEY` env vars, so smoke tests can target a GPU host without burning host CPU on local Ollama. +The flow scripts (`flows/lib.sh:route_llm_via_obol_cli`) wrap this exact sequence behind `OBOL_LLM_ENDPOINT` / `OBOL_LLM_MODEL` / `OBOL_LLM_API_KEY` env vars, so smoke tests can target a GPU host without burning host CPU on local Ollama. **Per-instance overlay**: `buildLiteLLMRoutedOverlay()` reuses "ollama" provider slot pointing at `litellm.llm.svc:4000/v1` with `api: openai-completions`. App → litellm:4000 → routes by model name → actual API. diff --git a/cmd/obol/model.go b/cmd/obol/model.go index f7c402e8..c47568ee 100644 --- a/cmd/obol/model.go +++ b/cmd/obol/model.go @@ -264,7 +264,6 @@ func modelSetupCustomCommand(cfg *config.Config) *cli.Command { Name: "custom", Usage: "Add a custom OpenAI-compatible endpoint (validates before adding)", Flags: []cli.Flag{ - &cli.StringFlag{Name: "name", Usage: "Short label for the endpoint (informational only — LiteLLM keys the route by --model, not --name)", Required: true}, &cli.StringFlag{Name: "endpoint", Usage: "Full base URL (e.g. http://host:8000/v1)", Required: true}, &cli.StringFlag{Name: "model", Usage: "Model identifier at the endpoint — this is also the LiteLLM model_name the agent will call", Required: true}, &cli.StringFlag{Name: "api-key", Usage: "API key (optional, some endpoints don't require it)"}, @@ -272,12 +271,11 @@ func modelSetupCustomCommand(cfg *config.Config) *cli.Command { }, Action: func(ctx context.Context, cmd *cli.Command) error { u := getUI(cmd) - name := cmd.String("name") endpoint := cmd.String("endpoint") modelName := cmd.String("model") apiKey := cmd.String("api-key") - if err := model.AddCustomEndpoint(cfg, u, name, endpoint, modelName, apiKey); err != nil { + if err := model.AddCustomEndpoint(cfg, u, endpoint, modelName, apiKey); err != nil { return err } diff --git a/flows/buy-external.sh b/flows/buy-external.sh index a386312e..705ad26d 100755 --- a/flows/buy-external.sh +++ b/flows/buy-external.sh @@ -61,7 +61,6 @@ # EXTERNAL_LOG_BLOCKS_BACK default: 30 (~6 min on Base Sepolia at 2s/blk) # OBOL_LLM_ENDPOINT default: http://127.0.0.1:8000/v1 # OBOL_LLM_MODEL default: qwen36-deep (27B-class) -# OBOL_LLM_NAME default: external-llm # # Exit code: 0 on PASS (every step pass), 1 on any FAIL. @@ -107,7 +106,6 @@ EXTERNAL_LOG_BLOCKS_BACK="${EXTERNAL_LOG_BLOCKS_BACK:-30}" OBOL_LLM_ENDPOINT="${OBOL_LLM_ENDPOINT:-http://127.0.0.1:8000/v1}" OBOL_LLM_MODEL="${OBOL_LLM_MODEL:-qwen36-deep}" -OBOL_LLM_NAME="${OBOL_LLM_NAME:-external-llm}" # Resolve OBOL_ROOT before sourcing helpers — lib.sh re-derives it but # operating on the canonical path simplifies later relative paths. @@ -449,7 +447,7 @@ detect_buyer_runtime bob # ───────────────────────────────────────────────────────────────── # STEP 5: Repoint LiteLLM at OBOL_LLM_ENDPOINT and add the live RPC route # ───────────────────────────────────────────────────────────────── -step "Bob: route LiteLLM via $OBOL_LLM_NAME ($OBOL_LLM_MODEL)" +step "Bob: route LiteLLM via $OBOL_LLM_MODEL ($OBOL_LLM_ENDPOINT)" if route_llm_via_obol_cli bob; then pass "LiteLLM routed via $OBOL_LLM_ENDPOINT" else diff --git a/flows/lib.sh b/flows/lib.sh index a9972f13..e1121b48 100755 --- a/flows/lib.sh +++ b/flows/lib.sh @@ -572,12 +572,10 @@ bootstrap_flow_workspace() { # OBOL_LLM_MODEL is the upstream model id (default qwen36-deep, 27B-class). # qwen36-fast (4B) is faster but flakes on long single-shot agent prompts; see # the flow-13/14 step 46 retry-wrapper rationale in lib-dual-stack.sh. -# OBOL_LLM_NAME is the LiteLLM short name registered for the endpoint (default -# external-llm). # # Sequence (all model edits use --no-sync so we trigger only one Hermes # helmfile rollout at the end): -# 1. obol model setup custom --name … --endpoint … --model … --no-sync +# 1. obol model setup custom --endpoint … --model … --no-sync # (validates the endpoint, patches LiteLLM, hot-adds the model.) # 2. obol model prefer --no-sync # (configured LiteLLM order is the primary-model contract.) @@ -587,13 +585,12 @@ bootstrap_flow_workspace() { # Each peer (alice/bob) routes independently — caller passes the runner. route_llm_via_obol_cli() { local runner=$1 - local model name + local model if [ -n "${OBOL_LLM_ENDPOINT:-}" ]; then model="${OBOL_LLM_MODEL:-qwen36-deep}" - name="${OBOL_LLM_NAME:-external-llm}" - local args=(model setup custom --no-sync --name "$name" --endpoint "$OBOL_LLM_ENDPOINT" --model "$model") + local args=(model setup custom --no-sync --endpoint "$OBOL_LLM_ENDPOINT" --model "$model") if [ -n "${OBOL_LLM_API_KEY:-}" ]; then args+=(--api-key "$OBOL_LLM_API_KEY") fi diff --git a/internal/embed/skills/monetize-guide/SKILL.md b/internal/embed/skills/monetize-guide/SKILL.md index bb342107..14b11d56 100644 --- a/internal/embed/skills/monetize-guide/SKILL.md +++ b/internal/embed/skills/monetize-guide/SKILL.md @@ -143,7 +143,7 @@ Two steps: first bridge the endpoint into LiteLLM, then sell LiteLLM. ```bash # Step A: Add the external endpoint to LiteLLM -obol model setup custom --name \ +obol model setup custom \ --endpoint \ --model "" diff --git a/internal/model/model.go b/internal/model/model.go index 3239a73b..1b8042ea 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -804,13 +804,11 @@ func RemoveModel(cfg *config.Config, u *ui.UI, modelName string) error { // because the agent then strips it and calls LiteLLM with a key that doesn't // match. // -// The `name` arg is informational only. It is surfaced via -// `obol model status` / `list` for human reference but does NOT participate -// in the LiteLLM route key. Two custom endpoints that publish the same -// `modelName` will overwrite each other in the LiteLLM ConfigMap; that is -// the natural "repoint my model" behavior an operator running -// `obol model setup custom` wants when they re-run the command. -func AddCustomEndpoint(cfg *config.Config, u *ui.UI, name, endpoint, modelName, apiKey string) error { +// Two custom endpoints that publish the same `modelName` will overwrite +// each other in the LiteLLM ConfigMap; that is the natural "repoint my +// model" behavior an operator running `obol model setup custom` wants when +// they re-run the command. +func AddCustomEndpoint(cfg *config.Config, u *ui.UI, endpoint, modelName, apiKey string) error { kubectlBinary := filepath.Join(cfg.BinDir, "kubectl") kubeconfigPath := filepath.Join(cfg.ConfigDir, "kubeconfig.yaml") @@ -840,14 +838,7 @@ func AddCustomEndpoint(cfg *config.Config, u *ui.UI, name, endpoint, modelName, entry := buildCustomEndpointEntry(modelName, clusterEndpoint, apiKey) - // Patch ConfigMap for persistence. The display label is logged so an - // operator can correlate the call with their `--name` arg, but it isn't - // part of the route key. - if name != "" { - u.Infof("Adding custom endpoint %q (model: %s) to LiteLLM config", name, modelName) - } else { - u.Infof("Adding custom endpoint (model: %s) to LiteLLM config", modelName) - } + u.Infof("Adding custom endpoint (model: %s) to LiteLLM config", modelName) if err := patchLiteLLMConfig(kubectlBinary, kubeconfigPath, []ModelEntry{entry}); err != nil { return fmt.Errorf("failed to update LiteLLM config: %w", err) @@ -856,10 +847,10 @@ func AddCustomEndpoint(cfg *config.Config, u *ui.UI, name, endpoint, modelName, // Hot-add via API (no restart needed). if err := hotAddModels(cfg, u, []ModelEntry{entry}); err != nil { u.Warnf("Hot-add failed, falling back to restart: %v", err) - return RestartLiteLLM(cfg, u, name) + return RestartLiteLLM(cfg, u, modelName) } - u.Successf("Custom endpoint %q added (model: %s)", name, modelName) + u.Successf("Custom endpoint added (model: %s)", modelName) return nil } From ffd2d8e97278be0117f1e71d15f58783333b06c6 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Sun, 24 May 2026 17:40:26 +0400 Subject: [PATCH 2/4] chore(frontend): bump to v0.1.25-rc2 --- .../embed/infrastructure/values/obol-frontend.yaml.gotmpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl index 2e76ac75..1f7373bd 100644 --- a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl +++ b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl @@ -46,8 +46,8 @@ image: pullPolicy: IfNotPresent # Digest-pinned: tag is informational, sha256 is authoritative. Eliminates # the mutable-tag attack surface called out by the v0.10.0-rc2 supply-chain - # review. Multi-arch index digest for v0.1.25-rc1 (linux/amd64 + linux/arm64). - tag: "v0.1.25-rc1@sha256:e7b38ca43771c29475d6831dbee53adb5d2685137ecb7d5878c82e4ecebee92a" + # review. Multi-arch index digest for v0.1.25-rc2 (linux/amd64 + linux/arm64). + tag: "v0.1.25-rc2@sha256:0a54d01401256c70a21d03ea348d4f2a449c30e4ee2e8a530b3e1f3a4c0cf327" service: type: ClusterIP From 3a8ef4747b34dfa36b8ecff38dd4ab3979cdbd0d Mon Sep 17 00:00:00 2001 From: bussyjd Date: Sun, 24 May 2026 18:11:00 +0400 Subject: [PATCH 3/4] chore(frontend): bump to v0.1.25-rc3 --- .../embed/infrastructure/values/obol-frontend.yaml.gotmpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl index 1f7373bd..a4d6328b 100644 --- a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl +++ b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl @@ -46,8 +46,8 @@ image: pullPolicy: IfNotPresent # Digest-pinned: tag is informational, sha256 is authoritative. Eliminates # the mutable-tag attack surface called out by the v0.10.0-rc2 supply-chain - # review. Multi-arch index digest for v0.1.25-rc2 (linux/amd64 + linux/arm64). - tag: "v0.1.25-rc2@sha256:0a54d01401256c70a21d03ea348d4f2a449c30e4ee2e8a530b3e1f3a4c0cf327" + # review. Multi-arch index digest for v0.1.25-rc3 (linux/amd64 + linux/arm64). + tag: "v0.1.25-rc3@sha256:6b7cde94dc73e877d7a3888b055914343e2237ad282652734260554c7eeb8db3" service: type: ClusterIP From b28b169f2e1619da90685e14a97bf24c702aa1f9 Mon Sep 17 00:00:00 2001 From: bussyjd Date: Sun, 24 May 2026 18:27:58 +0400 Subject: [PATCH 4/4] chore(frontend): bump to v0.1.25-rc4 --- .../embed/infrastructure/values/obol-frontend.yaml.gotmpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl index a4d6328b..5eb9915e 100644 --- a/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl +++ b/internal/embed/infrastructure/values/obol-frontend.yaml.gotmpl @@ -46,8 +46,8 @@ image: pullPolicy: IfNotPresent # Digest-pinned: tag is informational, sha256 is authoritative. Eliminates # the mutable-tag attack surface called out by the v0.10.0-rc2 supply-chain - # review. Multi-arch index digest for v0.1.25-rc3 (linux/amd64 + linux/arm64). - tag: "v0.1.25-rc3@sha256:6b7cde94dc73e877d7a3888b055914343e2237ad282652734260554c7eeb8db3" + # review. Multi-arch index digest for v0.1.25-rc4 (linux/amd64 + linux/arm64). + tag: "v0.1.25-rc4@sha256:143633300757bec467a8818aa8aa99ec30d70f5096ffe4a075e66b6adc6014a0" service: type: ClusterIP