Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions internal/embed/infrastructure/base/templates/llm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ data:
},
"openai": {
"id": "openai",
"npm": "openai",
"npm": "@ai-sdk/openai",
"api_key": "$OPENAI_API_KEY"
}
}
Expand Down Expand Up @@ -132,7 +132,7 @@ spec:
# providers.json is taken from the llmspy package (has full model definitions)
# and then merged with ConfigMap overrides (Ollama endpoint, API key refs).
- name: seed-config
image: ghcr.io/obolnetwork/llms:3.0.33-obol.2
image: ghcr.io/obolnetwork/llms:3.0.34-obol.1
imagePullPolicy: IfNotPresent
command:
- python3
Expand All @@ -159,6 +159,22 @@ spec:
json.dump(providers, f, indent=2)
os.chmod('/data/llms.json', 0o666)
os.chmod('/data/providers.json', 0o666)
# Patch: strip stream_options when forcing stream=false.
# OpenClaw sends stream_options with streaming requests; llmspy forces
# stream=false but doesn't remove stream_options. OpenAI rejects the
# combination. Copy the llms package to the writable volume and patch it.
# TODO: remove once fixed upstream in ObolNetwork/llms.
shutil.copytree(pkg_dir, '/data/llms', dirs_exist_ok=True)
main_path = '/data/llms/main.py'
with open(main_path) as f:
code = f.read()
code = code.replace(
'chat["stream"] = False',
'chat["stream"] = False\n chat.pop("stream_options", None)',
1,
)
with open(main_path, 'w') as f:
f.write(code)
volumeMounts:
- name: llmspy-config
mountPath: /config
Expand All @@ -169,7 +185,7 @@ spec:
- name: llmspy
# Obol fork of LLMSpy with smart routing extension.
# Pin a specific version for reproducibility.
image: ghcr.io/obolnetwork/llms:3.0.33-obol.2
image: ghcr.io/obolnetwork/llms:3.0.34-obol.1
imagePullPolicy: IfNotPresent
ports:
- name: http
Expand All @@ -190,6 +206,9 @@ spec:
# Avoid surprises if the image changes its default HOME.
- name: HOME
value: /home/llms
# Load patched llms package from the init container (stream_options fix).
- name: PYTHONPATH
value: /home/llms/.llms
volumeMounts:
- name: llmspy-home
mountPath: /home/llms/.llms
Expand Down
125 changes: 115 additions & 10 deletions internal/openclaw/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,35 @@ func requireEnvKey(t *testing.T, key string) string {
return v
}

// requireLLMSpyProvider verifies that a provider is actually active in the
// running llmspy pod (not auto-disabled due to invalid API key). This catches
// the case where `obol model setup` succeeds (ConfigMap patched) but llmspy
// auto-disables the provider at startup because provider.test() failed.
func requireLLMSpyProvider(t *testing.T, cfg *config.Config, provider string) {
t.Helper()
output := obolRun(t, cfg, "kubectl",
"exec", "-n", "llm", "deploy/llmspy", "-c", "llmspy", "--",
"python3", "-c", fmt.Sprintf(`import json
with open('/home/llms/.llms/llms.json') as f:
d = json.load(f)
p = d.get('providers', {}).get('%s', {})
print('enabled' if p.get('enabled') else 'disabled')
`, provider))
// Extract the last non-empty line (kubectl may prepend "Defaulted container" noise)
state := ""
for _, line := range strings.Split(strings.TrimSpace(output), "\n") {
line = strings.TrimSpace(line)
if line == "enabled" || line == "disabled" {
state = line
}
}
if state != "enabled" {
t.Skipf("llmspy provider %q is %s (API key likely invalid or expired) — "+
"check the key and re-run 'obol model setup --provider %s'", provider, state, provider)
}
t.Logf("llmspy provider %q is active", provider)
}

// ---------------------------------------------------------------------------
// Helpers — deployment scaffolding
// ---------------------------------------------------------------------------
Expand Down Expand Up @@ -313,12 +342,13 @@ func portForward(t *testing.T, cfg *config.Config, namespace string) string {
}

// chatCompletionWithPrompt sends a chat completion with a custom user message.
func chatCompletionWithPrompt(t *testing.T, baseURL, modelName, token, prompt string, maxTokens int) string {
// Note: max_tokens is intentionally omitted because newer models (e.g. gpt-5.2)
// require max_completion_tokens instead, and the prompt already constrains output.
func chatCompletionWithPrompt(t *testing.T, baseURL, modelName, token, prompt string) string {
t.Helper()
reqBody := map[string]interface{}{
"model": modelName,
"messages": []map[string]string{{"role": "user", "content": prompt}},
"max_tokens": maxTokens,
"model": modelName,
"messages": []map[string]string{{"role": "user", "content": prompt}},
}
bodyBytes, _ := json.Marshal(reqBody)

Expand All @@ -344,6 +374,7 @@ func chatCompletionWithPrompt(t *testing.T, baseURL, modelName, token, prompt st
defer resp.Body.Close()

respBody, _ := io.ReadAll(resp.Body)
t.Logf("chat completion response (HTTP %d): %s", resp.StatusCode, string(respBody))
if resp.StatusCode != http.StatusOK {
t.Fatalf("chat completion returned %d: %s", resp.StatusCode, string(respBody))
}
Expand All @@ -361,14 +392,34 @@ func chatCompletionWithPrompt(t *testing.T, baseURL, modelName, token, prompt st
if len(result.Choices) == 0 || result.Choices[0].Message.Content == "" {
t.Fatalf("empty response from chat completion: %s", string(respBody))
}
return result.Choices[0].Message.Content

content := result.Choices[0].Message.Content

// Reject responses that are actually upstream errors wrapped in a 200.
// llmspy returns errors like "500 status code (no body)" or "Model X not found"
// which OpenClaw may relay as chat content.
errorPatterns := []string{
"status code",
"not found",
"Model " + modelName + " not found",
"errorCode",
"Internal Server Error",
}
contentLower := strings.ToLower(content)
for _, p := range errorPatterns {
if strings.Contains(contentLower, strings.ToLower(p)) {
t.Fatalf("response contains upstream error (%q): %s", p, content)
}
}

return content
}

// chatCompletion sends a chat completion request with the gateway Bearer token
// and returns the assistant response.
func chatCompletion(t *testing.T, baseURL, modelName, token string) string {
t.Helper()
return chatCompletionWithPrompt(t, baseURL, modelName, token, "Reply with exactly one word: hello", 32)
return chatCompletionWithPrompt(t, baseURL, modelName, token, "Reply with exactly one word: hello")
}

// cleanupInstance deletes an OpenClaw instance via `obol openclaw delete --force`.
Expand Down Expand Up @@ -424,12 +475,13 @@ func TestIntegration_AnthropicInference(t *testing.T) {
// Configure llmspy gateway via obol model setup
t.Log("configuring llmspy via: obol model setup --provider anthropic")
obolRun(t, cfg, "model", "setup", "--provider", "anthropic", "--api-key", apiKey)
requireLLMSpyProvider(t, cfg, "anthropic")

cloud := &CloudProviderInfo{
Name: "anthropic",
APIKey: apiKey,
ModelID: "claude-sonnet-4-5-20250929",
Display: "Claude Sonnet 4.5",
ModelID: "claude-sonnet-4-6",
Display: "Claude Sonnet 4.6",
}

// Scaffold cloud overlay + deploy via obol openclaw sync
Expand All @@ -446,11 +498,22 @@ func TestIntegration_AnthropicInference(t *testing.T) {
t.Logf("retrieved gateway token (%d chars)", len(token))

baseURL := portForward(t, cfg, namespace)
agentModel := "ollama/claude-sonnet-4-5-20250929" // routed through llmspy
agentModel := "ollama/claude-sonnet-4-6" // routed through llmspy
t.Logf("testing inference with model %s at %s", agentModel, baseURL)

reply := chatCompletion(t, baseURL, agentModel, token)
t.Logf("Anthropic response: %s", reply)

// Known OpenClaw issue: Anthropic returns finish_reason "end_turn" which
// llmspy translates correctly, but OpenClaw doesn't recognize it and outputs
// "Unhandled stop reason: end_turn" instead of the model's actual text.
// The inference pipeline (obol-stack → llmspy → Anthropic) works — verified
// via direct curl to llmspy. This is an upstream OpenClaw bug.
if strings.Contains(reply, "Unhandled stop reason") {
t.Log("NOTE: response contains 'Unhandled stop reason' — this is a known " +
"OpenClaw issue with Anthropic's finish_reason translation, not an " +
"obol-stack or llmspy problem")
}
}

func TestIntegration_OpenAIInference(t *testing.T) {
Expand All @@ -463,6 +526,7 @@ func TestIntegration_OpenAIInference(t *testing.T) {
// Configure llmspy gateway via obol model setup
t.Log("configuring llmspy via: obol model setup --provider openai")
obolRun(t, cfg, "model", "setup", "--provider", "openai", "--api-key", apiKey)
requireLLMSpyProvider(t, cfg, "openai")

cloud := &CloudProviderInfo{
Name: "openai",
Expand Down Expand Up @@ -492,6 +556,46 @@ func TestIntegration_OpenAIInference(t *testing.T) {
t.Logf("OpenAI response: %s", reply)
}

func TestIntegration_GoogleInference(t *testing.T) {
cfg := requireCluster(t)
apiKey := requireEnvKey(t, "GEMINI_API_KEY")

const id = "test-google"
t.Cleanup(func() { cleanupInstance(t, cfg, id) })

// Configure llmspy gateway via obol model setup
t.Log("configuring llmspy via: obol model setup --provider google")
obolRun(t, cfg, "model", "setup", "--provider", "google", "--api-key", apiKey)
requireLLMSpyProvider(t, cfg, "google")

cloud := &CloudProviderInfo{
Name: "google",
APIKey: apiKey,
ModelID: "gemini-2.5-flash",
Display: "Gemini 2.5 Flash",
}

// Scaffold cloud overlay + deploy via obol openclaw sync
t.Logf("scaffolding OpenClaw instance %q with Google via llmspy", id)
scaffoldCloudInstance(t, cfg, id, cloud)

t.Log("deploying via: obol openclaw sync " + id)
obolRun(t, cfg, "openclaw", "sync", id)

namespace := fmt.Sprintf("%s-%s", appName, id)
waitForPodReady(t, cfg, namespace)

token := getGatewayToken(t, cfg, id)
t.Logf("retrieved gateway token (%d chars)", len(token))

baseURL := portForward(t, cfg, namespace)
agentModel := "ollama/gemini-2.5-flash" // routed through llmspy
t.Logf("testing inference with model %s at %s", agentModel, baseURL)

reply := chatCompletion(t, baseURL, agentModel, token)
t.Logf("Google response: %s", reply)
}

func TestIntegration_ZaiInference(t *testing.T) {
cfg := requireCluster(t)
apiKey := requireEnvKey(t, "ZHIPU_API_KEY")
Expand All @@ -503,6 +607,7 @@ func TestIntegration_ZaiInference(t *testing.T) {
// the old hardcoded map, so it only works with dynamic provider discovery.
t.Log("configuring llmspy via: obol model setup --provider zai")
obolRun(t, cfg, "model", "setup", "--provider", "zai", "--api-key", apiKey)
requireLLMSpyProvider(t, cfg, "zai")

cloud := &CloudProviderInfo{
Name: "zai",
Expand Down Expand Up @@ -821,7 +926,7 @@ func TestIntegration_SkillInference(t *testing.T) {
// into the system prompt, so the agent should know about them.
prompt := "List every skill you have access to. For each skill, state its exact name. Be concise — just the names, one per line."
t.Logf("sending skill-awareness prompt to %s", agentModel)
reply := chatCompletionWithPrompt(t, baseURL, agentModel, token, prompt, 256)
reply := chatCompletionWithPrompt(t, baseURL, agentModel, token, prompt)
t.Logf("agent reply:\n%s", reply)

replyLower := strings.ToLower(reply)
Expand Down
Loading