From 717ebeadf037696380ab6ce2f9b938d1d41c1a86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@gmx.de>
Date: Fri, 26 Dec 2025 20:35:52 +0100
Subject: [PATCH 1/2] OpenAI Responses: More fine-grained request config

---
 crates/llm/src/openai_responses.rs | 364 ++++++++++++++++++++++++++++-
 1 file changed, 354 insertions(+), 10 deletions(-)
diff --git a/crates/llm/src/openai_responses.rs b/crates/llm/src/openai_responses.rs
index 1d61eced..5bccc28d 100644
--- a/crates/llm/src/openai_responses.rs
+++ b/crates/llm/src/openai_responses.rs
@@ -143,13 +143,140 @@ struct ResponsesRequest {
     include: Vec<String>,
     #[serde(skip_serializing_if = "Option::is_none")]
     prompt_cache_key: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    text: Option<TextControls>,
+}
+
+/// Text controls for the Responses API (verbosity and output format)
+#[derive(Debug, Serialize, Clone)]
+struct TextControls {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    verbosity: Option<Verbosity>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    format: Option<TextFormat>,
+}
+
+/// Verbosity level for model output
+#[derive(Debug, Serialize, Clone, Copy, PartialEq, Eq)]
+#[serde(rename_all = "lowercase")]
+pub enum Verbosity {
+    Low,
+    Medium,
+    High,
+}
+
+/// Text format configuration for structured output
+#[derive(Debug, Serialize, Clone)]
+struct TextFormat {
+    #[serde(rename = "type")]
+    format_type: TextFormatType,
+    strict: bool,
+    schema: serde_json::Value,
+    name: String,
+}
+
+/// Format type for text output
+#[derive(Debug, Serialize, Clone)]
+#[serde(rename_all = "snake_case")]
+#[allow(dead_code)] // JsonSchema variant reserved for future structured output support
+enum TextFormatType {
+    JsonSchema,
 }
 
 /// Reasoning configuration for the request
-#[derive(Debug, Serialize)]
+#[derive(Debug, Serialize, Clone)]
 struct ReasoningConfig {
-    effort: String,
-    summary: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    effort: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    summary: Option<String>,
+}
+
+/// Model capability information for reasoning and verbosity
+#[derive(Debug, Clone)]
+struct ModelCapabilities {
+    /// Whether the model supports reasoning
+    supports_reasoning: bool,
+    /// Default reasoning effort for the model
+    default_effort: Option<String>,
+    /// Default reasoning summary setting
+    default_summary: Option<String>,
+    /// Whether the model supports verbosity control
+    supports_verbosity: bool,
+    /// Default verbosity for the model
+    default_verbosity: Option<Verbosity>,
+}
+
+impl ModelCapabilities {
+    /// Get model capabilities based on model ID
+    fn for_model(model: &str) -> Self {
+        let model_lower = model.to_lowercase();
+
+        // GPT-5 and variants - full reasoning and verbosity support
+        if model_lower.contains("gpt-5") || model_lower.starts_with("gpt5") {
+            return Self {
+                supports_reasoning: true,
+                default_effort: Some("medium".to_string()),
+                default_summary: Some("auto".to_string()),
+                supports_verbosity: true,
+                default_verbosity: Some(Verbosity::Medium),
+            };
+        }
+
+        // o3/o4 reasoning models
+        if model_lower.starts_with("o3") || model_lower.starts_with("o4") {
+            return Self {
+                supports_reasoning: true,
+                default_effort: Some("medium".to_string()),
+                default_summary: Some("auto".to_string()),
+                supports_verbosity: false,
+                default_verbosity: None,
+            };
+        }
+
+        // o1 models - reasoning but typically lower effort, no verbosity
+        if model_lower.starts_with("o1") {
+            return Self {
+                supports_reasoning: true,
+                default_effort: Some("low".to_string()),
+                default_summary: Some("auto".to_string()),
+                supports_verbosity: false,
+                default_verbosity: None,
+            };
+        }
+
+        // GPT-4o and variants - no reasoning, no verbosity
+        if model_lower.contains("gpt-4o") || model_lower.contains("gpt4o") {
+            return Self {
+                supports_reasoning: false,
+                default_effort: None,
+                default_summary: None,
+                supports_verbosity: false,
+                default_verbosity: None,
+            };
+        }
+
+        // GPT-4 variants (non-4o) - no reasoning, no verbosity
+        if model_lower.contains("gpt-4") || model_lower.contains("gpt4") {
+            return Self {
+                supports_reasoning: false,
+                default_effort: None,
+                default_summary: None,
+                supports_verbosity: false,
+                default_verbosity: None,
+            };
+        }
+
+        // Default: assume reasoning support with conservative defaults, no verbosity
+        // This allows new models to work out of the box
+        Self {
+            supports_reasoning: true,
+            default_effort: Some("low".to_string()),
+            default_summary: Some("auto".to_string()),
+            supports_verbosity: false,
+            default_verbosity: None,
+        }
+    }
 }
 
 /// Input item for the Responses API
@@ -601,7 +728,7 @@ impl OpenAIResponsesClient {
                     ..
                 } => {
                     let input = serde_json::from_str(&arguments)
-                        .unwrap_or_else(|_| serde_json::Value::String(arguments));
+                        .unwrap_or(serde_json::Value::String(arguments));
 
                     blocks.push(ContentBlock::ToolUse {
                         id: call_id,
@@ -1234,13 +1361,37 @@ impl LLMProvider for OpenAIResponsesClient {
 
         // Configure for stateless mode with encrypted reasoning
         let store = false;
-        // Always request encrypted reasoning content when operating statelessly
-        let include = if !store {
+
+        // Get model capabilities and build reasoning config
+        let capabilities = ModelCapabilities::for_model(&self.model);
+        let reasoning = if capabilities.supports_reasoning {
+            Some(ReasoningConfig {
+                effort: capabilities.default_effort,
+                summary: capabilities.default_summary,
+            })
+        } else {
+            None
+        };
+
+        // Request encrypted reasoning content when operating statelessly and reasoning is enabled
+        let include = if !store && reasoning.is_some() {
             vec!["reasoning.encrypted_content".to_string()]
         } else {
             vec![]
         };
 
+        // Build text controls for verbosity (if supported by model)
+        let text = if capabilities.supports_verbosity {
+            capabilities
+                .default_verbosity
+                .map(|verbosity| TextControls {
+                    verbosity: Some(verbosity),
+                    format: None,
+                })
+        } else {
+            None
+        };
+
         let responses_request = ResponsesRequest {
             model: self.model.clone(),
             instructions,
@@ -1248,14 +1399,12 @@ impl LLMProvider for OpenAIResponsesClient {
             tools,
             tool_choice: Some("auto".to_string()),
             parallel_tool_calls: false,
-            reasoning: Some(ReasoningConfig {
-                effort: "low".to_string(),
-                summary: "detailed".to_string(),
-            }),
+            reasoning,
             store,
             stream: streaming_callback.is_some(),
             include,
             prompt_cache_key: Some(request.session_id),
+            text,
         };
 
         let request_start = std::time::SystemTime::now();
@@ -1645,4 +1794,199 @@ mod tests {
             _ => panic!("Expected Message"),
         }
     }
+
+    #[test]
+    fn test_model_capabilities_gpt5() {
+        let caps = ModelCapabilities::for_model("gpt-5");
+        assert!(caps.supports_reasoning);
+        assert_eq!(caps.default_effort, Some("medium".to_string()));
+        assert_eq!(caps.default_summary, Some("auto".to_string()));
+
+        // Variants
+        let caps = ModelCapabilities::for_model("gpt-5-turbo");
+        assert!(caps.supports_reasoning);
+
+        let caps = ModelCapabilities::for_model("GPT-5");
+        assert!(caps.supports_reasoning);
+    }
+
+    #[test]
+    fn test_model_capabilities_o_series() {
+        // o3 models
+        let caps = ModelCapabilities::for_model("o3");
+        assert!(caps.supports_reasoning);
+        assert_eq!(caps.default_effort, Some("medium".to_string()));
+
+        let caps = ModelCapabilities::for_model("o3-mini");
+        assert!(caps.supports_reasoning);
+
+        // o4 models
+        let caps = ModelCapabilities::for_model("o4-mini");
+        assert!(caps.supports_reasoning);
+
+        // o1 models - lower default effort
+        let caps = ModelCapabilities::for_model("o1");
+        assert!(caps.supports_reasoning);
+        assert_eq!(caps.default_effort, Some("low".to_string()));
+
+        let caps = ModelCapabilities::for_model("o1-preview");
+        assert!(caps.supports_reasoning);
+    }
+
+    #[test]
+    fn test_model_capabilities_gpt4() {
+        // GPT-4o - no reasoning, no verbosity
+        let caps = ModelCapabilities::for_model("gpt-4o");
+        assert!(!caps.supports_reasoning);
+        assert_eq!(caps.default_effort, None);
+        assert!(!caps.supports_verbosity);
+        assert_eq!(caps.default_verbosity, None);
+
+        let caps = ModelCapabilities::for_model("gpt-4o-mini");
+        assert!(!caps.supports_reasoning);
+        assert!(!caps.supports_verbosity);
+
+        // GPT-4 variants - no reasoning, no verbosity
+        let caps = ModelCapabilities::for_model("gpt-4-turbo");
+        assert!(!caps.supports_reasoning);
+        assert!(!caps.supports_verbosity);
+
+        let caps = ModelCapabilities::for_model("gpt-4");
+        assert!(!caps.supports_reasoning);
+        assert!(!caps.supports_verbosity);
+    }
+
+    #[test]
+    fn test_model_capabilities_unknown() {
+        // Unknown models default to reasoning enabled with conservative settings, no verbosity
+        let caps = ModelCapabilities::for_model("some-future-model");
+        assert!(caps.supports_reasoning);
+        assert_eq!(caps.default_effort, Some("low".to_string()));
+        assert_eq!(caps.default_summary, Some("auto".to_string()));
+        assert!(!caps.supports_verbosity);
+        assert_eq!(caps.default_verbosity, None);
+    }
+
+    #[test]
+    fn test_model_capabilities_verbosity() {
+        // GPT-5 supports verbosity
+        let caps = ModelCapabilities::for_model("gpt-5");
+        assert!(caps.supports_verbosity);
+        assert_eq!(caps.default_verbosity, Some(Verbosity::Medium));
+
+        // o3/o4 don't support verbosity
+        let caps = ModelCapabilities::for_model("o3");
+        assert!(!caps.supports_verbosity);
+        assert_eq!(caps.default_verbosity, None);
+
+        let caps = ModelCapabilities::for_model("o4-mini");
+        assert!(!caps.supports_verbosity);
+    }
+
+    #[test]
+    fn test_reasoning_config_serialization() {
+        // Full config
+        let config = ReasoningConfig {
+            effort: Some("medium".to_string()),
+            summary: Some("auto".to_string()),
+        };
+        let json = serde_json::to_value(&config).unwrap();
+        assert_eq!(json["effort"], "medium");
+        assert_eq!(json["summary"], "auto");
+
+        // Partial config - effort only
+        let config = ReasoningConfig {
+            effort: Some("low".to_string()),
+            summary: None,
+        };
+        let json = serde_json::to_value(&config).unwrap();
+        assert_eq!(json["effort"], "low");
+        assert!(json.get("summary").is_none());
+
+        // Partial config - summary only
+        let config = ReasoningConfig {
+            effort: None,
+            summary: Some("detailed".to_string()),
+        };
+        let json = serde_json::to_value(&config).unwrap();
+        assert!(json.get("effort").is_none());
+        assert_eq!(json["summary"], "detailed");
+    }
+
+    #[test]
+    fn test_request_without_reasoning() {
+        // For GPT-4o, reasoning and text should not be included
+        let request = ResponsesRequest {
+            model: "gpt-4o".to_string(),
+            instructions: Some("Test".to_string()),
+            input: vec![],
+            tools: None,
+            tool_choice: Some("auto".to_string()),
+            parallel_tool_calls: false,
+            reasoning: None,
+            store: false,
+            stream: false,
+            include: vec![],
+            prompt_cache_key: None,
+            text: None,
+        };
+        let json = serde_json::to_value(&request).unwrap();
+        assert!(json.get("reasoning").is_none());
+        assert!(json.get("text").is_none());
+    }
+
+    #[test]
+    fn test_request_with_reasoning_and_verbosity() {
+        // For GPT-5, reasoning and text (verbosity) should be included
+        let request = ResponsesRequest {
+            model: "gpt-5".to_string(),
+            instructions: Some("Test".to_string()),
+            input: vec![],
+            tools: None,
+            tool_choice: Some("auto".to_string()),
+            parallel_tool_calls: false,
+            reasoning: Some(ReasoningConfig {
+                effort: Some("medium".to_string()),
+                summary: Some("auto".to_string()),
+            }),
+            store: false,
+            stream: false,
+            include: vec!["reasoning.encrypted_content".to_string()],
+            prompt_cache_key: None,
+            text: Some(TextControls {
+                verbosity: Some(Verbosity::Medium),
+                format: None,
+            }),
+        };
+        let json = serde_json::to_value(&request).unwrap();
+
+        // Check reasoning
+        assert!(json.get("reasoning").is_some());
+        assert_eq!(json["reasoning"]["effort"], "medium");
+        assert_eq!(json["reasoning"]["summary"], "auto");
+        assert_eq!(json["include"][0], "reasoning.encrypted_content");
+
+        // Check text/verbosity
+        assert!(json.get("text").is_some());
+        assert_eq!(json["text"]["verbosity"], "medium");
+        assert!(json["text"].get("format").is_none());
+    }
+
+    #[test]
+    fn test_verbosity_serialization() {
+        // Verbosity enum serializes to lowercase
+        let text = TextControls {
+            verbosity: Some(Verbosity::Low),
+            format: None,
+        };
+        let json = serde_json::to_value(&text).unwrap();
+        assert_eq!(json["verbosity"], "low");
+
+        let text = TextControls {
+            verbosity: Some(Verbosity::High),
+            format: None,
+        };
+        let json = serde_json::to_value(&text).unwrap();
+        assert_eq!(json["verbosity"], "high");
+    }
 }

From bc7cc0b11e6c8d8e9f4d22f308e8c4ccbf521b23 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@gmx.de>
Date: Thu, 1 Jan 2026 16:44:38 +0100
Subject: [PATCH 2/2] Let model config replace top-level fields in requests

... instead of merging, which can create invalid requests.
---
 crates/llm/src/config_merge.rs | 81 +++++++++++++++++++++++-----------
 1 file changed, 55 insertions(+), 26 deletions(-)

diff --git a/crates/llm/src/config_merge.rs b/crates/llm/src/config_merge.rs
index 3167c5fa..28cb5af2 100644
--- a/crates/llm/src/config_merge.rs
+++ b/crates/llm/src/config_merge.rs
@@ -1,16 +1,19 @@
 /// Utilities for merging JSON configurations
 ///
-/// This module provides functionality to recursively merge custom model configurations
-/// with base API request payloads in a non-destructive way.
+/// This module provides functionality to merge custom model configurations
+/// with base API request payloads using shallow (top-level only) merging.
 use serde_json::Value;
 
-/// Recursively merge two JSON values.
+/// Shallow merge two JSON values at the top level only.
 ///
 /// The merge behavior is:
-/// - For objects: recursively merge keys, with `custom` values overriding `base` values
+/// - For objects: merge only at top level - custom values completely replace base values
 /// - For arrays and primitives: `custom` replaces `base`
 /// - Keys that only exist in `base` are preserved
 ///
+/// This approach requires custom configs to specify complete sub-objects when overriding
+/// nested structures, which is more explicit and avoids unexpected partial merges.
+///
 /// # Examples
 ///
 /// ```
@@ -25,12 +28,12 @@ use serde_json::Value;
 ///     }
 /// });
 ///
+/// // To disable thinking, provide the complete thinking object
 /// let custom = json!({
 ///     "temperature": 0.9,
 ///     "thinking": {
-///         "budget_tokens": 16384
-///     },
-///     "max_tokens": 4096
+///         "type": "disabled"
+///     }
 /// });
 ///
 /// let result = merge_json(base, custom);
@@ -38,23 +41,17 @@ use serde_json::Value;
 /// assert_eq!(result, json!({
 ///     "temperature": 0.9,  // overridden
 ///     "thinking": {
-///         "type": "enabled",        // preserved from base
-///         "budget_tokens": 16384    // overridden
-///     },
-///     "max_tokens": 4096   // added from custom
+///         "type": "disabled"  // completely replaced (no budget_tokens)
+///     }
 /// }));
 /// ```
 pub fn merge_json(mut base: Value, custom: Value) -> Value {
     match (&mut base, custom) {
-        // Both are objects: recursively merge keys
+        // Both are objects: shallow merge at top level only
         (Value::Object(base_map), Value::Object(custom_map)) => {
             for (key, custom_value) in custom_map {
-                base_map
-                    .entry(key)
-                    .and_modify(|base_value| {
-                        *base_value = merge_json(base_value.clone(), custom_value.clone());
-                    })
-                    .or_insert(custom_value);
+                // Always replace - no recursive merging
+                base_map.insert(key, custom_value);
             }
             base
         }
@@ -100,7 +97,8 @@ mod tests {
     }
 
     #[test]
-    fn test_merge_nested_objects() {
+    fn test_shallow_merge_nested_objects() {
+        // Shallow merge: nested objects are completely replaced, not recursively merged
         let base = json!({
             "outer": {
                 "inner1": "value1",
@@ -116,17 +114,18 @@ mod tests {
         });
         let expected = json!({
             "outer": {
-                "inner1": "value1",      // preserved
-                "inner2": "overridden",  // overridden
-                "inner3": "new"          // added
+                // inner1 is NOT preserved - entire "outer" object was replaced
+                "inner2": "overridden",
+                "inner3": "new"
             },
-            "other": "data"  // preserved
+            "other": "data"  // preserved (not in custom)
         });
         assert_eq!(merge_json(base, custom), expected);
     }
 
     #[test]
-    fn test_anthropic_thinking_example() {
+    fn test_anthropic_thinking_disabled() {
+        // Key use case: disabling thinking without leftover budget_tokens
         let base = json!({
             "model": "claude-sonnet-4",
             "temperature": 0.7,
@@ -137,6 +136,34 @@ mod tests {
         });
         let custom = json!({
             "thinking": {
+                "type": "disabled"
+            }
+        });
+        let expected = json!({
+            "model": "claude-sonnet-4",
+            "temperature": 0.7,
+            "thinking": {
+                "type": "disabled"
+                // No budget_tokens - entire thinking object was replaced
+            }
+        });
+        assert_eq!(merge_json(base, custom), expected);
+    }
+
+    #[test]
+    fn test_anthropic_thinking_custom_budget() {
+        // To customize budget, provide the complete thinking object
+        let base = json!({
+            "model": "claude-sonnet-4",
+            "temperature": 0.7,
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 8192
+            }
+        });
+        let custom = json!({
+            "thinking": {
+                "type": "enabled",
                 "budget_tokens": 16384
             }
         });
@@ -166,7 +193,8 @@ mod tests {
     }
 
     #[test]
-    fn test_deeply_nested_merge() {
+    fn test_shallow_replaces_deeply_nested() {
+        // Shallow merge: entire top-level key is replaced
         let base = json!({
             "level1": {
                 "level2": {
@@ -187,11 +215,12 @@ mod tests {
                 }
             }
         });
+        // The entire "level1" object is replaced
         let expected = json!({
             "level1": {
                 "level2": {
                     "level3": {
-                        "keep": "this",
+                        // "keep" is NOT preserved
                         "override": "new",
                         "add": "value"
                     }