diff --git a/crates/goose-cli/src/commands/configure.rs b/crates/goose-cli/src/commands/configure.rs index 256f8b963968..1c5e7f09e93c 100644 --- a/crates/goose-cli/src/commands/configure.rs +++ b/crates/goose-cli/src/commands/configure.rs @@ -23,8 +23,6 @@ use goose::model::ModelConfig; #[cfg(feature = "telemetry")] use goose::posthog::{get_telemetry_choice, TELEMETRY_ENABLED_KEY}; use goose::providers::base::ConfigKey; -use goose::providers::chatgpt_codex::reasoning_levels_for_model; -use goose::providers::formats::anthropic::supports_adaptive_thinking; use goose::providers::provider_test::test_provider_configuration; use goose::providers::{create, providers, retry_operation, RetryConfig}; use goose::session::SessionType; @@ -738,15 +736,13 @@ pub async fn configure_provider_dialog() -> anyhow::Result { let spin = spinner(); spin.start("Attempting to fetch supported models..."); - let models_res = { - let temp_model_config = - ModelConfig::new(&provider_meta.default_model)?.with_canonical_limits(provider_name); - let temp_provider = create(provider_name, temp_model_config, Vec::new()).await?; - retry_operation(&RetryConfig::default(), || async { - temp_provider.fetch_recommended_models().await - }) - .await - }; + let temp_model_config = + ModelConfig::new(&provider_meta.default_model)?.with_canonical_limits(provider_name); + let temp_provider = create(provider_name, temp_model_config, Vec::new()).await?; + let models_res = retry_operation(&RetryConfig::default(), || async { + temp_provider.fetch_recommended_models().await + }) + .await; spin.stop(style("Model fetch complete").green()); // Select a model: on fetch error show styled error and abort; if models available, show list; otherwise free-text input @@ -766,78 +762,24 @@ pub async fn configure_provider_dialog() -> anyhow::Result { } }; - if model.to_lowercase().starts_with("gemini-3") { - let thinking_level: &str = cliclack::select("Select thinking level for Gemini 3:") - .item("low", "Low - Better latency, lighter reasoning", "") - .item("high", "High - Deeper reasoning, higher latency", "") - .interact()?; - config.set_gemini3_thinking_level(thinking_level)?; - } - - if model.to_lowercase().starts_with("claude-") { - let supports_adaptive = supports_adaptive_thinking(&model); - - let mut thinking_select = cliclack::select("Select extended thinking mode for Claude:"); - if supports_adaptive { - thinking_select = thinking_select.item( - "adaptive", - "Adaptive - Claude decides when and how much to think (recommended)", - "", - ); - } - thinking_select = thinking_select - .item("enabled", "Enabled - Fixed token budget for thinking", "") - .item("disabled", "Disabled - No extended thinking", ""); - if supports_adaptive { - thinking_select = thinking_select.initial_value("adaptive"); - } else { - thinking_select = thinking_select.initial_value("disabled"); - } - let thinking_type: &str = thinking_select.interact()?; - config.set_claude_thinking_type(thinking_type)?; + { + let supports_thinking = match temp_provider.fetch_model_info(&model).await { + Ok(model_info) => model_info.reasoning, + Err(_) => goose::model::ModelConfig::new(&model) + .map(|c| c.is_reasoning_model()) + .unwrap_or(false), + }; - if thinking_type == "adaptive" { - let effort: &str = cliclack::select("Select adaptive thinking effort level:") - .item("low", "Low - Minimal thinking, fastest responses", "") + if supports_thinking { + let effort: &str = cliclack::select("Select thinking effort:") + .item("off", "Off - No extended thinking", "") + .item("low", "Low - Better latency, lighter reasoning", "") .item("medium", "Medium - Moderate thinking", "") - .item("high", "High - Deep reasoning (default)", "") - .item( - "max", - "Max - No constraints on thinking depth (Opus 4.6 only)", - "", - ) - .initial_value("high") + .item("high", "High - Deep reasoning", "") + .item("max", "Max - No constraints on thinking depth", "") + .initial_value("off") .interact()?; - config.set_claude_thinking_effort(effort)?; - } else if thinking_type == "enabled" { - let budget: String = cliclack::input("Enter thinking budget (tokens):") - .default_input("16000") - .validate(|input: &String| match input.parse::() { - Ok(n) if n > 0 => Ok(()), - _ => Err("Please enter a valid positive number"), - }) - .interact()?; - config.set_claude_thinking_budget(budget.parse::()?)?; - } - } - - if provider_name == "chatgpt_codex" { - let valid_levels = reasoning_levels_for_model(&model); - if !valid_levels.is_empty() { - let mut select = cliclack::select("Select reasoning effort level:"); - for &level in valid_levels { - let description = match level { - "low" => "Low - Fast responses with lighter reasoning", - "medium" => "Medium - Balances speed and reasoning depth for everyday tasks", - "high" => "High - Greater reasoning depth for complex problems", - "xhigh" => "Extra High - Extra high reasoning depth for complex problems", - _ => "", - }; - select = select.item(level, description, ""); - } - select = select.initial_value("medium"); - let effort: &str = select.interact()?; - config.set_chatgpt_codex_reasoning_effort(effort.to_string())?; + config.set_goose_thinking_effort(effort)?; } } diff --git a/crates/goose-cli/src/session/builder.rs b/crates/goose-cli/src/session/builder.rs index 0cd8e3a47e1a..a0139520816d 100644 --- a/crates/goose-cli/src/session/builder.rs +++ b/crates/goose-cli/src/session/builder.rs @@ -268,6 +268,7 @@ fn resolve_provider_and_model( .is_some_and(|mc| mc.model_name == model_name) { let mut config = saved_model_config.unwrap(); + config.normalize_effort_suffix(); if let Some(temp) = recipe_settings.and_then(|s| s.temperature) { config = config.with_temperature(Some(temp)); } diff --git a/crates/goose-server/src/openapi.rs b/crates/goose-server/src/openapi.rs index ae33e32b697f..a2c5b2f99ba8 100644 --- a/crates/goose-server/src/openapi.rs +++ b/crates/goose-server/src/openapi.rs @@ -397,6 +397,7 @@ derive_utoipa!(IconTheme as IconThemeSchema); super::routes::config_management::read_all_config, super::routes::config_management::providers, super::routes::config_management::get_provider_models, + super::routes::config_management::get_provider_model_info, super::routes::config_management::get_slash_commands, super::routes::config_management::upsert_permissions, super::routes::config_management::create_custom_provider, @@ -573,6 +574,7 @@ derive_utoipa!(IconTheme as IconThemeSchema); PrincipalType, ModelInfo, ModelConfig, + super::routes::config_management::ProviderModelInfoQuery, Session, goose::config::goose_mode::GooseMode, SessionInsights, diff --git a/crates/goose-server/src/routes/agent.rs b/crates/goose-server/src/routes/agent.rs index 3576de0f1124..7bdc7971800a 100644 --- a/crates/goose-server/src/routes/agent.rs +++ b/crates/goose-server/src/routes/agent.rs @@ -48,6 +48,7 @@ pub struct UpdateProviderRequest { model: Option, session_id: String, context_limit: Option, + reasoning: Option, request_params: Option>, } @@ -595,7 +596,7 @@ async fn update_agent_provider( } }; - let model_config = ModelConfig::new(&model) + let mut model_config = ModelConfig::new(&model) .map_err(|e| { ( StatusCode::BAD_REQUEST, @@ -603,8 +604,12 @@ async fn update_agent_provider( ) })? .with_canonical_limits(&payload.provider) - .with_context_limit(payload.context_limit) - .with_request_params(payload.request_params); + .with_context_limit(payload.context_limit); + + if let Some(request_params) = payload.request_params { + model_config = model_config.with_merged_request_params(request_params); + } + model_config.reasoning = payload.reasoning; let extensions = EnabledExtensionsState::for_session(state.session_manager(), &payload.session_id, config) diff --git a/crates/goose-server/src/routes/config_management.rs b/crates/goose-server/src/routes/config_management.rs index e5203fe419f9..9d94a170b8f6 100644 --- a/crates/goose-server/src/routes/config_management.rs +++ b/crates/goose-server/src/routes/config_management.rs @@ -13,7 +13,7 @@ use goose::config::ExtensionEntry; use goose::config::{Config, ConfigError}; use goose::custom_requests::SourceType; use goose::model::ModelConfig; -use goose::providers::base::{ProviderMetadata, ProviderType}; +use goose::providers::base::{ModelInfo, ProviderMetadata, ProviderType}; use goose::providers::canonical::maybe_get_canonical_model; use goose::providers::catalog::{ get_provider_template, get_providers_by_format, ProviderCatalogEntry, ProviderFormat, @@ -366,7 +366,7 @@ pub async fn providers() -> Result>, ErrorResponse> { ("name" = String, Path, description = "Provider name (e.g., openai)") ), responses( - (status = 200, description = "Models fetched successfully", body = [String]), + (status = 200, description = "Models fetched successfully", body = [ModelInfo]), (status = 400, description = "Unknown provider, provider not configured, or authentication error"), (status = 429, description = "Rate limit exceeded"), (status = 500, description = "Internal server error") @@ -374,7 +374,7 @@ pub async fn providers() -> Result>, ErrorResponse> { )] pub async fn get_provider_models( Path(name): Path, -) -> Result>, ErrorResponse> { +) -> Result>, ErrorResponse> { let all = get_providers().await.into_iter().collect::>(); let Some((metadata, provider_type)) = all.into_iter().find(|(m, _)| m.name == name) else { return Err(ErrorResponse::bad_request(format!( @@ -392,7 +392,7 @@ pub async fn get_provider_models( let model_config = ModelConfig::new(&metadata.default_model)?.with_canonical_limits(&name); let provider = goose::providers::create(&name, model_config, Vec::new()).await?; - let models_result = provider.fetch_recommended_models().await; + let models_result = provider.fetch_recommended_model_info().await; match models_result { Ok(models) => Ok(Json(models)), @@ -400,6 +400,52 @@ pub async fn get_provider_models( } } +#[derive(Deserialize, ToSchema)] +pub struct ProviderModelInfoQuery { + pub model: String, +} + +#[utoipa::path( + post, + path = "/config/providers/{name}/model-info", + params( + ("name" = String, Path, description = "Provider name (e.g., openai)") + ), + request_body = ProviderModelInfoQuery, + responses( + (status = 200, description = "Model metadata fetched successfully", body = ModelInfo), + (status = 400, description = "Unknown provider, provider not configured, or authentication error"), + (status = 429, description = "Rate limit exceeded"), + (status = 500, description = "Internal server error") + ) +)] +pub async fn get_provider_model_info( + Path(name): Path, + Json(query): Json, +) -> Result, ErrorResponse> { + let all = get_providers().await.into_iter().collect::>(); + let Some((metadata, provider_type)) = all.into_iter().find(|(m, _)| m.name == name) else { + return Err(ErrorResponse::bad_request(format!( + "Unknown provider: {}", + name + ))); + }; + if !check_provider_configured(&metadata, provider_type) { + return Err(ErrorResponse::bad_request(format!( + "Provider '{}' is not configured", + name + ))); + } + + let model_config = ModelConfig::new(&query.model)?.with_canonical_limits(&name); + let provider = goose::providers::create(&name, model_config, Vec::new()).await?; + provider + .fetch_model_info(&query.model) + .await + .map(Json) + .map_err(Into::into) +} + #[derive(Deserialize, utoipa::IntoParams)] pub struct SlashCommandsQuery { /// Optional working directory to discover local skills from @@ -471,6 +517,7 @@ pub struct ModelInfoData { pub model: String, pub context_limit: usize, pub max_output_tokens: Option, + pub reasoning: bool, pub input_token_cost: Option, pub output_token_cost: Option, pub cache_read_token_cost: Option, @@ -508,6 +555,9 @@ pub async fn get_canonical_model_info( model: query.model.clone(), context_limit: canonical_model.limit.context, max_output_tokens: canonical_model.limit.output, + reasoning: canonical_model + .reasoning + .unwrap_or_else(|| ModelConfig::new_or_fail(&query.model).is_reasoning_model()), // Costs are per million tokens - client handles division for display input_token_cost: canonical_model.cost.input, output_token_cost: canonical_model.cost.output, @@ -857,6 +907,10 @@ pub fn routes(state: Arc) -> Router { .route("/config/extensions/{name}", delete(remove_extension)) .route("/config/providers", get(providers)) .route("/config/providers/{name}/models", get(get_provider_models)) + .route( + "/config/providers/{name}/model-info", + post(get_provider_model_info), + ) .route("/config/provider-catalog", get(get_provider_catalog)) .route( "/config/provider-catalog/{id}", diff --git a/crates/goose/src/acp/server.rs b/crates/goose/src/acp/server.rs index 5597d52f8e4d..c8ad7f404689 100644 --- a/crates/goose/src/acp/server.rs +++ b/crates/goose/src/acp/server.rs @@ -3256,11 +3256,14 @@ impl GooseAcpAgent { current_model }; let model = model_name.unwrap_or(&default_model); - let model_config = crate::model::ModelConfig::new(model) + let mut model_config = crate::model::ModelConfig::new(model) .invalid_params_err_ctx("Invalid model config")? .with_canonical_limits(&resolved_provider_name) - .with_context_limit(context_limit) - .with_request_params(request_params); + .with_context_limit(context_limit); + + if let Some(request_params) = request_params { + model_config = model_config.with_merged_request_params(request_params); + } let extensions = EnabledExtensionsState::for_session(&self.session_manager, session_id, &config).await; diff --git a/crates/goose/src/config/base.rs b/crates/goose/src/config/base.rs index acead899de11..267ef7a8da26 100644 --- a/crates/goose/src/config/base.rs +++ b/crates/goose/src/config/base.rs @@ -1024,7 +1024,6 @@ config_value!(CLAUDE_CODE_COMMAND, String, "claude"); config_value!(GEMINI_CLI_COMMAND, String, "gemini"); config_value!(CURSOR_AGENT_COMMAND, String, "cursor-agent"); config_value!(CODEX_COMMAND, String, "codex"); -config_value!(CODEX_REASONING_EFFORT, String, "high"); config_value!(CODEX_ENABLE_SKILLS, String, "true"); config_value!(CODEX_SKIP_GIT_CHECK, String, "false"); config_value!(CHATGPT_CODEX_REASONING_EFFORT, String, "medium"); @@ -1038,12 +1037,48 @@ config_value!(GOOSE_PROMPT_EDITOR_ALWAYS, Option); config_value!(GOOSE_MAX_ACTIVE_AGENTS, usize); config_value!(GOOSE_DISABLE_SESSION_NAMING, bool); config_value!(GOOSE_DISABLE_TOOL_CALL_SUMMARY, bool); -config_value!(GEMINI3_THINKING_LEVEL, String); -config_value!(CLAUDE_THINKING_TYPE, String); -config_value!(CLAUDE_THINKING_EFFORT, String); -config_value!(CLAUDE_THINKING_BUDGET, i32); +config_value!(GOOSE_THINKING_EFFORT, String); config_value!(GOOSE_DEFAULT_EXTENSION_TIMEOUT, u64); +fn find_workspace_or_exe_root() -> Option { + let exe = std::env::current_exe().ok()?; + let exe_dir = exe.parent()?.to_path_buf(); + + let mut path = exe; + while let Some(parent) = path.parent() { + let cargo_toml = parent.join("Cargo.toml"); + if cargo_toml.exists() { + if let Ok(content) = std::fs::read_to_string(&cargo_toml) { + if content.contains("[workspace]") { + return Some(parent.to_path_buf()); + } + } + } + path = parent.to_path_buf(); + } + + Some(exe_dir) +} + +pub fn load_init_config_from_workspace() -> Result { + let root = find_workspace_or_exe_root().ok_or_else(|| { + ConfigError::FileError(std::io::Error::new( + std::io::ErrorKind::NotFound, + "Could not determine executable path", + )) + })?; + + let init_config_path = root.join("init-config.yaml"); + if !init_config_path.exists() { + return Err(ConfigError::NotFound( + "init-config.yaml not found".to_string(), + )); + } + + let init_content = std::fs::read_to_string(&init_config_path)?; + parse_yaml_content(&init_content) +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/goose/src/model.rs b/crates/goose/src/model.rs index eebace1aa8d0..b8765b0b4a44 100644 --- a/crates/goose/src/model.rs +++ b/crates/goose/src/model.rs @@ -1,12 +1,51 @@ use once_cell::sync::Lazy; +use serde::de::Deserializer; use serde::{Deserialize, Serialize}; use serde_json::Value; use std::collections::HashMap; +use std::fmt; +use std::str::FromStr; use thiserror::Error; use utoipa::ToSchema; pub const DEFAULT_CONTEXT_LIMIT: usize = 128_000; +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum ThinkingEffort { + Off, + Low, + Medium, + High, + Max, +} + +impl FromStr for ThinkingEffort { + type Err = String; + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "off" | "disabled" | "none" => Ok(Self::Off), + "low" => Ok(Self::Low), + "medium" | "med" => Ok(Self::Medium), + "high" => Ok(Self::High), + "max" | "xhigh" => Ok(Self::Max), + other => Err(format!("unknown thinking effort: '{other}'")), + } + } +} + +impl fmt::Display for ThinkingEffort { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Off => write!(f, "off"), + Self::Low => write!(f, "low"), + Self::Medium => write!(f, "medium"), + Self::High => write!(f, "high"), + Self::Max => write!(f, "max"), + } + } +} + #[derive(Debug, Clone, Deserialize)] struct PredefinedModel { name: String, @@ -44,7 +83,7 @@ pub enum ConfigError { InvalidRange(String, String), } -#[derive(Debug, Clone, Default, Serialize, Deserialize, ToSchema)] +#[derive(Debug, Clone, Default, Serialize, ToSchema)] pub struct ModelConfig { pub model_name: String, pub context_limit: Option, @@ -61,6 +100,44 @@ pub struct ModelConfig { pub reasoning: Option, } +impl<'de> Deserialize<'de> for ModelConfig { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + #[derive(Deserialize)] + struct RawModelConfig { + model_name: String, + context_limit: Option, + temperature: Option, + max_tokens: Option, + toolshim: bool, + toolshim_model: Option, + #[serde(default)] + fast_model_config: Option>, + #[serde(default, skip_serializing_if = "Option::is_none")] + request_params: Option>, + #[serde(default, skip_serializing_if = "Option::is_none")] + reasoning: Option, + } + + let raw = RawModelConfig::deserialize(deserializer)?; + let mut config = Self { + model_name: raw.model_name, + context_limit: raw.context_limit, + temperature: raw.temperature, + max_tokens: raw.max_tokens, + toolshim: raw.toolshim, + toolshim_model: raw.toolshim_model, + fast_model_config: raw.fast_model_config, + request_params: raw.request_params, + reasoning: raw.reasoning, + }; + config.normalize_effort_suffix(); + Ok(config) + } +} + impl ModelConfig { pub fn new(model_name: &str) -> Result { Self::new_base(model_name.to_string(), None) @@ -114,13 +191,14 @@ impl ModelConfig { let toolshim = Self::parse_toolshim()?; let toolshim_model = Self::parse_toolshim_model()?; - // Pick up request_params from predefined models (always applies) + // Pick up predefined model settings before legacy suffix normalization. let predefined = find_predefined_model(&model_name); + let predefined_context_limit = predefined.as_ref().and_then(|pm| pm.context_limit); let request_params = predefined.and_then(|pm| pm.request_params); - Ok(Self { + let mut config = Self { model_name, - context_limit, + context_limit: context_limit.or(predefined_context_limit), temperature, max_tokens, toolshim, @@ -128,7 +206,9 @@ impl ModelConfig { fast_model_config: None, request_params, reasoning: None, - }) + }; + config.normalize_effort_suffix(); + Ok(config) } pub fn with_canonical_limits(mut self, provider_name: &str) -> Self { @@ -298,8 +378,17 @@ impl ModelConfig { Ok(self) } - pub fn with_request_params(mut self, params: Option>) -> Self { - self.request_params = params; + pub fn with_merged_request_params(mut self, params: HashMap) -> Self { + match self.request_params.as_mut() { + Some(existing) => { + for (k, v) in params { + existing.insert(k, v); + } + } + None => { + self.request_params = Some(params); + } + } self } @@ -319,6 +408,21 @@ impl ModelConfig { crate::providers::utils::is_openai_responses_model(&self.model_name) } + pub fn is_reasoning_model(&self) -> bool { + if let Some(reasoning) = self.reasoning { + return reasoning; + } + + self.is_openai_reasoning_model() + || self.model_name.to_lowercase().contains("claude") + || Self::is_gemini3_reasoning_model_name(&self.model_name) + } + + fn is_gemini3_reasoning_model_name(model_name: &str) -> bool { + let lower = model_name.to_lowercase(); + lower.starts_with("gemini-3") || lower.contains("/gemini-3") || lower.contains("-gemini-3") + } + pub fn max_output_tokens(&self) -> i32 { if let Some(tokens) = self.max_tokens { return tokens; @@ -327,6 +431,82 @@ impl ModelConfig { 4_096 } + pub fn normalize_effort_suffix(&mut self) { + if !self.is_openai_reasoning_model() { + return; + } + let parts: Vec<&str> = self.model_name.split('-').collect(); + let last = match parts.last() { + Some(l) => *l, + None => return, + }; + let effort = match last { + "none" => ThinkingEffort::Off, + "low" => ThinkingEffort::Low, + "medium" => ThinkingEffort::Medium, + "high" => ThinkingEffort::High, + "xhigh" => ThinkingEffort::Max, + _ => return, + }; + self.model_name = parts[..parts.len() - 1].join("-"); + let has_explicit_effort = self + .request_params + .as_ref() + .and_then(|p| p.get("thinking_effort")) + .is_some(); + if !has_explicit_effort { + let params = self.request_params.get_or_insert_with(HashMap::new); + params.insert( + "thinking_effort".to_string(), + serde_json::json!(effort.to_string()), + ); + } + } + + pub fn thinking_effort(&self) -> Option { + self.get_config_param::("thinking_effort", "GOOSE_THINKING_EFFORT") + .and_then(|s| s.parse::().ok()) + .or_else(Self::legacy_thinking_effort) + } + + fn legacy_thinking_effort() -> Option { + let config = crate::config::Config::global(); + + if let Ok(value) = config.get_param::("CLAUDE_THINKING_TYPE") { + if let Some(effort) = match value.to_lowercase().as_str() { + "adaptive" | "enabled" => Some(ThinkingEffort::High), + "disabled" => Some(ThinkingEffort::Off), + _ => None, + } { + return Some(effort); + } + } + + if let Ok(enabled) = config.get_param::("CLAUDE_THINKING_ENABLED") { + return Some(if enabled { + ThinkingEffort::High + } else { + ThinkingEffort::Off + }); + } + + if let Ok(value) = config.get_param::("GEMINI3_THINKING_LEVEL") { + if let Some(effort) = Self::legacy_gemini3_thinking_effort(&value) { + return Some(effort); + } + } + + None + } + + fn legacy_gemini3_thinking_effort(value: &str) -> Option { + match value.to_lowercase().as_str() { + "low" => Some(ThinkingEffort::Low), + "high" => Some(ThinkingEffort::High), + _ => None, + } + } + pub fn get_config_param serde::Deserialize<'de>>( &self, request_key: &str, @@ -419,13 +599,10 @@ mod tests { #[test] fn test_get_config_param() { - let _guard = env_lock::lock_env([ - ("CLAUDE_THINKING_EFFORT", Some("high")), - ("CLAUDE_THINKING_TYPE", None::<&str>), - ]); + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", Some("high"))]); let mut params = HashMap::new(); - params.insert("effort".to_string(), serde_json::json!("low")); + params.insert("thinking_effort".to_string(), serde_json::json!("low")); let config_with_params = ModelConfig { model_name: "test".to_string(), @@ -439,11 +616,13 @@ mod tests { }; assert_eq!( - config_with_params.get_config_param::("effort", "CLAUDE_THINKING_EFFORT"), + config_with_params + .get_config_param::("thinking_effort", "GOOSE_THINKING_EFFORT"), Some("low".to_string()) ); assert_eq!( - config_without_params.get_config_param::("effort", "CLAUDE_THINKING_EFFORT"), + config_without_params + .get_config_param::("thinking_effort", "GOOSE_THINKING_EFFORT"), Some("high".to_string()) ); assert_eq!( @@ -453,6 +632,236 @@ mod tests { ); } + #[test] + fn test_deserialize_preserves_fast_model_config() { + let config: ModelConfig = serde_json::from_value(serde_json::json!({ + "model_name": "primary-model", + "context_limit": null, + "temperature": null, + "max_tokens": null, + "toolshim": false, + "toolshim_model": null, + "fast_model_config": { + "model_name": "fast-model", + "context_limit": 4096, + "temperature": null, + "max_tokens": 1024, + "toolshim": false, + "toolshim_model": null + } + })) + .unwrap(); + + let fast_config = config.fast_model_config.as_ref().unwrap(); + assert_eq!(fast_config.model_name, "fast-model"); + assert_eq!(fast_config.context_limit, Some(4096)); + assert_eq!(fast_config.max_tokens, Some(1024)); + assert_eq!(config.use_fast_model().model_name, "fast-model"); + } + + mod thinking_effort_tests { + use super::*; + + #[test] + fn from_request_params() { + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]); + let mut params = HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("medium")); + let config = ModelConfig { + model_name: "test".to_string(), + request_params: Some(params), + ..Default::default() + }; + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Medium)); + } + + #[test] + fn from_env_var() { + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", Some("high"))]); + let config = ModelConfig { + model_name: "test".to_string(), + ..Default::default() + }; + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::High)); + } + + #[test] + fn request_params_override_env() { + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", Some("high"))]); + let mut params = HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("low")); + let config = ModelConfig { + model_name: "test".to_string(), + request_params: Some(params), + ..Default::default() + }; + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Low)); + } + + #[test] + fn legacy_claude_thinking_type_fallback() { + for value in ["enabled", "adaptive"] { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("CLAUDE_THINKING_TYPE", Some(value)), + ("CLAUDE_THINKING_ENABLED", None::<&str>), + ("GEMINI3_THINKING_LEVEL", None::<&str>), + ("ANTHROPIC_THINKING_BUDGET", None::<&str>), + ("CLAUDE_THINKING_BUDGET", None::<&str>), + ("GEMINI25_THINKING_BUDGET", None::<&str>), + ]); + let config = ModelConfig { + model_name: "test".to_string(), + ..Default::default() + }; + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::High)); + } + } + + #[test] + fn legacy_gemini3_thinking_level_mapping() { + assert_eq!( + ModelConfig::legacy_gemini3_thinking_effort("low"), + Some(ThinkingEffort::Low) + ); + assert_eq!( + ModelConfig::legacy_gemini3_thinking_effort("high"), + Some(ThinkingEffort::High) + ); + assert_eq!(ModelConfig::legacy_gemini3_thinking_effort("auto"), None); + } + + #[test] + fn legacy_gemini3_thinking_level_fallback() { + let temp_dir = tempfile::tempdir().unwrap(); + let temp_root = temp_dir.path().to_string_lossy().to_string(); + let _guard = env_lock::lock_env([ + ("GOOSE_PATH_ROOT", Some(temp_root.as_str())), + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("CLAUDE_THINKING_TYPE", None::<&str>), + ("CLAUDE_THINKING_ENABLED", None::<&str>), + ("GEMINI3_THINKING_LEVEL", Some("high")), + ("ANTHROPIC_THINKING_BUDGET", None::<&str>), + ("CLAUDE_THINKING_BUDGET", None::<&str>), + ("GEMINI25_THINKING_BUDGET", None::<&str>), + ]); + let config = ModelConfig { + model_name: "gemini-3-pro".to_string(), + ..Default::default() + }; + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::High)); + } + + #[test] + fn effort_suffix_stripped_from_model_name() { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("GOOSE_MAX_TOKENS", None::<&str>), + ("GOOSE_TEMPERATURE", None::<&str>), + ("GOOSE_CONTEXT_LIMIT", None::<&str>), + ("GOOSE_TOOLSHIM", None::<&str>), + ("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>), + ]); + let config = ModelConfig::new("o3-mini-high").unwrap(); + assert_eq!(config.model_name, "o3-mini"); + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::High)); + } + + #[test] + fn none_suffix_stripped_from_model_name() { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", Some("high")), + ("GOOSE_MAX_TOKENS", None::<&str>), + ("GOOSE_TEMPERATURE", None::<&str>), + ("GOOSE_CONTEXT_LIMIT", None::<&str>), + ("GOOSE_TOOLSHIM", None::<&str>), + ("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>), + ]); + let config = ModelConfig::new("o3-mini-none").unwrap(); + assert_eq!(config.model_name, "o3-mini"); + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Off)); + } + + #[test] + fn xhigh_suffix_stripped_from_model_name() { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", Some("low")), + ("GOOSE_MAX_TOKENS", None::<&str>), + ("GOOSE_TEMPERATURE", None::<&str>), + ("GOOSE_CONTEXT_LIMIT", None::<&str>), + ("GOOSE_TOOLSHIM", None::<&str>), + ("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>), + ]); + let config = ModelConfig::new("gpt-5.4-xhigh").unwrap(); + assert_eq!(config.model_name, "gpt-5.4"); + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Max)); + } + + #[test] + fn effort_suffix_not_stripped_when_thinking_effort_set() { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("GOOSE_MAX_TOKENS", None::<&str>), + ("GOOSE_TEMPERATURE", None::<&str>), + ("GOOSE_CONTEXT_LIMIT", None::<&str>), + ("GOOSE_TOOLSHIM", None::<&str>), + ("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>), + ]); + let mut params = HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("low")); + let mut config = ModelConfig::new("o3-mini-high").unwrap(); + // Suffix was already normalized during new(), but if request_params + // were set before construction, the suffix would not be stripped. + // Verify the normalized state: + assert_eq!(config.model_name, "o3-mini"); + + // Now simulate setting explicit effort after construction + config.request_params = Some(params); + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Low)); + } + + #[test] + fn no_suffix_no_change() { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("GOOSE_MAX_TOKENS", None::<&str>), + ("GOOSE_TEMPERATURE", None::<&str>), + ("GOOSE_CONTEXT_LIMIT", None::<&str>), + ("GOOSE_TOOLSHIM", None::<&str>), + ("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>), + ]); + let config = ModelConfig::new("o3-mini").unwrap(); + assert_eq!(config.model_name, "o3-mini"); + } + + #[test] + fn non_reasoning_model_suffix_not_stripped() { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("GOOSE_MAX_TOKENS", None::<&str>), + ("GOOSE_TEMPERATURE", None::<&str>), + ("GOOSE_CONTEXT_LIMIT", None::<&str>), + ("GOOSE_TOOLSHIM", None::<&str>), + ("GOOSE_TOOLSHIM_OLLAMA_MODEL", None::<&str>), + ]); + let config = ModelConfig::new("claude-sonnet-4-high").unwrap(); + assert_eq!(config.model_name, "claude-sonnet-4-high"); + } + + #[test] + fn parse_aliases() { + assert_eq!("off".parse::(), Ok(ThinkingEffort::Off)); + assert_eq!( + "disabled".parse::(), + Ok(ThinkingEffort::Off) + ); + assert_eq!("med".parse::(), Ok(ThinkingEffort::Medium)); + assert_eq!("max".parse::(), Ok(ThinkingEffort::Max)); + assert_eq!("xhigh".parse::(), Ok(ThinkingEffort::Max)); + assert!("invalid".parse::().is_err()); + } + } + mod with_canonical_limits { use super::*; @@ -597,4 +1006,36 @@ mod tests { assert!(!ModelConfig::new_or_fail("llama-3-70b").is_openai_reasoning_model()); } } + + mod is_reasoning_model { + use super::*; + + const ENV_LOCK_KEYS: [(&str, Option<&str>); 5] = [ + ("GOOSE_MAX_TOKENS", None), + ("GOOSE_TEMPERATURE", None), + ("GOOSE_CONTEXT_LIMIT", None), + ("GOOSE_TOOLSHIM", None), + ("GOOSE_TOOLSHIM_OLLAMA_MODEL", None), + ]; + + #[test] + fn includes_reasoning_model_families() { + let _guard = env_lock::lock_env(ENV_LOCK_KEYS); + assert!(ModelConfig::new_or_fail("o3-mini").is_reasoning_model()); + assert!(ModelConfig::new_or_fail("claude-sonnet-4").is_reasoning_model()); + assert!(ModelConfig::new_or_fail("gemini-3-pro").is_reasoning_model()); + } + + #[test] + fn uses_explicit_metadata_first() { + let _guard = env_lock::lock_env(ENV_LOCK_KEYS); + let mut config = ModelConfig::new_or_fail("provider-alias"); + config.reasoning = Some(true); + assert!(config.is_reasoning_model()); + + let mut config = ModelConfig::new_or_fail("claude-sonnet-4"); + config.reasoning = Some(false); + assert!(!config.is_reasoning_model()); + } + } } diff --git a/crates/goose/src/providers/base.rs b/crates/goose/src/providers/base.rs index f7a61090f1b9..246532f6977f 100644 --- a/crates/goose/src/providers/base.rs +++ b/crates/goose/src/providers/base.rs @@ -394,6 +394,9 @@ pub struct ModelInfo { pub currency: Option, /// Whether this model supports cache control pub supports_cache_control: Option, + /// Whether this model supports reasoning/thinking controls + #[serde(default)] + pub reasoning: bool, } impl ModelInfo { @@ -406,6 +409,7 @@ impl ModelInfo { output_token_cost: None, currency: None, supports_cache_control: None, + reasoning: false, } } @@ -423,10 +427,37 @@ impl ModelInfo { output_token_cost: Some(output_cost), currency: Some("$".to_string()), supports_cache_control: None, + reasoning: false, } } } +fn model_info_for_provider_model(provider_name: &str, model_name: &str) -> ModelInfo { + let registry = CanonicalModelRegistry::bundled().ok(); + let canonical = registry.as_ref().and_then(|registry| { + let canonical_id = map_to_canonical_model(provider_name, model_name, registry)?; + let (provider, model) = canonical_id.split_once('/')?; + registry.get(provider, model) + }); + + let reasoning = canonical + .as_ref() + .and_then(|model| model.reasoning) + .unwrap_or_else(|| ModelConfig::new_or_fail(model_name).is_reasoning_model()); + + ModelInfo { + name: model_name.to_string(), + context_limit: ModelConfig::new_or_fail(model_name) + .with_canonical_limits(provider_name) + .context_limit(), + input_token_cost: None, + output_token_cost: None, + currency: None, + supports_cache_control: None, + reasoning, + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToSchema)] pub enum ProviderType { Preferred, @@ -477,16 +508,7 @@ impl ProviderMetadata { default_model: default_model.to_string(), known_models: model_names .iter() - .map(|&model_name| ModelInfo { - name: model_name.to_string(), - context_limit: ModelConfig::new_or_fail(model_name) - .with_canonical_limits(name) - .context_limit(), - input_token_cost: None, - output_token_cost: None, - currency: None, - supports_cache_control: None, - }) + .map(|&model_name| model_info_for_provider_model(name, model_name)) .collect(), model_doc_link: model_doc_link.to_string(), config_keys, @@ -899,6 +921,19 @@ pub trait Provider: Send + Sync { Ok(vec![]) } + async fn fetch_supported_model_info(&self) -> Result, ProviderError> { + Ok(self + .fetch_supported_models() + .await? + .iter() + .map(|model_name| model_info_for_provider_model(self.get_name(), model_name)) + .collect()) + } + + async fn fetch_model_info(&self, model_name: &str) -> Result { + Ok(model_info_for_provider_model(self.get_name(), model_name)) + } + fn skip_canonical_filtering(&self) -> bool { false } @@ -964,6 +999,15 @@ pub trait Provider: Send + Sync { } } + async fn fetch_recommended_model_info(&self) -> Result, ProviderError> { + Ok(self + .fetch_recommended_models() + .await? + .iter() + .map(|model_name| model_info_for_provider_model(self.get_name(), model_name)) + .collect()) + } + async fn map_to_canonical_model( &self, provider_model: &str, @@ -1721,6 +1765,7 @@ mod tests { output_token_cost: None, currency: None, supports_cache_control: None, + reasoning: false, }; assert_eq!(info.context_limit, 1000); @@ -1732,6 +1777,7 @@ mod tests { output_token_cost: None, currency: None, supports_cache_control: None, + reasoning: false, }; assert_eq!(info, info2); @@ -1743,6 +1789,7 @@ mod tests { output_token_cost: None, currency: None, supports_cache_control: None, + reasoning: false, }; assert_ne!(info, info3); } diff --git a/crates/goose/src/providers/chatgpt_codex.rs b/crates/goose/src/providers/chatgpt_codex.rs index 4bdc8c58024d..1cc68abc7342 100644 --- a/crates/goose/src/providers/chatgpt_codex.rs +++ b/crates/goose/src/providers/chatgpt_codex.rs @@ -225,6 +225,29 @@ fn get_reasoning_effort(model_name: &str) -> String { } } +fn reasoning_effort_for_config(model_config: &ModelConfig) -> Option { + use crate::model::ThinkingEffort; + + model_config + .thinking_effort() + .map(|effort| { + let valid_levels = reasoning_levels_for_model(&model_config.model_name); + let preferred_levels: &[&str] = match effort { + ThinkingEffort::Off => return None, + ThinkingEffort::Low => &["low", "medium", "high", "xhigh"], + ThinkingEffort::Medium => &["medium", "high", "low", "xhigh"], + ThinkingEffort::High => &["high", "medium", "xhigh", "low"], + ThinkingEffort::Max => &["xhigh", "high", "medium", "low"], + }; + + preferred_levels + .iter() + .find(|level| valid_levels.contains(level)) + .map(|level| (*level).to_string()) + }) + .unwrap_or_else(|| Some(get_reasoning_effort(&model_config.model_name))) +} + fn create_codex_request( model_config: &ModelConfig, system: &str, @@ -232,7 +255,7 @@ fn create_codex_request( tools: &[Tool], ) -> Result { let input_items = build_input_items(messages)?; - let reasoning_effort = get_reasoning_effort(&model_config.model_name); + let reasoning_effort = reasoning_effort_for_config(model_config); let instructions = match model_config.model_name.as_str() { "gpt-5.3-codex" => format!("{GPT_53_CODEX_TOOL_PREAMBLE}\n\n{system}"), @@ -243,7 +266,6 @@ fn create_codex_request( "model": model_config.model_name, "input": input_items, "store": false, - "reasoning": {"effort": reasoning_effort}, "instructions": instructions, }); @@ -273,6 +295,13 @@ fn create_codex_request( payload_obj.insert("temperature".to_string(), json!(temp)); } + if let Some(reasoning_effort) = reasoning_effort { + payload_obj.insert( + "reasoning".to_string(), + json!({ "effort": reasoning_effort }), + ); + } + Ok(payload) } @@ -1173,6 +1202,42 @@ mod tests { ); } + #[test] + fn test_create_codex_request_reasoning_effort_from_unified_thinking() { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), json!("max")); + let mut config = ModelConfig::new("gpt-5.3-codex").unwrap(); + config.request_params = Some(params); + + let payload = create_codex_request(&config, "sys", &[], &[]).unwrap(); + assert_eq!(payload["reasoning"]["effort"], "xhigh"); + assert!(payload.get("reasoning_effort").is_none()); + } + + #[test] + fn test_create_codex_request_caps_unified_thinking_to_supported_level() { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), json!("max")); + let mut config = ModelConfig::new("unknown-model").unwrap(); + config.request_params = Some(params); + + let payload = create_codex_request(&config, "sys", &[], &[]).unwrap(); + assert_eq!(payload["reasoning"]["effort"], "high"); + assert!(payload.get("reasoning_effort").is_none()); + } + + #[test] + fn test_create_codex_request_off_omits_reasoning_for_codex_models() { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), json!("off")); + let mut config = ModelConfig::new("gpt-5.2-codex").unwrap(); + config.request_params = Some(params); + + let payload = create_codex_request(&config, "sys", &[], &[]).unwrap(); + assert!(payload.get("reasoning").is_none()); + assert!(payload.get("reasoning_effort").is_none()); + } + #[test_case( JwtClaims { chatgpt_account_id: Some("account-1".to_string()), diff --git a/crates/goose/src/providers/codex.rs b/crates/goose/src/providers/codex.rs index a2075f52aa34..8b420d4146e3 100644 --- a/crates/goose/src/providers/codex.rs +++ b/crates/goose/src/providers/codex.rs @@ -16,7 +16,7 @@ use super::base::{ }; use super::errors::ProviderError; use super::utils::{filter_extensions_from_system_prompt, RequestLog}; -use crate::config::base::{CodexCommand, CodexReasoningEffort, CodexSkipGitCheck}; +use crate::config::base::{CodexCommand, CodexSkipGitCheck}; use crate::config::paths::Paths; use crate::config::search_path::SearchPaths; use crate::config::{Config, ExtensionConfig, GooseMode}; @@ -50,7 +50,7 @@ pub struct CodexProvider { #[serde(skip)] name: String, /// Reasoning effort level (none, low, medium, high, xhigh) - reasoning_effort: String, + reasoning_effort: Option, /// Whether to skip git repo check skip_git_check: bool, /// CLI config overrides for MCP servers @@ -60,12 +60,40 @@ pub struct CodexProvider { } impl CodexProvider { - fn supports_reasoning_effort(model_name: &str, reasoning_effort: &str) -> bool { - if !CODEX_REASONING_LEVELS.contains(&reasoning_effort) { - return false; + fn legacy_reasoning_effort() -> Option { + Config::global() + .get_param::("CODEX_REASONING_EFFORT") + .ok() + .and_then(|effort| match effort.to_lowercase().as_str() { + "none" => Some(crate::model::ThinkingEffort::Off), + "low" => Some(crate::model::ThinkingEffort::Low), + "medium" => Some(crate::model::ThinkingEffort::Medium), + "high" => Some(crate::model::ThinkingEffort::High), + "xhigh" => Some(crate::model::ThinkingEffort::Max), + _ => None, + }) + } + + fn map_thinking_effort( + _model_name: &str, + effort: Option, + ) -> Option { + use crate::model::ThinkingEffort; + match effort + .or_else(Self::legacy_reasoning_effort) + .unwrap_or(ThinkingEffort::High) + { + ThinkingEffort::Off => Some("none".to_string()), + ThinkingEffort::Low => Some("low".to_string()), + ThinkingEffort::Medium => Some("medium".to_string()), + ThinkingEffort::High => Some("high".to_string()), + ThinkingEffort::Max => Some("xhigh".to_string()), } + } - if reasoning_effort == "none" && model_name.contains("codex") { + #[cfg(test)] + fn supports_reasoning_effort(_model_name: &str, reasoning_effort: &str) -> bool { + if !CODEX_REASONING_LEVELS.contains(&reasoning_effort) { return false; } @@ -115,7 +143,7 @@ impl CodexProvider { println!("=== CODEX PROVIDER DEBUG ==="); println!("Command: {:?}", self.command); println!("Model: {}", self.model.model_name); - println!("Reasoning effort: {}", self.reasoning_effort); + println!("Reasoning effort: {:?}", self.reasoning_effort); println!("Skip git check: {}", self.skip_git_check); println!("Prompt length: {} chars", prompt.len()); println!("Prompt: {}", prompt); @@ -142,11 +170,10 @@ impl CodexProvider { cmd.arg("-m").arg(&self.model.model_name); } - // Reasoning effort configuration - cmd.arg("-c").arg(format!( - "model_reasoning_effort=\"{}\"", - self.reasoning_effort - )); + if let Some(reasoning_effort) = &self.reasoning_effort { + cmd.arg("-c") + .arg(format!("model_reasoning_effort=\"{}\"", reasoning_effort)); + } for override_config in &self.mcp_config_overrides { cmd.arg("-c").arg(override_config); @@ -604,7 +631,6 @@ impl ProviderDef for CodexProvider { CODEX_DOC_URL, vec![ ConfigKey::from_value_type::(true, false, true), - ConfigKey::from_value_type::(false, false, true), ConfigKey::from_value_type::(false, false, true), ], ) @@ -619,24 +645,8 @@ impl ProviderDef for CodexProvider { let command: String = config.get_codex_command().unwrap_or_default().into(); let resolved_command = SearchPaths::builder().with_npm().resolve(command)?; - // Get reasoning effort from config, default to "high" - let reasoning_effort = config - .get_codex_reasoning_effort() - .map(String::from) - .unwrap_or_else(|_| "high".to_string()); - - // Validate reasoning effort let reasoning_effort = - if Self::supports_reasoning_effort(&model.model_name, &reasoning_effort) { - reasoning_effort - } else { - tracing::warn!( - "Invalid CODEX_REASONING_EFFORT '{}' for model '{}', using 'high'", - reasoning_effort, - model.model_name - ); - "high".to_string() - }; + Self::map_thinking_effort(&model.model_name, model.thinking_effort()); // Get skip_git_check from config, default to false let skip_git_check = config @@ -925,7 +935,7 @@ mod tests { command: PathBuf::from("codex"), model: ModelConfig::new("gpt-5.2-codex").unwrap(), name: "codex".to_string(), - reasoning_effort: "high".to_string(), + reasoning_effort: Some("high".to_string()), skip_git_check: false, mcp_config_overrides: Vec::new(), mode_by_session: tokio::sync::RwLock::new(HashMap::new()), @@ -946,7 +956,7 @@ mod tests { command: PathBuf::from("codex"), model: ModelConfig::new("gpt-5.2-codex").unwrap(), name: "codex".to_string(), - reasoning_effort: "high".to_string(), + reasoning_effort: Some("high".to_string()), skip_git_check: false, mcp_config_overrides: Vec::new(), mode_by_session: tokio::sync::RwLock::new(HashMap::new()), @@ -980,7 +990,7 @@ mod tests { command: PathBuf::from("codex"), model: ModelConfig::new("gpt-5.2-codex").unwrap(), name: "codex".to_string(), - reasoning_effort: "high".to_string(), + reasoning_effort: Some("high".to_string()), skip_git_check: false, mcp_config_overrides: Vec::new(), mode_by_session: tokio::sync::RwLock::new(HashMap::new()), @@ -1005,7 +1015,7 @@ mod tests { #[test] fn test_reasoning_effort_support_by_model() { assert!(CodexProvider::supports_reasoning_effort("gpt-5.2", "none")); - assert!(!CodexProvider::supports_reasoning_effort( + assert!(CodexProvider::supports_reasoning_effort( "gpt-5.2-codex", "none" )); @@ -1029,7 +1039,7 @@ mod tests { command: PathBuf::from("codex"), model: ModelConfig::new("gpt-5.2-codex").unwrap(), name: "codex".to_string(), - reasoning_effort: "high".to_string(), + reasoning_effort: Some("high".to_string()), skip_git_check: false, mcp_config_overrides: Vec::new(), mode_by_session: tokio::sync::RwLock::new(HashMap::new()), @@ -1055,7 +1065,7 @@ mod tests { command: PathBuf::from("codex"), model: ModelConfig::new("gpt-5.2-codex").unwrap(), name: "codex".to_string(), - reasoning_effort: "high".to_string(), + reasoning_effort: Some("high".to_string()), skip_git_check: false, mcp_config_overrides: Vec::new(), mode_by_session: tokio::sync::RwLock::new(HashMap::new()), @@ -1128,7 +1138,7 @@ mod tests { command: PathBuf::from("codex"), model: ModelConfig::new("gpt-5.2-codex").unwrap(), name: "codex".to_string(), - reasoning_effort: "high".to_string(), + reasoning_effort: Some("high".to_string()), skip_git_check: false, mcp_config_overrides: Vec::new(), mode_by_session: tokio::sync::RwLock::new(HashMap::new()), @@ -1145,7 +1155,7 @@ mod tests { command: PathBuf::from("codex"), model: ModelConfig::new("gpt-5.2-codex").unwrap(), name: "codex".to_string(), - reasoning_effort: "high".to_string(), + reasoning_effort: Some("high".to_string()), skip_git_check: false, mcp_config_overrides: Vec::new(), mode_by_session: tokio::sync::RwLock::new(HashMap::new()), @@ -1214,20 +1224,56 @@ mod tests { #[test] fn test_config_keys() { let metadata = CodexProvider::metadata(); - assert_eq!(metadata.config_keys.len(), 3); + assert_eq!(metadata.config_keys.len(), 2); // First key should be CODEX_COMMAND (required) assert_eq!(metadata.config_keys[0].name, "CODEX_COMMAND"); assert!(metadata.config_keys[0].required); assert!(!metadata.config_keys[0].secret); - // Second key should be CODEX_REASONING_EFFORT (optional) - assert_eq!(metadata.config_keys[1].name, "CODEX_REASONING_EFFORT"); + // Second key should be CODEX_SKIP_GIT_CHECK (optional) + assert_eq!(metadata.config_keys[1].name, "CODEX_SKIP_GIT_CHECK"); assert!(!metadata.config_keys[1].required); + } - // Third key should be CODEX_SKIP_GIT_CHECK (optional) - assert_eq!(metadata.config_keys[2].name, "CODEX_SKIP_GIT_CHECK"); - assert!(!metadata.config_keys[2].required); + #[test] + fn test_map_thinking_effort() { + use crate::model::ThinkingEffort; + + let _guard = env_lock::lock_env([ + ("CODEX_REASONING_EFFORT", None::<&str>), + ("GOOSE_THINKING_EFFORT", None::<&str>), + ]); + + assert_eq!( + CodexProvider::map_thinking_effort("gpt-5.2-codex", Some(ThinkingEffort::Off)), + Some("none".to_string()) + ); + assert_eq!( + CodexProvider::map_thinking_effort("gpt-5.2", Some(ThinkingEffort::Off)), + Some("none".to_string()) + ); + assert_eq!( + CodexProvider::map_thinking_effort("gpt-5.2-codex", Some(ThinkingEffort::Max)), + Some("xhigh".to_string()) + ); + assert_eq!( + CodexProvider::map_thinking_effort("gpt-5.2-codex", None), + Some("high".to_string()) + ); + } + + #[test] + fn test_map_thinking_effort_uses_legacy_codex_env() { + let _guard = env_lock::lock_env([ + ("CODEX_REASONING_EFFORT", Some("low")), + ("GOOSE_THINKING_EFFORT", None::<&str>), + ]); + + assert_eq!( + CodexProvider::map_thinking_effort("gpt-5.2-codex", None), + Some("low".to_string()) + ); } #[test] @@ -1236,7 +1282,7 @@ mod tests { command: PathBuf::from("codex"), model: ModelConfig::new("gpt-5.2-codex").unwrap(), name: "codex".to_string(), - reasoning_effort: "high".to_string(), + reasoning_effort: Some("high".to_string()), skip_git_check: false, mcp_config_overrides: Vec::new(), mode_by_session: tokio::sync::RwLock::new(HashMap::new()), diff --git a/crates/goose/src/providers/databricks.rs b/crates/goose/src/providers/databricks.rs index 2695871beb55..a7f512e16f1f 100644 --- a/crates/goose/src/providers/databricks.rs +++ b/crates/goose/src/providers/databricks.rs @@ -3,12 +3,14 @@ use async_trait::async_trait; use futures::future::BoxFuture; use serde::{Deserialize, Serialize}; use serde_json::Value; +use std::collections::HashSet; +use std::sync::LazyLock; use std::sync::{Arc, Mutex}; -use std::time::Duration; +use std::time::{Duration, Instant}; use super::api_client::{ApiClient, AuthMethod, AuthProvider}; use super::base::{ - ConfigKey, MessageStream, Provider, ProviderDef, ProviderMetadata, + ConfigKey, MessageStream, ModelInfo, Provider, ProviderDef, ProviderMetadata, DEFAULT_PROVIDER_TIMEOUT_SECS, }; use super::embedding::EmbeddingCapable; @@ -21,7 +23,7 @@ use super::openai_compatible::{ stream_openai_compat, stream_responses_compat, }; use super::retry::ProviderRetry; -use super::utils::{ImageFormat, RequestLog}; +use super::utils::{is_openai_responses_model, ImageFormat, RequestLog}; use crate::config::ConfigError; use crate::conversation::message::Message; use crate::instance_id::get_instance_id; @@ -33,11 +35,35 @@ use crate::providers::retry::{ use rmcp::model::Tool; use serde_json::json; +#[derive(Debug, Clone)] +struct DatabricksEndpointInfo { + name: String, + upstream_model_name: Option, + upstream_model_provider: Option, + reasoning: Option, +} + +#[derive(Debug, Clone)] +struct DatabricksUpstreamModel { + name: String, + provider: Option, +} + +#[derive(Debug, Clone)] +struct CachedDatabricksEndpointInfo { + info: DatabricksEndpointInfo, + fetched_at: Instant, +} + const DEFAULT_CLIENT_ID: &str = "databricks-cli"; const DEFAULT_REDIRECT_URL: &str = "http://localhost"; const DEFAULT_SCOPES: &[&str] = &["all-apis", "offline_access"]; const DATABRICKS_PROVIDER_NAME: &str = "databricks"; +const DATABRICKS_ENDPOINT_METADATA_TTL_SECS: u64 = 60; +static DATABRICKS_ENDPOINT_INFO_CACHE: LazyLock< + Mutex>, +> = LazyLock::new(|| Mutex::new(std::collections::HashMap::new())); pub const DATABRICKS_DEFAULT_MODEL: &str = "databricks-claude-sonnet-4"; const DATABRICKS_DEFAULT_FAST_MODEL: &str = "databricks-claude-haiku-4-5"; pub const DATABRICKS_KNOWN_MODELS: &[&str] = &[ @@ -116,6 +142,8 @@ impl AuthProvider for DatabricksAuthProvider { pub struct DatabricksProvider { #[serde(skip)] api_client: ApiClient, + #[serde(skip)] + host: String, auth: DatabricksAuth, model: ModelConfig, image_format: ImageFormat, @@ -172,13 +200,14 @@ impl DatabricksProvider { })); let api_client = ApiClient::with_timeout( - host, + host.clone(), auth_method, Duration::from_secs(DEFAULT_PROVIDER_TIMEOUT_SECS), )?; let mut provider = Self { api_client, + host, auth, model: model.clone(), image_format: ImageFormat::OpenAi, @@ -240,13 +269,14 @@ impl DatabricksProvider { })); let api_client = ApiClient::with_timeout( - host, + host.clone(), auth_method, Duration::from_secs(DEFAULT_PROVIDER_TIMEOUT_SECS), )?; Ok(Self { api_client, + host, auth, model, image_format: ImageFormat::OpenAi, @@ -270,7 +300,252 @@ impl DatabricksProvider { } fn is_responses_model(model_name: &str) -> bool { - super::utils::is_openai_responses_model(model_name) + is_openai_responses_model(model_name) + } + + fn is_claude_model(model_name: &str) -> bool { + model_name.to_lowercase().contains("claude") + } + + fn is_reasoning_capable_model_name(model_name: &str) -> bool { + Self::is_claude_model(model_name) || Self::is_responses_model(model_name) + } + + fn endpoint_model_candidates(value: &Value) -> Vec { + let mut candidates: Vec = Vec::new(); + + fn get_string_at(value: &Value, path: &[&str]) -> Option { + path.iter() + .try_fold(value, |current, key| current.get(*key)) + .and_then(|v| v.as_str()) + .filter(|s| !s.is_empty()) + .map(ToString::to_string) + } + + fn push_candidate( + name: Option, + provider: Option, + candidates: &mut Vec, + ) { + if let Some(name) = name { + if !candidates.iter().any(|candidate| candidate.name == name) { + candidates.push(DatabricksUpstreamModel { name, provider }); + } + } + } + + for config_key in ["config", "pending_config"] { + let Some(config) = value.get(config_key) else { + continue; + }; + + for collection_key in ["served_entities", "served_models"] { + let Some(entities) = config.get(collection_key).and_then(|v| v.as_array()) else { + continue; + }; + + for entity in entities { + push_candidate( + get_string_at(entity, &["external_model", "name"]), + get_string_at(entity, &["external_model", "provider"]), + &mut candidates, + ); + push_candidate( + get_string_at(entity, &["foundation_model", "name"]), + get_string_at(entity, &["foundation_model", "provider"]), + &mut candidates, + ); + push_candidate( + get_string_at(entity, &["entity_name"]), + None, + &mut candidates, + ); + } + } + } + + candidates + } + + fn endpoint_info_from_value(endpoint: &Value) -> Option { + let name = endpoint.get("name")?.as_str()?.to_string(); + let upstream_model = Self::endpoint_model_candidates(endpoint) + .into_iter() + .find(|candidate| candidate.name != name); + let upstream_model_name = upstream_model.as_ref().map(|model| model.name.clone()); + let upstream_model_provider = upstream_model.and_then(|model| model.provider); + + let reasoning = upstream_model_name + .as_deref() + .map(Self::is_reasoning_capable_model_name) + .or_else(|| Some(Self::is_reasoning_capable_model_name(&name))); + + Some(DatabricksEndpointInfo { + name, + upstream_model_name, + upstream_model_provider, + reasoning, + }) + } + + async fn fetch_endpoint_info( + &self, + endpoint_name: &str, + ) -> Result { + let response = self + .api_client + .request( + None, + &format!( + "api/2.0/serving-endpoints/{}", + urlencoding::encode(endpoint_name) + ), + ) + .response_get() + .await + .map_err(|e| { + ProviderError::RequestFailed(format!( + "Failed to fetch Databricks endpoint metadata: {}", + e + )) + })?; + + if !response.status().is_success() { + let status = response.status(); + let detail = response.text().await.unwrap_or_default(); + return Err(ProviderError::RequestFailed(format!( + "Failed to fetch Databricks endpoint metadata: {} {}", + status, detail + ))); + } + + let json: Value = response.json().await.map_err(|e| { + ProviderError::RequestFailed(format!( + "Failed to parse Databricks endpoint metadata: {}", + e + )) + })?; + + Self::endpoint_info_from_value(&json).ok_or_else(|| { + ProviderError::RequestFailed( + "Unexpected response format from Databricks endpoint metadata".to_string(), + ) + }) + } + + async fn resolve_endpoint_info( + &self, + endpoint_name: &str, + ) -> Result { + const MAX_MODEL_SERVING_HOPS: usize = 4; + + let original_endpoint_name = endpoint_name.to_string(); + let mut current_endpoint_name = endpoint_name.to_string(); + let mut visited = HashSet::new(); + let mut last_info: Option = None; + + for _ in 0..MAX_MODEL_SERVING_HOPS { + if !visited.insert(current_endpoint_name.clone()) { + break; + } + + let info = self.fetch_endpoint_info(¤t_endpoint_name).await?; + let next_endpoint_name = match ( + info.upstream_model_provider.as_deref(), + info.upstream_model_name.as_deref(), + ) { + (Some("databricks-model-serving"), Some(next_endpoint_name)) + if !visited.contains(next_endpoint_name) => + { + Some(next_endpoint_name.to_string()) + } + _ => None, + }; + + if let Some(next_endpoint_name) = next_endpoint_name { + last_info = Some(info); + current_endpoint_name = next_endpoint_name; + continue; + } + + return Ok(if info.name == original_endpoint_name { + info + } else { + let upstream_model_name = info + .upstream_model_name + .clone() + .or_else(|| Some(info.name.clone())); + DatabricksEndpointInfo { + name: original_endpoint_name, + upstream_model_name, + upstream_model_provider: info.upstream_model_provider.clone(), + reasoning: info.reasoning, + } + }); + } + + last_info + .map(|info| DatabricksEndpointInfo { + name: original_endpoint_name, + upstream_model_name: info.upstream_model_name, + upstream_model_provider: info.upstream_model_provider, + reasoning: info.reasoning, + }) + .ok_or_else(|| { + ProviderError::RequestFailed( + "Failed to resolve Databricks endpoint metadata".to_string(), + ) + }) + } + + async fn resolve_endpoint_info_cached( + &self, + endpoint_name: &str, + ) -> Result { + let cache_key = format!("{}:{}", self.host, endpoint_name); + let cached = DATABRICKS_ENDPOINT_INFO_CACHE + .lock() + .unwrap() + .get(&cache_key) + .cloned(); + + if let Some(cached) = cached { + if cached.fetched_at.elapsed() + < Duration::from_secs(DATABRICKS_ENDPOINT_METADATA_TTL_SECS) + { + return Ok(cached.info); + } + } + + let info = self.resolve_endpoint_info(endpoint_name).await?; + DATABRICKS_ENDPOINT_INFO_CACHE.lock().unwrap().insert( + cache_key, + CachedDatabricksEndpointInfo { + info: info.clone(), + fetched_at: Instant::now(), + }, + ); + Ok(info) + } + + fn model_info_from_endpoint(info: DatabricksEndpointInfo) -> ModelInfo { + let context_model = info.upstream_model_name.as_deref().unwrap_or(&info.name); + let context_limit = ModelConfig::new_or_fail(context_model) + .with_canonical_limits(DATABRICKS_PROVIDER_NAME) + .context_limit(); + let reasoning = info + .reasoning + .unwrap_or_else(|| ModelConfig::new_or_fail(context_model).is_reasoning_model()); + + ModelInfo { + name: info.name, + context_limit, + input_token_cost: None, + output_token_cost: None, + currency: None, + supports_cache_control: None, + reasoning, + } } fn get_endpoint_path(&self, model_name: &str, is_embedding: bool) -> String { @@ -378,11 +653,49 @@ impl Provider for DatabricksProvider { messages: &[Message], tools: &[Tool], ) -> Result { - let path = self.get_endpoint_path(&model_config.model_name, false); + let (endpoint_name, _) = super::utils::extract_reasoning_effort(&model_config.model_name); + let endpoint_info = self.resolve_endpoint_info_cached(&endpoint_name).await.ok(); + let effective_model_name = endpoint_info + .as_ref() + .and_then(|info| info.upstream_model_name.as_deref()) + .unwrap_or(&model_config.model_name); + let is_responses_model = Self::is_responses_model(&model_config.model_name) + || Self::is_responses_model(effective_model_name); + let path = if is_responses_model { + "serving-endpoints/responses".to_string() + } else { + self.get_endpoint_path(&model_config.model_name, false) + }; let client_request_id = self.build_client_request_id(session_id); - if Self::is_responses_model(&model_config.model_name) { - let mut payload = create_responses_request(model_config, system, messages, tools)?; + if is_responses_model { + let responses_model_config; + let request_model_config = if effective_model_name != model_config.model_name { + responses_model_config = { + let mut config = model_config.clone(); + config.model_name = effective_model_name.to_string(); + config + }; + &responses_model_config + } else { + model_config + }; + let mut payload = + create_responses_request(request_model_config, system, messages, tools)?; + payload["model"] = Value::String(endpoint_name.clone()); + if payload.get("reasoning").is_none() { + if let Some(effort) = model_config.thinking_effort().and_then(|effort| { + super::utils::openai_reasoning_effort_for_thinking(effective_model_name, effort) + }) { + payload.as_object_mut().unwrap().insert( + "reasoning".to_string(), + json!({ + "effort": effort, + "summary": "auto", + }), + ); + } + } payload["stream"] = Value::Bool(true); if let Some(ref client_request_id) = client_request_id { payload["client_request_id"] = Value::String(client_request_id.clone()); @@ -406,8 +719,27 @@ impl Provider for DatabricksProvider { stream_responses_compat(response, log) } else { - let mut payload = - create_request(model_config, system, messages, tools, &self.image_format)?; + let format_model_config; + let request_model_config = if Self::is_claude_model(effective_model_name) + && !Self::is_claude_model(&model_config.model_name) + { + format_model_config = { + let mut config = model_config.clone(); + config.model_name = effective_model_name.to_string(); + config + }; + &format_model_config + } else { + model_config + }; + + let mut payload = create_request( + request_model_config, + system, + messages, + tools, + &self.image_format, + )?; payload .as_object_mut() .expect("payload should have model key") @@ -498,6 +830,15 @@ impl Provider for DatabricksProvider { } async fn fetch_supported_models(&self) -> Result, ProviderError> { + Ok(self + .fetch_supported_model_info() + .await? + .into_iter() + .map(|model| model.name) + .collect()) + } + + async fn fetch_supported_model_info(&self) -> Result, ProviderError> { let response = self .api_client .request(None, "api/2.0/serving-endpoints") @@ -530,18 +871,25 @@ impl Provider for DatabricksProvider { ) })?; - let models: Vec = endpoints - .iter() - .filter_map(|endpoint| { - endpoint - .get("name") - .and_then(|v| v.as_str()) - .map(|name| name.to_string()) - }) - .collect(); + let mut models = Vec::new(); + for endpoint in endpoints { + if let Some(endpoint_info) = Self::endpoint_info_from_value(endpoint) { + models.push(Self::model_info_from_endpoint(endpoint_info)); + } + } Ok(models) } + + async fn fetch_model_info(&self, model_name: &str) -> Result { + let (endpoint_name, _) = super::utils::extract_reasoning_effort(model_name); + let endpoint_info = self.resolve_endpoint_info_cached(&endpoint_name).await?; + Ok(Self::model_info_from_endpoint(endpoint_info)) + } + + async fn fetch_recommended_model_info(&self) -> Result, ProviderError> { + self.fetch_supported_model_info().await + } } #[async_trait] @@ -596,6 +944,7 @@ mod tests { super::super::api_client::AuthMethod::NoAuth, ) .unwrap(), + host: "https://example.com".to_string(), auth: DatabricksAuth::Token("fake".into()), model: ModelConfig::new_or_fail("databricks-gpt-5.4"), image_format: ImageFormat::OpenAi, @@ -628,4 +977,91 @@ mod tests { ); } } + + #[test] + fn endpoint_metadata_marks_reasoning_alias_from_external_model() { + let endpoint = json!({ + "name": "goose", + "config": { + "served_entities": [{ + "name": "current", + "external_model": { + "name": "claude-opus-4.6", + "provider": "anthropic", + "task": "llm/v1/chat" + } + }] + } + }); + + let info = DatabricksProvider::endpoint_info_from_value(&endpoint).unwrap(); + + assert_eq!(info.name, "goose"); + assert_eq!(info.upstream_model_name.as_deref(), Some("claude-opus-4.6")); + assert_eq!(info.reasoning, Some(true)); + } + + #[test] + fn endpoint_metadata_captures_databricks_model_serving_hop() { + let endpoint = json!({ + "name": "goose", + "config": { + "served_entities": [{ + "external_model": { + "name": "databricks-claude-opus-4-6", + "provider": "databricks-model-serving", + "task": "llm/v1/chat" + } + }] + } + }); + + let info = DatabricksProvider::endpoint_info_from_value(&endpoint).unwrap(); + + assert_eq!(info.name, "goose"); + assert_eq!( + info.upstream_model_name.as_deref(), + Some("databricks-claude-opus-4-6") + ); + assert_eq!( + info.upstream_model_provider.as_deref(), + Some("databricks-model-serving") + ); + assert_eq!(info.reasoning, Some(true)); + } + + #[test] + fn endpoint_metadata_marks_reasoning_alias_from_pending_gpt_model() { + let endpoint = json!({ + "name": "goose", + "pending_config": { + "served_entities": [{ + "external_model": { + "name": "gpt-5.5", + "provider": "openai", + "task": "llm/v1/chat" + } + }] + } + }); + + let info = DatabricksProvider::endpoint_info_from_value(&endpoint).unwrap(); + + assert_eq!(info.name, "goose"); + assert_eq!(info.upstream_model_name.as_deref(), Some("gpt-5.5")); + assert_eq!(info.reasoning, Some(true)); + } + + #[test] + fn endpoint_metadata_uses_endpoint_name_when_no_upstream_model_exists() { + let endpoint = json!({ + "name": "goose-gpt-5-5" + }); + + let info = DatabricksProvider::endpoint_info_from_value(&endpoint).unwrap(); + + assert_eq!(info.name, "goose-gpt-5-5"); + assert_eq!(info.upstream_model_name, None); + assert_eq!(info.reasoning, Some(true)); + } } diff --git a/crates/goose/src/providers/formats/anthropic.rs b/crates/goose/src/providers/formats/anthropic.rs index 72450770bbd0..92803dea4b9b 100644 --- a/crates/goose/src/providers/formats/anthropic.rs +++ b/crates/goose/src/providers/formats/anthropic.rs @@ -1,6 +1,6 @@ use crate::conversation::message::{Message, MessageContent}; use crate::mcp_utils::extract_text_from_resource; -use crate::model::ModelConfig; +use crate::model::{ModelConfig, ThinkingEffort}; use crate::providers::base::Usage; use crate::providers::errors::ProviderError; use crate::providers::utils::{convert_image, ImageFormat}; @@ -37,7 +37,6 @@ macro_rules! string_enum { } string_enum!(ThinkingType { Adaptive => "adaptive", Enabled => "enabled", Disabled => "disabled" }); -string_enum!(ThinkingEffort { Low => "low", Medium => "medium", High => "high", Max => "max" }); #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] pub struct AnthropicFormatOptions { @@ -80,33 +79,16 @@ pub fn thinking_type(model_config: &ModelConfig) -> ThinkingType { } let is_adaptive_model = supports_adaptive_thinking(&model_config.model_name); + let effort = model_config.thinking_effort(); - if let Some(s) = - model_config.get_config_param::("thinking_type", "CLAUDE_THINKING_TYPE") - { - let tt = s.parse::().unwrap_or_else(|e| { - tracing::warn!("{e}"); - ThinkingType::Disabled - }); - if tt == ThinkingType::Adaptive && !is_adaptive_model { - tracing::warn!( - "Adaptive thinking not supported for {}, disabling thinking", - model_config.model_name - ); - return ThinkingType::Disabled; - } - return tt; + if effort.is_none() && legacy_thinking_budget_tokens().is_some() { + return ThinkingType::Enabled; } - if is_adaptive_model { - ThinkingType::Adaptive - } else if std::env::var("CLAUDE_THINKING_ENABLED").is_ok() { - tracing::warn!( - "CLAUDE_THINKING_ENABLED is deprecated, use CLAUDE_THINKING_TYPE=enabled instead" - ); - ThinkingType::Enabled - } else { - ThinkingType::Disabled + match effort.unwrap_or(ThinkingEffort::Off) { + ThinkingEffort::Off => ThinkingType::Disabled, + _ if is_adaptive_model => ThinkingType::Adaptive, + _ => ThinkingType::Enabled, } } @@ -510,35 +492,45 @@ pub fn get_usage(data: &Value) -> Result { } pub fn thinking_effort(model_config: &ModelConfig) -> ThinkingEffort { - match model_config.get_config_param::("effort", "CLAUDE_THINKING_EFFORT") { - Some(s) => s.parse().unwrap_or_else(|e| { - tracing::warn!("{e}, defaulting to 'high'"); - ThinkingEffort::High - }), - None => ThinkingEffort::High, - } + model_config + .thinking_effort() + .unwrap_or(ThinkingEffort::High) } -fn thinking_budget_tokens(model_config: &ModelConfig) -> i32 { - let request_param = model_config +pub fn thinking_budget_tokens(model_config: &ModelConfig) -> i32 { + if let Some(request_param) = model_config .request_params .as_ref() .and_then(|params| params.get("budget_tokens")) - .and_then(|v| serde_json::from_value(v.clone()).ok()); - - request_param - .or_else(|| { - crate::config::Config::global() - .get_param::("ANTHROPIC_THINKING_BUDGET") - .ok() - }) - .or_else(|| { - crate::config::Config::global() - .get_param::("CLAUDE_THINKING_BUDGET") - .ok() - }) - .unwrap_or(16000) - .max(1024) + .and_then(|v| serde_json::from_value::(v.clone()).ok()) + { + return request_param.max(1024); + } + + if let Some(budget) = legacy_thinking_budget_tokens() { + return budget; + } + + let effort = model_config + .thinking_effort() + .unwrap_or(ThinkingEffort::High); + match effort { + ThinkingEffort::Off => 1024, + ThinkingEffort::Low => 4000, + ThinkingEffort::Medium => 10000, + ThinkingEffort::High => 16000, + ThinkingEffort::Max => 32000, + } +} + +fn legacy_thinking_budget_tokens() -> Option { + let config = crate::config::Config::global(); + for key in ["ANTHROPIC_THINKING_BUDGET", "CLAUDE_THINKING_BUDGET"] { + if let Ok(budget) = config.get_param::(key) { + return Some(budget.max(1024)); + } + } + None } fn apply_thinking_config( @@ -1181,14 +1173,14 @@ mod tests { #[test] fn test_create_request_adaptive_thinking_for_46_models() -> Result<()> { - let _guard = env_lock::lock_env([ - ("CLAUDE_THINKING_TYPE", Some("adaptive")), - ("CLAUDE_THINKING_EFFORT", Some("high")), - ("CLAUDE_THINKING_ENABLED", None::<&str>), - ]); + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]); + + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), json!("high")); let mut config = cfg("claude-opus-4-6"); config.max_tokens = Some(4096); + config.request_params = Some(params); let messages = vec![Message::user().with_text("Hello")]; let payload = create_request(&config, "system", &messages, &[])?; @@ -1202,27 +1194,20 @@ mod tests { #[test] fn test_create_request_enabled_thinking_with_budget() -> Result<()> { let _guard = env_lock::lock_env([ - ("CLAUDE_THINKING_TYPE", None::<&str>), - ("CLAUDE_THINKING_EFFORT", None::<&str>), - ("CLAUDE_THINKING_ENABLED", None::<&str>), - ("ANTHROPIC_THINKING_BUDGET", None::<&str>), - ("CLAUDE_THINKING_BUDGET", None::<&str>), + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("ANTHROPIC_PRESERVE_THINKING_CONTEXT", None::<&str>), ]); - let mut params = std::collections::HashMap::new(); - params.insert("thinking_type".to_string(), json!("enabled")); - params.insert("budget_tokens".to_string(), json!(10000)); - - let mut config = cfg("claude-3-7-sonnet-20250219"); + let mut config = cfg_with_effort("claude-3-7-sonnet-20250219", "high"); config.max_tokens = Some(4096); - config.request_params = Some(params); let messages = vec![Message::user().with_text("Hello")]; let payload = create_request(&config, "system", &messages, &[])?; assert_eq!(payload["thinking"]["type"], "enabled"); - assert_eq!(payload["thinking"]["budget_tokens"], 10000); - assert_eq!(payload["max_tokens"], 4096 + 10000); + let budget = payload["thinking"]["budget_tokens"].as_i64().unwrap(); + assert!(budget > 0); + assert_eq!(payload["max_tokens"], 4096 + budget); Ok(()) } @@ -1230,12 +1215,11 @@ mod tests { #[test] fn test_create_request_disabled_thinking_no_thinking_field() -> Result<()> { let _guard = env_lock::lock_env([ - ("CLAUDE_THINKING_TYPE", None::<&str>), - ("CLAUDE_THINKING_ENABLED", None::<&str>), + ("GOOSE_THINKING_EFFORT", None::<&str>), ("ANTHROPIC_PRESERVE_THINKING_CONTEXT", None::<&str>), ]); - let config = cfg("claude-sonnet-4-20250514"); + let config = cfg_with_effort("claude-sonnet-4-20250514", "off"); let messages = vec![Message::user().with_text("Hello")]; let payload = create_request(&config, "system", &messages, &[])?; @@ -1449,9 +1433,9 @@ mod tests { } } - fn cfg_with_thinking(name: &str, tt: &str) -> ModelConfig { + fn cfg_with_effort(name: &str, effort: &str) -> ModelConfig { let mut params = std::collections::HashMap::new(); - params.insert("thinking_type".to_string(), json!(tt)); + params.insert("thinking_effort".to_string(), json!(effort)); ModelConfig { model_name: name.to_string(), request_params: Some(params), @@ -1460,50 +1444,61 @@ mod tests { } #[test] - fn test_thinking_type_explicit_params() { + fn test_thinking_type_from_effort() { + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]); + // Adaptive model with effort → adaptive assert_eq!( - thinking_type(&cfg_with_thinking("claude-opus-4-6", "adaptive")), + thinking_type(&cfg_with_effort("claude-opus-4-6", "high")), ThinkingType::Adaptive ); + // Adaptive model with off → disabled assert_eq!( - thinking_type(&cfg_with_thinking("claude-opus-4-6", "disabled")), + thinking_type(&cfg_with_effort("claude-opus-4-6", "off")), ThinkingType::Disabled ); + // Non-adaptive Claude with effort → enabled assert_eq!( - thinking_type(&cfg_with_thinking("claude-3-7-sonnet-20250219", "enabled")), + thinking_type(&cfg_with_effort("claude-3-7-sonnet-20250219", "high")), ThinkingType::Enabled ); + // Non-adaptive Claude with off → disabled assert_eq!( - thinking_type(&cfg_with_thinking("claude-3-7-sonnet-20250219", "adaptive")), - ThinkingType::Disabled - ); - assert_eq!( - thinking_type(&cfg_with_thinking("claude-opus-4-6", "adapttive")), + thinking_type(&cfg_with_effort("claude-3-7-sonnet-20250219", "off")), ThinkingType::Disabled ); } + #[test] + fn test_thinking_budget_uses_legacy_env() { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("ANTHROPIC_THINKING_BUDGET", Some("8192")), + ("CLAUDE_THINKING_BUDGET", None::<&str>), + ]); + let config = cfg_with_effort("claude-3-7-sonnet-20250219", "high"); + assert_eq!(thinking_budget_tokens(&config), 8192); + } + #[test] fn test_thinking_type_non_claude_always_disabled() { - assert_eq!(thinking_type(&cfg("gpt-4o")), ThinkingType::Disabled); assert_eq!( - thinking_type(&cfg_with_thinking("gpt-4o", "enabled")), + thinking_type(&cfg_with_effort("gpt-4o", "off")), + ThinkingType::Disabled + ); + assert_eq!( + thinking_type(&cfg_with_effort("gpt-4o", "high")), ThinkingType::Disabled ); } #[test] - fn test_thinking_type_env_var_override() { - let _guard = env_lock::lock_env([ - ("CLAUDE_THINKING_TYPE", Some("adaptive")), - ("CLAUDE_THINKING_ENABLED", None::<&str>), - ]); + fn test_thinking_type_off_means_disabled() { assert_eq!( - thinking_type(&cfg("claude-opus-4-6")), - ThinkingType::Adaptive + thinking_type(&cfg_with_effort("claude-opus-4-6", "off")), + ThinkingType::Disabled ); assert_eq!( - thinking_type(&cfg("claude-3-7-sonnet-20250219")), + thinking_type(&cfg_with_effort("claude-3-7-sonnet-20250219", "off")), ThinkingType::Disabled ); } diff --git a/crates/goose/src/providers/formats/databricks.rs b/crates/goose/src/providers/formats/databricks.rs index 584bbdf8234c..7918c75004eb 100644 --- a/crates/goose/src/providers/formats/databricks.rs +++ b/crates/goose/src/providers/formats/databricks.rs @@ -1,10 +1,12 @@ use crate::conversation::message::{Message, MessageContent}; use crate::model::ModelConfig; -use crate::providers::formats::anthropic::{thinking_effort, thinking_type, ThinkingType}; +use crate::providers::formats::anthropic::{ + thinking_budget_tokens, thinking_effort, thinking_type, ThinkingType, +}; use crate::providers::utils::{ convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model, - is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name, - ImageFormat, + is_valid_function_name, load_image_file, openai_reasoning_effort_for_thinking, + safely_parse_json, sanitize_function_name, ImageFormat, }; use anyhow::{anyhow, Error}; use rmcp::model::{ @@ -245,11 +247,7 @@ fn apply_claude_thinking_config(payload: &mut Value, model_config: &ModelConfig) ); } ThinkingType::Enabled => { - let budget_tokens = model_config - .get_config_param::("budget_tokens", "CLAUDE_THINKING_BUDGET") - .unwrap_or(16000) - .max(1024); - + let budget_tokens = thinking_budget_tokens(model_config); let max_tokens = model_config.max_output_tokens() + budget_tokens; obj.insert("max_tokens".to_string(), json!(max_tokens)); obj.insert( @@ -582,8 +580,17 @@ pub fn create_request( )); } - let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name); + let (model_name, legacy_reasoning_effort) = extract_reasoning_effort(&model_config.model_name); let is_openai_reasoning_model = is_openai_responses_model(&model_name); + let reasoning_effort = if is_openai_reasoning_model { + model_config + .thinking_effort() + .map_or(legacy_reasoning_effort, |effort| { + openai_reasoning_effort_for_thinking(&model_name, effort) + }) + } else { + None + }; let system_message = DatabricksMessage { role: "system".to_string(), @@ -652,6 +659,9 @@ pub fn create_request( if let Some(params) = &model_config.request_params { if let Some(obj) = payload.as_object_mut() { for (key, value) in params { + if key == "thinking_effort" { + continue; + } obj.insert(key.clone(), value.clone()); } } @@ -1042,19 +1052,63 @@ mod tests { #[test] fn test_create_request_reasoning_effort() -> anyhow::Result<()> { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("high")); let model_config = ModelConfig { - model_name: "o3-mini-high".to_string(), + model_name: "o3-mini".to_string(), context_limit: Some(4096), temperature: None, max_tokens: Some(1024), toolshim: false, toolshim_model: None, fast_model_config: None, - request_params: None, + request_params: Some(params), + reasoning: None, + }; + let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?; + assert_eq!(request["reasoning_effort"], "high"); + Ok(()) + } + + #[test] + fn test_create_request_off_effort_preserves_none() -> anyhow::Result<()> { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("off")); + let model_config = ModelConfig { + model_name: "databricks-o3-mini".to_string(), + context_limit: Some(4096), + temperature: None, + max_tokens: Some(1024), + toolshim: false, + toolshim_model: None, + fast_model_config: None, + request_params: Some(params), + reasoning: None, + }; + let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?; + assert_eq!(request["reasoning_effort"], "none"); + assert!(request.get("thinking_effort").is_none()); + Ok(()) + } + + #[test] + fn test_create_request_max_effort_uses_supported_level() -> anyhow::Result<()> { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("max")); + let model_config = ModelConfig { + model_name: "databricks-gpt-5.2-pro".to_string(), + context_limit: Some(4096), + temperature: None, + max_tokens: Some(1024), + toolshim: false, + toolshim_model: None, + fast_model_config: None, + request_params: Some(params), reasoning: None, }; let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?; assert_eq!(request["reasoning_effort"], "high"); + assert!(request.get("thinking_effort").is_none()); Ok(()) } @@ -1117,15 +1171,11 @@ mod tests { #[test] fn test_create_request_adaptive_thinking_for_46_models() -> anyhow::Result<()> { - let _guard = env_lock::lock_env([ - ("CLAUDE_THINKING_TYPE", Some("adaptive")), - ("CLAUDE_THINKING_EFFORT", Some("low")), - ("CLAUDE_THINKING_ENABLED", None::<&str>), - ("CLAUDE_THINKING_BUDGET", None::<&str>), - ]); - let mut model_config = ModelConfig::new_or_fail("databricks-claude-opus-4-6"); model_config.max_tokens = Some(4096); + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("low")); + model_config.request_params = Some(params); let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?; @@ -1140,30 +1190,47 @@ mod tests { #[test] fn test_create_request_enabled_thinking_with_budget() -> anyhow::Result<()> { - let _guard = env_lock::lock_env([ - ("CLAUDE_THINKING_TYPE", None::<&str>), - ("CLAUDE_THINKING_ENABLED", None::<&str>), - ("CLAUDE_THINKING_BUDGET", Some("10000")), - ]); - let mut model_config = ModelConfig::new_or_fail("databricks-claude-3-7-sonnet"); model_config.max_tokens = Some(4096); - model_config = model_config.with_request_params(Some(std::collections::HashMap::from([( - "thinking_type".to_string(), - json!("enabled"), - )]))); + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("high")); + model_config.request_params = Some(params); let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?; assert_eq!(request["thinking"]["type"], "enabled"); - assert_eq!(request["thinking"]["budget_tokens"], 10000); - assert_eq!(request["max_tokens"], 14096); + assert_eq!(request["thinking"]["budget_tokens"], 16000); + assert_eq!(request["max_tokens"], 20096); assert_eq!(request["temperature"], 2); assert!(request.get("max_completion_tokens").is_none()); Ok(()) } + #[test] + fn test_create_request_enabled_thinking_budget_tracks_effort() -> anyhow::Result<()> { + for (effort, expected_budget) in [ + ("low", 4000), + ("medium", 10000), + ("high", 16000), + ("max", 32000), + ] { + let mut model_config = ModelConfig::new_or_fail("databricks-claude-3-7-sonnet"); + model_config.max_tokens = Some(4096); + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!(effort)); + model_config.request_params = Some(params); + + let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?; + + assert_eq!(request["thinking"]["type"], "enabled"); + assert_eq!(request["thinking"]["budget_tokens"], expected_budget); + assert_eq!(request["max_tokens"], 4096 + expected_budget); + } + + Ok(()) + } + #[test] fn test_response_to_message_claude_thinking() -> anyhow::Result<()> { let response = json!({ diff --git a/crates/goose/src/providers/formats/google.rs b/crates/goose/src/providers/formats/google.rs index 298efb93fbfd..b35c2db504a0 100644 --- a/crates/goose/src/providers/formats/google.rs +++ b/crates/goose/src/providers/formats/google.rs @@ -542,22 +542,18 @@ fn get_thinking_config(model_config: &ModelConfig) -> Option { } if is_gemini_3 { - let thinking_level_str = model_config - .get_config_param::("thinking_level", "GEMINI3_THINKING_LEVEL") - .map(|s| s.to_lowercase()) - .unwrap_or_else(|| "low".to_string()); - - let thinking_level = match thinking_level_str.as_str() { - "high" => ThinkingLevel::High, - "low" => ThinkingLevel::Low, - invalid => { - tracing::warn!( - "Invalid thinking level '{}' for model '{}'. Valid levels: low, high. Using 'low'.", - invalid, - model_config.model_name, - ); + use crate::model::ThinkingEffort; + let effort = model_config + .thinking_effort() + .unwrap_or(ThinkingEffort::Off); + if effort == ThinkingEffort::Off { + return None; + } + let thinking_level = match effort { + ThinkingEffort::Off | ThinkingEffort::Low | ThinkingEffort::Medium => { ThinkingLevel::Low } + ThinkingEffort::High | ThinkingEffort::Max => ThinkingLevel::High, }; Some(ThinkingConfig { @@ -1378,7 +1374,11 @@ data: [DONE]"#; fn test_get_thinking_config() { use crate::model::ModelConfig; - let config = ModelConfig::new("gemini-3-pro").unwrap(); + // Test 1: Gemini 3 model with low thinking effort + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("low")); + let mut config = ModelConfig::new("gemini-3-pro").unwrap(); + config.request_params = Some(params); let result = get_thinking_config(&config); assert!(result.is_some()); let thinking_config = result.unwrap(); @@ -1386,9 +1386,18 @@ data: [DONE]"#; assert!(thinking_config.thinking_budget.is_none()); assert!(thinking_config.include_thoughts); - let config = ModelConfig::new("Gemini-3-Flash").unwrap(); + // Test 2: Gemini 3 model with high thinking effort + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), serde_json::json!("high")); + let mut config = ModelConfig::new("Gemini-3-Flash").unwrap(); + config.request_params = Some(params); let result = get_thinking_config(&config); assert!(result.is_some()); + let thinking_config = result.unwrap(); + assert!(matches!( + thinking_config.thinking_level, + Some(ThinkingLevel::High) + )); let config = ModelConfig::new("gemini-2.5-flash").unwrap(); let result = get_thinking_config(&config); @@ -1405,7 +1414,7 @@ data: [DONE]"#; params.insert("thinking_budget".to_string(), json!(4096)); let config = ModelConfig::new("gemini-2.5-flash") .unwrap() - .with_request_params(Some(params)); + .with_merged_request_params(params); let result = get_thinking_config(&config); assert!(result.is_some()); let thinking_config = result.unwrap(); @@ -1415,7 +1424,7 @@ data: [DONE]"#; params.insert("thinking_budget".to_string(), json!(-1)); let config = ModelConfig::new("gemini-2.5-flash") .unwrap() - .with_request_params(Some(params)); + .with_merged_request_params(params); let result = get_thinking_config(&config); assert!(result.is_some()); let thinking_config = result.unwrap(); diff --git a/crates/goose/src/providers/formats/openai.rs b/crates/goose/src/providers/formats/openai.rs index 66e9acaba574..1ec5aa1308f0 100644 --- a/crates/goose/src/providers/formats/openai.rs +++ b/crates/goose/src/providers/formats/openai.rs @@ -5,8 +5,8 @@ use crate::providers::base::{split_think_blocks, ProviderUsage, ThinkFilter, Usa use crate::providers::errors::ProviderError; use crate::providers::utils::{ convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model, - is_valid_function_name, load_image_file, safely_parse_json, sanitize_function_name, - ImageFormat, + is_valid_function_name, load_image_file, openai_reasoning_effort_for_thinking, + safely_parse_json, sanitize_function_name, ImageFormat, }; use anyhow::{anyhow, Error}; use async_stream::try_stream; @@ -1239,8 +1239,17 @@ pub fn create_request_with_options( )); } - let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name); + let (model_name, legacy_reasoning_effort) = extract_reasoning_effort(&model_config.model_name); let is_reasoning_model = is_openai_responses_model(&model_name); + let reasoning_effort = if is_reasoning_model { + model_config + .thinking_effort() + .map_or(legacy_reasoning_effort, |effort| { + openai_reasoning_effort_for_thinking(&model_name, effort) + }) + } else { + None + }; let system_message = json!({ "role": if is_reasoning_model { "developer" } else { "system" }, @@ -1299,7 +1308,7 @@ pub fn create_request_with_options( if let Some(params) = &model_config.request_params { if let Some(obj) = payload.as_object_mut() { for (key, value) in params { - if !is_reserved_request_param_key(key) { + if key != "thinking_effort" && !is_reserved_request_param_key(key) { obj.insert(key.clone(), value.clone()); } } @@ -2070,8 +2079,7 @@ mod tests { fn test_create_request_omits_max_tokens_when_unset() -> anyhow::Result<()> { // Unknown models on OpenAI-compatible local providers (llama_swap, // lmstudio) have no canonical record and no GOOSE_MAX_TOKENS, so the - // request must not pin the legacy 4096 default — the server should - // pick its own ceiling. See issue #9007. + // request must not pin the legacy 4096 default. See issue #9007. let model_config = ModelConfig { model_name: "some-unknown-local-model".to_string(), context_limit: None, @@ -2164,8 +2172,6 @@ mod tests { #[test] fn test_create_request_o1_default() -> anyhow::Result<()> { - // Without an explicit effort suffix the API picks its own default; - // we should omit reasoning_effort entirely but still use "developer" role. let model_config = ModelConfig { model_name: "o1".to_string(), context_limit: Some(4096), @@ -2208,18 +2214,112 @@ mod tests { Ok(()) } + #[test] + fn test_create_request_o1_medium_effort() -> anyhow::Result<()> { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), json!("medium")); + let model_config = ModelConfig { + model_name: "o1".to_string(), + context_limit: Some(4096), + temperature: None, + max_tokens: Some(1024), + toolshim: false, + toolshim_model: None, + fast_model_config: None, + request_params: Some(params), + reasoning: None, + }; + let request = create_request( + &model_config, + "system", + &[], + &[], + &ImageFormat::OpenAi, + false, + )?; + let obj = request.as_object().unwrap(); + + assert_eq!(obj.get("reasoning_effort"), Some(&json!("medium"))); + assert!(obj.get("thinking_effort").is_none()); + + Ok(()) + } + + #[test] + fn test_create_request_o3_off_effort_preserves_none() -> anyhow::Result<()> { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), json!("off")); + let model_config = ModelConfig { + model_name: "o3".to_string(), + context_limit: Some(4096), + temperature: None, + max_tokens: Some(1024), + toolshim: false, + toolshim_model: None, + fast_model_config: None, + request_params: Some(params), + reasoning: None, + }; + let request = create_request( + &model_config, + "system", + &[], + &[], + &ImageFormat::OpenAi, + false, + )?; + let obj = request.as_object().unwrap(); + + assert_eq!(obj.get("reasoning_effort"), Some(&json!("none"))); + assert!(obj.get("thinking_effort").is_none()); + + Ok(()) + } + + #[test] + fn test_create_request_gpt5_pro_max_effort_uses_supported_level() -> anyhow::Result<()> { + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), json!("max")); + let model_config = ModelConfig { + model_name: "gpt-5.2-pro-2025-12-11".to_string(), + context_limit: Some(4096), + temperature: None, + max_tokens: Some(1024), + toolshim: false, + toolshim_model: None, + fast_model_config: None, + request_params: Some(params), + reasoning: None, + }; + let request = create_request( + &model_config, + "system", + &[], + &[], + &ImageFormat::OpenAi, + false, + )?; + let obj = request.as_object().unwrap(); + + assert_eq!(obj.get("reasoning_effort"), Some(&json!("high"))); + assert!(obj.get("thinking_effort").is_none()); + + Ok(()) + } + #[test] fn test_create_request_o3_custom_reasoning_effort() -> anyhow::Result<()> { - // Test custom reasoning effort for O3 model + let mut params = std::collections::HashMap::new(); + params.insert("thinking_effort".to_string(), json!("high")); let model_config = ModelConfig { - model_name: "o3-mini-high".to_string(), + model_name: "o3-mini".to_string(), context_limit: Some(4096), temperature: None, max_tokens: Some(1024), toolshim: false, toolshim_model: None, fast_model_config: None, - request_params: None, + request_params: Some(params), reasoning: None, }; let request = create_request( @@ -2246,6 +2346,7 @@ mod tests { for (key, value) in expected.as_object().unwrap() { assert_eq!(obj.get(key).unwrap(), value); } + assert!(obj.get("thinking_effort").is_none()); Ok(()) } diff --git a/crates/goose/src/providers/formats/openai_responses.rs b/crates/goose/src/providers/formats/openai_responses.rs index b328b0c9df4e..ef39fb8b20e9 100644 --- a/crates/goose/src/providers/formats/openai_responses.rs +++ b/crates/goose/src/providers/formats/openai_responses.rs @@ -2,7 +2,9 @@ use crate::conversation::message::{Message, MessageContent}; use crate::mcp_utils::extract_text_from_resource; use crate::model::ModelConfig; use crate::providers::base::{ProviderUsage, Usage}; -use crate::providers::utils::{extract_reasoning_effort, is_openai_responses_model}; +use crate::providers::utils::{ + extract_reasoning_effort, is_openai_responses_model, openai_reasoning_effort_for_thinking, +}; use anyhow::{anyhow, Error}; use async_stream::try_stream; use chrono; @@ -541,11 +543,26 @@ pub fn create_responses_request( add_message_items(&mut input_items, messages); - let (model_name, reasoning_effort) = extract_reasoning_effort(&model_config.model_name); + let (model_name, legacy_reasoning_effort) = extract_reasoning_effort(&model_config.model_name); // All models routed here are responses-capable; temperature is rejected // by the API for reasoning models regardless of whether an explicit // effort suffix was provided. let is_reasoning_model = is_openai_responses_model(&model_name); + let reasoning_effort = if is_reasoning_model { + if let Some(effort) = legacy_reasoning_effort.as_deref() { + effort + .parse() + .ok() + .and_then(|effort| openai_reasoning_effort_for_thinking(&model_name, effort)) + .or(legacy_reasoning_effort) + } else { + model_config + .thinking_effort() + .and_then(|effort| openai_reasoning_effort_for_thinking(&model_name, effort)) + } + } else { + None + }; let mut payload = json!({ "model": model_name, @@ -1268,6 +1285,17 @@ mod tests { } } + #[test] + fn test_responses_request_with_normalized_effort_suffix() { + let model_config = ModelConfig::new("o3-mini-high").unwrap(); + + let result = create_responses_request(&model_config, "You are helpful.", &[], &[]).unwrap(); + + assert_eq!(result["model"], "o3-mini"); + assert_eq!(result["reasoning"]["effort"], "high"); + assert_eq!(result["reasoning"]["summary"], "auto"); + } + #[test] fn test_responses_request_without_effort_suffix_omits_reasoning() { for model_name in ["gpt-5.4", "o3", "gpt-5-nano"] { @@ -1294,6 +1322,30 @@ mod tests { } } + #[test] + fn test_responses_request_non_reasoning_model_ignores_global_thinking_effort() { + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", Some("high"))]); + let model_config = ModelConfig { + model_name: "gpt-4o".to_string(), + context_limit: None, + temperature: None, + max_tokens: None, + toolshim: false, + toolshim_model: None, + fast_model_config: None, + request_params: None, + reasoning: None, + }; + + let result = create_responses_request(&model_config, "You are helpful.", &[], &[]).unwrap(); + + assert_eq!(result["model"], "gpt-4o"); + assert!( + result.get("reasoning").is_none(), + "non-reasoning models should not receive reasoning config" + ); + } + #[test] fn test_user_image_serialized_in_responses_request() { use crate::conversation::message::Message; diff --git a/crates/goose/src/providers/formats/openrouter.rs b/crates/goose/src/providers/formats/openrouter.rs index f20d613cc075..22ac7465b252 100644 --- a/crates/goose/src/providers/formats/openrouter.rs +++ b/crates/goose/src/providers/formats/openrouter.rs @@ -1,4 +1,5 @@ use crate::conversation::message::{Message, MessageContent, ProviderMetadata}; +use crate::model::{ModelConfig, ThinkingEffort}; use crate::providers::formats::openai; use rmcp::model::Role; use serde_json::{json, Value}; @@ -87,9 +88,40 @@ pub fn add_reasoning_details_to_request(payload: &mut Value, messages: &[Message } } +fn reasoning_effort_for_openrouter(effort: ThinkingEffort) -> &'static str { + match effort { + ThinkingEffort::Off => "none", + ThinkingEffort::Low => "low", + ThinkingEffort::Medium => "medium", + ThinkingEffort::High => "high", + ThinkingEffort::Max => "xhigh", + } +} + +pub fn apply_reasoning_config(payload: &mut Value, model_config: &ModelConfig) { + let Some(effort) = model_config.thinking_effort() else { + return; + }; + + if let Some(obj) = payload.as_object_mut() { + let clamped_effort = obj + .remove("reasoning_effort") + .and_then(|value| value.as_str().map(str::to_owned)); + if clamped_effort.is_none() && !model_config.is_reasoning_model() { + return; + } + + obj.insert( + "reasoning".to_string(), + json!({ "effort": clamped_effort.as_deref().unwrap_or_else(|| reasoning_effort_for_openrouter(effort)) }), + ); + } +} + #[cfg(test)] mod tests { use super::*; + use std::collections::HashMap; #[test] fn test_extract_reasoning_details() { @@ -149,4 +181,89 @@ mod tests { let details = get_reasoning_details(&tool_request.metadata).unwrap(); assert_eq!(details.len(), 1); } + + #[test] + fn test_apply_reasoning_config_uses_openrouter_reasoning_object() { + let mut payload = json!({ + "model": "openai/gpt-5", + "messages": [], + "reasoning_effort": "high" + }); + let mut model_config = ModelConfig::new_or_fail("openai/gpt-5"); + let mut params = HashMap::new(); + params.insert("thinking_effort".to_string(), json!("max")); + model_config.request_params = Some(params); + + apply_reasoning_config(&mut payload, &model_config); + + assert_eq!(payload["reasoning"], json!({ "effort": "high" })); + assert!(payload.get("reasoning_effort").is_none()); + } + + #[test] + fn test_apply_reasoning_config_uses_reasoning_metadata() { + let mut payload = json!({ + "model": "x-ai/grok-4", + "messages": [] + }); + let mut model_config = ModelConfig::new_or_fail("x-ai/grok-4"); + let mut params = HashMap::new(); + params.insert("thinking_effort".to_string(), json!("high")); + model_config.request_params = Some(params); + model_config.reasoning = Some(true); + + apply_reasoning_config(&mut payload, &model_config); + + assert_eq!(payload["reasoning"], json!({ "effort": "high" })); + } + + #[test] + fn test_apply_reasoning_config_uses_model_detection() { + let mut payload = json!({ + "model": "anthropic/claude-sonnet-4", + "messages": [] + }); + let mut model_config = ModelConfig::new_or_fail("anthropic/claude-sonnet-4"); + let mut params = HashMap::new(); + params.insert("thinking_effort".to_string(), json!("high")); + model_config.request_params = Some(params); + + apply_reasoning_config(&mut payload, &model_config); + + assert_eq!(payload["reasoning"], json!({ "effort": "high" })); + } + + #[test] + fn test_apply_reasoning_config_skips_non_reasoning_models() { + let mut payload = json!({ + "model": "openai/gpt-4o", + "messages": [] + }); + let mut model_config = ModelConfig::new_or_fail("openai/gpt-4o"); + let mut params = HashMap::new(); + params.insert("thinking_effort".to_string(), json!("high")); + model_config.request_params = Some(params); + model_config.reasoning = Some(false); + + apply_reasoning_config(&mut payload, &model_config); + + assert!(payload.get("reasoning").is_none()); + } + + #[test] + fn test_apply_reasoning_config_off_disables_reasoning() { + let mut payload = json!({ + "model": "x-ai/grok-4", + "messages": [] + }); + let mut model_config = ModelConfig::new_or_fail("x-ai/grok-4"); + let mut params = HashMap::new(); + params.insert("thinking_effort".to_string(), json!("off")); + model_config.request_params = Some(params); + model_config.reasoning = Some(true); + + apply_reasoning_config(&mut payload, &model_config); + + assert_eq!(payload["reasoning"], json!({ "effort": "none" })); + } } diff --git a/crates/goose/src/providers/openrouter.rs b/crates/goose/src/providers/openrouter.rs index 08b8689b99fd..ac2a476befbe 100644 --- a/crates/goose/src/providers/openrouter.rs +++ b/crates/goose/src/providers/openrouter.rs @@ -278,6 +278,7 @@ impl Provider for OpenRouterProvider { if is_gemini_model(&model_config.model_name) { openrouter_format::add_reasoning_details_to_request(&mut payload, messages); } + openrouter_format::apply_reasoning_config(&mut payload, model_config); if let Some(obj) = payload.as_object_mut() { obj.insert("transforms".to_string(), json!(["middle-out"])); diff --git a/crates/goose/src/providers/provider_registry.rs b/crates/goose/src/providers/provider_registry.rs index 2684bfd5f04c..c4c869ebeeed 100644 --- a/crates/goose/src/providers/provider_registry.rs +++ b/crates/goose/src/providers/provider_registry.rs @@ -162,6 +162,7 @@ impl ProviderRegistry { output_token_cost: m.output_token_cost, currency: m.currency.clone(), supports_cache_control: Some(m.supports_cache_control.unwrap_or(false)), + reasoning: m.reasoning || ModelConfig::new_or_fail(&m.name).is_reasoning_model(), }) .collect(); diff --git a/crates/goose/src/providers/utils.rs b/crates/goose/src/providers/utils.rs index 81d15f5fc082..87be4af7515f 100644 --- a/crates/goose/src/providers/utils.rs +++ b/crates/goose/src/providers/utils.rs @@ -1,7 +1,7 @@ use super::base::Usage; use super::errors::GoogleErrorCode; use crate::config::paths::Paths; -use crate::model::ModelConfig; +use crate::model::{ModelConfig, ThinkingEffort}; use crate::providers::errors::ProviderError; use anyhow::{anyhow, Result}; use base64::Engine; @@ -237,6 +237,49 @@ pub fn extract_reasoning_effort(model_name: &str) -> (String, Option) { (model_name.to_string(), None) } +pub fn openai_reasoning_effort_for_thinking( + model_name: &str, + effort: ThinkingEffort, +) -> Option { + if effort == ThinkingEffort::Off { + return Some("none".to_string()); + } + + let supported = openai_reasoning_efforts_for_model(model_name); + let preferred: &[&str] = match effort { + ThinkingEffort::Off => unreachable!(), + ThinkingEffort::Low => &["low", "medium", "high", "xhigh"], + ThinkingEffort::Medium => &["medium", "high", "low", "xhigh"], + ThinkingEffort::High => &["high", "medium", "xhigh", "low"], + ThinkingEffort::Max => &["xhigh", "high", "medium", "low"], + }; + + preferred + .iter() + .find(|level| supported.contains(level)) + .map(|level| (*level).to_string()) +} + +fn openai_reasoning_efforts_for_model(model_name: &str) -> &'static [&'static str] { + let normalized = model_name.to_ascii_lowercase(); + + if normalized.contains("gpt-5") { + if normalized.contains("-pro") || normalized.contains("/pro") { + &["high"] + } else if normalized.contains("gpt-5.4") + || normalized.contains("gpt-5-4") + || normalized.contains("gpt-5.5") + || normalized.contains("gpt-5-5") + { + &["low", "medium", "high", "xhigh"] + } else { + &["low", "medium", "high"] + } + } else { + &["low", "medium", "high"] + } +} + pub fn sanitize_function_name(name: &str) -> String { static RE: OnceLock = OnceLock::new(); let re = RE.get_or_init(|| Regex::new(r"[^a-zA-Z0-9_-]").unwrap()); diff --git a/documentation/docs/guides/cli-providers.md b/documentation/docs/guides/cli-providers.md index 269c58b173e0..bcbf4fc032e5 100644 --- a/documentation/docs/guides/cli-providers.md +++ b/documentation/docs/guides/cli-providers.md @@ -330,7 +330,7 @@ GOOSE_PROVIDER=claude-code GOOSE_MODE=approve goose session | `GOOSE_PROVIDER` | Set to `codex` to use this provider | None | | `GOOSE_MODEL` | Model to use (only known models are passed to CLI) | `gpt-5.2-codex` | | `CODEX_COMMAND` | Path to the Codex CLI command | `codex` | -| `GOOSE_THINKING_EFFORT` | Unified thinking effort (`off`, `low`, `medium`, `high`, `max`). Mapped to Codex CLI effort levels (`none/low/medium/high/xhigh`). | `high` | +| `CODEX_REASONING_EFFORT` | Reasoning effort level: `low`, `medium`, `high`, or `xhigh` (`none` is only supported on non-codex models like `gpt-5.2`) | `high` | | `CODEX_ENABLE_SKILLS` | Enable Codex skills: `true` or `false` | `true` | | `CODEX_SKIP_GIT_CHECK` | Skip git repository requirement: `true` or `false` | `false` | diff --git a/ui/desktop/openapi.json b/ui/desktop/openapi.json index e0b73df4cad3..1919dfcb04bf 100644 --- a/ui/desktop/openapi.json +++ b/ui/desktop/openapi.json @@ -1369,6 +1369,56 @@ } } }, + "/config/providers/{name}/model-info": { + "post": { + "tags": [ + "super::routes::config_management" + ], + "operationId": "get_provider_model_info", + "parameters": [ + { + "name": "name", + "in": "path", + "description": "Provider name (e.g., openai)", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProviderModelInfoQuery" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Model metadata fetched successfully", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ModelInfo" + } + } + } + }, + "400": { + "description": "Unknown provider, provider not configured, or authentication error" + }, + "429": { + "description": "Rate limit exceeded" + }, + "500": { + "description": "Internal server error" + } + } + } + }, "/config/providers/{name}/models": { "get": { "tags": [ @@ -1394,7 +1444,7 @@ "schema": { "type": "array", "items": { - "type": "string" + "$ref": "#/components/schemas/ModelInfo" } } } @@ -6533,6 +6583,10 @@ "description": "Cost per token for output in USD (optional)", "nullable": true }, + "reasoning": { + "type": "boolean", + "description": "Whether this model supports reasoning/thinking controls" + }, "supports_cache_control": { "type": "boolean", "description": "Whether this model supports cache control", @@ -6546,6 +6600,7 @@ "provider", "model", "context_limit", + "reasoning", "currency" ], "properties": { @@ -6586,6 +6641,9 @@ }, "provider": { "type": "string" + }, + "reasoning": { + "type": "boolean" } } }, @@ -6969,6 +7027,17 @@ } } }, + "ProviderModelInfoQuery": { + "type": "object", + "required": [ + "model" + ], + "properties": { + "model": { + "type": "string" + } + } + }, "ProviderTemplate": { "type": "object", "required": [ @@ -8987,6 +9056,10 @@ "provider": { "type": "string" }, + "reasoning": { + "type": "boolean", + "nullable": true + }, "request_params": { "type": "object", "additionalProperties": {}, diff --git a/ui/desktop/src/api/index.ts b/ui/desktop/src/api/index.ts index fd1811a2c985..b4254714ffef 100644 --- a/ui/desktop/src/api/index.ts +++ b/ui/desktop/src/api/index.ts @@ -1,4 +1,4 @@ // This file is auto-generated by @hey-api/openapi-ts -export { addExtension, agentAddExtension, agentRemoveExtension, callTool, cancelDownload, cancelLocalModelDownload, checkProvider, cleanupProviderCache, configureProviderOauth, confirmToolAction, createCustomProvider, createRecipe, createSchedule, decodeRecipe, deleteLocalModel, deleteModel, deleteRecipe, deleteSchedule, deleteSession, diagnostics, downloadHfModel, downloadModel, encodeRecipe, exportApp, exportSession, forkSession, getCanonicalModelInfo, getCustomProvider, getDictationConfig, getDownloadProgress, getExtensions, getFeatures, getLocalModelDownloadProgress, getModelSettings, getPrompt, getPrompts, getProviderCatalog, getProviderCatalogTemplate, getProviderModels, getRepoFiles, getSession, getSessionExtensions, getSessionInsights, getSlashCommands, getTools, getTunnelStatus, importApp, importSession, importSessionNostr, inspectRunningJob, killRunningJob, listApps, listLocalModels, listModels, listRecipes, listSchedules, listSessions, mcpUiProxy, type Options, parseRecipe, pauseSchedule, providers, readAllConfig, readConfig, readResource, recipeToYaml, removeConfig, removeCustomProvider, removeExtension, reply, resetPrompt, restartAgent, resumeAgent, runNowHandler, savePrompt, saveRecipe, scanRecipe, scheduleRecipe, searchHfModels, searchSessions, sendTelemetryEvent, sessionCancel, sessionEvents, sessionReply, sessionsHandler, setConfigProvider, setRecipeSlashCommand, shareSessionNostr, startAgent, startNanogptSetup, startOpenrouterSetup, startTetrateSetup, startTunnel, status, stopAgent, stopTunnel, syncFeaturedModels, systemInfo, transcribeDictation, unpauseSchedule, updateAgentProvider, updateCustomProvider, updateFromSession, updateModelSettings, updateSchedule, updateSession, updateSessionName, updateSessionUserRecipeValues, updateWorkingDir, upsertConfig, upsertPermissions, validateConfig } from './sdk.gen'; -export type { ActionRequired, ActionRequiredData, AddExtensionData, AddExtensionErrors, AddExtensionRequest, AddExtensionResponse, AddExtensionResponses, AgentAddExtensionData, AgentAddExtensionErrors, AgentAddExtensionResponse, AgentAddExtensionResponses, AgentRemoveExtensionData, AgentRemoveExtensionErrors, AgentRemoveExtensionResponse, AgentRemoveExtensionResponses, Annotations, Author, AuthorRequest, CallToolData, CallToolError, CallToolErrors, CallToolRequest, CallToolResponse, CallToolResponse2, CallToolResponses, CancelDownloadData, CancelDownloadErrors, CancelDownloadResponses, CancelLocalModelDownloadData, CancelLocalModelDownloadErrors, CancelLocalModelDownloadResponses, CancelRequest, ChatRequest, CheckProviderData, CheckProviderRequest, CleanupProviderCacheData, CleanupProviderCacheErrors, CleanupProviderCacheResponse, CleanupProviderCacheResponses, ClientOptions, CommandType, ConfigKey, ConfigKeyQuery, ConfigResponse, ConfigureProviderOauthData, ConfigureProviderOauthErrors, ConfigureProviderOauthResponses, ConfirmToolActionData, ConfirmToolActionErrors, ConfirmToolActionRequest, ConfirmToolActionResponses, Content, ContentBlock, Conversation, CreateCustomProviderData, CreateCustomProviderErrors, CreateCustomProviderResponse, CreateCustomProviderResponse2, CreateCustomProviderResponses, CreateRecipeData, CreateRecipeErrors, CreateRecipeRequest, CreateRecipeResponse, CreateRecipeResponse2, CreateRecipeResponses, CreateScheduleData, CreateScheduleErrors, CreateScheduleRequest, CreateScheduleResponse, CreateScheduleResponses, CspMetadata, DeclarativeProviderConfig, DecodeRecipeData, DecodeRecipeErrors, DecodeRecipeRequest, DecodeRecipeResponse, DecodeRecipeResponse2, DecodeRecipeResponses, DeleteLocalModelData, DeleteLocalModelErrors, DeleteLocalModelResponses, DeleteModelData, DeleteModelErrors, DeleteModelResponses, DeleteRecipeData, DeleteRecipeErrors, DeleteRecipeRequest, DeleteRecipeResponse, DeleteRecipeResponses, DeleteScheduleData, DeleteScheduleErrors, DeleteScheduleResponse, DeleteScheduleResponses, DeleteSessionData, DeleteSessionErrors, DeleteSessionResponses, DiagnosticsData, DiagnosticsErrors, DiagnosticsResponse, DiagnosticsResponses, DictationProvider, DictationProviderStatus, DownloadHfModelData, DownloadHfModelErrors, DownloadHfModelResponse, DownloadHfModelResponses, DownloadModelData, DownloadModelErrors, DownloadModelRequest, DownloadModelResponses, DownloadProgress, DownloadStatus, EmbeddedResource, EncodeRecipeData, EncodeRecipeErrors, EncodeRecipeRequest, EncodeRecipeResponse, EncodeRecipeResponse2, EncodeRecipeResponses, Envs, EnvVarConfig, ErrorResponse, ExportAppData, ExportAppError, ExportAppErrors, ExportAppResponse, ExportAppResponses, ExportSessionData, ExportSessionErrors, ExportSessionResponse, ExportSessionResponses, ExtensionConfig, ExtensionData, ExtensionEntry, ExtensionLoadResult, ExtensionQuery, ExtensionResponse, FeaturesResponse, ForkRequest, ForkResponse, ForkSessionData, ForkSessionErrors, ForkSessionResponse, ForkSessionResponses, FrontendToolRequest, GetCanonicalModelInfoData, GetCanonicalModelInfoResponse, GetCanonicalModelInfoResponses, GetCustomProviderData, GetCustomProviderErrors, GetCustomProviderResponse, GetCustomProviderResponses, GetDictationConfigData, GetDictationConfigResponse, GetDictationConfigResponses, GetDownloadProgressData, GetDownloadProgressErrors, GetDownloadProgressResponse, GetDownloadProgressResponses, GetExtensionsData, GetExtensionsErrors, GetExtensionsResponse, GetExtensionsResponses, GetFeaturesData, GetFeaturesResponse, GetFeaturesResponses, GetLocalModelDownloadProgressData, GetLocalModelDownloadProgressErrors, GetLocalModelDownloadProgressResponse, GetLocalModelDownloadProgressResponses, GetModelSettingsData, GetModelSettingsErrors, GetModelSettingsResponse, GetModelSettingsResponses, GetPromptData, GetPromptErrors, GetPromptResponse, GetPromptResponses, GetPromptsData, GetPromptsResponse, GetPromptsResponses, GetProviderCatalogData, GetProviderCatalogErrors, GetProviderCatalogResponse, GetProviderCatalogResponses, GetProviderCatalogTemplateData, GetProviderCatalogTemplateErrors, GetProviderCatalogTemplateResponse, GetProviderCatalogTemplateResponses, GetProviderModelsData, GetProviderModelsErrors, GetProviderModelsResponse, GetProviderModelsResponses, GetRepoFilesData, GetRepoFilesResponse, GetRepoFilesResponses, GetSessionData, GetSessionErrors, GetSessionExtensionsData, GetSessionExtensionsErrors, GetSessionExtensionsResponse, GetSessionExtensionsResponses, GetSessionInsightsData, GetSessionInsightsErrors, GetSessionInsightsResponse, GetSessionInsightsResponses, GetSessionResponse, GetSessionResponses, GetSlashCommandsData, GetSlashCommandsResponse, GetSlashCommandsResponses, GetToolsData, GetToolsErrors, GetToolsQuery, GetToolsResponse, GetToolsResponses, GetTunnelStatusData, GetTunnelStatusResponse, GetTunnelStatusResponses, GooseApp, GooseMode, HfGgufFile, HfModelInfo, HfQuantVariant, Icon, IconTheme, ImageContent, ImportAppData, ImportAppError, ImportAppErrors, ImportAppRequest, ImportAppResponse, ImportAppResponse2, ImportAppResponses, ImportSessionData, ImportSessionErrors, ImportSessionNostrData, ImportSessionNostrErrors, ImportSessionNostrRequest, ImportSessionNostrResponse, ImportSessionNostrResponses, ImportSessionRequest, ImportSessionResponse, ImportSessionResponses, InspectJobResponse, InspectRunningJobData, InspectRunningJobErrors, InspectRunningJobResponse, InspectRunningJobResponses, JsonObject, KillJobResponse, KillRunningJobData, KillRunningJobResponses, ListAppsData, ListAppsError, ListAppsErrors, ListAppsRequest, ListAppsResponse, ListAppsResponse2, ListAppsResponses, ListLocalModelsData, ListLocalModelsResponse, ListLocalModelsResponses, ListModelsData, ListModelsResponse, ListModelsResponses, ListRecipeResponse, ListRecipesData, ListRecipesErrors, ListRecipesResponse, ListRecipesResponses, ListSchedulesData, ListSchedulesErrors, ListSchedulesResponse, ListSchedulesResponse2, ListSchedulesResponses, ListSessionsData, ListSessionsErrors, ListSessionsResponse, ListSessionsResponses, LoadedProvider, LocalModelResponse, McpAppResource, McpUiProxyData, McpUiProxyErrors, McpUiProxyResponses, Message, MessageContent, MessageEvent, MessageMetadata, ModelCapabilities, ModelConfig, ModelDownloadStatus, ModelInfo, ModelInfoData, ModelInfoQuery, ModelInfoResponse, ModelSettings, ModelTemplate, ParseRecipeData, ParseRecipeError, ParseRecipeErrors, ParseRecipeRequest, ParseRecipeResponse, ParseRecipeResponse2, ParseRecipeResponses, PauseScheduleData, PauseScheduleErrors, PauseScheduleResponse, PauseScheduleResponses, Permission, PermissionLevel, PermissionsMetadata, PrincipalType, PromptContentResponse, PromptsListResponse, ProviderCatalogEntry, ProviderDetails, ProviderEngine, ProviderMetadata, ProvidersData, ProvidersResponse, ProvidersResponse2, ProvidersResponses, ProviderTemplate, ProviderType, RawAudioContent, RawEmbeddedResource, RawImageContent, RawResource, RawTextContent, ReadAllConfigData, ReadAllConfigResponse, ReadAllConfigResponses, ReadConfigData, ReadConfigErrors, ReadConfigResponses, ReadResourceData, ReadResourceErrors, ReadResourceRequest, ReadResourceResponse, ReadResourceResponse2, ReadResourceResponses, Recipe, RecipeManifest, RecipeParameter, RecipeParameterInputType, RecipeParameterRequirement, RecipeToYamlData, RecipeToYamlError, RecipeToYamlErrors, RecipeToYamlRequest, RecipeToYamlResponse, RecipeToYamlResponse2, RecipeToYamlResponses, RedactedThinkingContent, RemoveConfigData, RemoveConfigErrors, RemoveConfigResponse, RemoveConfigResponses, RemoveCustomProviderData, RemoveCustomProviderErrors, RemoveCustomProviderResponse, RemoveCustomProviderResponses, RemoveExtensionData, RemoveExtensionErrors, RemoveExtensionRequest, RemoveExtensionResponse, RemoveExtensionResponses, ReplyData, ReplyErrors, ReplyResponse, ReplyResponses, RepoVariantsResponse, ResetPromptData, ResetPromptErrors, ResetPromptResponse, ResetPromptResponses, ResourceContents, ResourceMetadata, Response, RestartAgentData, RestartAgentErrors, RestartAgentRequest, RestartAgentResponse, RestartAgentResponse2, RestartAgentResponses, ResumeAgentData, ResumeAgentErrors, ResumeAgentRequest, ResumeAgentResponse, ResumeAgentResponse2, ResumeAgentResponses, RetryConfig, Role, RunNowHandlerData, RunNowHandlerErrors, RunNowHandlerResponse, RunNowHandlerResponses, RunNowResponse, SamplingConfig, SavePromptData, SavePromptErrors, SavePromptRequest, SavePromptResponse, SavePromptResponses, SaveRecipeData, SaveRecipeError, SaveRecipeErrors, SaveRecipeRequest, SaveRecipeResponse, SaveRecipeResponse2, SaveRecipeResponses, ScanRecipeData, ScanRecipeRequest, ScanRecipeResponse, ScanRecipeResponse2, ScanRecipeResponses, ScheduledJob, ScheduleRecipeData, ScheduleRecipeErrors, ScheduleRecipeRequest, ScheduleRecipeResponses, SearchHfModelsData, SearchHfModelsErrors, SearchHfModelsResponse, SearchHfModelsResponses, SearchSessionsData, SearchSessionsErrors, SearchSessionsResponse, SearchSessionsResponses, SendTelemetryEventData, SendTelemetryEventResponses, Session, SessionCancelData, SessionCancelResponses, SessionDisplayInfo, SessionEventsData, SessionEventsErrors, SessionEventsResponse, SessionEventsResponses, SessionExtensionsResponse, SessionInsights, SessionListResponse, SessionReplyData, SessionReplyErrors, SessionReplyRequest, SessionReplyResponse, SessionReplyResponse2, SessionReplyResponses, SessionsHandlerData, SessionsHandlerErrors, SessionsHandlerResponse, SessionsHandlerResponses, SessionsQuery, SessionType, SetConfigProviderData, SetProviderRequest, SetRecipeSlashCommandData, SetRecipeSlashCommandErrors, SetRecipeSlashCommandResponses, SetSlashCommandRequest, Settings, SetupResponse, ShareSessionNostrData, ShareSessionNostrErrors, ShareSessionNostrRequest, ShareSessionNostrResponse, ShareSessionNostrResponse2, ShareSessionNostrResponses, SlashCommand, SlashCommandsResponse, StartAgentData, StartAgentError, StartAgentErrors, StartAgentRequest, StartAgentResponse, StartAgentResponses, StartNanogptSetupData, StartNanogptSetupResponse, StartNanogptSetupResponses, StartOpenrouterSetupData, StartOpenrouterSetupResponse, StartOpenrouterSetupResponses, StartTetrateSetupData, StartTetrateSetupResponse, StartTetrateSetupResponses, StartTunnelData, StartTunnelError, StartTunnelErrors, StartTunnelResponse, StartTunnelResponses, StatusData, StatusResponse, StatusResponses, StopAgentData, StopAgentErrors, StopAgentRequest, StopAgentResponse, StopAgentResponses, StopTunnelData, StopTunnelError, StopTunnelErrors, StopTunnelResponses, SubRecipe, SuccessCheck, SyncFeaturedModelsData, SyncFeaturedModelsResponses, SystemInfo, SystemInfoData, SystemInfoResponse, SystemInfoResponses, SystemNotificationContent, SystemNotificationType, TaskSupport, TelemetryEventRequest, Template, TextContent, ThinkingContent, TokenState, Tool, ToolAnnotations, ToolConfirmationRequest, ToolExecution, ToolInfo, ToolPermission, ToolRequest, ToolResponse, TranscribeDictationData, TranscribeDictationErrors, TranscribeDictationResponse, TranscribeDictationResponses, TranscribeRequest, TranscribeResponse, TunnelInfo, TunnelState, UiMetadata, UnpauseScheduleData, UnpauseScheduleErrors, UnpauseScheduleResponse, UnpauseScheduleResponses, UpdateAgentProviderData, UpdateAgentProviderErrors, UpdateAgentProviderResponses, UpdateCustomProviderData, UpdateCustomProviderErrors, UpdateCustomProviderRequest, UpdateCustomProviderResponse, UpdateCustomProviderResponses, UpdateFromSessionData, UpdateFromSessionErrors, UpdateFromSessionRequest, UpdateFromSessionResponses, UpdateModelSettingsData, UpdateModelSettingsErrors, UpdateModelSettingsResponse, UpdateModelSettingsResponses, UpdateProviderRequest, UpdateScheduleData, UpdateScheduleErrors, UpdateScheduleRequest, UpdateScheduleResponse, UpdateScheduleResponses, UpdateSessionData, UpdateSessionErrors, UpdateSessionNameData, UpdateSessionNameErrors, UpdateSessionNameRequest, UpdateSessionNameResponses, UpdateSessionRequest, UpdateSessionResponses, UpdateSessionUserRecipeValuesData, UpdateSessionUserRecipeValuesError, UpdateSessionUserRecipeValuesErrors, UpdateSessionUserRecipeValuesRequest, UpdateSessionUserRecipeValuesResponse, UpdateSessionUserRecipeValuesResponse2, UpdateSessionUserRecipeValuesResponses, UpdateWorkingDirData, UpdateWorkingDirErrors, UpdateWorkingDirRequest, UpdateWorkingDirResponses, UpsertConfigData, UpsertConfigErrors, UpsertConfigQuery, UpsertConfigResponse, UpsertConfigResponses, UpsertPermissionsData, UpsertPermissionsErrors, UpsertPermissionsQuery, UpsertPermissionsResponse, UpsertPermissionsResponses, ValidateConfigData, ValidateConfigErrors, ValidateConfigResponse, ValidateConfigResponses, WhisperModelResponse, WindowProps } from './types.gen'; +export { addExtension, agentAddExtension, agentRemoveExtension, callTool, cancelDownload, cancelLocalModelDownload, checkProvider, cleanupProviderCache, configureProviderOauth, confirmToolAction, createCustomProvider, createRecipe, createSchedule, decodeRecipe, deleteLocalModel, deleteModel, deleteRecipe, deleteSchedule, deleteSession, diagnostics, downloadHfModel, downloadModel, encodeRecipe, exportApp, exportSession, forkSession, getCanonicalModelInfo, getCustomProvider, getDictationConfig, getDownloadProgress, getExtensions, getFeatures, getLocalModelDownloadProgress, getModelSettings, getPrompt, getPrompts, getProviderCatalog, getProviderCatalogTemplate, getProviderModelInfo, getProviderModels, getRepoFiles, getSession, getSessionExtensions, getSessionInsights, getSlashCommands, getTools, getTunnelStatus, importApp, importSession, importSessionNostr, inspectRunningJob, killRunningJob, listApps, listLocalModels, listModels, listRecipes, listSchedules, listSessions, mcpUiProxy, type Options, parseRecipe, pauseSchedule, providers, readAllConfig, readConfig, readResource, recipeToYaml, removeConfig, removeCustomProvider, removeExtension, reply, resetPrompt, restartAgent, resumeAgent, runNowHandler, savePrompt, saveRecipe, scanRecipe, scheduleRecipe, searchHfModels, searchSessions, sendTelemetryEvent, sessionCancel, sessionEvents, sessionReply, sessionsHandler, setConfigProvider, setRecipeSlashCommand, shareSessionNostr, startAgent, startNanogptSetup, startOpenrouterSetup, startTetrateSetup, startTunnel, status, stopAgent, stopTunnel, syncFeaturedModels, systemInfo, transcribeDictation, unpauseSchedule, updateAgentProvider, updateCustomProvider, updateFromSession, updateModelSettings, updateSchedule, updateSession, updateSessionName, updateSessionUserRecipeValues, updateWorkingDir, upsertConfig, upsertPermissions, validateConfig } from './sdk.gen'; +export type { ActionRequired, ActionRequiredData, AddExtensionData, AddExtensionErrors, AddExtensionRequest, AddExtensionResponse, AddExtensionResponses, AgentAddExtensionData, AgentAddExtensionErrors, AgentAddExtensionResponse, AgentAddExtensionResponses, AgentRemoveExtensionData, AgentRemoveExtensionErrors, AgentRemoveExtensionResponse, AgentRemoveExtensionResponses, Annotations, Author, AuthorRequest, CallToolData, CallToolError, CallToolErrors, CallToolRequest, CallToolResponse, CallToolResponse2, CallToolResponses, CancelDownloadData, CancelDownloadErrors, CancelDownloadResponses, CancelLocalModelDownloadData, CancelLocalModelDownloadErrors, CancelLocalModelDownloadResponses, CancelRequest, ChatRequest, CheckProviderData, CheckProviderRequest, CleanupProviderCacheData, CleanupProviderCacheErrors, CleanupProviderCacheResponse, CleanupProviderCacheResponses, ClientOptions, CommandType, ConfigKey, ConfigKeyQuery, ConfigResponse, ConfigureProviderOauthData, ConfigureProviderOauthErrors, ConfigureProviderOauthResponses, ConfirmToolActionData, ConfirmToolActionErrors, ConfirmToolActionRequest, ConfirmToolActionResponses, Content, ContentBlock, Conversation, CreateCustomProviderData, CreateCustomProviderErrors, CreateCustomProviderResponse, CreateCustomProviderResponse2, CreateCustomProviderResponses, CreateRecipeData, CreateRecipeErrors, CreateRecipeRequest, CreateRecipeResponse, CreateRecipeResponse2, CreateRecipeResponses, CreateScheduleData, CreateScheduleErrors, CreateScheduleRequest, CreateScheduleResponse, CreateScheduleResponses, CspMetadata, DeclarativeProviderConfig, DecodeRecipeData, DecodeRecipeErrors, DecodeRecipeRequest, DecodeRecipeResponse, DecodeRecipeResponse2, DecodeRecipeResponses, DeleteLocalModelData, DeleteLocalModelErrors, DeleteLocalModelResponses, DeleteModelData, DeleteModelErrors, DeleteModelResponses, DeleteRecipeData, DeleteRecipeErrors, DeleteRecipeRequest, DeleteRecipeResponse, DeleteRecipeResponses, DeleteScheduleData, DeleteScheduleErrors, DeleteScheduleResponse, DeleteScheduleResponses, DeleteSessionData, DeleteSessionErrors, DeleteSessionResponses, DiagnosticsData, DiagnosticsErrors, DiagnosticsResponse, DiagnosticsResponses, DictationProvider, DictationProviderStatus, DownloadHfModelData, DownloadHfModelErrors, DownloadHfModelResponse, DownloadHfModelResponses, DownloadModelData, DownloadModelErrors, DownloadModelRequest, DownloadModelResponses, DownloadProgress, DownloadStatus, EmbeddedResource, EncodeRecipeData, EncodeRecipeErrors, EncodeRecipeRequest, EncodeRecipeResponse, EncodeRecipeResponse2, EncodeRecipeResponses, Envs, EnvVarConfig, ErrorResponse, ExportAppData, ExportAppError, ExportAppErrors, ExportAppResponse, ExportAppResponses, ExportSessionData, ExportSessionErrors, ExportSessionResponse, ExportSessionResponses, ExtensionConfig, ExtensionData, ExtensionEntry, ExtensionLoadResult, ExtensionQuery, ExtensionResponse, FeaturesResponse, ForkRequest, ForkResponse, ForkSessionData, ForkSessionErrors, ForkSessionResponse, ForkSessionResponses, FrontendToolRequest, GetCanonicalModelInfoData, GetCanonicalModelInfoResponse, GetCanonicalModelInfoResponses, GetCustomProviderData, GetCustomProviderErrors, GetCustomProviderResponse, GetCustomProviderResponses, GetDictationConfigData, GetDictationConfigResponse, GetDictationConfigResponses, GetDownloadProgressData, GetDownloadProgressErrors, GetDownloadProgressResponse, GetDownloadProgressResponses, GetExtensionsData, GetExtensionsErrors, GetExtensionsResponse, GetExtensionsResponses, GetFeaturesData, GetFeaturesResponse, GetFeaturesResponses, GetLocalModelDownloadProgressData, GetLocalModelDownloadProgressErrors, GetLocalModelDownloadProgressResponse, GetLocalModelDownloadProgressResponses, GetModelSettingsData, GetModelSettingsErrors, GetModelSettingsResponse, GetModelSettingsResponses, GetPromptData, GetPromptErrors, GetPromptResponse, GetPromptResponses, GetPromptsData, GetPromptsResponse, GetPromptsResponses, GetProviderCatalogData, GetProviderCatalogErrors, GetProviderCatalogResponse, GetProviderCatalogResponses, GetProviderCatalogTemplateData, GetProviderCatalogTemplateErrors, GetProviderCatalogTemplateResponse, GetProviderCatalogTemplateResponses, GetProviderModelInfoData, GetProviderModelInfoErrors, GetProviderModelInfoResponse, GetProviderModelInfoResponses, GetProviderModelsData, GetProviderModelsErrors, GetProviderModelsResponse, GetProviderModelsResponses, GetRepoFilesData, GetRepoFilesResponse, GetRepoFilesResponses, GetSessionData, GetSessionErrors, GetSessionExtensionsData, GetSessionExtensionsErrors, GetSessionExtensionsResponse, GetSessionExtensionsResponses, GetSessionInsightsData, GetSessionInsightsErrors, GetSessionInsightsResponse, GetSessionInsightsResponses, GetSessionResponse, GetSessionResponses, GetSlashCommandsData, GetSlashCommandsResponse, GetSlashCommandsResponses, GetToolsData, GetToolsErrors, GetToolsQuery, GetToolsResponse, GetToolsResponses, GetTunnelStatusData, GetTunnelStatusResponse, GetTunnelStatusResponses, GooseApp, GooseMode, HfGgufFile, HfModelInfo, HfQuantVariant, Icon, IconTheme, ImageContent, ImportAppData, ImportAppError, ImportAppErrors, ImportAppRequest, ImportAppResponse, ImportAppResponse2, ImportAppResponses, ImportSessionData, ImportSessionErrors, ImportSessionNostrData, ImportSessionNostrErrors, ImportSessionNostrRequest, ImportSessionNostrResponse, ImportSessionNostrResponses, ImportSessionRequest, ImportSessionResponse, ImportSessionResponses, InspectJobResponse, InspectRunningJobData, InspectRunningJobErrors, InspectRunningJobResponse, InspectRunningJobResponses, JsonObject, KillJobResponse, KillRunningJobData, KillRunningJobResponses, ListAppsData, ListAppsError, ListAppsErrors, ListAppsRequest, ListAppsResponse, ListAppsResponse2, ListAppsResponses, ListLocalModelsData, ListLocalModelsResponse, ListLocalModelsResponses, ListModelsData, ListModelsResponse, ListModelsResponses, ListRecipeResponse, ListRecipesData, ListRecipesErrors, ListRecipesResponse, ListRecipesResponses, ListSchedulesData, ListSchedulesErrors, ListSchedulesResponse, ListSchedulesResponse2, ListSchedulesResponses, ListSessionsData, ListSessionsErrors, ListSessionsResponse, ListSessionsResponses, LoadedProvider, LocalModelResponse, McpAppResource, McpUiProxyData, McpUiProxyErrors, McpUiProxyResponses, Message, MessageContent, MessageEvent, MessageMetadata, ModelCapabilities, ModelConfig, ModelDownloadStatus, ModelInfo, ModelInfoData, ModelInfoQuery, ModelInfoResponse, ModelSettings, ModelTemplate, ParseRecipeData, ParseRecipeError, ParseRecipeErrors, ParseRecipeRequest, ParseRecipeResponse, ParseRecipeResponse2, ParseRecipeResponses, PauseScheduleData, PauseScheduleErrors, PauseScheduleResponse, PauseScheduleResponses, Permission, PermissionLevel, PermissionsMetadata, PrincipalType, PromptContentResponse, PromptsListResponse, ProviderCatalogEntry, ProviderDetails, ProviderEngine, ProviderMetadata, ProviderModelInfoQuery, ProvidersData, ProvidersResponse, ProvidersResponse2, ProvidersResponses, ProviderTemplate, ProviderType, RawAudioContent, RawEmbeddedResource, RawImageContent, RawResource, RawTextContent, ReadAllConfigData, ReadAllConfigResponse, ReadAllConfigResponses, ReadConfigData, ReadConfigErrors, ReadConfigResponses, ReadResourceData, ReadResourceErrors, ReadResourceRequest, ReadResourceResponse, ReadResourceResponse2, ReadResourceResponses, Recipe, RecipeManifest, RecipeParameter, RecipeParameterInputType, RecipeParameterRequirement, RecipeToYamlData, RecipeToYamlError, RecipeToYamlErrors, RecipeToYamlRequest, RecipeToYamlResponse, RecipeToYamlResponse2, RecipeToYamlResponses, RedactedThinkingContent, RemoveConfigData, RemoveConfigErrors, RemoveConfigResponse, RemoveConfigResponses, RemoveCustomProviderData, RemoveCustomProviderErrors, RemoveCustomProviderResponse, RemoveCustomProviderResponses, RemoveExtensionData, RemoveExtensionErrors, RemoveExtensionRequest, RemoveExtensionResponse, RemoveExtensionResponses, ReplyData, ReplyErrors, ReplyResponse, ReplyResponses, RepoVariantsResponse, ResetPromptData, ResetPromptErrors, ResetPromptResponse, ResetPromptResponses, ResourceContents, ResourceMetadata, Response, RestartAgentData, RestartAgentErrors, RestartAgentRequest, RestartAgentResponse, RestartAgentResponse2, RestartAgentResponses, ResumeAgentData, ResumeAgentErrors, ResumeAgentRequest, ResumeAgentResponse, ResumeAgentResponse2, ResumeAgentResponses, RetryConfig, Role, RunNowHandlerData, RunNowHandlerErrors, RunNowHandlerResponse, RunNowHandlerResponses, RunNowResponse, SamplingConfig, SavePromptData, SavePromptErrors, SavePromptRequest, SavePromptResponse, SavePromptResponses, SaveRecipeData, SaveRecipeError, SaveRecipeErrors, SaveRecipeRequest, SaveRecipeResponse, SaveRecipeResponse2, SaveRecipeResponses, ScanRecipeData, ScanRecipeRequest, ScanRecipeResponse, ScanRecipeResponse2, ScanRecipeResponses, ScheduledJob, ScheduleRecipeData, ScheduleRecipeErrors, ScheduleRecipeRequest, ScheduleRecipeResponses, SearchHfModelsData, SearchHfModelsErrors, SearchHfModelsResponse, SearchHfModelsResponses, SearchSessionsData, SearchSessionsErrors, SearchSessionsResponse, SearchSessionsResponses, SendTelemetryEventData, SendTelemetryEventResponses, Session, SessionCancelData, SessionCancelResponses, SessionDisplayInfo, SessionEventsData, SessionEventsErrors, SessionEventsResponse, SessionEventsResponses, SessionExtensionsResponse, SessionInsights, SessionListResponse, SessionReplyData, SessionReplyErrors, SessionReplyRequest, SessionReplyResponse, SessionReplyResponse2, SessionReplyResponses, SessionsHandlerData, SessionsHandlerErrors, SessionsHandlerResponse, SessionsHandlerResponses, SessionsQuery, SessionType, SetConfigProviderData, SetProviderRequest, SetRecipeSlashCommandData, SetRecipeSlashCommandErrors, SetRecipeSlashCommandResponses, SetSlashCommandRequest, Settings, SetupResponse, ShareSessionNostrData, ShareSessionNostrErrors, ShareSessionNostrRequest, ShareSessionNostrResponse, ShareSessionNostrResponse2, ShareSessionNostrResponses, SlashCommand, SlashCommandsResponse, StartAgentData, StartAgentError, StartAgentErrors, StartAgentRequest, StartAgentResponse, StartAgentResponses, StartNanogptSetupData, StartNanogptSetupResponse, StartNanogptSetupResponses, StartOpenrouterSetupData, StartOpenrouterSetupResponse, StartOpenrouterSetupResponses, StartTetrateSetupData, StartTetrateSetupResponse, StartTetrateSetupResponses, StartTunnelData, StartTunnelError, StartTunnelErrors, StartTunnelResponse, StartTunnelResponses, StatusData, StatusResponse, StatusResponses, StopAgentData, StopAgentErrors, StopAgentRequest, StopAgentResponse, StopAgentResponses, StopTunnelData, StopTunnelError, StopTunnelErrors, StopTunnelResponses, SubRecipe, SuccessCheck, SyncFeaturedModelsData, SyncFeaturedModelsResponses, SystemInfo, SystemInfoData, SystemInfoResponse, SystemInfoResponses, SystemNotificationContent, SystemNotificationType, TaskSupport, TelemetryEventRequest, Template, TextContent, ThinkingContent, TokenState, Tool, ToolAnnotations, ToolConfirmationRequest, ToolExecution, ToolInfo, ToolPermission, ToolRequest, ToolResponse, TranscribeDictationData, TranscribeDictationErrors, TranscribeDictationResponse, TranscribeDictationResponses, TranscribeRequest, TranscribeResponse, TunnelInfo, TunnelState, UiMetadata, UnpauseScheduleData, UnpauseScheduleErrors, UnpauseScheduleResponse, UnpauseScheduleResponses, UpdateAgentProviderData, UpdateAgentProviderErrors, UpdateAgentProviderResponses, UpdateCustomProviderData, UpdateCustomProviderErrors, UpdateCustomProviderRequest, UpdateCustomProviderResponse, UpdateCustomProviderResponses, UpdateFromSessionData, UpdateFromSessionErrors, UpdateFromSessionRequest, UpdateFromSessionResponses, UpdateModelSettingsData, UpdateModelSettingsErrors, UpdateModelSettingsResponse, UpdateModelSettingsResponses, UpdateProviderRequest, UpdateScheduleData, UpdateScheduleErrors, UpdateScheduleRequest, UpdateScheduleResponse, UpdateScheduleResponses, UpdateSessionData, UpdateSessionErrors, UpdateSessionNameData, UpdateSessionNameErrors, UpdateSessionNameRequest, UpdateSessionNameResponses, UpdateSessionRequest, UpdateSessionResponses, UpdateSessionUserRecipeValuesData, UpdateSessionUserRecipeValuesError, UpdateSessionUserRecipeValuesErrors, UpdateSessionUserRecipeValuesRequest, UpdateSessionUserRecipeValuesResponse, UpdateSessionUserRecipeValuesResponse2, UpdateSessionUserRecipeValuesResponses, UpdateWorkingDirData, UpdateWorkingDirErrors, UpdateWorkingDirRequest, UpdateWorkingDirResponses, UpsertConfigData, UpsertConfigErrors, UpsertConfigQuery, UpsertConfigResponse, UpsertConfigResponses, UpsertPermissionsData, UpsertPermissionsErrors, UpsertPermissionsQuery, UpsertPermissionsResponse, UpsertPermissionsResponses, ValidateConfigData, ValidateConfigErrors, ValidateConfigResponse, ValidateConfigResponses, WhisperModelResponse, WindowProps } from './types.gen'; diff --git a/ui/desktop/src/api/sdk.gen.ts b/ui/desktop/src/api/sdk.gen.ts index 2870da539dd6..c98c91640854 100644 --- a/ui/desktop/src/api/sdk.gen.ts +++ b/ui/desktop/src/api/sdk.gen.ts @@ -2,7 +2,7 @@ import type { Client, Options as Options2, TDataShape } from './client'; import { client } from './client.gen'; -import type { AddExtensionData, AddExtensionErrors, AddExtensionResponses, AgentAddExtensionData, AgentAddExtensionErrors, AgentAddExtensionResponses, AgentRemoveExtensionData, AgentRemoveExtensionErrors, AgentRemoveExtensionResponses, CallToolData, CallToolErrors, CallToolResponses, CancelDownloadData, CancelDownloadErrors, CancelDownloadResponses, CancelLocalModelDownloadData, CancelLocalModelDownloadErrors, CancelLocalModelDownloadResponses, CheckProviderData, CleanupProviderCacheData, CleanupProviderCacheErrors, CleanupProviderCacheResponses, ConfigureProviderOauthData, ConfigureProviderOauthErrors, ConfigureProviderOauthResponses, ConfirmToolActionData, ConfirmToolActionErrors, ConfirmToolActionResponses, CreateCustomProviderData, CreateCustomProviderErrors, CreateCustomProviderResponses, CreateRecipeData, CreateRecipeErrors, CreateRecipeResponses, CreateScheduleData, CreateScheduleErrors, CreateScheduleResponses, DecodeRecipeData, DecodeRecipeErrors, DecodeRecipeResponses, DeleteLocalModelData, DeleteLocalModelErrors, DeleteLocalModelResponses, DeleteModelData, DeleteModelErrors, DeleteModelResponses, DeleteRecipeData, DeleteRecipeErrors, DeleteRecipeResponses, DeleteScheduleData, DeleteScheduleErrors, DeleteScheduleResponses, DeleteSessionData, DeleteSessionErrors, DeleteSessionResponses, DiagnosticsData, DiagnosticsErrors, DiagnosticsResponses, DownloadHfModelData, DownloadHfModelErrors, DownloadHfModelResponses, DownloadModelData, DownloadModelErrors, DownloadModelResponses, EncodeRecipeData, EncodeRecipeErrors, EncodeRecipeResponses, ExportAppData, ExportAppErrors, ExportAppResponses, ExportSessionData, ExportSessionErrors, ExportSessionResponses, ForkSessionData, ForkSessionErrors, ForkSessionResponses, GetCanonicalModelInfoData, GetCanonicalModelInfoResponses, GetCustomProviderData, GetCustomProviderErrors, GetCustomProviderResponses, GetDictationConfigData, GetDictationConfigResponses, GetDownloadProgressData, GetDownloadProgressErrors, GetDownloadProgressResponses, GetExtensionsData, GetExtensionsErrors, GetExtensionsResponses, GetFeaturesData, GetFeaturesResponses, GetLocalModelDownloadProgressData, GetLocalModelDownloadProgressErrors, GetLocalModelDownloadProgressResponses, GetModelSettingsData, GetModelSettingsErrors, GetModelSettingsResponses, GetPromptData, GetPromptErrors, GetPromptResponses, GetPromptsData, GetPromptsResponses, GetProviderCatalogData, GetProviderCatalogErrors, GetProviderCatalogResponses, GetProviderCatalogTemplateData, GetProviderCatalogTemplateErrors, GetProviderCatalogTemplateResponses, GetProviderModelsData, GetProviderModelsErrors, GetProviderModelsResponses, GetRepoFilesData, GetRepoFilesResponses, GetSessionData, GetSessionErrors, GetSessionExtensionsData, GetSessionExtensionsErrors, GetSessionExtensionsResponses, GetSessionInsightsData, GetSessionInsightsErrors, GetSessionInsightsResponses, GetSessionResponses, GetSlashCommandsData, GetSlashCommandsResponses, GetToolsData, GetToolsErrors, GetToolsResponses, GetTunnelStatusData, GetTunnelStatusResponses, ImportAppData, ImportAppErrors, ImportAppResponses, ImportSessionData, ImportSessionErrors, ImportSessionNostrData, ImportSessionNostrErrors, ImportSessionNostrResponses, ImportSessionResponses, InspectRunningJobData, InspectRunningJobErrors, InspectRunningJobResponses, KillRunningJobData, KillRunningJobResponses, ListAppsData, ListAppsErrors, ListAppsResponses, ListLocalModelsData, ListLocalModelsResponses, ListModelsData, ListModelsResponses, ListRecipesData, ListRecipesErrors, ListRecipesResponses, ListSchedulesData, ListSchedulesErrors, ListSchedulesResponses, ListSessionsData, ListSessionsErrors, ListSessionsResponses, McpUiProxyData, McpUiProxyErrors, McpUiProxyResponses, ParseRecipeData, ParseRecipeErrors, ParseRecipeResponses, PauseScheduleData, PauseScheduleErrors, PauseScheduleResponses, ProvidersData, ProvidersResponses, ReadAllConfigData, ReadAllConfigResponses, ReadConfigData, ReadConfigErrors, ReadConfigResponses, ReadResourceData, ReadResourceErrors, ReadResourceResponses, RecipeToYamlData, RecipeToYamlErrors, RecipeToYamlResponses, RemoveConfigData, RemoveConfigErrors, RemoveConfigResponses, RemoveCustomProviderData, RemoveCustomProviderErrors, RemoveCustomProviderResponses, RemoveExtensionData, RemoveExtensionErrors, RemoveExtensionResponses, ReplyData, ReplyErrors, ReplyResponses, ResetPromptData, ResetPromptErrors, ResetPromptResponses, RestartAgentData, RestartAgentErrors, RestartAgentResponses, ResumeAgentData, ResumeAgentErrors, ResumeAgentResponses, RunNowHandlerData, RunNowHandlerErrors, RunNowHandlerResponses, SavePromptData, SavePromptErrors, SavePromptResponses, SaveRecipeData, SaveRecipeErrors, SaveRecipeResponses, ScanRecipeData, ScanRecipeResponses, ScheduleRecipeData, ScheduleRecipeErrors, ScheduleRecipeResponses, SearchHfModelsData, SearchHfModelsErrors, SearchHfModelsResponses, SearchSessionsData, SearchSessionsErrors, SearchSessionsResponses, SendTelemetryEventData, SendTelemetryEventResponses, SessionCancelData, SessionCancelResponses, SessionEventsData, SessionEventsErrors, SessionEventsResponses, SessionReplyData, SessionReplyErrors, SessionReplyResponses, SessionsHandlerData, SessionsHandlerErrors, SessionsHandlerResponses, SetConfigProviderData, SetRecipeSlashCommandData, SetRecipeSlashCommandErrors, SetRecipeSlashCommandResponses, ShareSessionNostrData, ShareSessionNostrErrors, ShareSessionNostrResponses, StartAgentData, StartAgentErrors, StartAgentResponses, StartNanogptSetupData, StartNanogptSetupResponses, StartOpenrouterSetupData, StartOpenrouterSetupResponses, StartTetrateSetupData, StartTetrateSetupResponses, StartTunnelData, StartTunnelErrors, StartTunnelResponses, StatusData, StatusResponses, StopAgentData, StopAgentErrors, StopAgentResponses, StopTunnelData, StopTunnelErrors, StopTunnelResponses, SyncFeaturedModelsData, SyncFeaturedModelsResponses, SystemInfoData, SystemInfoResponses, TranscribeDictationData, TranscribeDictationErrors, TranscribeDictationResponses, UnpauseScheduleData, UnpauseScheduleErrors, UnpauseScheduleResponses, UpdateAgentProviderData, UpdateAgentProviderErrors, UpdateAgentProviderResponses, UpdateCustomProviderData, UpdateCustomProviderErrors, UpdateCustomProviderResponses, UpdateFromSessionData, UpdateFromSessionErrors, UpdateFromSessionResponses, UpdateModelSettingsData, UpdateModelSettingsErrors, UpdateModelSettingsResponses, UpdateScheduleData, UpdateScheduleErrors, UpdateScheduleResponses, UpdateSessionData, UpdateSessionErrors, UpdateSessionNameData, UpdateSessionNameErrors, UpdateSessionNameResponses, UpdateSessionResponses, UpdateSessionUserRecipeValuesData, UpdateSessionUserRecipeValuesErrors, UpdateSessionUserRecipeValuesResponses, UpdateWorkingDirData, UpdateWorkingDirErrors, UpdateWorkingDirResponses, UpsertConfigData, UpsertConfigErrors, UpsertConfigResponses, UpsertPermissionsData, UpsertPermissionsErrors, UpsertPermissionsResponses, ValidateConfigData, ValidateConfigErrors, ValidateConfigResponses } from './types.gen'; +import type { AddExtensionData, AddExtensionErrors, AddExtensionResponses, AgentAddExtensionData, AgentAddExtensionErrors, AgentAddExtensionResponses, AgentRemoveExtensionData, AgentRemoveExtensionErrors, AgentRemoveExtensionResponses, CallToolData, CallToolErrors, CallToolResponses, CancelDownloadData, CancelDownloadErrors, CancelDownloadResponses, CancelLocalModelDownloadData, CancelLocalModelDownloadErrors, CancelLocalModelDownloadResponses, CheckProviderData, CleanupProviderCacheData, CleanupProviderCacheErrors, CleanupProviderCacheResponses, ConfigureProviderOauthData, ConfigureProviderOauthErrors, ConfigureProviderOauthResponses, ConfirmToolActionData, ConfirmToolActionErrors, ConfirmToolActionResponses, CreateCustomProviderData, CreateCustomProviderErrors, CreateCustomProviderResponses, CreateRecipeData, CreateRecipeErrors, CreateRecipeResponses, CreateScheduleData, CreateScheduleErrors, CreateScheduleResponses, DecodeRecipeData, DecodeRecipeErrors, DecodeRecipeResponses, DeleteLocalModelData, DeleteLocalModelErrors, DeleteLocalModelResponses, DeleteModelData, DeleteModelErrors, DeleteModelResponses, DeleteRecipeData, DeleteRecipeErrors, DeleteRecipeResponses, DeleteScheduleData, DeleteScheduleErrors, DeleteScheduleResponses, DeleteSessionData, DeleteSessionErrors, DeleteSessionResponses, DiagnosticsData, DiagnosticsErrors, DiagnosticsResponses, DownloadHfModelData, DownloadHfModelErrors, DownloadHfModelResponses, DownloadModelData, DownloadModelErrors, DownloadModelResponses, EncodeRecipeData, EncodeRecipeErrors, EncodeRecipeResponses, ExportAppData, ExportAppErrors, ExportAppResponses, ExportSessionData, ExportSessionErrors, ExportSessionResponses, ForkSessionData, ForkSessionErrors, ForkSessionResponses, GetCanonicalModelInfoData, GetCanonicalModelInfoResponses, GetCustomProviderData, GetCustomProviderErrors, GetCustomProviderResponses, GetDictationConfigData, GetDictationConfigResponses, GetDownloadProgressData, GetDownloadProgressErrors, GetDownloadProgressResponses, GetExtensionsData, GetExtensionsErrors, GetExtensionsResponses, GetFeaturesData, GetFeaturesResponses, GetLocalModelDownloadProgressData, GetLocalModelDownloadProgressErrors, GetLocalModelDownloadProgressResponses, GetModelSettingsData, GetModelSettingsErrors, GetModelSettingsResponses, GetPromptData, GetPromptErrors, GetPromptResponses, GetPromptsData, GetPromptsResponses, GetProviderCatalogData, GetProviderCatalogErrors, GetProviderCatalogResponses, GetProviderCatalogTemplateData, GetProviderCatalogTemplateErrors, GetProviderCatalogTemplateResponses, GetProviderModelInfoData, GetProviderModelInfoErrors, GetProviderModelInfoResponses, GetProviderModelsData, GetProviderModelsErrors, GetProviderModelsResponses, GetRepoFilesData, GetRepoFilesResponses, GetSessionData, GetSessionErrors, GetSessionExtensionsData, GetSessionExtensionsErrors, GetSessionExtensionsResponses, GetSessionInsightsData, GetSessionInsightsErrors, GetSessionInsightsResponses, GetSessionResponses, GetSlashCommandsData, GetSlashCommandsResponses, GetToolsData, GetToolsErrors, GetToolsResponses, GetTunnelStatusData, GetTunnelStatusResponses, ImportAppData, ImportAppErrors, ImportAppResponses, ImportSessionData, ImportSessionErrors, ImportSessionNostrData, ImportSessionNostrErrors, ImportSessionNostrResponses, ImportSessionResponses, InspectRunningJobData, InspectRunningJobErrors, InspectRunningJobResponses, KillRunningJobData, KillRunningJobResponses, ListAppsData, ListAppsErrors, ListAppsResponses, ListLocalModelsData, ListLocalModelsResponses, ListModelsData, ListModelsResponses, ListRecipesData, ListRecipesErrors, ListRecipesResponses, ListSchedulesData, ListSchedulesErrors, ListSchedulesResponses, ListSessionsData, ListSessionsErrors, ListSessionsResponses, McpUiProxyData, McpUiProxyErrors, McpUiProxyResponses, ParseRecipeData, ParseRecipeErrors, ParseRecipeResponses, PauseScheduleData, PauseScheduleErrors, PauseScheduleResponses, ProvidersData, ProvidersResponses, ReadAllConfigData, ReadAllConfigResponses, ReadConfigData, ReadConfigErrors, ReadConfigResponses, ReadResourceData, ReadResourceErrors, ReadResourceResponses, RecipeToYamlData, RecipeToYamlErrors, RecipeToYamlResponses, RemoveConfigData, RemoveConfigErrors, RemoveConfigResponses, RemoveCustomProviderData, RemoveCustomProviderErrors, RemoveCustomProviderResponses, RemoveExtensionData, RemoveExtensionErrors, RemoveExtensionResponses, ReplyData, ReplyErrors, ReplyResponses, ResetPromptData, ResetPromptErrors, ResetPromptResponses, RestartAgentData, RestartAgentErrors, RestartAgentResponses, ResumeAgentData, ResumeAgentErrors, ResumeAgentResponses, RunNowHandlerData, RunNowHandlerErrors, RunNowHandlerResponses, SavePromptData, SavePromptErrors, SavePromptResponses, SaveRecipeData, SaveRecipeErrors, SaveRecipeResponses, ScanRecipeData, ScanRecipeResponses, ScheduleRecipeData, ScheduleRecipeErrors, ScheduleRecipeResponses, SearchHfModelsData, SearchHfModelsErrors, SearchHfModelsResponses, SearchSessionsData, SearchSessionsErrors, SearchSessionsResponses, SendTelemetryEventData, SendTelemetryEventResponses, SessionCancelData, SessionCancelResponses, SessionEventsData, SessionEventsErrors, SessionEventsResponses, SessionReplyData, SessionReplyErrors, SessionReplyResponses, SessionsHandlerData, SessionsHandlerErrors, SessionsHandlerResponses, SetConfigProviderData, SetRecipeSlashCommandData, SetRecipeSlashCommandErrors, SetRecipeSlashCommandResponses, ShareSessionNostrData, ShareSessionNostrErrors, ShareSessionNostrResponses, StartAgentData, StartAgentErrors, StartAgentResponses, StartNanogptSetupData, StartNanogptSetupResponses, StartOpenrouterSetupData, StartOpenrouterSetupResponses, StartTetrateSetupData, StartTetrateSetupResponses, StartTunnelData, StartTunnelErrors, StartTunnelResponses, StatusData, StatusResponses, StopAgentData, StopAgentErrors, StopAgentResponses, StopTunnelData, StopTunnelErrors, StopTunnelResponses, SyncFeaturedModelsData, SyncFeaturedModelsResponses, SystemInfoData, SystemInfoResponses, TranscribeDictationData, TranscribeDictationErrors, TranscribeDictationResponses, UnpauseScheduleData, UnpauseScheduleErrors, UnpauseScheduleResponses, UpdateAgentProviderData, UpdateAgentProviderErrors, UpdateAgentProviderResponses, UpdateCustomProviderData, UpdateCustomProviderErrors, UpdateCustomProviderResponses, UpdateFromSessionData, UpdateFromSessionErrors, UpdateFromSessionResponses, UpdateModelSettingsData, UpdateModelSettingsErrors, UpdateModelSettingsResponses, UpdateScheduleData, UpdateScheduleErrors, UpdateScheduleResponses, UpdateSessionData, UpdateSessionErrors, UpdateSessionNameData, UpdateSessionNameErrors, UpdateSessionNameResponses, UpdateSessionResponses, UpdateSessionUserRecipeValuesData, UpdateSessionUserRecipeValuesErrors, UpdateSessionUserRecipeValuesResponses, UpdateWorkingDirData, UpdateWorkingDirErrors, UpdateWorkingDirResponses, UpsertConfigData, UpsertConfigErrors, UpsertConfigResponses, UpsertPermissionsData, UpsertPermissionsErrors, UpsertPermissionsResponses, ValidateConfigData, ValidateConfigErrors, ValidateConfigResponses } from './types.gen'; export type Options = Options2 & { /** @@ -237,6 +237,15 @@ export const providers = (options?: Option export const cleanupProviderCache = (options: Options) => (options.client ?? client).post({ url: '/config/providers/{name}/cleanup', ...options }); +export const getProviderModelInfo = (options: Options) => (options.client ?? client).post({ + url: '/config/providers/{name}/model-info', + ...options, + headers: { + 'Content-Type': 'application/json', + ...options.headers + } +}); + export const getProviderModels = (options: Options) => (options.client ?? client).get({ url: '/config/providers/{name}/models', ...options }); export const configureProviderOauth = (options: Options) => (options.client ?? client).post({ url: '/config/providers/{name}/oauth', ...options }); diff --git a/ui/desktop/src/api/types.gen.ts b/ui/desktop/src/api/types.gen.ts index da88661b8f36..b172229d6dcc 100644 --- a/ui/desktop/src/api/types.gen.ts +++ b/ui/desktop/src/api/types.gen.ts @@ -818,6 +818,10 @@ export type ModelInfo = { * Cost per token for output in USD (optional) */ output_token_cost?: number | null; + /** + * Whether this model supports reasoning/thinking controls + */ + reasoning?: boolean; /** * Whether this model supports cache control */ @@ -834,6 +838,7 @@ export type ModelInfoData = { model: string; output_token_cost?: number | null; provider: string; + reasoning: boolean; }; export type ModelInfoQuery = { @@ -999,6 +1004,10 @@ export type ProviderMetadata = { setup_steps?: Array; }; +export type ProviderModelInfoQuery = { + model: string; +}; + export type ProviderTemplate = { api_url: string; doc_url: string; @@ -1643,6 +1652,7 @@ export type UpdateProviderRequest = { context_limit?: number | null; model?: string | null; provider: string; + reasoning?: boolean | null; request_params?: { [key: string]: unknown; } | null; @@ -2727,6 +2737,42 @@ export type CleanupProviderCacheResponses = { export type CleanupProviderCacheResponse = CleanupProviderCacheResponses[keyof CleanupProviderCacheResponses]; +export type GetProviderModelInfoData = { + body: ProviderModelInfoQuery; + path: { + /** + * Provider name (e.g., openai) + */ + name: string; + }; + query?: never; + url: '/config/providers/{name}/model-info'; +}; + +export type GetProviderModelInfoErrors = { + /** + * Unknown provider, provider not configured, or authentication error + */ + 400: unknown; + /** + * Rate limit exceeded + */ + 429: unknown; + /** + * Internal server error + */ + 500: unknown; +}; + +export type GetProviderModelInfoResponses = { + /** + * Model metadata fetched successfully + */ + 200: ModelInfo; +}; + +export type GetProviderModelInfoResponse = GetProviderModelInfoResponses[keyof GetProviderModelInfoResponses]; + export type GetProviderModelsData = { body?: never; path: { @@ -2758,7 +2804,7 @@ export type GetProviderModelsResponses = { /** * Models fetched successfully */ - 200: Array; + 200: Array; }; export type GetProviderModelsResponse = GetProviderModelsResponses[keyof GetProviderModelsResponses]; diff --git a/ui/desktop/src/components/ModelAndProviderContext.tsx b/ui/desktop/src/components/ModelAndProviderContext.tsx index 8b5f19d6249c..cacae4d7f496 100644 --- a/ui/desktop/src/components/ModelAndProviderContext.tsx +++ b/ui/desktop/src/components/ModelAndProviderContext.tsx @@ -77,6 +77,7 @@ export const ModelAndProviderProvider: React.FC = provider: providerName, model: modelName, context_limit: model.context_limit, + reasoning: model.reasoning, request_params: model.request_params, }, }); diff --git a/ui/desktop/src/components/recipes/shared/RecipeModelSelector.tsx b/ui/desktop/src/components/recipes/shared/RecipeModelSelector.tsx index 2a0c310ef6bc..ad616bfdb389 100644 --- a/ui/desktop/src/components/recipes/shared/RecipeModelSelector.tsx +++ b/ui/desktop/src/components/recipes/shared/RecipeModelSelector.tsx @@ -108,8 +108,8 @@ export const RecipeModelSelector = ({ const modelList = models || []; const options = modelList.map((m) => ({ - value: m, - label: m, + value: m.name, + label: m.name, provider: p.name, })); diff --git a/ui/desktop/src/components/settings/models/modelInterface.ts b/ui/desktop/src/components/settings/models/modelInterface.ts index a8ee79bec60c..a572ed498585 100644 --- a/ui/desktop/src/components/settings/models/modelInterface.ts +++ b/ui/desktop/src/components/settings/models/modelInterface.ts @@ -9,6 +9,7 @@ export default interface Model { alias?: string; // optional model display name subtext?: string; // goes below model name if not the provider context_limit?: number; // optional context limit override + reasoning?: boolean; // optional reasoning/thinking support metadata request_params?: Record; // provider-specific request parameters } @@ -45,7 +46,7 @@ export async function getProviderMetadata( export interface ProviderModelsResult { provider: ProviderDetails; - models: string[] | null; + models: Model[] | null; error: string | null; warning: string | null; } @@ -61,7 +62,7 @@ export async function fetchModelsForProviders( const allModels = response.data || []; const downloadedModels = allModels .filter((m) => m.status.state === 'Downloaded') - .map((m) => m.id); + .map((m) => ({ name: m.id, provider: p.name }) as Model); return { provider: p, models: downloadedModels, error: null, warning: null }; } @@ -69,12 +70,28 @@ export async function fetchModelsForProviders( path: { name: p.name }, throwOnError: true, }); - const models = response.data || []; + const models = (response.data || []).map( + (m) => + ({ + name: m.name, + provider: p.name, + context_limit: m.context_limit, + reasoning: m.reasoning ?? undefined, + }) as Model + ); return { provider: p, models, error: null, warning: null }; } catch (e: unknown) { // For custom providers, fall back to the configured model list if (p.provider_type === 'Custom') { - const fallbackModels = p.metadata.known_models.map((m) => m.name); + const fallbackModels = p.metadata.known_models.map( + (m) => + ({ + name: m.name, + provider: p.name, + context_limit: m.context_limit, + reasoning: m.reasoning ?? undefined, + }) as Model + ); if (fallbackModels.length > 0) { console.warn(`Failed to fetch models for ${p.name}:`, getErrorMessage(e)); return { diff --git a/ui/desktop/src/components/settings/models/subcomponents/SwitchModelModal.tsx b/ui/desktop/src/components/settings/models/subcomponents/SwitchModelModal.tsx index 0bbda0cfdcdd..d12c3877a89a 100644 --- a/ui/desktop/src/components/settings/models/subcomponents/SwitchModelModal.tsx +++ b/ui/desktop/src/components/settings/models/subcomponents/SwitchModelModal.tsx @@ -19,10 +19,14 @@ import { useModelAndProvider } from '../../../ModelAndProviderContext'; import type { View } from '../../../../utils/navigationUtils'; import Model, { getProviderMetadata, fetchModelsForProviders } from '../modelInterface'; import { getPredefinedModelsFromEnv, shouldShowPredefinedModels } from '../predefinedModelsUtils'; -import { ProviderType } from '../../../../api'; +import { getProviderModelInfo, ProviderType } from '../../../../api'; import { trackModelChanged } from '../../../../utils/analytics'; const i18n = defineMessages({ + thinkingEffortOff: { + id: 'switchModelModal.thinkingEffortOff', + defaultMessage: 'Off - No extended thinking', + }, thinkingLevelLow: { id: 'switchModelModal.thinkingLevelLow', defaultMessage: 'Low - Better latency, lighter reasoning', @@ -185,16 +189,7 @@ const i18n = defineMessages({ }, }); -// THINKING_LEVEL_OPTIONS and CLAUDE_THINKING_EFFORT_OPTIONS are created inside the component to support i18n. - -function isClaudeModel(name: string | null | undefined): boolean { - return !!name && name.toLowerCase().startsWith('claude-'); -} - -function supportsAdaptiveThinking(name: string): boolean { - const lower = name.toLowerCase(); - return lower.includes('claude-opus-4-6') || lower.includes('claude-sonnet-4-6'); -} +// Thinking effort options are created inside the component to support i18n. const PREFERRED_MODEL_PATTERNS = [ /claude-sonnet-4/i, @@ -256,12 +251,8 @@ export const SwitchModelModal = ({ }: SwitchModelModalProps) => { const intl = useIntl(); - const THINKING_LEVEL_OPTIONS = [ - { value: 'low', label: intl.formatMessage(i18n.thinkingLevelLow) }, - { value: 'high', label: intl.formatMessage(i18n.thinkingLevelHigh) }, - ]; - - const CLAUDE_THINKING_EFFORT_OPTIONS = [ + const THINKING_EFFORT_OPTIONS = [ + { value: 'off', label: intl.formatMessage(i18n.thinkingEffortOff) }, { value: 'low', label: intl.formatMessage(i18n.claudeEffortLow) }, { value: 'medium', label: intl.formatMessage(i18n.claudeEffortMedium) }, { value: 'high', label: intl.formatMessage(i18n.claudeEffortHigh) }, @@ -278,7 +269,13 @@ export const SwitchModelModal = ({ const currentModel = sessionModel ?? configModel; const currentProvider = sessionProvider ?? configProvider; const [providerOptions, setProviderOptions] = useState<{ value: string; label: string }[]>([]); - type ModelOption = { value: string; label: string; provider: string; isDisabled?: boolean }; + type ModelOption = { + value: string; + label: string; + provider: string; + isDisabled?: boolean; + reasoning?: boolean; + }; const [modelOptions, setModelOptions] = useState<{ options: ModelOption[] }[]>([]); const [provider, setProvider] = useState( initialProvider || currentProvider || null @@ -304,43 +301,66 @@ export const SwitchModelModal = ({ import('../../../../api').ProviderDetails[] >([]); const fetchedProviders = useRef>(new Set()); - const [thinkingLevel, setThinkingLevel] = useState('low'); - const [claudeThinkingType, setClaudeThinkingType] = useState('disabled'); - const [claudeThinkingEffort, setClaudeThinkingEffort] = useState('high'); - const [claudeThinkingBudget, setClaudeThinkingBudget] = useState('16000'); + const [thinkingEffort, setThinkingEffort] = useState(null); + const [selectedModelReasoning, setSelectedModelReasoning] = useState(null); const modelName = usePredefinedModels ? selectedPredefinedModel?.name : model; - const isGemini3Model = modelName?.toLowerCase().startsWith('gemini-3') ?? false; - const showClaudeThinking = isClaudeModel(modelName); - const modelSupportsAdaptive = modelName ? supportsAdaptiveThinking(modelName) : false; - - useEffect(() => { - if (!showClaudeThinking) return; - if (claudeThinkingType === 'adaptive' && !modelSupportsAdaptive) { - setClaudeThinkingType('disabled'); - } - }, [modelName, showClaudeThinking, modelSupportsAdaptive, claudeThinkingType]); + const effectiveProvider = usePredefinedModels ? selectedPredefinedModel?.provider : provider; + const modelReasoning = selectedModelReasoning ?? selectedPredefinedModel?.reasoning; + const showThinkingControl = modelReasoning === true; useEffect(() => { - const readConfig = async (key: string): Promise => { + (async () => { try { - const val = (await read(key, false)) as string; - return val || null; + const effort = (await read('GOOSE_THINKING_EFFORT', false)) as string; + if (effort) setThinkingEffort(effort); } catch (e) { - console.warn(`Could not read ${key}, using default:`, e); - return null; + console.warn('Could not read GOOSE_THINKING_EFFORT, using default:', e); } - }; - (async () => { - const tt = await readConfig('CLAUDE_THINKING_TYPE'); - if (tt) setClaudeThinkingType(tt); - const effort = await readConfig('CLAUDE_THINKING_EFFORT'); - if (effort) setClaudeThinkingEffort(effort); - const budget = await readConfig('CLAUDE_THINKING_BUDGET'); - if (budget) setClaudeThinkingBudget(budget); })(); }, [read]); + useEffect(() => { + if (!effectiveProvider || !modelName || modelName === 'custom') { + return; + } + + let cancelled = false; + setSelectedModelReasoning(selectedPredefinedModel?.reasoning ?? null); + getProviderModelInfo({ + path: { name: effectiveProvider }, + body: { model: modelName }, + }) + .then((response) => { + if (!cancelled) { + setSelectedModelReasoning(response.data?.reasoning ?? null); + } + }) + .catch(() => { + if (!cancelled) { + setSelectedModelReasoning(null); + } + }); + + return () => { + cancelled = true; + }; + }, [effectiveProvider, modelName, selectedPredefinedModel?.reasoning]); + + useEffect(() => { + if (!provider || !model) return; + + const selectedOption = modelOptions + .flatMap((group) => group.options) + .find((option) => option.provider === provider && option.value === model); + + if (selectedOption?.reasoning !== undefined) { + setSelectedModelReasoning(selectedOption.reasoning); + } else { + setSelectedModelReasoning(null); + } + }, [model, provider, modelOptions]); + // Validate form data const validateForm = useCallback(() => { const errors = { @@ -393,36 +413,18 @@ export const SwitchModelModal = ({ subtext: providerDisplayName, } as Model; } + modelObj = { + ...modelObj, + reasoning: selectedModelReasoning ?? modelObj.reasoning, + }; - if (isGemini3Model) { + if (showThinkingControl) { + const effort = thinkingEffort ?? modelObj.request_params?.thinking_effort ?? 'off'; modelObj = { ...modelObj, - request_params: { ...modelObj.request_params, thinking_level: thinkingLevel }, + request_params: { ...modelObj.request_params, thinking_effort: effort }, }; - } - - if (showClaudeThinking) { - const params: Record = { - ...modelObj.request_params, - thinking_type: claudeThinkingType, - }; - if (claudeThinkingType === 'adaptive') { - params.effort = claudeThinkingEffort; - } else if (claudeThinkingType === 'enabled') { - params.budget_tokens = parseInt(claudeThinkingBudget, 10) || 16000; - } - modelObj = { ...modelObj, request_params: params }; - - upsert('CLAUDE_THINKING_TYPE', claudeThinkingType, false).catch(console.warn); - if (claudeThinkingType === 'adaptive') { - upsert('CLAUDE_THINKING_EFFORT', claudeThinkingEffort, false).catch(console.warn); - } else if (claudeThinkingType === 'enabled') { - upsert( - 'CLAUDE_THINKING_BUDGET', - parseInt(claudeThinkingBudget, 10) || 16000, - false - ).catch(console.warn); - } + upsert('GOOSE_THINKING_EFFORT', effort, false).catch(console.warn); } const success = await changeModel(sessionId, modelObj); @@ -515,7 +517,7 @@ export const SwitchModelModal = ({ if (cancelled) return; const newGroupedOptions: { - options: { value: string; label: string; provider: string; providerType: ProviderType }[]; + options: (ModelOption & { providerType: ProviderType })[]; }[] = []; const newErrors: Record = {}; const newWarnings: Record = {}; @@ -536,11 +538,13 @@ export const SwitchModelModal = ({ label: string; provider: string; providerType: ProviderType; + reasoning?: boolean; }[] = modelList.map((m) => ({ - value: m, - label: m, + value: m.name, + label: m.name, provider: p.name, providerType: p.provider_type, + reasoning: m.reasoning, })); if (p.provider_type !== 'Custom') { @@ -608,28 +612,36 @@ export const SwitchModelModal = ({ // Handle model selection change const handleModelChange = (newValue: unknown) => { - const selectedOption = newValue as { value: string; label: string; provider: string } | null; + const selectedOption = newValue as { + value: string; + label: string; + provider: string; + reasoning?: boolean; + } | null; if (selectedOption?.value === 'custom') { setIsCustomModel(true); setModel(''); setProvider(selectedOption.provider); + setSelectedModelReasoning(null); setUserClearedModel(false); } else if (selectedOption === null) { // User cleared the selection setIsCustomModel(false); setModel(''); + setSelectedModelReasoning(null); setUserClearedModel(true); } else { setIsCustomModel(false); setModel(selectedOption?.value || ''); setProvider(selectedOption?.provider || ''); + setSelectedModelReasoning(selectedOption?.reasoning ?? null); setUserClearedModel(false); } }; // Store the original model options in state, initialized from modelOptions const [originalModelOptions, setOriginalModelOptions] = - useState<{ options: { value: string; label: string; provider: string }[] }[]>(modelOptions); + useState<{ options: ModelOption[] }[]>(modelOptions); const handleInputChange = (inputValue: string) => { if (!provider) return; @@ -673,54 +685,20 @@ export const SwitchModelModal = ({ } }; - const claudeThinkingTypeOptions = [ - ...(modelSupportsAdaptive - ? [{ value: 'adaptive', label: intl.formatMessage(i18n.claudeAdaptive) }] - : []), - { value: 'enabled', label: intl.formatMessage(i18n.claudeEnabled) }, - { value: 'disabled', label: intl.formatMessage(i18n.claudeDisabled) }, - ]; - - const claudeThinkingControls = showClaudeThinking && ( -
-
- - o.value === claudeThinkingEffort)} - onChange={(newValue: unknown) => { - const option = newValue as { value: string; label: string } | null; - setClaudeThinkingEffort(option?.value || 'high'); - }} - placeholder={intl.formatMessage(i18n.selectEffortLevel)} - /> -
- )} - {claudeThinkingType === 'enabled' && ( -
- - setClaudeThinkingBudget(e.target.value)} - /> -
- )} + const thinkingEffortControl = showThinkingControl && ( +
+ + o.value === thinkingLevel)} - onChange={(newValue: unknown) => { - const option = newValue as { value: string; label: string } | null; - setThinkingLevel(option?.value || 'low'); - }} - placeholder={intl.formatMessage(i18n.selectThinkingLevel)} - /> -
- )} - - {claudeThinkingControls} + {thinkingEffortControl}
) : ( /* Manual Provider/Model Selection */ @@ -963,25 +923,7 @@ export const SwitchModelModal = ({ )} - {isGemini3Model && ( -
- -