getprobo · aureliensibiril · Apr 2, 2026 · Apr 2, 2026 · Apr 6, 2026 · Apr 6, 2026
diff --git a/e2e/console/testdata/config.yaml b/e2e/console/testdata/config.yaml
@@ -61,12 +61,12 @@ probod:
     slack:
       sender-interval: 60
 
-  llm:
+  agents:
     providers:
       openai:
         type: "openai"
         api-key: "thisisnotasecret"
-    defaults:
+    default:
       provider: "openai"
       model-name: "gpt-4o"
       temperature: 0.1

diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go
@@ -255,6 +255,15 @@ func WithParallelToolCalls(enabled bool) Option {
 	}
 }
 
+func WithThinking(budgetTokens int) Option {
+	return func(a *Agent) {
+		a.modelSettings.Thinking = &llm.ThinkingConfig{
+			Enabled:      true,
+			BudgetTokens: budgetTokens,
+		}
+	}
+}
+
 func WithLogger(l *log.Logger) Option {
 	return func(a *Agent) {
 		a.logger = l

diff --git a/pkg/agent/agent_tool.go b/pkg/agent/agent_tool.go
@@ -116,5 +116,20 @@ func (t *agentTool) Execute(ctx context.Context, arguments string) (ToolResult,
 		return ToolResult{}, err
 	}
 
-	return ToolResult{Content: result.FinalMessage().Text()}, nil
+	text := result.FinalMessage().Text()
+
+	if t.agent.outputType != nil {
+		if !json.Valid([]byte(text)) {
+			preview := text
+			if len(preview) > 500 {
+				preview = preview[:500] + "... (truncated)"
+			}
+			return ToolResult{
+				Content: fmt.Sprintf("Sub-agent %q returned invalid JSON. Raw output:\n%s", t.agent.name, preview),
+				IsError: true,
+			}, nil
+		}
+	}
+
+	return ToolResult{Content: text}, nil
 }
diff --git a/pkg/agent/model_settings.go b/pkg/agent/model_settings.go
@@ -24,4 +24,5 @@ type ModelSettings struct {
 	MaxTokens         *int
 	ToolChoice        *llm.ToolChoice
 	ParallelToolCalls *bool
+	Thinking          *llm.ThinkingConfig
 }
diff --git a/pkg/agent/progress.go b/pkg/agent/progress.go
@@ -0,0 +1,36 @@
+// Copyright (c) 2026 Probo Inc <hello@getprobo.com>.
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+// OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+
+package agent
+
+import "context"
+
+type (
+	ProgressEventType string
+
+	ProgressEvent struct {
+		Type       ProgressEventType `json:"type"`
+		Step       string            `json:"step"`
+		ParentStep string            `json:"parent_step,omitempty"`
+		Message    string            `json:"message"`
+	}
+
+	ProgressReporter func(ctx context.Context, event ProgressEvent)
+)
+
+const (
+	ProgressEventStepStarted   ProgressEventType = "step_started"
+	ProgressEventStepCompleted ProgressEventType = "step_completed"
+	ProgressEventStepFailed    ProgressEventType = "step_failed"
+)
diff --git a/pkg/agent/run.go b/pkg/agent/run.go
@@ -28,7 +28,19 @@ import (
 	"go.probo.inc/probo/pkg/llm"
 )
 
-const tracerName = "go.probo.inc/probo/pkg/agent"
+const (
+	tracerName = "go.probo.inc/probo/pkg/agent"
+
+	// maxEmptyOutputRetries bounds the number of times the core loop
+	// will re-ask the model to produce a structured output after it
+	// returned a thinking-only empty response.
+	maxEmptyOutputRetries = 2
+
+	// synthesisNudge is the static user message appended after tool
+	// exploration completes, asking the model to produce the final
+	// structured output on the next (synthesis) turn.
+	synthesisNudge = "Based on everything you have gathered, produce the final structured output now."
+)
 
 type (
 	CallLLMFunc func(ctx context.Context, agent *Agent, req *llm.ChatCompletionRequest) (*llm.ChatCompletionResponse, error)
@@ -68,7 +80,32 @@ type (
 func noopEvent(_ context.Context, _ StreamEvent) {}
 
 func blockingCallLLM(ctx context.Context, agent *Agent, req *llm.ChatCompletionRequest) (*llm.ChatCompletionResponse, error) {
-	return agent.client.ChatCompletion(ctx, req)
+	resp, err := agent.client.ChatCompletion(ctx, req)
+	if err == nil {
+		return resp, nil
+	}
+
+	// Some providers (e.g. Anthropic) require streaming for large
+	// max_tokens or when thinking is enabled. Fall back to streaming
+	// transparently when the blocking call returns ErrStreamingRequired.
+	var streamRequired *llm.ErrStreamingRequired
+	if !errors.As(err, &streamRequired) {
+		return nil, err
+	}
+
+	stream, sErr := agent.client.ChatCompletionStream(ctx, req)
+	if sErr != nil {
+		return nil, err // return the original error
+	}
+	defer stream.Close()
+
+	acc := llm.NewStreamAccumulator(stream)
+	for acc.Next() {
+	}
+	if sErr := acc.Err(); sErr != nil {
+		return nil, sErr
+	}
+	return acc.Response(), nil
 }
 
 func (a *Agent) Run(ctx context.Context, messages []llm.Message) (*Result, error) {
@@ -273,6 +310,24 @@ func coreLoop(ctx context.Context, startAgent *Agent, inputMessages []llm.Messag
 		log.Int("tool_count", len(s.toolDefs)),
 	)
 
+	emptyOutputRetries := 0
+
+	structuredFormat := resolveStructuredFormat(s.agent)
+
+	// When the agent has both tools and a structured output request,
+	// we delay structured output enforcement until a dedicated
+	// synthesis turn. Enforcing the schema during tool exploration
+	// causes models with extended thinking to stuff planning prose
+	// into the first text field of the schema as a scratchpad,
+	// burning the entire max_tokens budget on thinking-inside-JSON
+	// before ever producing a valid object. Instead, we let the
+	// model freely call tools without a schema, then force one final
+	// synthesis turn with ToolChoice=none + schema enforced once the
+	// model signals it has enough information (finish_reason=stop).
+	// Agents without tools or without a structured output request
+	// do not need this dance and enforce the schema immediately.
+	exploring := structuredFormat != nil && len(s.toolDefs) > 0
+
 	for {
 		if err := ctx.Err(); err != nil {
 			return s.finishRun(ctx, nil, fmt.Errorf("cannot complete: %w", err))
@@ -284,15 +339,21 @@ func coreLoop(ctx context.Context, startAgent *Agent, inputMessages []llm.Messag
 
 		fullMessages := buildFullMessages(s.systemPrompt, s.messages)
 
-		responseFormat := s.agent.responseFormat
-		if responseFormat == nil && s.agent.outputType != nil {
-			responseFormat = s.agent.outputType.responseFormat()
+		var responseFormat *llm.ResponseFormat
+		if !exploring {
+			responseFormat = structuredFormat
 		}
 
 		toolChoice := s.agent.modelSettings.ToolChoice
 		if s.toolUsedInRun && s.agent.resetToolChoice && toolChoice != nil {
 			toolChoice = nil
 		}
+		if !exploring && structuredFormat != nil && len(s.toolDefs) > 0 {
+			// On the synthesis turn, forbid further tool calls so the
+			// model is forced to convert what it has into JSON.
+			none := llm.ToolChoice{Type: llm.ToolChoiceNone}
+			toolChoice = &none
+		}
 
 		req := &llm.ChatCompletionRequest{
 			Model:             s.agent.model,
@@ -306,6 +367,7 @@ func coreLoop(ctx context.Context, startAgent *Agent, inputMessages []llm.Messag
 			ToolChoice:        toolChoice,
 			ParallelToolCalls: s.agent.modelSettings.ParallelToolCalls,
 			ResponseFormat:    responseFormat,
+			Thinking:          s.agent.modelSettings.Thinking,
 		}
 
 		s.logger.InfoCtx(
@@ -336,6 +398,58 @@ func coreLoop(ctx context.Context, startAgent *Agent, inputMessages []llm.Messag
 
 		switch resp.FinishReason {
 		case llm.FinishReasonStop, llm.FinishReasonLength:
+			// Model signalled it has nothing more to do with tools.
+			// If we have a structured output request but haven't
+			// enforced the schema yet, promote this turn to the
+			// synthesis turn: the next iteration runs with
+			// ToolChoice=none and the schema enforced, so the model
+			// converts what it has gathered into JSON in one shot.
+			//
+			// Anthropic requires the last message in the conversation
+			// to be a user message, so we cannot simply continue after
+			// an assistant stop turn. Drop empty (thinking-only) turns
+			// from history and append a user nudge that asks for the
+			// final structured output. Non-empty assistant turns stay
+			// in history so the model can reference its own
+			// conclusions during synthesis.
+			if exploring && s.turns < s.agent.maxTurns {
+				exploring = false
+				if resp.Message.Text() == "" {
+					s.messages = s.messages[:len(s.messages)-1]
+				}
+				s.messages = append(s.messages, llm.Message{
+					Role:  llm.RoleUser,
+					Parts: []llm.Part{llm.TextPart{Text: synthesisNudge}},
+				})
+				s.logger.InfoCtx(
+					ctx,
+					"entering synthesis turn: forcing structured output with tool_choice=none",
+					log.Int("turn", s.turns),
+					log.Int("output_tokens", resp.Usage.OutputTokens),
+				)
+				continue
+			}
+
+			// Synthesis turn ran but produced no text. Retry the same
+			// turn a bounded number of times so the model gets another
+			// chance to emit the required JSON output. The empty
+			// assistant turn must be dropped from history because
+			// Anthropic rejects requests where the last message is a
+			// thinking-only assistant turn.
+			if structuredFormat != nil && resp.Message.Text() == "" && emptyOutputRetries < maxEmptyOutputRetries && s.turns < s.agent.maxTurns {
+				emptyOutputRetries++
+				s.messages = s.messages[:len(s.messages)-1]
+				s.logger.InfoCtx(
+					ctx,
+					"retrying turn: structured output expected but got empty text",
+					log.Int("turn", s.turns),
+					log.Int("retry", emptyOutputRetries),
+					log.Int("output_tokens", resp.Usage.OutputTokens),
+				)
+				continue
+			}
+			emptyOutputRetries = 0
+
 			if err := runOutputGuardrails(ctx, s.agent, resp.Message); err != nil {
 				return s.finishRun(ctx, nil, err)
 			}
@@ -354,6 +468,7 @@ func coreLoop(ctx context.Context, startAgent *Agent, inputMessages []llm.Messag
 
 		case llm.FinishReasonToolCalls:
 			s.toolUsedInRun = true
+			emptyOutputRetries = 0
 
 			s.logger.InfoCtx(
 				ctx,
@@ -852,12 +967,24 @@ func executeSingleTool(
 	emitHook(agent, func(h RunHooks) { h.OnToolEnd(ctx, agent, tool, result, nil) })
 	emitAgentHook(agent, func(h AgentHooks) { h.OnToolEnd(ctx, agent, tool, result) })
 
-	logger.InfoCtx(
-		ctx,
-		"tool execution completed",
-		log.String("tool", tool.Name()),
-		log.Bool("is_error", result.IsError),
-	)
+	if result.IsError {
+		content := result.Content
+		if len(content) > 200 {
+			content = content[:200] + "... (truncated)"
+		}
+		logger.WarnCtx(
+			ctx,
+			"tool returned error",
+			log.String("tool", tool.Name()),
+			log.String("content", content),
+		)
+	} else {
+		logger.InfoCtx(
+			ctx,
+			"tool execution completed",
+			log.String("tool", tool.Name()),
+		)
+	}
 
 	return result, nil
 }
@@ -1178,3 +1305,18 @@ func emitAgentHook(agent *Agent, fn func(AgentHooks)) {
 		fn(agent.agentHooks)
 	}
 }
+
+// resolveStructuredFormat returns the structured output request the
+// agent wants enforced on its final turn, or nil if none. An agent can
+// declare structured output through either WithOutputType (typed
+// sub-agents) or a directly-set responseFormat (the RunTyped
+// convenience wrapper).
+func resolveStructuredFormat(a *Agent) *llm.ResponseFormat {
+	if a.responseFormat != nil {
+		return a.responseFormat
+	}
+	if a.outputType != nil {
+		return a.outputType.responseFormat()
+	}
+	return nil
+}
diff --git a/pkg/agent/tool.go b/pkg/agent/tool.go
@@ -29,6 +29,7 @@ type (
 		IsError bool
 	}
 
+	// ToolDescriptor describes a tool's name and LLM definition.
 	ToolDescriptor interface {
 		Name() string
 		Definition() llm.Tool
@@ -38,7 +39,31 @@ type (
 		ToolDescriptor
 		Execute(ctx context.Context, arguments string) (ToolResult, error)
 	}
+)
 
+// ResultJSON marshals v to JSON and returns a successful ToolResult.
+func ResultJSON(v any) ToolResult {
+	data, err := json.Marshal(v)
+	if err != nil {
+		return ToolResult{
+			Content: fmt.Sprintf("cannot marshal tool result: %s", err),
+			IsError: true,
+		}
+	}
+	return ToolResult{Content: string(data)}
+}
+
+// ResultError returns an error ToolResult with the given message.
+func ResultError(msg string) ToolResult {
+	return ToolResult{Content: msg, IsError: true}
+}
+
+// ResultErrorf returns an error ToolResult with a formatted message.
+func ResultErrorf(format string, args ...any) ToolResult {
+	return ToolResult{Content: fmt.Sprintf(format, args...), IsError: true}
+}
+
+type (
 	functionTool[P any] struct {
 		name           string
 		description    string