Skip to content
Open
Show file tree
Hide file tree
Changes from 36 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
70e7bbd
Wire vendor assessment and LLM file parts
aureliensibiril Apr 2, 2026
df390cb
Add vendor assessment agent with composable tool framework
aureliensibiril Apr 2, 2026
d09fd74
Add structured output types for vetting agents
aureliensibiril Apr 6, 2026
3623c65
Wire output types into vetting sub-agents
aureliensibiril Apr 6, 2026
8b69799
Update agent prompts for JSON output format
aureliensibiril Apr 6, 2026
eb386bf
Add JSON schema support to Anthropic provider
aureliensibiril Apr 6, 2026
4b110b6
Validate JSON output in agent-as-tool results
aureliensibiril Apr 6, 2026
52c8358
Fix WithTx callback signature in vendor assessment
aureliensibiril Apr 6, 2026
f59dcd2
Fix e2e config for agents key rename
aureliensibiril Apr 6, 2026
1168a5e
Fix Anthropic streaming for thinking and tool use
aureliensibiril Apr 6, 2026
cf53c93
Fix Anthropic streaming for thinking and tool use
aureliensibiril Apr 6, 2026
3c1e194
Retry on empty structured output response
aureliensibiril Apr 6, 2026
73b98f1
Use typed ErrStreamingRequired for Anthropic fallback
aureliensibiril Apr 7, 2026
2f9b43d
Factor probod LLM client provisioning
aureliensibiril Apr 7, 2026
ca3f28e
Factor vetting sub-agent constructors
aureliensibiril Apr 7, 2026
aa3489a
Tighten vetting assessment validation and config
aureliensibiril Apr 7, 2026
4425bd7
Add assessVendor MCP tool
aureliensibiril Apr 7, 2026
35b8be0
Simplify vetting orchestrator entries
aureliensibiril Apr 7, 2026
2b2c609
Detect Anthropic SDK client-side streaming requirement
aureliensibiril Apr 7, 2026
c0314cc
Fix gofmt: trailing blank line in vendor_service.go
aureliensibiril Apr 7, 2026
e44ace5
Move vetting prompts into prompts/ subdirectory
aureliensibiril Apr 7, 2026
4e12dc8
Remove redundant JSON examples from sub-agent prompts
aureliensibiril Apr 7, 2026
8862748
Describe VendorInfo fields with jsonschema tags
aureliensibiril Apr 7, 2026
5381fb0
Reduce vetting extraction prompt to a schema stub
aureliensibiril Apr 7, 2026
0b9d908
Rewrite vetting orchestrator prompts in XML
aureliensibiril Apr 7, 2026
0f09422
Rewrite vetting sub-agent prompts in XML
aureliensibiril Apr 7, 2026
f99abb6
Defer structured output until synthesis turn
aureliensibiril Apr 7, 2026
02a2f6a
Hoist agent loop constants to package level
aureliensibiril Apr 7, 2026
f4289da
Enforce VendorInfo enums via schema decoration
aureliensibiril Apr 7, 2026
03ba157
Tighten vetting output type schema tests
aureliensibiril Apr 7, 2026
b8a536e
Merge progress hooks into a single parametrised struct
aureliensibiril Apr 7, 2026
c7ba96e
Drop unused agent toolset indirection
aureliensibiril Apr 8, 2026
0a34bb1
Harden browser SSRF guards against internal hosts
aureliensibiril Apr 8, 2026
484d2c4
Fix correctness bugs in browser and diff tools
aureliensibiril Apr 8, 2026
e1c1f4b
Document SSL inspector InsecureSkipVerify intent
aureliensibiril Apr 8, 2026
0546c09
Rewrite vetting prompt examples as valid JSON
aureliensibiril Apr 8, 2026
a9325bc
Give vendor info extractor its own timeout budget
aureliensibiril Apr 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions e2e/console/testdata/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,12 @@ probod:
slack:
sender-interval: 60

llm:
agents:
providers:
openai:
type: "openai"
api-key: "thisisnotasecret"
defaults:
default:
provider: "openai"
model-name: "gpt-4o"
temperature: 0.1
Expand Down
9 changes: 9 additions & 0 deletions pkg/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,15 @@ func WithParallelToolCalls(enabled bool) Option {
}
}

func WithThinking(budgetTokens int) Option {
return func(a *Agent) {
a.modelSettings.Thinking = &llm.ThinkingConfig{
Enabled: true,
BudgetTokens: budgetTokens,
}
}
}

func WithLogger(l *log.Logger) Option {
return func(a *Agent) {
a.logger = l
Expand Down
17 changes: 16 additions & 1 deletion pkg/agent/agent_tool.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,5 +116,20 @@ func (t *agentTool) Execute(ctx context.Context, arguments string) (ToolResult,
return ToolResult{}, err
}

return ToolResult{Content: result.FinalMessage().Text()}, nil
text := result.FinalMessage().Text()

if t.agent.outputType != nil {
if !json.Valid([]byte(text)) {
preview := text
if len(preview) > 500 {
preview = preview[:500] + "... (truncated)"
}
return ToolResult{
Content: fmt.Sprintf("Sub-agent %q returned invalid JSON. Raw output:\n%s", t.agent.name, preview),
IsError: true,
}, nil
}
}

return ToolResult{Content: text}, nil
}
1 change: 1 addition & 0 deletions pkg/agent/model_settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ type ModelSettings struct {
MaxTokens *int
ToolChoice *llm.ToolChoice
ParallelToolCalls *bool
Thinking *llm.ThinkingConfig
}
36 changes: 36 additions & 0 deletions pkg/agent/progress.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Copyright (c) 2026 Probo Inc <hello@getprobo.com>.
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
// AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
// OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
// PERFORMANCE OF THIS SOFTWARE.

package agent

import "context"

type (
ProgressEventType string

ProgressEvent struct {
Type ProgressEventType `json:"type"`
Step string `json:"step"`
ParentStep string `json:"parent_step,omitempty"`
Message string `json:"message"`
}

ProgressReporter func(ctx context.Context, event ProgressEvent)
)

const (
ProgressEventStepStarted ProgressEventType = "step_started"
ProgressEventStepCompleted ProgressEventType = "step_completed"
ProgressEventStepFailed ProgressEventType = "step_failed"
)
164 changes: 153 additions & 11 deletions pkg/agent/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,19 @@ import (
"go.probo.inc/probo/pkg/llm"
)

const tracerName = "go.probo.inc/probo/pkg/agent"
const (
tracerName = "go.probo.inc/probo/pkg/agent"

// maxEmptyOutputRetries bounds the number of times the core loop
// will re-ask the model to produce a structured output after it
// returned a thinking-only empty response.
maxEmptyOutputRetries = 2
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

put it as WithX


// synthesisNudge is the static user message appended after tool
// exploration completes, asking the model to produce the final
// structured output on the next (synthesis) turn.
synthesisNudge = "Based on everything you have gathered, produce the final structured output now."
)

type (
CallLLMFunc func(ctx context.Context, agent *Agent, req *llm.ChatCompletionRequest) (*llm.ChatCompletionResponse, error)
Expand Down Expand Up @@ -68,7 +80,32 @@ type (
func noopEvent(_ context.Context, _ StreamEvent) {}

func blockingCallLLM(ctx context.Context, agent *Agent, req *llm.ChatCompletionRequest) (*llm.ChatCompletionResponse, error) {
return agent.client.ChatCompletion(ctx, req)
resp, err := agent.client.ChatCompletion(ctx, req)
if err == nil {
return resp, nil
}

// Some providers (e.g. Anthropic) require streaming for large
// max_tokens or when thinking is enabled. Fall back to streaming
// transparently when the blocking call returns ErrStreamingRequired.
var streamRequired *llm.ErrStreamingRequired
if !errors.As(err, &streamRequired) {
return nil, err
}

stream, sErr := agent.client.ChatCompletionStream(ctx, req)
if sErr != nil {
return nil, err // return the original error
}
defer stream.Close()

acc := llm.NewStreamAccumulator(stream)
for acc.Next() {
}
if sErr := acc.Err(); sErr != nil {
return nil, sErr
}
return acc.Response(), nil
}

func (a *Agent) Run(ctx context.Context, messages []llm.Message) (*Result, error) {
Expand Down Expand Up @@ -273,6 +310,24 @@ func coreLoop(ctx context.Context, startAgent *Agent, inputMessages []llm.Messag
log.Int("tool_count", len(s.toolDefs)),
)

emptyOutputRetries := 0

structuredFormat := resolveStructuredFormat(s.agent)

// When the agent has both tools and a structured output request,
// we delay structured output enforcement until a dedicated
// synthesis turn. Enforcing the schema during tool exploration
// causes models with extended thinking to stuff planning prose
// into the first text field of the schema as a scratchpad,
// burning the entire max_tokens budget on thinking-inside-JSON
// before ever producing a valid object. Instead, we let the
// model freely call tools without a schema, then force one final
// synthesis turn with ToolChoice=none + schema enforced once the
// model signals it has enough information (finish_reason=stop).
// Agents without tools or without a structured output request
// do not need this dance and enforce the schema immediately.
exploring := structuredFormat != nil && len(s.toolDefs) > 0

for {
if err := ctx.Err(); err != nil {
return s.finishRun(ctx, nil, fmt.Errorf("cannot complete: %w", err))
Expand All @@ -284,15 +339,21 @@ func coreLoop(ctx context.Context, startAgent *Agent, inputMessages []llm.Messag

fullMessages := buildFullMessages(s.systemPrompt, s.messages)

responseFormat := s.agent.responseFormat
if responseFormat == nil && s.agent.outputType != nil {
responseFormat = s.agent.outputType.responseFormat()
var responseFormat *llm.ResponseFormat
if !exploring {
responseFormat = structuredFormat
}

toolChoice := s.agent.modelSettings.ToolChoice
if s.toolUsedInRun && s.agent.resetToolChoice && toolChoice != nil {
toolChoice = nil
}
if !exploring && structuredFormat != nil && len(s.toolDefs) > 0 {
// On the synthesis turn, forbid further tool calls so the
// model is forced to convert what it has into JSON.
none := llm.ToolChoice{Type: llm.ToolChoiceNone}
toolChoice = &none
}

req := &llm.ChatCompletionRequest{
Model: s.agent.model,
Expand All @@ -306,6 +367,7 @@ func coreLoop(ctx context.Context, startAgent *Agent, inputMessages []llm.Messag
ToolChoice: toolChoice,
ParallelToolCalls: s.agent.modelSettings.ParallelToolCalls,
ResponseFormat: responseFormat,
Thinking: s.agent.modelSettings.Thinking,
}

s.logger.InfoCtx(
Expand Down Expand Up @@ -336,6 +398,58 @@ func coreLoop(ctx context.Context, startAgent *Agent, inputMessages []llm.Messag

switch resp.FinishReason {
case llm.FinishReasonStop, llm.FinishReasonLength:
// Model signalled it has nothing more to do with tools.
// If we have a structured output request but haven't
// enforced the schema yet, promote this turn to the
// synthesis turn: the next iteration runs with
// ToolChoice=none and the schema enforced, so the model
// converts what it has gathered into JSON in one shot.
//
// Anthropic requires the last message in the conversation
// to be a user message, so we cannot simply continue after
// an assistant stop turn. Drop empty (thinking-only) turns
// from history and append a user nudge that asks for the
// final structured output. Non-empty assistant turns stay
// in history so the model can reference its own
// conclusions during synthesis.
if exploring && s.turns < s.agent.maxTurns {
exploring = false
if resp.Message.Text() == "" {
s.messages = s.messages[:len(s.messages)-1]
}
s.messages = append(s.messages, llm.Message{
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style

Role: llm.RoleUser,
Parts: []llm.Part{llm.TextPart{Text: synthesisNudge}},
})
s.logger.InfoCtx(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Warn

ctx,
"entering synthesis turn: forcing structured output with tool_choice=none",
log.Int("turn", s.turns),
log.Int("output_tokens", resp.Usage.OutputTokens),
)
continue
}

// Synthesis turn ran but produced no text. Retry the same
// turn a bounded number of times so the model gets another
// chance to emit the required JSON output. The empty
// assistant turn must be dropped from history because
// Anthropic rejects requests where the last message is a
// thinking-only assistant turn.
if structuredFormat != nil && resp.Message.Text() == "" && emptyOutputRetries < maxEmptyOutputRetries && s.turns < s.agent.maxTurns {
emptyOutputRetries++
s.messages = s.messages[:len(s.messages)-1]
s.logger.InfoCtx(
ctx,
"retrying turn: structured output expected but got empty text",
log.Int("turn", s.turns),
log.Int("retry", emptyOutputRetries),
log.Int("output_tokens", resp.Usage.OutputTokens),
)
continue
}
emptyOutputRetries = 0

if err := runOutputGuardrails(ctx, s.agent, resp.Message); err != nil {
return s.finishRun(ctx, nil, err)
}
Expand All @@ -354,6 +468,7 @@ func coreLoop(ctx context.Context, startAgent *Agent, inputMessages []llm.Messag

case llm.FinishReasonToolCalls:
s.toolUsedInRun = true
emptyOutputRetries = 0
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when it's happen?


s.logger.InfoCtx(
ctx,
Expand Down Expand Up @@ -852,12 +967,24 @@ func executeSingleTool(
emitHook(agent, func(h RunHooks) { h.OnToolEnd(ctx, agent, tool, result, nil) })
emitAgentHook(agent, func(h AgentHooks) { h.OnToolEnd(ctx, agent, tool, result) })

logger.InfoCtx(
ctx,
"tool execution completed",
log.String("tool", tool.Name()),
log.Bool("is_error", result.IsError),
)
if result.IsError {
content := result.Content
if len(content) > 200 {
content = content[:200] + "... (truncated)"
}
logger.WarnCtx(
ctx,
"tool returned error",
log.String("tool", tool.Name()),
log.String("content", content),
)
} else {
logger.InfoCtx(
ctx,
"tool execution completed",
log.String("tool", tool.Name()),
)
}

return result, nil
}
Expand Down Expand Up @@ -1178,3 +1305,18 @@ func emitAgentHook(agent *Agent, fn func(AgentHooks)) {
fn(agent.agentHooks)
}
}

// resolveStructuredFormat returns the structured output request the
// agent wants enforced on its final turn, or nil if none. An agent can
// declare structured output through either WithOutputType (typed
// sub-agents) or a directly-set responseFormat (the RunTyped
// convenience wrapper).
func resolveStructuredFormat(a *Agent) *llm.ResponseFormat {
if a.responseFormat != nil {
return a.responseFormat
}
if a.outputType != nil {
return a.outputType.responseFormat()
}
return nil
}
25 changes: 25 additions & 0 deletions pkg/agent/tool.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ type (
IsError bool
}

// ToolDescriptor describes a tool's name and LLM definition.
ToolDescriptor interface {
Name() string
Definition() llm.Tool
Expand All @@ -38,7 +39,31 @@ type (
ToolDescriptor
Execute(ctx context.Context, arguments string) (ToolResult, error)
}
)

// ResultJSON marshals v to JSON and returns a successful ToolResult.
func ResultJSON(v any) ToolResult {
data, err := json.Marshal(v)
if err != nil {
return ToolResult{
Content: fmt.Sprintf("cannot marshal tool result: %s", err),
IsError: true,
}
}
return ToolResult{Content: string(data)}
}

// ResultError returns an error ToolResult with the given message.
func ResultError(msg string) ToolResult {
return ToolResult{Content: msg, IsError: true}
}

// ResultErrorf returns an error ToolResult with a formatted message.
func ResultErrorf(format string, args ...any) ToolResult {
return ToolResult{Content: fmt.Sprintf(format, args...), IsError: true}
}

type (
functionTool[P any] struct {
name string
description string
Expand Down
Loading
Loading