-
Notifications
You must be signed in to change notification settings - Fork 151
Add vendor assessment agent with structured output #982
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 32 commits
70e7bbd
df390cb
d09fd74
3623c65
8b69799
eb386bf
4b110b6
52c8358
f59dcd2
1168a5e
cf53c93
3c1e194
73b98f1
2f9b43d
ca3f28e
aa3489a
4425bd7
35b8be0
2b2c609
c0314cc
e44ace5
4e12dc8
8862748
5381fb0
0b9d908
0f09422
f99abb6
02a2f6a
f4289da
03ba157
b8a536e
c7ba96e
0a34bb1
484d2c4
e1c1f4b
0546c09
a9325bc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| // Copyright (c) 2026 Probo Inc <hello@getprobo.com>. | ||
| // | ||
| // Permission to use, copy, modify, and/or distribute this software for any | ||
| // purpose with or without fee is hereby granted, provided that the above | ||
| // copyright notice and this permission notice appear in all copies. | ||
| // | ||
| // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH | ||
| // REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY | ||
| // AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, | ||
| // INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM | ||
| // LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | ||
| // OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | ||
| // PERFORMANCE OF THIS SOFTWARE. | ||
|
|
||
| package agent | ||
|
|
||
| import "context" | ||
|
|
||
| type ( | ||
| ProgressEventType string | ||
|
|
||
| ProgressEvent struct { | ||
| Type ProgressEventType `json:"type"` | ||
| Step string `json:"step"` | ||
| ParentStep string `json:"parent_step,omitempty"` | ||
| Message string `json:"message"` | ||
| } | ||
|
|
||
| ProgressReporter func(ctx context.Context, event ProgressEvent) | ||
| ) | ||
|
|
||
| const ( | ||
| ProgressEventStepStarted ProgressEventType = "step_started" | ||
| ProgressEventStepCompleted ProgressEventType = "step_completed" | ||
| ProgressEventStepFailed ProgressEventType = "step_failed" | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,7 +28,19 @@ import ( | |
| "go.probo.inc/probo/pkg/llm" | ||
| ) | ||
|
|
||
| const tracerName = "go.probo.inc/probo/pkg/agent" | ||
| const ( | ||
| tracerName = "go.probo.inc/probo/pkg/agent" | ||
|
|
||
| // maxEmptyOutputRetries bounds the number of times the core loop | ||
| // will re-ask the model to produce a structured output after it | ||
| // returned a thinking-only empty response. | ||
| maxEmptyOutputRetries = 2 | ||
|
|
||
| // synthesisNudge is the static user message appended after tool | ||
| // exploration completes, asking the model to produce the final | ||
| // structured output on the next (synthesis) turn. | ||
| synthesisNudge = "Based on everything you have gathered, produce the final structured output now." | ||
| ) | ||
|
|
||
| type ( | ||
| CallLLMFunc func(ctx context.Context, agent *Agent, req *llm.ChatCompletionRequest) (*llm.ChatCompletionResponse, error) | ||
|
|
@@ -68,7 +80,32 @@ type ( | |
| func noopEvent(_ context.Context, _ StreamEvent) {} | ||
|
|
||
| func blockingCallLLM(ctx context.Context, agent *Agent, req *llm.ChatCompletionRequest) (*llm.ChatCompletionResponse, error) { | ||
| return agent.client.ChatCompletion(ctx, req) | ||
| resp, err := agent.client.ChatCompletion(ctx, req) | ||
| if err == nil { | ||
| return resp, nil | ||
| } | ||
|
|
||
| // Some providers (e.g. Anthropic) require streaming for large | ||
| // max_tokens or when thinking is enabled. Fall back to streaming | ||
| // transparently when the blocking call returns ErrStreamingRequired. | ||
| var streamRequired *llm.ErrStreamingRequired | ||
| if !errors.As(err, &streamRequired) { | ||
| return nil, err | ||
| } | ||
|
|
||
| stream, sErr := agent.client.ChatCompletionStream(ctx, req) | ||
| if sErr != nil { | ||
| return nil, err // return the original error | ||
| } | ||
| defer stream.Close() | ||
|
|
||
| acc := llm.NewStreamAccumulator(stream) | ||
| for acc.Next() { | ||
| } | ||
| if sErr := acc.Err(); sErr != nil { | ||
| return nil, sErr | ||
| } | ||
| return acc.Response(), nil | ||
| } | ||
|
|
||
| func (a *Agent) Run(ctx context.Context, messages []llm.Message) (*Result, error) { | ||
|
|
@@ -273,6 +310,24 @@ func coreLoop(ctx context.Context, startAgent *Agent, inputMessages []llm.Messag | |
| log.Int("tool_count", len(s.toolDefs)), | ||
| ) | ||
|
|
||
| emptyOutputRetries := 0 | ||
|
|
||
| structuredFormat := resolveStructuredFormat(s.agent) | ||
|
|
||
| // When the agent has both tools and a structured output request, | ||
| // we delay structured output enforcement until a dedicated | ||
| // synthesis turn. Enforcing the schema during tool exploration | ||
| // causes models with extended thinking to stuff planning prose | ||
| // into the first text field of the schema as a scratchpad, | ||
| // burning the entire max_tokens budget on thinking-inside-JSON | ||
| // before ever producing a valid object. Instead, we let the | ||
| // model freely call tools without a schema, then force one final | ||
| // synthesis turn with ToolChoice=none + schema enforced once the | ||
| // model signals it has enough information (finish_reason=stop). | ||
| // Agents without tools or without a structured output request | ||
| // do not need this dance and enforce the schema immediately. | ||
| exploring := structuredFormat != nil && len(s.toolDefs) > 0 | ||
|
|
||
| for { | ||
| if err := ctx.Err(); err != nil { | ||
| return s.finishRun(ctx, nil, fmt.Errorf("cannot complete: %w", err)) | ||
|
|
@@ -284,15 +339,21 @@ func coreLoop(ctx context.Context, startAgent *Agent, inputMessages []llm.Messag | |
|
|
||
| fullMessages := buildFullMessages(s.systemPrompt, s.messages) | ||
|
|
||
| responseFormat := s.agent.responseFormat | ||
| if responseFormat == nil && s.agent.outputType != nil { | ||
| responseFormat = s.agent.outputType.responseFormat() | ||
| var responseFormat *llm.ResponseFormat | ||
| if !exploring { | ||
| responseFormat = structuredFormat | ||
| } | ||
|
|
||
| toolChoice := s.agent.modelSettings.ToolChoice | ||
| if s.toolUsedInRun && s.agent.resetToolChoice && toolChoice != nil { | ||
| toolChoice = nil | ||
| } | ||
| if !exploring && structuredFormat != nil && len(s.toolDefs) > 0 { | ||
| // On the synthesis turn, forbid further tool calls so the | ||
| // model is forced to convert what it has into JSON. | ||
| none := llm.ToolChoice{Type: llm.ToolChoiceNone} | ||
| toolChoice = &none | ||
| } | ||
|
|
||
| req := &llm.ChatCompletionRequest{ | ||
| Model: s.agent.model, | ||
|
|
@@ -306,6 +367,7 @@ func coreLoop(ctx context.Context, startAgent *Agent, inputMessages []llm.Messag | |
| ToolChoice: toolChoice, | ||
| ParallelToolCalls: s.agent.modelSettings.ParallelToolCalls, | ||
| ResponseFormat: responseFormat, | ||
| Thinking: s.agent.modelSettings.Thinking, | ||
| } | ||
|
|
||
| s.logger.InfoCtx( | ||
|
|
@@ -336,6 +398,58 @@ func coreLoop(ctx context.Context, startAgent *Agent, inputMessages []llm.Messag | |
|
|
||
| switch resp.FinishReason { | ||
| case llm.FinishReasonStop, llm.FinishReasonLength: | ||
| // Model signalled it has nothing more to do with tools. | ||
| // If we have a structured output request but haven't | ||
| // enforced the schema yet, promote this turn to the | ||
| // synthesis turn: the next iteration runs with | ||
| // ToolChoice=none and the schema enforced, so the model | ||
| // converts what it has gathered into JSON in one shot. | ||
| // | ||
| // Anthropic requires the last message in the conversation | ||
| // to be a user message, so we cannot simply continue after | ||
| // an assistant stop turn. Drop empty (thinking-only) turns | ||
| // from history and append a user nudge that asks for the | ||
| // final structured output. Non-empty assistant turns stay | ||
| // in history so the model can reference its own | ||
| // conclusions during synthesis. | ||
| if exploring && s.turns < s.agent.maxTurns { | ||
| exploring = false | ||
| if resp.Message.Text() == "" { | ||
| s.messages = s.messages[:len(s.messages)-1] | ||
| } | ||
| s.messages = append(s.messages, llm.Message{ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. style |
||
| Role: llm.RoleUser, | ||
| Parts: []llm.Part{llm.TextPart{Text: synthesisNudge}}, | ||
| }) | ||
| s.logger.InfoCtx( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Warn |
||
| ctx, | ||
| "entering synthesis turn: forcing structured output with tool_choice=none", | ||
| log.Int("turn", s.turns), | ||
| log.Int("output_tokens", resp.Usage.OutputTokens), | ||
| ) | ||
| continue | ||
| } | ||
|
|
||
| // Synthesis turn ran but produced no text. Retry the same | ||
| // turn a bounded number of times so the model gets another | ||
| // chance to emit the required JSON output. The empty | ||
| // assistant turn must be dropped from history because | ||
| // Anthropic rejects requests where the last message is a | ||
| // thinking-only assistant turn. | ||
| if structuredFormat != nil && resp.Message.Text() == "" && emptyOutputRetries < maxEmptyOutputRetries && s.turns < s.agent.maxTurns { | ||
| emptyOutputRetries++ | ||
| s.messages = s.messages[:len(s.messages)-1] | ||
| s.logger.InfoCtx( | ||
| ctx, | ||
| "retrying turn: structured output expected but got empty text", | ||
| log.Int("turn", s.turns), | ||
| log.Int("retry", emptyOutputRetries), | ||
| log.Int("output_tokens", resp.Usage.OutputTokens), | ||
| ) | ||
| continue | ||
| } | ||
| emptyOutputRetries = 0 | ||
|
|
||
| if err := runOutputGuardrails(ctx, s.agent, resp.Message); err != nil { | ||
| return s.finishRun(ctx, nil, err) | ||
| } | ||
|
|
@@ -354,6 +468,7 @@ func coreLoop(ctx context.Context, startAgent *Agent, inputMessages []llm.Messag | |
|
|
||
| case llm.FinishReasonToolCalls: | ||
| s.toolUsedInRun = true | ||
| emptyOutputRetries = 0 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when it's happen? |
||
|
|
||
| s.logger.InfoCtx( | ||
| ctx, | ||
|
|
@@ -852,12 +967,24 @@ func executeSingleTool( | |
| emitHook(agent, func(h RunHooks) { h.OnToolEnd(ctx, agent, tool, result, nil) }) | ||
| emitAgentHook(agent, func(h AgentHooks) { h.OnToolEnd(ctx, agent, tool, result) }) | ||
|
|
||
| logger.InfoCtx( | ||
| ctx, | ||
| "tool execution completed", | ||
| log.String("tool", tool.Name()), | ||
| log.Bool("is_error", result.IsError), | ||
| ) | ||
| if result.IsError { | ||
| content := result.Content | ||
| if len(content) > 200 { | ||
| content = content[:200] + "... (truncated)" | ||
| } | ||
| logger.WarnCtx( | ||
| ctx, | ||
| "tool returned error", | ||
| log.String("tool", tool.Name()), | ||
| log.String("content", content), | ||
| ) | ||
| } else { | ||
| logger.InfoCtx( | ||
| ctx, | ||
| "tool execution completed", | ||
| log.String("tool", tool.Name()), | ||
| ) | ||
| } | ||
|
|
||
| return result, nil | ||
| } | ||
|
|
@@ -1178,3 +1305,18 @@ func emitAgentHook(agent *Agent, fn func(AgentHooks)) { | |
| fn(agent.agentHooks) | ||
| } | ||
| } | ||
|
|
||
| // resolveStructuredFormat returns the structured output request the | ||
| // agent wants enforced on its final turn, or nil if none. An agent can | ||
| // declare structured output through either WithOutputType (typed | ||
| // sub-agents) or a directly-set responseFormat (the RunTyped | ||
| // convenience wrapper). | ||
| func resolveStructuredFormat(a *Agent) *llm.ResponseFormat { | ||
| if a.responseFormat != nil { | ||
| return a.responseFormat | ||
| } | ||
| if a.outputType != nil { | ||
| return a.outputType.responseFormat() | ||
| } | ||
| return nil | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
put it as WithX