Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
63f778d
feat(beta): implement server-side tool event handling and testing
vvlrff Apr 16, 2026
0ba0ba3
Merge branch 'main' into fix/anthropic-builtin-tool-history
vvlrff Apr 16, 2026
8058b1a
feat: add event handling for tool execution across multiple built-in …
vvlrff Apr 18, 2026
a378550
Merge remote-tracking branch 'upstream/main' into fix/anthropic-built…
vvlrff Apr 18, 2026
7afec09
feat: normalize tool names for built-in tool events and update tests
vvlrff Apr 18, 2026
564a02e
refactor: streamline tool name handling and improve test coverage for…
vvlrff Apr 19, 2026
79e5e5b
Merge branch 'main' into fix/anthropic-builtin-tool-history
vvlrff Apr 20, 2026
7cafda1
feat(events): introduce Anthropic server tool events and update relat…
vvlrff Apr 21, 2026
7dd08b5
Merge remote-tracking branch 'upstream/main' into fix/anthropic-built…
vvlrff Apr 21, 2026
e79f824
add OpenAI event classes and integrate into response processing
vvlrff Apr 21, 2026
b7946ee
refactor: clean up import statements in search tools
vvlrff Apr 21, 2026
5d74854
Merge remote-tracking branch 'upstream/main' into fix/anthropic-built…
vvlrff Apr 26, 2026
a8ce14d
Merge remote-tracking branch 'upstream/main' into fix/openai-builtin-…
vvlrff Apr 26, 2026
1b62764
Merge branch 'fix/openai-builtin-history' into fix/anthropic-builtin-…
vvlrff Apr 26, 2026
bfbad87
Refactor and enhance tool event handling for Anthropic and Gemini int…
vvlrff Apr 26, 2026
3ee7176
fix: update event handling for Anthropic server tool results
vvlrff Apr 27, 2026
fceec56
Merge branch 'main' into fix/anthropic-builtin-tool-history
Lancetnik Apr 27, 2026
863a88b
fix: enhance OpenAI reasoning event handling and ensure unique event …
vvlrff Apr 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion autogen/beta/config/anthropic/anthropic_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from anthropic import NOT_GIVEN, AsyncAnthropic
from anthropic.types import (
Message,
ServerToolUseBlock,
TextBlock,
ThinkingBlock,
ToolUseBlock,
Expand All @@ -33,6 +34,7 @@
from autogen.beta.tools.builtin.skills import SkillsToolSchema
from autogen.beta.tools.schemas import ToolSchema

from .events import AnthropicServerToolCallEvent, AnthropicServerToolResultBlockType, AnthropicServerToolResultEvent
from .mappers import (
convert_messages,
extract_mcp_servers,
Expand Down Expand Up @@ -173,12 +175,27 @@ async def __call__(
for _ in range(max_continuations):
if response.stop_reason != "pause_turn":
break
await self._emit_builtin_tool_events(response.content, context)
anthropic_messages.append({"role": "assistant", "content": response.content})
create_kwargs["messages"] = anthropic_messages
response = await self._client.messages.create(**create_kwargs)

return await self._process_response(response, context)

async def _emit_builtin_tool_events(
self,
content_blocks: list[Any],
context: "ConversationContext",
) -> None:
"""Emit typed server-tool events for server-side tool blocks."""
for block in content_blocks:
if isinstance(block, ServerToolUseBlock):
if call_event := AnthropicServerToolCallEvent.from_block(block):
await context.send(call_event)
elif isinstance(block, AnthropicServerToolResultBlockType):
if result_event := AnthropicServerToolResultEvent.from_block(block):
await context.send(result_event)

def _build_system(self, prompt: Iterable[str]) -> Any:
text = "\n".join(prompt)
if self._prompt_caching:
Expand Down Expand Up @@ -222,6 +239,14 @@ async def _process_response(
)
)

elif isinstance(block, ServerToolUseBlock):
if call_event := AnthropicServerToolCallEvent.from_block(block):
await context.send(call_event)

elif isinstance(block, AnthropicServerToolResultBlockType):
if result_event := AnthropicServerToolResultEvent.from_block(block):
await context.send(result_event)

usage = normalize_usage(response.usage.model_dump() if response.usage else {})

return ModelResponse(
Expand All @@ -248,12 +273,19 @@ async def _process_stream(

if event_type == "content_block_start":
block = event.content_block
if getattr(block, "type", None) == "tool_use":
block_type = getattr(block, "type", None)
if block_type == "tool_use":
current_tool = {
"id": block.id,
"name": block.name,
"arguments": "",
}
elif block_type == "server_tool_use":
if call_event := AnthropicServerToolCallEvent.from_block(block):
await context.send(call_event)
elif isinstance(block, AnthropicServerToolResultBlockType):
if result_event := AnthropicServerToolResultEvent.from_block(block):
await context.send(result_event)

elif event_type == "content_block_delta":
delta = event.delta
Expand Down
80 changes: 80 additions & 0 deletions autogen/beta/config/anthropic/events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Copyright (c) 2026, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
#
# SPDX-License-Identifier: Apache-2.0

import json
from typing import TypeAlias

from anthropic.types import (
BashCodeExecutionToolResultBlock,
CodeExecutionToolResultBlock,
ServerToolUseBlock,
TextEditorCodeExecutionToolResultBlock,
WebFetchToolResultBlock,
WebSearchToolResultBlock,
)

from autogen.beta.events import BuiltinToolCallEvent, BuiltinToolResultEvent
from autogen.beta.events.base import Field
from autogen.beta.events.tool_events import ToolResult
from autogen.beta.tools.builtin.code_execution import CODE_EXECUTION_TOOL_NAME
from autogen.beta.tools.builtin.web_fetch import WEB_FETCH_TOOL_NAME
from autogen.beta.tools.builtin.web_search import WEB_SEARCH_TOOL_NAME

AnthropicServerToolResultBlockType: TypeAlias = (
WebSearchToolResultBlock
| WebFetchToolResultBlock
| CodeExecutionToolResultBlock
| BashCodeExecutionToolResultBlock
| TextEditorCodeExecutionToolResultBlock
)


class AnthropicServerToolCallEvent(BuiltinToolCallEvent):
block: ServerToolUseBlock = Field(repr=False)

@classmethod
def from_block(cls, block: ServerToolUseBlock) -> "AnthropicServerToolCallEvent | None":
match block.name:
case "web_search":
name = WEB_SEARCH_TOOL_NAME
case "web_fetch":
name = WEB_FETCH_TOOL_NAME
case "code_execution" | "bash_code_execution" | "text_editor_code_execution":
name = CODE_EXECUTION_TOOL_NAME
case _:
return None
return cls(
id=block.id,
name=name,
arguments=json.dumps(block.input),
block=block,
)


class AnthropicServerToolResultEvent(BuiltinToolResultEvent):
block: AnthropicServerToolResultBlockType = Field(repr=False)

@classmethod
def from_block(cls, block: object) -> "AnthropicServerToolResultEvent | None":
if isinstance(block, WebSearchToolResultBlock):
name = WEB_SEARCH_TOOL_NAME
elif isinstance(block, WebFetchToolResultBlock):
name = WEB_FETCH_TOOL_NAME
elif isinstance(
block,
(
CodeExecutionToolResultBlock,
BashCodeExecutionToolResultBlock,
TextEditorCodeExecutionToolResultBlock,
),
):
name = CODE_EXECUTION_TOOL_NAME
else:
return None
return cls(
parent_id=block.tool_use_id,
name=name,
result=ToolResult(),
block=block,
)
16 changes: 14 additions & 2 deletions autogen/beta/config/anthropic/mappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from fast_depends.library.serializer import SerializerProto

from autogen.beta.config.anthropic.events import AnthropicServerToolCallEvent, AnthropicServerToolResultEvent
from autogen.beta.events import (
BaseEvent,
BinaryInput,
Expand Down Expand Up @@ -144,8 +145,12 @@ def tool_to_api(t: ToolSchema) -> dict[str, Any]:
return {"type": t.version, "name": "memory"}

elif isinstance(t, ShellToolSchema):
# https://platform.claude.com/docs/en/agents-and-tools/tool-use/bash-tool
return {"type": t.version, "name": "bash"}
# Anthropic's bash tool is client-side — it ships a typed schema but the
# application must execute the command itself and return a tool_result.
# autogen/beta does not provide a default executor for this here.
# Use LocalShellTool (tools/shell/) instead, which runs commands via subprocess
# and works with any provider.
raise UnsupportedToolError(t.type, "anthropic")

elif isinstance(t, SkillsToolSchema):
# Skills are handled via the container parameter, not the tools[] array.
Expand Down Expand Up @@ -253,6 +258,13 @@ def convert_messages(
if content:
result.append({"role": "assistant", "content": content})

elif isinstance(message, (AnthropicServerToolCallEvent, AnthropicServerToolResultEvent)):
block = message.block.model_dump(exclude_none=True, mode="json")
if result and result[-1]["role"] == "assistant":
result[-1]["content"].append(block)
else:
result.append({"role": "assistant", "content": [block]})

elif isinstance(message, ToolResultsEvent):
tool_results = []
for r in message.results:
Expand Down
66 changes: 66 additions & 0 deletions autogen/beta/config/gemini/events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright (c) 2026, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
#
# SPDX-License-Identifier: Apache-2.0

import json
from uuid import uuid4

from google.genai import types

from autogen.beta.events import BuiltinToolCallEvent, BuiltinToolResultEvent
from autogen.beta.events.base import Field
from autogen.beta.events.tool_events import ToolResult
from autogen.beta.tools.builtin.code_execution import CODE_EXECUTION_TOOL_NAME


class GeminiServerToolCallEvent(BuiltinToolCallEvent):
part: types.Part | None = Field(default=None, repr=False)
grounding_metadata: types.GroundingMetadata | None = Field(default=None, repr=False)

@classmethod
def from_executable_code(cls, part: types.Part) -> "GeminiServerToolCallEvent | None":
if part.executable_code is None:
return None
language = part.executable_code.language
return cls(
name=CODE_EXECUTION_TOOL_NAME,
arguments=json.dumps({
"code": part.executable_code.code or "",
"language": language.name if language.name else str(language) or "",
}),
part=part,
)

@classmethod
def from_grounding(cls, gm: types.GroundingMetadata, *, name: str) -> "GeminiServerToolCallEvent":
return cls(
id=str(uuid4()),
name=name,
arguments=json.dumps({"queries": list(gm.web_search_queries or [])}),
grounding_metadata=gm,
)


class GeminiServerToolResultEvent(BuiltinToolResultEvent):
part: types.Part | None = Field(default=None, repr=False)
grounding_metadata: types.GroundingMetadata | None = Field(default=None, repr=False)

@classmethod
def from_code_execution_result(cls, part: types.Part, *, parent_id: str) -> "GeminiServerToolResultEvent | None":
if part.code_execution_result is None:
return None
return cls(
parent_id=parent_id,
name=CODE_EXECUTION_TOOL_NAME,
result=ToolResult(),
part=part,
)

@classmethod
def from_grounding(cls, gm: types.GroundingMetadata, *, parent_id: str, name: str) -> "GeminiServerToolResultEvent":
return cls(
parent_id=parent_id,
name=name,
result=ToolResult(),
grounding_metadata=gm,
)
62 changes: 61 additions & 1 deletion autogen/beta/config/gemini/gemini_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,15 @@
from autogen.beta.response import ResponseProto
from autogen.beta.tools.schemas import ToolSchema

from .mappers import build_system_instruction, build_tools, convert_messages, normalize_usage, response_proto_to_config
from .events import GeminiServerToolCallEvent, GeminiServerToolResultEvent
from .mappers import (
build_system_instruction,
build_tools,
convert_messages,
grounding_tool_name,
normalize_usage,
response_proto_to_config,
)


class CreateConfig(TypedDict, total=False):
Expand Down Expand Up @@ -126,6 +134,7 @@ async def _process_response(
calls: list[ToolCallEvent] = []

for candidate in response.candidates or ():
pending_code_call_id: str | None = None
if candidate.content:
for part in candidate.content.parts or ():
if part.thought and part.text:
Expand All @@ -146,6 +155,28 @@ async def _process_response(
provider_data=pdata,
)
)
elif part.executable_code and (call_event := GeminiServerToolCallEvent.from_executable_code(part)):
pending_code_call_id = call_event.id
await context.send(call_event)
elif (
part.code_execution_result
and pending_code_call_id is not None
and (
result_event := GeminiServerToolResultEvent.from_code_execution_result(
part, parent_id=pending_code_call_id
)
)
):
await context.send(result_event)
pending_code_call_id = None
grounding = candidate.grounding_metadata if candidate.grounding_metadata else None
if grounding:
name = grounding_tool_name(grounding)
gnd_call = GeminiServerToolCallEvent.from_grounding(grounding, name=name)
await context.send(gnd_call)
await context.send(
GeminiServerToolResultEvent.from_grounding(grounding, parent_id=gnd_call.id, name=name)
)

usage = Usage()
if response.usage_metadata:
Expand Down Expand Up @@ -175,6 +206,8 @@ async def _process_stream(
calls: list[ToolCallEvent] = []
usage = Usage()
finish_reason: str | None = None
pending_code_call_id: str | None = None
last_grounding_metadata: types.GroundingMetadata | None = None

async for chunk in stream:
for candidate in chunk.candidates or ():
Expand All @@ -198,6 +231,25 @@ async def _process_stream(
provider_data=pdata,
)
)
elif part.executable_code and (
call_event := GeminiServerToolCallEvent.from_executable_code(part)
):
pending_code_call_id = call_event.id
await context.send(call_event)
elif (
part.code_execution_result
and pending_code_call_id is not None
and (
result_event := GeminiServerToolResultEvent.from_code_execution_result(
part, parent_id=pending_code_call_id
)
)
):
await context.send(result_event)
pending_code_call_id = None
grounding = candidate.grounding_metadata if candidate.grounding_metadata else None
if grounding:
last_grounding_metadata = grounding

if chunk.usage_metadata:
usage = normalize_usage(chunk.usage_metadata)
Expand All @@ -207,6 +259,14 @@ async def _process_stream(
if fr is not None:
finish_reason = fr.name.lower() if hasattr(fr, "name") else str(fr)

if last_grounding_metadata is not None:
name = grounding_tool_name(last_grounding_metadata)
gnd_call = GeminiServerToolCallEvent.from_grounding(last_grounding_metadata, name=name)
await context.send(gnd_call)
await context.send(
GeminiServerToolResultEvent.from_grounding(last_grounding_metadata, parent_id=gnd_call.id, name=name)
)

message: ModelMessage | None = None
if full_content:
message = ModelMessage(full_content)
Expand Down
Loading
Loading