diff --git a/autogen/beta/config/openai/__init__.py b/autogen/beta/config/openai/__init__.py index ac7a538b3b4..5a301c3c79b 100644 --- a/autogen/beta/config/openai/__init__.py +++ b/autogen/beta/config/openai/__init__.py @@ -4,6 +4,12 @@ from .config import OpenAIConfig, OpenAIResponsesConfig from .containers import ContainerInfo, ContainerManager, ExpiresAfter +from .events import ( + OpenAIReasoningEvent, + OpenAIServerToolCallEvent, + OpenAIServerToolItem, + OpenAIServerToolResultEvent, +) from .openai_client import OpenAIClient from .openai_responses_client import OpenAIResponsesClient @@ -13,6 +19,10 @@ "ExpiresAfter", "OpenAIClient", "OpenAIConfig", + "OpenAIReasoningEvent", "OpenAIResponsesClient", "OpenAIResponsesConfig", + "OpenAIServerToolCallEvent", + "OpenAIServerToolItem", + "OpenAIServerToolResultEvent", ) diff --git a/autogen/beta/config/openai/events.py b/autogen/beta/config/openai/events.py new file mode 100644 index 00000000000..9da8be2463b --- /dev/null +++ b/autogen/beta/config/openai/events.py @@ -0,0 +1,29 @@ +# Copyright (c) 2026, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors +# +# SPDX-License-Identifier: Apache-2.0 + +from typing import TypeAlias + +from openai.types.responses import ( + ResponseCodeInterpreterToolCall, + ResponseFunctionWebSearch, + ResponseReasoningItem, +) +from openai.types.responses.response_output_item import ImageGenerationCall + +from autogen.beta.events import BuiltinToolCallEvent, BuiltinToolResultEvent, ModelReasoning +from autogen.beta.events.base import Field + +OpenAIServerToolItem: TypeAlias = ResponseFunctionWebSearch | ResponseCodeInterpreterToolCall | ImageGenerationCall + + +class OpenAIServerToolCallEvent(BuiltinToolCallEvent): + item: OpenAIServerToolItem = Field(repr=False) + + +class OpenAIServerToolResultEvent(BuiltinToolResultEvent): + """Observability-only companion to :class:`OpenAIServerToolCallEvent`.""" + + +class OpenAIReasoningEvent(ModelReasoning): + item: ResponseReasoningItem = Field(repr=False) diff --git a/autogen/beta/config/openai/mappers.py b/autogen/beta/config/openai/mappers.py index 59cde6eeeba..63a9fd10a18 100644 --- a/autogen/beta/config/openai/mappers.py +++ b/autogen/beta/config/openai/mappers.py @@ -10,6 +10,7 @@ from openai.types import CompletionUsage from openai.types.responses import ResponseUsage +from autogen.beta.config.openai.events import OpenAIReasoningEvent, OpenAIServerToolCallEvent from autogen.beta.events import ( BaseEvent, BinaryInput, @@ -191,6 +192,15 @@ def events_to_responses_input( "output": blocks, }) + elif isinstance(message, (OpenAIReasoningEvent, OpenAIServerToolCallEvent)): + # The Responses API requires reasoning items to accompany their + # paired server-side tool calls on replay. Both event types wrap + # the original SDK object, so we re-emit it verbatim as an input + # item; ``OpenAIServerToolResultEvent`` carries no payload because + # the Responses API represents a server-side tool as a single + # combined item, already covered by the call event above. + result.append(message.item.model_dump(exclude_none=True, mode="json")) + elif isinstance(message, ModelRequest): for inp in message.parts: if isinstance(inp, TextInput): diff --git a/autogen/beta/config/openai/openai_responses_client.py b/autogen/beta/config/openai/openai_responses_client.py index 27321608473..c357f6b5011 100644 --- a/autogen/beta/config/openai/openai_responses_client.py +++ b/autogen/beta/config/openai/openai_responses_client.py @@ -19,7 +19,6 @@ ResponseCompletedEvent, ResponseFunctionToolCall, ResponseFunctionWebSearch, - ResponseOutputItemAddedEvent, ResponseOutputItemDoneEvent, ResponseOutputMessage, ResponseReasoningItem, @@ -34,11 +33,8 @@ from autogen.beta.events import ( BaseEvent, BinaryResult, - BuiltinToolCallEvent, - BuiltinToolResultEvent, ModelMessage, ModelMessageChunk, - ModelReasoning, ModelResponse, ToolCallEvent, ToolCallsEvent, @@ -51,6 +47,7 @@ from autogen.beta.tools.builtin.web_search import WEB_SEARCH_TOOL_NAME from autogen.beta.tools.schemas import ToolSchema +from .events import OpenAIReasoningEvent, OpenAIServerToolCallEvent, OpenAIServerToolResultEvent from .mappers import ( events_to_responses_input, normalize_responses_usage, @@ -154,9 +151,8 @@ async def _process_response( for item in response.output: if isinstance(item, ResponseReasoningItem): - for summary in item.summary or []: - if hasattr(summary, "text") and summary.text: - await context.send(ModelReasoning(summary.text)) + text = "\n\n".join(s.text for s in (item.summary or []) if getattr(s, "text", None)) + await context.send(OpenAIReasoningEvent(text, item=item)) elif isinstance(item, ResponseOutputMessage): for part in item.content: @@ -165,39 +161,36 @@ async def _process_response( await context.send(model_msg) elif isinstance(item, ResponseFunctionWebSearch): - args = item.action.model_dump_json() await context.send( - BuiltinToolCallEvent( + OpenAIServerToolCallEvent( id=item.id, name=WEB_SEARCH_TOOL_NAME, - arguments=args, + arguments=item.action.model_dump_json(), + item=item, ) ) await context.send( - BuiltinToolResultEvent( + OpenAIServerToolResultEvent( parent_id=item.id, name=WEB_SEARCH_TOOL_NAME, - result=ToolResult(args), + result=ToolResult(), ) ) elif isinstance(item, ResponseCodeInterpreterToolCall): await context.send( - BuiltinToolCallEvent( + OpenAIServerToolCallEvent( id=item.id, name=CODE_EXECUTION_TOOL_NAME, arguments=json.dumps({"code": item.code}) if item.code is not None else "{}", + item=item, ) ) await context.send( - BuiltinToolResultEvent( + OpenAIServerToolResultEvent( parent_id=item.id, name=CODE_EXECUTION_TOOL_NAME, - result=ToolResult({ - "status": item.status, - "container_id": item.container_id, - "outputs": [output.model_dump() for output in item.outputs] if item.outputs else [], - }), + result=ToolResult(), ) ) @@ -216,17 +209,18 @@ async def _process_response( metadata=item.model_dump(exclude={"result", "status", "type"}), ) await context.send( - BuiltinToolCallEvent( + OpenAIServerToolCallEvent( id=item.id, name=IMAGE_GENERATION_TOOL_NAME, arguments="", + item=item, ) ) await context.send( - BuiltinToolResultEvent( + OpenAIServerToolResultEvent( parent_id=item.id, name=IMAGE_GENERATION_TOOL_NAME, - result=ToolResult(item.result), + result=ToolResult(), ) ) files.append(result) @@ -260,97 +254,79 @@ async def _process_stream( full_content += event.delta await context.send(ModelMessageChunk(event.delta)) - elif isinstance(event, ResponseOutputItemAddedEvent): - # call image generation tool - if isinstance(event.item, ImageGenerationCall): - await context.send( - BuiltinToolCallEvent( - id=event.item.id, - name=IMAGE_GENERATION_TOOL_NAME, - arguments="", + elif isinstance(event, ResponseOutputItemDoneEvent): + # Builtin and reasoning events are emitted on Done so the typed + # SDK object carried by the event is fully populated (Added fires + # before the server-side tool has executed — code/outputs missing). + + if isinstance(event.item, ResponseReasoningItem): + text = "\n\n".join(s.text for s in (event.item.summary or []) if getattr(s, "text", None)) + await context.send(OpenAIReasoningEvent(text, item=event.item)) + + elif isinstance(event.item, ResponseFunctionToolCall): + calls.append( + ToolCallEvent( + id=event.item.call_id, + name=event.item.name, + arguments=event.item.arguments, ) ) - # call web search tool elif isinstance(event.item, ResponseFunctionWebSearch): - ( - await context.send( - BuiltinToolCallEvent( - id=event.item.id, - name=WEB_SEARCH_TOOL_NAME, - arguments=event.item.action.model_dump_json(), - ) - ), + await context.send( + OpenAIServerToolCallEvent( + id=event.item.id, + name=WEB_SEARCH_TOOL_NAME, + arguments=event.item.action.model_dump_json(), + item=event.item, + ) + ) + await context.send( + OpenAIServerToolResultEvent( + parent_id=event.item.id, + name=WEB_SEARCH_TOOL_NAME, + result=ToolResult(), + ) ) - # call code execution tool elif isinstance(event.item, ResponseCodeInterpreterToolCall): await context.send( - BuiltinToolCallEvent( + OpenAIServerToolCallEvent( id=event.item.id, name=CODE_EXECUTION_TOOL_NAME, arguments=json.dumps({"code": event.item.code}) if event.item.code is not None else "{}", - provider_data={ - "status": event.item.status, - "container_id": event.item.container_id, - }, + item=event.item, ) ) - - else: - pass - - elif isinstance(event, ResponseOutputItemDoneEvent): - # call regular function tool - if isinstance(event.item, ResponseFunctionToolCall): - calls.append( - ToolCallEvent( - id=event.item.call_id, - name=event.item.name, - arguments=event.item.arguments, + await context.send( + OpenAIServerToolResultEvent( + parent_id=event.item.id, + name=CODE_EXECUTION_TOOL_NAME, + result=ToolResult(), ) ) - # image generation tool call result elif isinstance(event.item, ImageGenerationCall) and event.item.result: result = BinaryResult( base64.b64decode(event.item.result), metadata=event.item.model_dump(exclude={"result", "status", "type"}), ) await context.send( - BuiltinToolResultEvent( - parent_id=event.item.id, + OpenAIServerToolCallEvent( + id=event.item.id, name=IMAGE_GENERATION_TOOL_NAME, - result=ToolResult(event.item.result), - ) - ) - files.append(result) - - # web search tool call result - elif isinstance(event.item, ResponseFunctionWebSearch): - await context.send( - BuiltinToolResultEvent( - parent_id=event.item.id, - name=WEB_SEARCH_TOOL_NAME, - result=ToolResult(event.item.action.model_dump_json()), + arguments="", + item=event.item, ) ) - - # code execution tool call result - elif isinstance(event.item, ResponseCodeInterpreterToolCall): await context.send( - BuiltinToolResultEvent( + OpenAIServerToolResultEvent( parent_id=event.item.id, - name=CODE_EXECUTION_TOOL_NAME, - result=ToolResult({ - "status": event.item.status, - "container_id": event.item.container_id, - "outputs": [output.model_dump() for output in event.item.outputs] - if event.item.outputs - else [], - }), + name=IMAGE_GENERATION_TOOL_NAME, + result=ToolResult(), ) ) + files.append(result) elif isinstance(event, ResponseCompletedEvent): # Stream finished diff --git a/test/beta/config/openai/test_builtin_tool_events.py b/test/beta/config/openai/test_builtin_tool_events.py new file mode 100644 index 00000000000..5a0a1ddac7f --- /dev/null +++ b/test/beta/config/openai/test_builtin_tool_events.py @@ -0,0 +1,108 @@ +# Copyright (c) 2026, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors +# +# SPDX-License-Identifier: Apache-2.0 + +import json +from collections.abc import Iterable +from typing import Any + +import pytest +from openai.types.responses import ( + Response, + ResponseCodeInterpreterToolCall, + ResponseFunctionToolCall, + ResponseFunctionWebSearch, + ResponseOutputMessage, +) +from openai.types.responses.response_function_web_search import ActionSearch +from openai.types.responses.response_output_item import ImageGenerationCall +from openai.types.responses.response_output_text import ResponseOutputText + +from autogen.beta import MemoryStream +from autogen.beta.config.openai import ( + OpenAIResponsesClient, + OpenAIServerToolCallEvent, + OpenAIServerToolResultEvent, +) +from autogen.beta.context import ConversationContext +from autogen.beta.events import BaseEvent, ModelMessage, ModelResponse, ToolCallEvent, ToolCallsEvent +from autogen.beta.events.tool_events import ToolResult +from autogen.beta.tools.builtin.code_execution import CODE_EXECUTION_TOOL_NAME +from autogen.beta.tools.builtin.image_generation import IMAGE_GENERATION_TOOL_NAME +from autogen.beta.tools.builtin.web_search import WEB_SEARCH_TOOL_NAME + + +async def _process(output: Iterable[Any]) -> tuple[ModelResponse, list[BaseEvent]]: + client = OpenAIResponsesClient(api_key="test") + response = Response.model_construct( + id="r1", + object="response", + model="gpt-5", + output=list(output), + usage=None, + ) + stream = MemoryStream() + context = ConversationContext(stream=stream) + result = await client._process_response(response, context) + return result, list(await stream.history.get_events()) + + +@pytest.mark.asyncio +async def test_process_response_routes_all_item_types() -> None: + web = ResponseFunctionWebSearch( + id="ws_1", + action=ActionSearch(type="search", query="bitcoin"), + status="completed", + type="web_search_call", + ) + code = ResponseCodeInterpreterToolCall( + id="ci_1", + code="print(1)", + status="completed", + type="code_interpreter_call", + outputs=None, + container_id="c_1", + ) + image = ImageGenerationCall( + id="ig_1", + status="completed", + type="image_generation_call", + result="YWJj", # base64 "abc" + revised_prompt=None, + output_format="png", + ) + msg = ResponseOutputMessage( + id="msg_1", + type="message", + role="assistant", + status="completed", + content=[ResponseOutputText(type="output_text", text="Done.", annotations=[])], + ) + user_tool = ResponseFunctionToolCall( + id="id_1", + call_id="call_1", + name="multiply", + arguments='{"a": 2, "b": 3}', + type="function_call", + ) + + response, events = await _process([web, code, image, msg, user_tool]) + + assert response.message == ModelMessage("Done.") + assert response.tool_calls == ToolCallsEvent([ + ToolCallEvent(id="call_1", name="multiply", arguments='{"a": 2, "b": 3}'), + ]) + assert [f.data for f in response.files] == [b"abc"] + assert events == [ + OpenAIServerToolCallEvent( + id="ws_1", name=WEB_SEARCH_TOOL_NAME, arguments=web.action.model_dump_json(), item=web + ), + OpenAIServerToolResultEvent(parent_id="ws_1", name=WEB_SEARCH_TOOL_NAME, result=ToolResult()), + OpenAIServerToolCallEvent( + id="ci_1", name=CODE_EXECUTION_TOOL_NAME, arguments=json.dumps({"code": "print(1)"}), item=code + ), + OpenAIServerToolResultEvent(parent_id="ci_1", name=CODE_EXECUTION_TOOL_NAME, result=ToolResult()), + OpenAIServerToolCallEvent(id="ig_1", name=IMAGE_GENERATION_TOOL_NAME, arguments="", item=image), + OpenAIServerToolResultEvent(parent_id="ig_1", name=IMAGE_GENERATION_TOOL_NAME, result=ToolResult()), + ModelMessage("Done."), + ]