Skip to content

Commit e7aab4a

Browse files
aidendle94claude
andcommitted
feat(gemma4): implement PR#39027 — adjust_request for reasoning parser, multi-turn tool fixes
- Add adjust_request() to ReasoningParser base class and wire it through the full serving pipeline (api_server, responses, render) - Gemma4ReasoningParser.adjust_request() sets skip_special_tokens=False unconditionally to preserve boundary tokens - Add is_reasoning_end() with tool-call/turn-boundary detection via reverse scan for <|turn>, <|tool_call>, <|tool_response> tokens - Fix streaming prefix stripping to return empty reasoning instead of None when thought\n prefix is fully consumed - Add adjust_request() to abstract_parser delegating to both reasoning and tool parsers - Rename _parse_gemma4_args streaming→partial; withhold trailing keys without values in partial mode - Skip empty user messages in Anthropic Messages API translation - Fix mypy cast in ReasoningParserManager.register_module - Add Gemma4 tool chat template (331-line jinja) Based on vllm-project#39027 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 8ec3f42 commit e7aab4a

File tree

9 files changed

+398
-24
lines changed

9 files changed

+398
-24
lines changed
Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,331 @@
1+
{%- macro format_parameters(properties, required) -%}
2+
{%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3+
{%- set ns = namespace(found_first=false) -%}
4+
{%- for key, value in properties | dictsort -%}
5+
{%- set add_comma = false -%}
6+
{%- if key not in standard_keys -%}
7+
{%- if ns.found_first %},{% endif -%}
8+
{%- set ns.found_first = true -%}
9+
{{ key }}:{
10+
{%- if value['description'] -%}
11+
description:<|"|>{{ value['description'] }}<|"|>
12+
{%- set add_comma = true -%}
13+
{%- endif -%}
14+
{%- if value['nullable'] %}
15+
{%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
16+
nullable:true
17+
{%- endif -%}
18+
{%- if value['type'] | upper == 'STRING' -%}
19+
{%- if value['enum'] -%}
20+
{%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
21+
enum:{{ format_argument(value['enum']) }}
22+
{%- endif -%}
23+
{%- elif value['type'] | upper == 'OBJECT' -%}
24+
,properties:{
25+
{%- if value['properties'] is defined and value['properties'] is mapping -%}
26+
{{- format_parameters(value['properties'], value['required'] | default([])) -}}
27+
{%- elif value is mapping -%}
28+
{{- format_parameters(value, value['required'] | default([])) -}}
29+
{%- endif -%}
30+
}
31+
{%- if value['required'] -%}
32+
,required:[
33+
{%- for item in value['required'] | default([]) -%}
34+
<|"|>{{- item -}}<|"|>
35+
{%- if not loop.last %},{% endif -%}
36+
{%- endfor -%}
37+
]
38+
{%- endif -%}
39+
{%- elif value['type'] | upper == 'ARRAY' -%}
40+
{%- if value['items'] is mapping and value['items'] -%}
41+
,items:{
42+
{%- set ns_items = namespace(found_first=false) -%}
43+
{%- for item_key, item_value in value['items'] | dictsort -%}
44+
{%- if item_value is not none -%}
45+
{%- if ns_items.found_first %},{% endif -%}
46+
{%- set ns_items.found_first = true -%}
47+
{%- if item_key == 'properties' -%}
48+
properties:{
49+
{%- if item_value is mapping -%}
50+
{{- format_parameters(item_value, value['items']['required'] | default([])) -}}
51+
{%- endif -%}
52+
}
53+
{%- elif item_key == 'required' -%}
54+
required:[
55+
{%- for req_item in item_value -%}
56+
<|"|>{{- req_item -}}<|"|>
57+
{%- if not loop.last %},{% endif -%}
58+
{%- endfor -%}
59+
]
60+
{%- elif item_key == 'type' -%}
61+
{%- if item_value is string -%}
62+
type:{{ format_argument(item_value | upper) }}
63+
{%- else -%}
64+
type:{{ format_argument(item_value | map('upper') | list) }}
65+
{%- endif -%}
66+
{%- else -%}
67+
{{ item_key }}:{{ format_argument(item_value) }}
68+
{%- endif -%}
69+
{%- endif -%}
70+
{%- endfor -%}
71+
}
72+
{%- endif -%}
73+
{%- endif -%}
74+
{%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
75+
type:<|"|>{{ value['type'] | upper }}<|"|>}
76+
{%- endif -%}
77+
{%- endfor -%}
78+
{%- endmacro -%}
79+
{%- macro format_function_declaration(tool_data) -%}
80+
declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|>
81+
{%- set params = tool_data['function']['parameters'] -%}
82+
{%- if params -%}
83+
,parameters:{
84+
{%- if params['properties'] -%}
85+
properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
86+
{%- endif -%}
87+
{%- if params['required'] -%}
88+
required:[
89+
{%- for item in params['required'] -%}
90+
<|"|>{{- item -}}<|"|>
91+
{{- ',' if not loop.last -}}
92+
{%- endfor -%}
93+
],
94+
{%- endif -%}
95+
{%- if params['type'] -%}
96+
type:<|"|>{{- params['type'] | upper -}}<|"|>}
97+
{%- endif -%}
98+
{%- endif -%}
99+
{%- if 'response' in tool_data['function'] -%}
100+
{%- set response_declaration = tool_data['function']['response'] -%}
101+
,response:{
102+
{%- if response_declaration['description'] -%}
103+
description:<|"|>{{- response_declaration['description'] -}}<|"|>,
104+
{%- endif -%}
105+
{%- if response_declaration['type'] | upper == 'OBJECT' -%}
106+
type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>}
107+
{%- endif -%}
108+
{%- endif -%}
109+
}
110+
{%- endmacro -%}
111+
{%- macro format_argument(argument, escape_keys=True) -%}
112+
{%- if argument is string -%}
113+
{{- '<|"|>' + argument + '<|"|>' -}}
114+
{%- elif argument is boolean -%}
115+
{{- 'true' if argument else 'false' -}}
116+
{%- elif argument is mapping -%}
117+
{{- '{' -}}
118+
{%- set ns = namespace(found_first=false) -%}
119+
{%- for key, value in argument | dictsort -%}
120+
{%- if ns.found_first %},{% endif -%}
121+
{%- set ns.found_first = true -%}
122+
{%- if escape_keys -%}
123+
{{- '<|"|>' + key + '<|"|>' -}}
124+
{%- else -%}
125+
{{- key -}}
126+
{%- endif -%}
127+
:{{- format_argument(value, escape_keys=escape_keys) -}}
128+
{%- endfor -%}
129+
{{- '}' -}}
130+
{%- elif argument is sequence -%}
131+
{{- '[' -}}
132+
{%- for item in argument -%}
133+
{{- format_argument(item, escape_keys=escape_keys) -}}
134+
{%- if not loop.last %},{% endif -%}
135+
{%- endfor -%}
136+
{{- ']' -}}
137+
{%- else -%}
138+
{{- argument -}}
139+
{%- endif -%}
140+
{%- endmacro -%}
141+
{%- macro strip_thinking(text) -%}
142+
{%- set ns = namespace(result='') -%}
143+
{%- for part in text.split('<channel|>') -%}
144+
{%- if '<|channel>' in part -%}
145+
{%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
146+
{%- else -%}
147+
{%- set ns.result = ns.result + part -%}
148+
{%- endif -%}
149+
{%- endfor -%}
150+
{{- ns.result | trim -}}
151+
{%- endmacro -%}
152+
153+
{%- macro format_tool_response_block(tool_name, response) -%}
154+
{{- '<|tool_response>' -}}
155+
{%- if response is mapping -%}
156+
{{- 'response:' + tool_name + '{' -}}
157+
{%- for key, value in response | dictsort -%}
158+
{{- key -}}:{{- format_argument(value, escape_keys=False) -}}
159+
{%- if not loop.last %},{% endif -%}
160+
{%- endfor -%}
161+
{{- '}' -}}
162+
{%- else -%}
163+
{{- 'response:' + tool_name + '{value:' + format_argument(response, escape_keys=False) + '}' -}}
164+
{%- endif -%}
165+
{{- '<tool_response|>' -}}
166+
{%- endmacro -%}
167+
168+
{%- set ns = namespace(prev_message_type=None) -%}
169+
{%- set loop_messages = messages -%}
170+
{{ bos_token }}
171+
{%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
172+
{{- '<|turn>system\n' -}}
173+
174+
{%- if enable_thinking is defined and enable_thinking -%}
175+
{{- '<|think|>' -}}
176+
{%- set ns.prev_message_type = 'think' -%}
177+
{%- endif -%}
178+
179+
{%- if messages[0]['role'] in ['system', 'developer'] -%}
180+
{{- messages[0]['content'] | trim -}}
181+
{%- set loop_messages = messages[1:] -%}
182+
{%- endif -%}
183+
184+
{%- if tools -%}
185+
{%- for tool in tools %}
186+
{{- '<|tool>' -}}
187+
{{- format_function_declaration(tool) | trim -}}
188+
{{- '<tool|>' -}}
189+
{%- endfor %}
190+
{%- set ns.prev_message_type = 'tool' -%}
191+
{%- endif -%}
192+
193+
{{- '<turn|>\n' -}}
194+
{%- endif %}
195+
196+
{%- set ns_turn = namespace(last_user_idx=-1) -%}
197+
{%- for i in range(loop_messages | length) -%}
198+
{%- if loop_messages[i]['role'] == 'user' -%}
199+
{%- set ns_turn.last_user_idx = i -%}
200+
{%- endif -%}
201+
{%- endfor -%}
202+
203+
{%- for message in loop_messages -%}
204+
{%- if message['role'] != 'tool' -%}
205+
{%- set ns.prev_message_type = None -%}
206+
{%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
207+
{#- OpenAI may emit multiple assistant messages in one tool loop (user -> asst -> tool -> asst -> tool).
208+
Only the first of those should open <|turn>model; later ones continue the same model turn. -#}
209+
{%- set prev_nt = namespace(role=None, found=false) -%}
210+
{%- if loop.index0 > 0 -%}
211+
{%- for j in range(loop.index0 - 1, -1, -1) -%}
212+
{%- if not prev_nt.found -%}
213+
{%- if loop_messages[j]['role'] != 'tool' -%}
214+
{%- set prev_nt.role = loop_messages[j]['role'] -%}
215+
{%- set prev_nt.found = true -%}
216+
{%- endif -%}
217+
{%- endif -%}
218+
{%- endfor -%}
219+
{%- endif -%}
220+
{%- set continue_same_model_turn = (role == 'model' and prev_nt.role == 'assistant') -%}
221+
{%- if not continue_same_model_turn -%}
222+
{{- '<|turn>' + role + '\n' }}
223+
{%- endif -%}
224+
225+
{%- if message.get('reasoning') and loop.index0 > ns_turn.last_user_idx and message.get('tool_calls') -%}
226+
{{- '<|channel>thought\n' + message['reasoning'] + '\n<channel|>'}}
227+
{%- endif -%}
228+
229+
{%- if message['tool_calls'] -%}
230+
{%- for tool_call in message['tool_calls'] -%}
231+
{%- set function = tool_call['function'] -%}
232+
{{- '<|tool_call>call:' + function['name'] + '{' -}}
233+
{%- if function['arguments'] is mapping -%}
234+
{%- set ns_args = namespace(found_first=false) -%}
235+
{%- for key, value in function['arguments'] | dictsort -%}
236+
{%- if ns_args.found_first %},{% endif -%}
237+
{%- set ns_args.found_first = true -%}
238+
{{- key -}}:{{- format_argument(value, escape_keys=False) -}}
239+
{%- endfor -%}
240+
{%- elif function['arguments'] is string -%}
241+
{{- function['arguments'] -}}
242+
{%- endif -%}
243+
{{- '}<tool_call|>' -}}
244+
{%- endfor -%}
245+
{%- set ns.prev_message_type = 'tool_call' -%}
246+
{%- endif -%}
247+
248+
{%- set ns_tr_out = namespace(flag=false) -%}
249+
{%- if message.get('tool_responses') -%}
250+
{#- Legacy: tool_responses embedded on the assistant message -#}
251+
{%- for tool_response in message['tool_responses'] -%}
252+
{{- format_tool_response_block(tool_response['name'] | default('unknown'), tool_response['response']) -}}
253+
{%- set ns_tr_out.flag = true -%}
254+
{%- set ns.prev_message_type = 'tool_response' -%}
255+
{%- endfor -%}
256+
{%- elif message.get('tool_calls') -%}
257+
{#- OpenAI Chat Completions: consecutive following messages with role "tool" (no break/continue; range scan) -#}
258+
{%- set ns_tool_scan = namespace(stopped=false) -%}
259+
{%- for k in range(loop.index0 + 1, loop_messages | length) -%}
260+
{%- if ns_tool_scan.stopped -%}
261+
{%- elif loop_messages[k]['role'] != 'tool' -%}
262+
{%- set ns_tool_scan.stopped = true -%}
263+
{%- else -%}
264+
{%- set follow = loop_messages[k] -%}
265+
{%- set ns_tname = namespace(name=follow.get('name') | default('unknown')) -%}
266+
{%- for tc in message['tool_calls'] -%}
267+
{%- if tc.get('id') == follow.get('tool_call_id') -%}
268+
{%- set ns_tname.name = tc['function']['name'] -%}
269+
{%- endif -%}
270+
{%- endfor -%}
271+
{%- set tool_body = follow.get('content') -%}
272+
{%- if tool_body is string -%}
273+
{{- format_tool_response_block(ns_tname.name, tool_body) -}}
274+
{%- elif tool_body is sequence and tool_body is not string -%}
275+
{%- set ns_txt = namespace(s='') -%}
276+
{%- for part in tool_body -%}
277+
{%- if part.get('type') == 'text' -%}
278+
{%- set ns_txt.s = ns_txt.s + (part.get('text') | default('')) -%}
279+
{%- endif -%}
280+
{%- endfor -%}
281+
{{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
282+
{%- else -%}
283+
{{- format_tool_response_block(ns_tname.name, tool_body) -}}
284+
{%- endif -%}
285+
{%- set ns_tr_out.flag = true -%}
286+
{%- set ns.prev_message_type = 'tool_response' -%}
287+
{%- endif -%}
288+
{%- endfor -%}
289+
{%- endif -%}
290+
291+
{%- if message['content'] is string -%}
292+
{%- if role == 'model' -%}
293+
{{- strip_thinking(message['content']) -}}
294+
{%- else -%}
295+
{{- message['content'] | trim -}}
296+
{%- endif -%}
297+
{%- elif message['content'] is sequence -%}
298+
{%- for item in message['content'] -%}
299+
{%- if item['type'] == 'text' -%}
300+
{%- if role == 'model' -%}
301+
{{- strip_thinking(item['text']) -}}
302+
{%- else -%}
303+
{{- item['text'] | trim -}}
304+
{%- endif -%}
305+
{%- elif item['type'] == 'image' -%}
306+
{{- '\n\n<|image|>\n\n' -}}
307+
{%- set ns.prev_message_type = 'image' -%}
308+
{%- elif item['type'] == 'audio' -%}
309+
{{- '<|audio|>' -}}
310+
{%- set ns.prev_message_type = 'audio' -%}
311+
{%- elif item['type'] == 'video' -%}
312+
{{- '\n\n<|video|>\n\n' -}}
313+
{%- set ns.prev_message_type = 'video' -%}
314+
{%- endif -%}
315+
{%- endfor -%}
316+
{%- endif -%}
317+
318+
{%- if not (ns_tr_out.flag and not message.get('content')) -%}
319+
{{- '<turn|>\n' -}}
320+
{%- endif -%}
321+
{%- endif -%}
322+
{%- endfor -%}
323+
324+
{%- if add_generation_prompt -%}
325+
{%- if ns.prev_message_type != 'tool_response' -%}
326+
{{- '<|turn>model\n' -}}
327+
{%- endif -%}
328+
{%- if not enable_thinking | default(false) -%}
329+
{{- '<|channel>thought\n<channel|>' -}}
330+
{%- endif -%}
331+
{%- endif -%}

vllm/entrypoints/anthropic/serving.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,8 @@ def _convert_messages(
170170
else:
171171
cls._convert_message_content(msg, openai_msg, openai_messages)
172172

173-
openai_messages.append(openai_msg)
173+
if not (msg.role == "user" and "content" not in openai_msg):
174+
openai_messages.append(openai_msg)
174175

175176
@classmethod
176177
def _convert_message_content(

vllm/entrypoints/openai/api_server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ async def init_app_state(
379379
enable_auto_tools=args.enable_auto_tool_choice,
380380
exclude_tools_when_tool_choice_none=args.exclude_tools_when_tool_choice_none,
381381
tool_parser=args.tool_call_parser,
382+
reasoning_parser=args.structured_outputs_config.reasoning_parser,
382383
default_chat_template_kwargs=args.default_chat_template_kwargs,
383384
log_error_stack=args.log_error_stack,
384385
)
@@ -481,6 +482,7 @@ async def init_render_app_state(
481482
enable_auto_tools=args.enable_auto_tool_choice,
482483
exclude_tools_when_tool_choice_none=args.exclude_tools_when_tool_choice_none,
483484
tool_parser=args.tool_call_parser,
485+
reasoning_parser=args.structured_outputs_config.reasoning_parser,
484486
default_chat_template_kwargs=args.default_chat_template_kwargs,
485487
log_error_stack=args.log_error_stack,
486488
)

vllm/entrypoints/openai/responses/serving.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,7 @@ async def _make_request(
594594
default_template_kwargs=None,
595595
tool_dicts=tool_dicts,
596596
tool_parser=self.parser.tool_parser_cls if self.parser else None,
597+
reasoning_parser=self.parser.reasoning_parser_cls if self.parser else None,
597598
)
598599
return messages, engine_inputs
599600

@@ -618,6 +619,7 @@ async def _render_next_turn(
618619
default_template_kwargs=None,
619620
tool_dicts=tool_dicts,
620621
tool_parser=tool_parser,
622+
reasoning_parser=self.parser.reasoning_parser_cls if self.parser else None,
621623
)
622624
return engine_inputs
623625

0 commit comments

Comments
 (0)