Skip to content
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
4b3aa51
Narrow prefix-preserving check to the actual requirement
qgallouedec Apr 5, 2026
0894910
Merge branch 'main' into narrow-prefix-preserving-check
qgallouedec Apr 5, 2026
730070b
Update chat template examples to use multiplication function calls
qgallouedec Apr 5, 2026
4622d77
style
qgallouedec Apr 5, 2026
08d4c51
Move chat templates from inline strings to `.jinja` files
qgallouedec Apr 5, 2026
276559d
tools in dummy
qgallouedec Apr 5, 2026
673c35d
Add chat template files to MANIFEST.in
qgallouedec Apr 5, 2026
604c476
Enhance chat template handling to include tool call formatting in mes…
qgallouedec Apr 5, 2026
83a7ef6
align grpo and async
qgallouedec Apr 5, 2026
0f28384
Merge branch 'main' into chat-templates-files
qgallouedec Apr 6, 2026
e5d7cdf
revert no content
qgallouedec Apr 6, 2026
a618809
docstyle ignore
qgallouedec Apr 6, 2026
a0b81b1
Merge branch 'main' into chat-templates-files
qgallouedec Apr 6, 2026
67ab0af
Merge branch 'main' into chat-templates-files
qgallouedec Apr 6, 2026
63ec7d3
Merge branch 'main' into chat-templates-files
qgallouedec Apr 7, 2026
c838146
Merge branch 'main' into chat-templates-files
qgallouedec Apr 7, 2026
7b7f5d1
revert old modif
qgallouedec Apr 7, 2026
8e31596
Add Qwen3-VL tool calling support
qgallouedec Apr 7, 2026
91e940e
Merge branch 'main' into qwen3vl-tool-calling
qgallouedec Apr 7, 2026
116d5c0
Merge branch 'main' into qwen3vl-tool-calling
qgallouedec Apr 8, 2026
e111044
Merge branch 'main' into qwen3vl-tool-calling
qgallouedec Apr 9, 2026
39f0f32
Merge branch 'main' into qwen3vl-tool-calling
qgallouedec Apr 9, 2026
535544b
Merge branch 'main' into qwen3vl-tool-calling
qgallouedec Apr 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/grpo_trainer.md
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,7 @@ Tested with:

- [**Gemma4**](https://huggingface.co/collections/google/gemma-4) — e.g., `google/gemma-4-E2B-it`
- [**Qwen3**](https://huggingface.co/collections/Qwen/qwen3) — e.g., `Qwen/Qwen3-0.6B`
- [**Qwen3-VL**](https://huggingface.co/collections/Qwen/qwen3-vl) — e.g., `Qwen/Qwen3-VL-2B-Instruct`
- [**Qwen3.5**](https://huggingface.co/collections/Qwen/qwen35) — e.g., `Qwen/Qwen3.5-2B`

> [!TIP]
Expand Down
21 changes: 13 additions & 8 deletions tests/test_chat_template_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ def test_clone_with_sequence_classification_model(self):
@pytest.mark.parametrize(
"tokenizer_name",
[
pytest.param("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", id="qwen3"),
pytest.param("trl-internal-testing/tiny-Qwen3MoeForCausalLM", id="qwen3"),
pytest.param("trl-internal-testing/tiny-Qwen3VLForConditionalGeneration", id="qwen3_vl"),
pytest.param("trl-internal-testing/tiny-Qwen3_5ForConditionalGeneration", id="qwen35"),
],
)
Expand Down Expand Up @@ -213,7 +214,7 @@ def test_deepseek_tool_calling(self, model_id):

class TestIsChatTemplatePrefixPreserving:
def test_prefix_preserving_template(self):
tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification")
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same template, but it's more natural to use the CausalLM instead of the SequenceClassification

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't remember why we used Qwen3MoeForSequenceClassification in the first place

tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen3MoeForCausalLM")
# docstyle-ignore
tokenizer.chat_template = textwrap.dedent(r"""
{%- for message in messages %}
Expand Down Expand Up @@ -243,7 +244,7 @@ def test_prefix_preserving_template(self):
assert is_chat_template_prefix_preserving(tokenizer) is True

def test_non_prefix_preserving_template(self):
tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification")
tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen3MoeForCausalLM")
# The following template is quite typical of models like Qwen3 and GPT-OSS, where the thinking part (even
# empty) is only present for last assistant message, which makes it non-prefix-preserving: appending a tool
# message changes the earlier output.
Expand Down Expand Up @@ -308,7 +309,7 @@ def test_non_prefix_preserving_template(self):
@pytest.mark.parametrize(
"tokenizer_name",
[
pytest.param("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", id="qwen3"),
pytest.param("trl-internal-testing/tiny-Qwen3MoeForCausalLM", id="qwen3"),
],
)
class TestGetTrainingChatTemplate:
Expand Down Expand Up @@ -464,7 +465,8 @@ def test_behavior_unchanged_generation_prompt_with_enable_thinking_false(self, t
@pytest.mark.parametrize(
"tokenizer_name",
[
pytest.param("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", id="qwen3"),
pytest.param("trl-internal-testing/tiny-Qwen3MoeForCausalLM", id="qwen3"),
pytest.param("trl-internal-testing/tiny-Qwen3VLForConditionalGeneration", id="qwen3_vl"),
pytest.param("trl-internal-testing/tiny-Qwen3_5ForConditionalGeneration", id="qwen35"),
pytest.param(
"trl-internal-testing/tiny-Gemma4ForConditionalGeneration",
Expand Down Expand Up @@ -498,8 +500,11 @@ def test_parse_response(self, tokenizer_name):
assert parsed == messages[-1]

def test_parse_response_with_reasoning_content(self, tokenizer_name):
if tokenizer_name == "trl-internal-testing/tiny-Gemma4ForConditionalGeneration":
pytest.skip("Gemma4 doesn't support inline reasoning_content.")
if tokenizer_name in [
"trl-internal-testing/tiny-Gemma4ForConditionalGeneration",
"trl-internal-testing/tiny-Qwen3VLForConditionalGeneration",
]:
pytest.skip("This tokenizer doesn't support inline reasoning_content.")
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
if getattr(tokenizer, "response_schema", None) is None:
tokenizer = add_response_schema(tokenizer)
Expand Down Expand Up @@ -585,7 +590,7 @@ def test_parse_response_multiple_tool_calls(self, tokenizer_name):
assert parsed == messages[-1]

def test_parse_response_malformed_tool_call(self, tokenizer_name):
if tokenizer_name != "trl-internal-testing/tiny-Qwen3MoeForSequenceClassification":
if tokenizer_name != "trl-internal-testing/tiny-Qwen3MoeForCausalLM":
pytest.skip("For simplicity, we only test the malformed tool call case on one tokenizer.")
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
if getattr(tokenizer, "response_schema", None) is None:
Expand Down
5 changes: 3 additions & 2 deletions trl/chat_template_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,9 @@ def clone_chat_template(

qwen3_chat_template = (_CHAT_TEMPLATES_DIR / "qwen3.jinja").read_text()

qwen3_5_chat_template_2b_and_below = (_CHAT_TEMPLATES_DIR / "qwen3_5_2b_and_below.jinja").read_text()
qwen3_vl_chat_template = (_CHAT_TEMPLATES_DIR / "qwen3_vl.jinja").read_text()

qwen3_5_chat_template_2b_and_below = (_CHAT_TEMPLATES_DIR / "qwen3_5_2b_and_below.jinja").read_text()

qwen3_5_chat_template_4b_and_above = (_CHAT_TEMPLATES_DIR / "qwen3_5_4b_and_above.jinja").read_text()

Expand Down Expand Up @@ -223,7 +224,7 @@ def add_response_schema(tokenizer: PreTrainedTokenizer) -> PreTrainedTokenizer:
{'role': 'assistant', 'content': '', 'tool_calls': [{'type': 'function', 'function': {'name': 'multiply', 'arguments': {'a': 3, 'b': 4}}}]}
```
"""
if tokenizer.chat_template == qwen3_chat_template:
if tokenizer.chat_template in [qwen3_chat_template, qwen3_vl_chat_template]:
tokenizer.response_schema = qwen3_schema
return tokenizer
if tokenizer.chat_template in [qwen3_5_chat_template_2b_and_below, qwen3_5_chat_template_4b_and_above]:
Expand Down
4 changes: 4 additions & 0 deletions trl/chat_templates/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ Used for identity comparison only.

Original Qwen3 chat template.

### `qwen3_vl.jinja`

Original Qwen3-VL chat template. Unlike text-only Qwen3, this template is already prefix-preserving (no conditional thinking blocks), so no training patch is needed.

### `qwen3_5_2b_and_below.jinja` / `qwen3_5_4b_and_above.jinja`

Original Qwen3.5 chat templates.
Expand Down
120 changes: 120 additions & 0 deletions trl/chat_templates/qwen3_vl.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
{%- if tools %}
{{- '<|im_start|>system\n' }}
{%- if messages[0].role == 'system' %}
{%- if messages[0].content is string %}
{{- messages[0].content }}
{%- else %}
{%- for content in messages[0].content %}
{%- if 'text' in content %}
{{- content.text }}
{%- endif %}
{%- endfor %}
{%- endif %}
{{- '\n\n' }}
{%- endif %}
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
{%- for tool in tools %}
{{- "\n" }}
{{- tool | tojson }}
{%- endfor %}
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
{%- if messages[0].role == 'system' %}
{{- '<|im_start|>system\n' }}
{%- if messages[0].content is string %}
{{- messages[0].content }}
{%- else %}
{%- for content in messages[0].content %}
{%- if 'text' in content %}
{{- content.text }}
{%- endif %}
{%- endfor %}
{%- endif %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- set image_count = namespace(value=0) %}
{%- set video_count = namespace(value=0) %}
{%- for message in messages %}
{%- if message.role == "user" %}
{{- '<|im_start|>' + message.role + '\n' }}
{%- if message.content is string %}
{{- message.content }}
{%- else %}
{%- for content in message.content %}
{%- if content.type == 'image' or 'image' in content or 'image_url' in content %}
{%- set image_count.value = image_count.value + 1 %}
{%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
<|vision_start|><|image_pad|><|vision_end|>
{%- elif content.type == 'video' or 'video' in content %}
{%- set video_count.value = video_count.value + 1 %}
{%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
<|vision_start|><|video_pad|><|vision_end|>
{%- elif 'text' in content %}
{{- content.text }}
{%- endif %}
{%- endfor %}
{%- endif %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "assistant" %}
{{- '<|im_start|>' + message.role + '\n' }}
{%- if message.content is string %}
{{- message.content }}
{%- else %}
{%- for content_item in message.content %}
{%- if 'text' in content_item %}
{{- content_item.text }}
{%- endif %}
{%- endfor %}
{%- endif %}
{%- if message.tool_calls %}
{%- for tool_call in message.tool_calls %}
{%- if (loop.first and message.content) or (not loop.first) %}
{{- '\n' }}
{%- endif %}
{%- if tool_call.function %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '<tool_call>\n{"name": "' }}
{{- tool_call.name }}
{{- '", "arguments": ' }}
{%- if tool_call.arguments is string %}
{{- tool_call.arguments }}
{%- else %}
{{- tool_call.arguments | tojson }}
{%- endif %}
{{- '}\n</tool_call>' }}
{%- endfor %}
{%- endif %}
{{- '<|im_end|>\n' }}
{%- elif message.role == "tool" %}
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|im_start|>user' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{%- if message.content is string %}
{{- message.content }}
{%- else %}
{%- for content in message.content %}
{%- if content.type == 'image' or 'image' in content or 'image_url' in content %}
{%- set image_count.value = image_count.value + 1 %}
{%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
<|vision_start|><|image_pad|><|vision_end|>
{%- elif content.type == 'video' or 'video' in content %}
{%- set video_count.value = video_count.value + 1 %}
{%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
<|vision_start|><|video_pad|><|vision_end|>
{%- elif 'text' in content %}
{{- content.text }}
{%- endif %}
{%- endfor %}
{%- endif %}
{{- '\n</tool_response>' }}
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- endif %}
Loading