Skip to content
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
4b3aa51
Narrow prefix-preserving check to the actual requirement
qgallouedec Apr 5, 2026
0894910
Merge branch 'main' into narrow-prefix-preserving-check
qgallouedec Apr 5, 2026
730070b
Update chat template examples to use multiplication function calls
qgallouedec Apr 5, 2026
4622d77
style
qgallouedec Apr 5, 2026
08d4c51
Move chat templates from inline strings to `.jinja` files
qgallouedec Apr 5, 2026
276559d
tools in dummy
qgallouedec Apr 5, 2026
673c35d
Add chat template files to MANIFEST.in
qgallouedec Apr 5, 2026
604c476
Enhance chat template handling to include tool call formatting in mes…
qgallouedec Apr 5, 2026
83a7ef6
align grpo and async
qgallouedec Apr 5, 2026
0f28384
Merge branch 'main' into chat-templates-files
qgallouedec Apr 6, 2026
e5d7cdf
revert no content
qgallouedec Apr 6, 2026
a618809
docstyle ignore
qgallouedec Apr 6, 2026
a0b81b1
Merge branch 'main' into chat-templates-files
qgallouedec Apr 6, 2026
67ab0af
Merge branch 'main' into chat-templates-files
qgallouedec Apr 6, 2026
b18e39e
Add GPT-OSS tool calling support
qgallouedec Apr 6, 2026
71ce5a0
fix gpt oss
qgallouedec Apr 6, 2026
8f1ad1e
Update tool suffix ID retrieval to use actual tool names for GPT-OSS …
qgallouedec Apr 6, 2026
9b9771d
style
qgallouedec Apr 6, 2026
b3f4481
align async
qgallouedec Apr 6, 2026
76a0f66
Merge branch 'main' into gpt-oss-tool-calling
qgallouedec Apr 7, 2026
0890038
Merge branch 'main' into gpt-oss-tool-calling
qgallouedec Apr 7, 2026
3253602
style
qgallouedec Apr 7, 2026
b95dbec
Merge branch 'main' into gpt-oss-tool-calling
qgallouedec Apr 7, 2026
ec81a1e
Merge branch 'main' into gpt-oss-tool-calling
qgallouedec Apr 8, 2026
450b9ef
Merge branch 'main' into gpt-oss-tool-calling
qgallouedec Apr 9, 2026
392dece
Apply suggestions from code review
qgallouedec Apr 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/grpo_trainer.md
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,7 @@ The returned images are automatically injected into the conversation and passed
Tested with:

- [**Gemma4**](https://huggingface.co/collections/google/gemma-4) — e.g., `google/gemma-4-E2B-it`
- [**GPT-OSS**](https://huggingface.co/collections/openai/gpt-oss) — e.g., `openai/gpt-oss-20b`
- [**Qwen3**](https://huggingface.co/collections/Qwen/qwen3) — e.g., `Qwen/Qwen3-0.6B`
- [**Qwen3.5**](https://huggingface.co/collections/Qwen/qwen35) — e.g., `Qwen/Qwen3.5-2B`

Expand Down
13 changes: 10 additions & 3 deletions tests/test_chat_template_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def test_clone_with_sequence_classification_model(self):
@pytest.mark.parametrize(
"tokenizer_name",
[
pytest.param("trl-internal-testing/tiny-GptOssForCausalLM", id="gptoss"),
pytest.param("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", id="qwen3"),
pytest.param("trl-internal-testing/tiny-Qwen3_5ForConditionalGeneration", id="qwen35"),
],
Expand Down Expand Up @@ -396,6 +397,7 @@ def test_behavior_unchanged_generation_prompt_with_enable_thinking_false(self, t
@pytest.mark.parametrize(
"tokenizer_name",
[
pytest.param("trl-internal-testing/tiny-GptOssForCausalLM", id="gptoss"),
pytest.param("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", id="qwen3"),
pytest.param("trl-internal-testing/tiny-Qwen3_5ForConditionalGeneration", id="qwen35"),
pytest.param(
Expand Down Expand Up @@ -430,8 +432,11 @@ def test_parse_response(self, tokenizer_name):
assert parsed == messages[-1]

def test_parse_response_with_reasoning_content(self, tokenizer_name):
if tokenizer_name == "trl-internal-testing/tiny-Gemma4ForConditionalGeneration":
pytest.skip("Gemma4 doesn't support inline reasoning_content.")
if tokenizer_name in (
"trl-internal-testing/tiny-Gemma4ForConditionalGeneration",
"trl-internal-testing/tiny-GptOssForCausalLM",
):
pytest.skip("This model doesn't support inline reasoning_content.")
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
if getattr(tokenizer, "response_schema", None) is None:
tokenizer = add_response_schema(tokenizer)
Expand Down Expand Up @@ -490,7 +495,7 @@ def test_parse_response_tool_call_without_arguments(self, tokenizer_name):
tool_calls = [{"type": "function", "function": {"name": "ping", "arguments": {}}}]
messages = [
{"role": "user", "content": "Ping the service."},
{"role": "assistant", "tool_calls": tool_calls},
{"role": "assistant", "content": "", "tool_calls": tool_calls},
]
prefix = tokenizer.apply_chat_template(messages[:1], add_generation_prompt=True).input_ids
text = tokenizer.apply_chat_template(messages).input_ids
Expand All @@ -499,6 +504,8 @@ def test_parse_response_tool_call_without_arguments(self, tokenizer_name):
assert parsed == {"role": "assistant", "content": "", "tool_calls": tool_calls}

def test_parse_response_multiple_tool_calls(self, tokenizer_name):
if tokenizer_name == "trl-internal-testing/tiny-GptOssForCausalLM":
pytest.skip("GPT-OSS template only renders one tool call per assistant message.")
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
if getattr(tokenizer, "response_schema", None) is None:
tokenizer = add_response_schema(tokenizer)
Expand Down
50 changes: 49 additions & 1 deletion trl/chat_template_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,49 @@ def clone_chat_template(
return model, tokenizer, added_tokens


gptoss_schema = {
# Normalize final content to analysis format so both map to the same "content" group.
"x-regex-substitutions": [
[r"<\|channel\|>final<\|message\|>(.*?)<\|return\|>", r"<|channel|>analysis<|message|>\1<|end|>"],
],
"x-regex": r"^(?:<\|channel\|>analysis<\|message\|>(?P<content>.*?)<\|end\|>(?:<\|start\|>assistant)?)?\s*(?P<tool_calls>to=functions\.\S+<\|channel\|>commentary json<\|message\|>.*?<\|call\|>)?$",
"type": "object",
"properties": {
"role": {"const": "assistant"},
"content": {"type": "string"},
"tool_calls": {
"type": "array",
"x-regex-iterator": r"(to=functions\.\S+<\|channel\|>commentary json<\|message\|>.*?<\|call\|>)",
"items": {
# Convert "to=functions.NAME<|channel|>commentary json<|message|>ARGS<|call|>"
# into '{"name": "NAME", "arguments": ARGS}' so it can be parsed as JSON.
"x-regex-substitutions": [
[
r"to=functions\.(\S+)<\|channel\|>commentary json<\|message\|>(.*?)<\|call\|>",
r'{"name": "\1", "arguments": \2}',
],
],
"x-parser": "json",
"x-parser-args": {"transform": "{type: 'function', function: @}"},
"type": "object",
"properties": {
"type": {"const": "function"},
"function": {
"type": "object",
"properties": {
"name": {"type": "string"},
"arguments": {
"type": "object",
"additionalProperties": {},
},
},
},
},
},
},
},
}

# Adapted and corrected versions of the schemas from:
# https://github.com/huggingface/transformers/blob/main/tests/utils/test_chat_parsing_utils.py
qwen3_schema = {
Expand Down Expand Up @@ -183,11 +226,13 @@ def clone_chat_template(
},
}


gptoss_chat_template = (_CHAT_TEMPLATES_DIR / "gptoss.jinja").read_text()

qwen3_chat_template = (_CHAT_TEMPLATES_DIR / "qwen3.jinja").read_text()

qwen3_5_chat_template_2b_and_below = (_CHAT_TEMPLATES_DIR / "qwen3_5_2b_and_below.jinja").read_text()


qwen3_5_chat_template_4b_and_above = (_CHAT_TEMPLATES_DIR / "qwen3_5_4b_and_above.jinja").read_text()


Expand Down Expand Up @@ -220,6 +265,9 @@ def add_response_schema(tokenizer: PreTrainedTokenizer) -> PreTrainedTokenizer:
{'role': 'assistant', 'content': '', 'tool_calls': [{'type': 'function', 'function': {'name': 'multiply', 'arguments': {'a': 3, 'b': 4}}}]}
```
"""
if tokenizer.chat_template == gptoss_chat_template:
tokenizer.response_schema = gptoss_schema
return tokenizer
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prefix-preserving check diverges from suffix extraction construction

Low Severity

is_chat_template_prefix_preserving still uses a hardcoded "dummy" tool name, while _get_tool_suffix_ids was changed to use the real tool name via tool_messages[0]["name"]. The comment on line 350 explicitly states "Use the same dummy messages as _get_tool_suffix_ids", but the constructions now differ. For GPT-OSS, the tool name is embedded in the rendered text (e.g. to=functions.NAME), so the validation function is no longer testing the exact property that _get_tool_suffix_ids relies on.

Additional Locations (2)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 450b9ef. Configure here.

if tokenizer.chat_template == qwen3_chat_template:
tokenizer.response_schema = qwen3_schema
return tokenizer
Expand Down
4 changes: 4 additions & 0 deletions trl/chat_templates/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ Jinja2 chat templates stored here serve two purposes:

Used for identity comparison only.

### `gptoss.jinja`

Original GPT-OSS chat template.

### `qwen3.jinja`

Original Qwen3 chat template.
Expand Down
Loading
Loading