Skip to content
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
4b3aa51
Narrow prefix-preserving check to the actual requirement
qgallouedec Apr 5, 2026
0894910
Merge branch 'main' into narrow-prefix-preserving-check
qgallouedec Apr 5, 2026
730070b
Update chat template examples to use multiplication function calls
qgallouedec Apr 5, 2026
4622d77
style
qgallouedec Apr 5, 2026
08d4c51
Move chat templates from inline strings to `.jinja` files
qgallouedec Apr 5, 2026
276559d
tools in dummy
qgallouedec Apr 5, 2026
673c35d
Add chat template files to MANIFEST.in
qgallouedec Apr 5, 2026
604c476
Enhance chat template handling to include tool call formatting in mes…
qgallouedec Apr 5, 2026
83a7ef6
align grpo and async
qgallouedec Apr 5, 2026
0f28384
Merge branch 'main' into chat-templates-files
qgallouedec Apr 6, 2026
e5d7cdf
revert no content
qgallouedec Apr 6, 2026
a618809
docstyle ignore
qgallouedec Apr 6, 2026
a0b81b1
Merge branch 'main' into chat-templates-files
qgallouedec Apr 6, 2026
67ab0af
Merge branch 'main' into chat-templates-files
qgallouedec Apr 6, 2026
63ec7d3
Merge branch 'main' into chat-templates-files
qgallouedec Apr 7, 2026
c838146
Merge branch 'main' into chat-templates-files
qgallouedec Apr 7, 2026
7b7f5d1
revert old modif
qgallouedec Apr 7, 2026
8e31596
Add Qwen3-VL tool calling support
qgallouedec Apr 7, 2026
91e940e
Merge branch 'main' into qwen3vl-tool-calling
qgallouedec Apr 7, 2026
116d5c0
Merge branch 'main' into qwen3vl-tool-calling
qgallouedec Apr 8, 2026
e111044
Merge branch 'main' into qwen3vl-tool-calling
qgallouedec Apr 9, 2026
39f0f32
Merge branch 'main' into qwen3vl-tool-calling
qgallouedec Apr 9, 2026
535544b
Merge branch 'main' into qwen3vl-tool-calling
qgallouedec Apr 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ include CONTRIBUTING.md
include README.md
include trl/accelerate_configs/*.yaml
include trl/templates/*.md
include trl/chat_templates/*.jinja
include trl/chat_templates/*.md
include trl/skills/**/*.md
recursive-exclude * __pycache__
prune tests
1 change: 1 addition & 0 deletions docs/source/grpo_trainer.md
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,7 @@ Tested with:

- [**Gemma4**](https://huggingface.co/collections/google/gemma-4) — e.g., `google/gemma-4-E2B-it`
- [**Qwen3**](https://huggingface.co/collections/Qwen/qwen3) — e.g., `Qwen/Qwen3-0.6B`
- [**Qwen3-VL**](https://huggingface.co/collections/Qwen/qwen3-vl) — e.g., `Qwen/Qwen3-VL-2B-Instruct`
- [**Qwen3.5**](https://huggingface.co/collections/Qwen/qwen35) — e.g., `Qwen/Qwen3.5-2B`

> [!TIP]
Expand Down
21 changes: 13 additions & 8 deletions tests/test_chat_template_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ def test_clone_with_sequence_classification_model(self):
@pytest.mark.parametrize(
"tokenizer_name",
[
pytest.param("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", id="qwen3"),
pytest.param("trl-internal-testing/tiny-Qwen3MoeForCausalLM", id="qwen3"),
pytest.param("trl-internal-testing/tiny-Qwen3VLForConditionalGeneration", id="qwen3_vl"),
pytest.param("trl-internal-testing/tiny-Qwen3_5ForConditionalGeneration", id="qwen35"),
],
)
Expand Down Expand Up @@ -145,7 +146,7 @@ def test_add_response_schema(self, tokenizer_name):

class TestIsChatTemplatePrefixPreserving:
def test_prefix_preserving_template(self):
tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification")
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same template, but it's more natural to use the CausalLM instead of the SequenceClassification

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't remember why we used Qwen3MoeForSequenceClassification in the first place

tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen3MoeForCausalLM")
# docstyle-ignore
tokenizer.chat_template = textwrap.dedent(r"""
{%- for message in messages %}
Expand Down Expand Up @@ -175,7 +176,7 @@ def test_prefix_preserving_template(self):
assert is_chat_template_prefix_preserving(tokenizer) is True

def test_non_prefix_preserving_template(self):
tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification")
tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen3MoeForCausalLM")
# The following template is quite typical of models like Qwen3 and GPT-OSS, where the thinking part (even
# empty) is only present for last assistant message, which makes it non-prefix-preserving: appending a tool
# message changes the earlier output.
Expand Down Expand Up @@ -240,7 +241,7 @@ def test_non_prefix_preserving_template(self):
@pytest.mark.parametrize(
"tokenizer_name",
[
pytest.param("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", id="qwen3"),
pytest.param("trl-internal-testing/tiny-Qwen3MoeForCausalLM", id="qwen3"),
],
)
class TestGetTrainingChatTemplate:
Expand Down Expand Up @@ -396,7 +397,8 @@ def test_behavior_unchanged_generation_prompt_with_enable_thinking_false(self, t
@pytest.mark.parametrize(
"tokenizer_name",
[
pytest.param("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", id="qwen3"),
pytest.param("trl-internal-testing/tiny-Qwen3MoeForCausalLM", id="qwen3"),
pytest.param("trl-internal-testing/tiny-Qwen3VLForConditionalGeneration", id="qwen3_vl"),
pytest.param("trl-internal-testing/tiny-Qwen3_5ForConditionalGeneration", id="qwen35"),
pytest.param(
"trl-internal-testing/tiny-Gemma4ForConditionalGeneration",
Expand Down Expand Up @@ -430,8 +432,11 @@ def test_parse_response(self, tokenizer_name):
assert parsed == messages[-1]

def test_parse_response_with_reasoning_content(self, tokenizer_name):
if tokenizer_name == "trl-internal-testing/tiny-Gemma4ForConditionalGeneration":
pytest.skip("Gemma4 doesn't support inline reasoning_content.")
if tokenizer_name in [
"trl-internal-testing/tiny-Gemma4ForConditionalGeneration",
"trl-internal-testing/tiny-Qwen3VLForConditionalGeneration",
]:
pytest.skip("This tokenizer doesn't support inline reasoning_content.")
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
if getattr(tokenizer, "response_schema", None) is None:
tokenizer = add_response_schema(tokenizer)
Expand Down Expand Up @@ -517,7 +522,7 @@ def test_parse_response_multiple_tool_calls(self, tokenizer_name):
assert parsed == messages[-1]

def test_parse_response_malformed_tool_call(self, tokenizer_name):
if tokenizer_name != "trl-internal-testing/tiny-Qwen3MoeForSequenceClassification":
if tokenizer_name != "trl-internal-testing/tiny-Qwen3MoeForCausalLM":
pytest.skip("For simplicity, we only test the malformed tool call case on one tokenizer.")
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
if getattr(tokenizer, "response_schema", None) is None:
Expand Down
Loading
Loading