huggingface · qgallouedec · Apr 10, 2026 · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -3,6 +3,8 @@ include CONTRIBUTING.md
 include README.md
 include trl/accelerate_configs/*.yaml
 include trl/templates/*.md
+include trl/chat_templates/*.jinja
+include trl/chat_templates/*.md
 include trl/skills/**/*.md
 recursive-exclude * __pycache__
 prune tests
diff --git a/docs/source/grpo_trainer.md b/docs/source/grpo_trainer.md
@@ -742,6 +742,7 @@ Tested with:
 
 - [**Gemma4**](https://huggingface.co/collections/google/gemma-4) — e.g., `google/gemma-4-E2B-it`
 - [**Qwen3**](https://huggingface.co/collections/Qwen/qwen3) — e.g., `Qwen/Qwen3-0.6B`
+- [**Qwen3-VL**](https://huggingface.co/collections/Qwen/qwen3-vl) — e.g., `Qwen/Qwen3-VL-2B-Instruct`
 - [**Qwen3.5**](https://huggingface.co/collections/Qwen/qwen35) — e.g., `Qwen/Qwen3.5-2B`
 
 > [!TIP]

diff --git a/tests/test_chat_template_utils.py b/tests/test_chat_template_utils.py
@@ -113,7 +113,8 @@ def test_clone_with_sequence_classification_model(self):
 @pytest.mark.parametrize(
     "tokenizer_name",
     [
-        pytest.param("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", id="qwen3"),
+        pytest.param("trl-internal-testing/tiny-Qwen3MoeForCausalLM", id="qwen3"),
+        pytest.param("trl-internal-testing/tiny-Qwen3VLForConditionalGeneration", id="qwen3_vl"),
         pytest.param("trl-internal-testing/tiny-Qwen3_5ForConditionalGeneration", id="qwen35"),
     ],
 )
@@ -145,7 +146,7 @@ def test_add_response_schema(self, tokenizer_name):
 
 class TestIsChatTemplatePrefixPreserving:
     def test_prefix_preserving_template(self):
-        tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification")
+        tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen3MoeForCausalLM")
         # docstyle-ignore
         tokenizer.chat_template = textwrap.dedent(r"""
         {%- for message in messages %}
@@ -175,7 +176,7 @@ def test_prefix_preserving_template(self):
         assert is_chat_template_prefix_preserving(tokenizer) is True
 
     def test_non_prefix_preserving_template(self):
-        tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification")
+        tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen3MoeForCausalLM")
         # The following template is quite typical of models like Qwen3 and GPT-OSS, where the thinking part (even
         # empty) is only present for last assistant message, which makes it non-prefix-preserving: appending a tool
         # message changes the earlier output.
@@ -240,7 +241,7 @@ def test_non_prefix_preserving_template(self):
 @pytest.mark.parametrize(
     "tokenizer_name",
     [
-        pytest.param("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", id="qwen3"),
+        pytest.param("trl-internal-testing/tiny-Qwen3MoeForCausalLM", id="qwen3"),
     ],
 )
 class TestGetTrainingChatTemplate:
@@ -396,7 +397,8 @@ def test_behavior_unchanged_generation_prompt_with_enable_thinking_false(self, t
 @pytest.mark.parametrize(
     "tokenizer_name",
     [
-        pytest.param("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", id="qwen3"),
+        pytest.param("trl-internal-testing/tiny-Qwen3MoeForCausalLM", id="qwen3"),
+        pytest.param("trl-internal-testing/tiny-Qwen3VLForConditionalGeneration", id="qwen3_vl"),
         pytest.param("trl-internal-testing/tiny-Qwen3_5ForConditionalGeneration", id="qwen35"),
         pytest.param(
             "trl-internal-testing/tiny-Gemma4ForConditionalGeneration",
@@ -430,8 +432,11 @@ def test_parse_response(self, tokenizer_name):
         assert parsed == messages[-1]
 
     def test_parse_response_with_reasoning_content(self, tokenizer_name):
-        if tokenizer_name == "trl-internal-testing/tiny-Gemma4ForConditionalGeneration":
-            pytest.skip("Gemma4 doesn't support inline reasoning_content.")
+        if tokenizer_name in [
+            "trl-internal-testing/tiny-Gemma4ForConditionalGeneration",
+            "trl-internal-testing/tiny-Qwen3VLForConditionalGeneration",
+        ]:
+            pytest.skip("This tokenizer doesn't support inline reasoning_content.")
         tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
         if getattr(tokenizer, "response_schema", None) is None:
             tokenizer = add_response_schema(tokenizer)
@@ -517,7 +522,7 @@ def test_parse_response_multiple_tool_calls(self, tokenizer_name):
         assert parsed == messages[-1]
 
     def test_parse_response_malformed_tool_call(self, tokenizer_name):
-        if tokenizer_name != "trl-internal-testing/tiny-Qwen3MoeForSequenceClassification":
+        if tokenizer_name != "trl-internal-testing/tiny-Qwen3MoeForCausalLM":
             pytest.skip("For simplicity, we only test the malformed tool call case on one tokenizer.")
         tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
         if getattr(tokenizer, "response_schema", None) is None: