huggingface · qgallouedec · Apr 9, 2026 · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026
diff --git a/docs/source/grpo_trainer.md b/docs/source/grpo_trainer.md
@@ -741,6 +741,7 @@ The returned images are automatically injected into the conversation and passed
 Tested with:
 
 - [**Gemma4**](https://huggingface.co/collections/google/gemma-4) — e.g., `google/gemma-4-E2B-it`
+- [**GPT-OSS**](https://huggingface.co/collections/openai/gpt-oss) — e.g., `openai/gpt-oss-20b`
 - [**Qwen3**](https://huggingface.co/collections/Qwen/qwen3) — e.g., `Qwen/Qwen3-0.6B`
 - [**Qwen3.5**](https://huggingface.co/collections/Qwen/qwen35) — e.g., `Qwen/Qwen3.5-2B`
 

diff --git a/tests/test_chat_template_utils.py b/tests/test_chat_template_utils.py
@@ -113,6 +113,7 @@ def test_clone_with_sequence_classification_model(self):
 @pytest.mark.parametrize(
     "tokenizer_name",
     [
+        pytest.param("trl-internal-testing/tiny-GptOssForCausalLM", id="gptoss"),
         pytest.param("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", id="qwen3"),
         pytest.param("trl-internal-testing/tiny-Qwen3_5ForConditionalGeneration", id="qwen35"),
     ],
@@ -396,6 +397,7 @@ def test_behavior_unchanged_generation_prompt_with_enable_thinking_false(self, t
 @pytest.mark.parametrize(
     "tokenizer_name",
     [
+        pytest.param("trl-internal-testing/tiny-GptOssForCausalLM", id="gptoss"),
         pytest.param("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", id="qwen3"),
         pytest.param("trl-internal-testing/tiny-Qwen3_5ForConditionalGeneration", id="qwen35"),
         pytest.param(
@@ -430,8 +432,11 @@ def test_parse_response(self, tokenizer_name):
         assert parsed == messages[-1]
 
     def test_parse_response_with_reasoning_content(self, tokenizer_name):
-        if tokenizer_name == "trl-internal-testing/tiny-Gemma4ForConditionalGeneration":
-            pytest.skip("Gemma4 doesn't support inline reasoning_content.")
+        if tokenizer_name in (
+            "trl-internal-testing/tiny-Gemma4ForConditionalGeneration",
+            "trl-internal-testing/tiny-GptOssForCausalLM",
+        ):
+            pytest.skip("This model doesn't support inline reasoning_content.")
         tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
         if getattr(tokenizer, "response_schema", None) is None:
             tokenizer = add_response_schema(tokenizer)
@@ -490,7 +495,7 @@ def test_parse_response_tool_call_without_arguments(self, tokenizer_name):
         tool_calls = [{"type": "function", "function": {"name": "ping", "arguments": {}}}]
         messages = [
             {"role": "user", "content": "Ping the service."},
-            {"role": "assistant", "tool_calls": tool_calls},
+            {"role": "assistant", "content": "", "tool_calls": tool_calls},
         ]
         prefix = tokenizer.apply_chat_template(messages[:1], add_generation_prompt=True).input_ids
         text = tokenizer.apply_chat_template(messages).input_ids
@@ -499,6 +504,8 @@ def test_parse_response_tool_call_without_arguments(self, tokenizer_name):
         assert parsed == {"role": "assistant", "content": "", "tool_calls": tool_calls}
 
     def test_parse_response_multiple_tool_calls(self, tokenizer_name):
+        if tokenizer_name == "trl-internal-testing/tiny-GptOssForCausalLM":
+            pytest.skip("GPT-OSS template only renders one tool call per assistant message.")
         tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
         if getattr(tokenizer, "response_schema", None) is None:
             tokenizer = add_response_schema(tokenizer)

diff --git a/trl/chat_template_utils.py b/trl/chat_template_utils.py
@@ -114,6 +114,49 @@ def clone_chat_template(
     return model, tokenizer, added_tokens
 
 
+gptoss_schema = {
+    # Normalize final content to analysis format so both map to the same "content" group.
+    "x-regex-substitutions": [
+        [r"<\|channel\|>final<\|message\|>(.*?)<\|return\|>", r"<|channel|>analysis<|message|>\1<|end|>"],
+    ],
+    "x-regex": r"^(?:<\|channel\|>analysis<\|message\|>(?P<content>.*?)<\|end\|>(?:<\|start\|>assistant)?)?\s*(?P<tool_calls>to=functions\.\S+<\|channel\|>commentary json<\|message\|>.*?<\|call\|>)?$",
+    "type": "object",
+    "properties": {
+        "role": {"const": "assistant"},
+        "content": {"type": "string"},
+        "tool_calls": {
+            "type": "array",
+            "x-regex-iterator": r"(to=functions\.\S+<\|channel\|>commentary json<\|message\|>.*?<\|call\|>)",
+            "items": {
+                # Convert "to=functions.NAME<|channel|>commentary json<|message|>ARGS<|call|>"
+                # into '{"name": "NAME", "arguments": ARGS}' so it can be parsed as JSON.
+                "x-regex-substitutions": [
+                    [
+                        r"to=functions\.(\S+)<\|channel\|>commentary json<\|message\|>(.*?)<\|call\|>",
+                        r'{"name": "\1", "arguments": \2}',
+                    ],
+                ],
+                "x-parser": "json",
+                "x-parser-args": {"transform": "{type: 'function', function: @}"},
+                "type": "object",
+                "properties": {
+                    "type": {"const": "function"},
+                    "function": {
+                        "type": "object",
+                        "properties": {
+                            "name": {"type": "string"},
+                            "arguments": {
+                                "type": "object",
+                                "additionalProperties": {},
+                            },
+                        },
+                    },
+                },
+            },
+        },
+    },
+}
+
 # Adapted and corrected versions of the schemas from:
 # https://github.com/huggingface/transformers/blob/main/tests/utils/test_chat_parsing_utils.py
 qwen3_schema = {
@@ -183,11 +226,13 @@ def clone_chat_template(
     },
 }
 
+
+gptoss_chat_template = (_CHAT_TEMPLATES_DIR / "gptoss.jinja").read_text()
+
 qwen3_chat_template = (_CHAT_TEMPLATES_DIR / "qwen3.jinja").read_text()
 
 qwen3_5_chat_template_2b_and_below = (_CHAT_TEMPLATES_DIR / "qwen3_5_2b_and_below.jinja").read_text()
 
-
 qwen3_5_chat_template_4b_and_above = (_CHAT_TEMPLATES_DIR / "qwen3_5_4b_and_above.jinja").read_text()
 
 
@@ -220,6 +265,9 @@ def add_response_schema(tokenizer: PreTrainedTokenizer) -> PreTrainedTokenizer:
     {'role': 'assistant', 'content': '', 'tool_calls': [{'type': 'function', 'function': {'name': 'multiply', 'arguments': {'a': 3, 'b': 4}}}]}
     ```
     """
+    if tokenizer.chat_template == gptoss_chat_template:
+        tokenizer.response_schema = gptoss_schema
+        return tokenizer
     if tokenizer.chat_template == qwen3_chat_template:
         tokenizer.response_schema = qwen3_schema
         return tokenizer

diff --git a/trl/chat_templates/README.md b/trl/chat_templates/README.md
@@ -11,6 +11,10 @@ Jinja2 chat templates stored here serve two purposes:
 
 Used for identity comparison only.
 
+### `gptoss.jinja`
+
+Original GPT-OSS chat template.
+
 ### `qwen3.jinja`
 
 Original Qwen3 chat template.