Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
4b3aa51
Narrow prefix-preserving check to the actual requirement
qgallouedec Apr 5, 2026
0894910
Merge branch 'main' into narrow-prefix-preserving-check
qgallouedec Apr 5, 2026
730070b
Update chat template examples to use multiplication function calls
qgallouedec Apr 5, 2026
4622d77
style
qgallouedec Apr 5, 2026
08d4c51
Move chat templates from inline strings to `.jinja` files
qgallouedec Apr 5, 2026
276559d
tools in dummy
qgallouedec Apr 5, 2026
673c35d
Add chat template files to MANIFEST.in
qgallouedec Apr 5, 2026
604c476
Enhance chat template handling to include tool call formatting in mes…
qgallouedec Apr 5, 2026
83a7ef6
align grpo and async
qgallouedec Apr 5, 2026
0f28384
Merge branch 'main' into chat-templates-files
qgallouedec Apr 6, 2026
e5d7cdf
revert no content
qgallouedec Apr 6, 2026
a618809
docstyle ignore
qgallouedec Apr 6, 2026
a0b81b1
Merge branch 'main' into chat-templates-files
qgallouedec Apr 6, 2026
2384da5
Add GLM-4-MoE tool calling support
qgallouedec Apr 6, 2026
14ac6a7
Merge branch 'main' into glm4moe-tool-calling
qgallouedec Apr 6, 2026
0b85443
Apply suggestions from code review
qgallouedec Apr 6, 2026
c9dfa1f
doc
qgallouedec Apr 6, 2026
e06d88c
Merge branch 'main' into glm4moe-tool-calling
qgallouedec Apr 7, 2026
303eac5
Merge branch 'main' into glm4moe-tool-calling
qgallouedec Apr 7, 2026
87e23ed
Merge branch 'main' into glm4moe-tool-calling
qgallouedec Apr 7, 2026
d8985b2
Merge branch 'main' into glm4moe-tool-calling
qgallouedec Apr 8, 2026
dff9615
Apply suggestion from @qgallouedec
qgallouedec Apr 8, 2026
c31e258
Merge branch 'main' into glm4moe-tool-calling
qgallouedec Apr 9, 2026
3fc2ca8
Merge branch 'main' into glm4moe-tool-calling
qgallouedec Apr 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/grpo_trainer.md
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,7 @@ The returned images are automatically injected into the conversation and passed
Tested with:

- [**Gemma4**](https://huggingface.co/collections/google/gemma-4) — e.g., `google/gemma-4-E2B-it`
- **GLM-4-MoE** ([4.5](https://huggingface.co/collections/zai-org/glm-45), [4.6](https://huggingface.co/collections/zai-org/glm-46) or [4.7](https://huggingface.co/collections/zai-org/glm-47)) — e.g., `zai-org/GLM-4.7`
- [**GPT-OSS**](https://huggingface.co/collections/openai/gpt-oss) — e.g., `openai/gpt-oss-20b`
- [**Qwen3**](https://huggingface.co/collections/Qwen/qwen3) — e.g., `Qwen/Qwen3-0.6B`
- [**Qwen3.5**](https://huggingface.co/collections/Qwen/qwen35) — e.g., `Qwen/Qwen3.5-2B`
Expand Down
2 changes: 2 additions & 0 deletions tests/test_chat_template_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def test_clone_with_sequence_classification_model(self):
@pytest.mark.parametrize(
"tokenizer_name",
[
pytest.param("trl-internal-testing/tiny-Glm4MoeForCausalLM", id="glm4moe"),
pytest.param("trl-internal-testing/tiny-GptOssForCausalLM", id="gptoss"),
pytest.param("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", id="qwen3"),
pytest.param("trl-internal-testing/tiny-Qwen3_5ForConditionalGeneration", id="qwen35"),
Expand Down Expand Up @@ -465,6 +466,7 @@ def test_behavior_unchanged_generation_prompt_with_enable_thinking_false(self, t
@pytest.mark.parametrize(
"tokenizer_name",
[
pytest.param("trl-internal-testing/tiny-Glm4MoeForCausalLM", id="glm4moe"),
pytest.param("trl-internal-testing/tiny-GptOssForCausalLM", id="gptoss"),
pytest.param("trl-internal-testing/tiny-Qwen3MoeForSequenceClassification", id="qwen3"),
pytest.param("trl-internal-testing/tiny-Qwen3_5ForConditionalGeneration", id="qwen35"),
Expand Down
39 changes: 39 additions & 0 deletions trl/chat_template_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,41 @@ def clone_chat_template(
return model, tokenizer, added_tokens


glm4moe_schema = {
"x-regex": r"^(?:\n?<think>\n?(?:(?P<reasoning_content>.*?\S.*?)\n?|[\s]*)</think>\s*)?(?P<content>.*?)(?:\n(?=<tool_call>))?(?=(?:<tool_call>|$))(?P<tool_calls>(?:<tool_call>.+?</tool_call>\s*)+)?$",
"type": "object",
"properties": {
"role": {"const": "assistant"},
"content": {"type": "string"},
"reasoning_content": {"type": "string"},
"tool_calls": {
"type": "array",
"x-regex-iterator": r"<tool_call>\s*(.+?)\s*</tool_call>",
"items": {
"type": "object",
"properties": {
"type": {"const": "function"},
"function": {
"type": "object",
"properties": {
"name": {"type": "string", "x-regex": r"^(\S+)"},
"arguments": {
"type": "object",
"x-regex-key-value": r"<arg_key>(?P<key>[^<]+)</arg_key>\s*\n<arg_value>(?P<value>.*?)</arg_value>",
"default": {},
"additionalProperties": {
"x-parser": "json",
"x-parser-args": {"allow_non_json": True},
},
},
},
},
},
},
},
},
}

gptoss_schema = {
# Normalize final content to analysis format so both map to the same "content" group.
"x-regex-substitutions": [
Expand Down Expand Up @@ -230,6 +265,8 @@ def clone_chat_template(
}


glm4moe_chat_template = (_CHAT_TEMPLATES_DIR / "glm4moe.jinja").read_text()

gptoss_chat_template = (_CHAT_TEMPLATES_DIR / "gptoss.jinja").read_text()

qwen3_chat_template = (_CHAT_TEMPLATES_DIR / "qwen3.jinja").read_text()
Expand Down Expand Up @@ -268,6 +305,8 @@ def add_response_schema(tokenizer: PreTrainedTokenizer) -> PreTrainedTokenizer:
{'role': 'assistant', 'content': '', 'tool_calls': [{'type': 'function', 'function': {'name': 'multiply', 'arguments': {'a': 3, 'b': 4}}}]}
```
"""
if tokenizer.chat_template == glm4moe_chat_template:
tokenizer.response_schema = glm4moe_schema
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing return statement in GLM-4-MoE branch

High Severity

The glm4moe branch in add_response_schema sets the response_schema but is missing a return tokenizer statement. Execution falls through to the final ValueError, causing add_response_schema to always fail for GLM-4-MoE tokenizers. This breaks tool-calling support, for example, during GRPOTrainer initialization.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 3fc2ca8. Configure here.

if tokenizer.chat_template == gptoss_chat_template:
tokenizer.response_schema = gptoss_schema
return tokenizer
Expand Down
4 changes: 4 additions & 0 deletions trl/chat_templates/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ Jinja2 chat templates stored here serve two purposes:

Used for identity comparison only.

### `glm4moe.jinja`

Original GLM-4-MoE chat template.

### `gptoss.jinja`

Original GPT-OSS chat template.
Expand Down
103 changes: 103 additions & 0 deletions trl/chat_templates/glm4moe.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
[gMASK]<sop>
{%- if tools -%}
<|system|>
# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{% for tool in tools %}
{{ tool | tojson(ensure_ascii=False) }}
{% endfor %}
</tools>

For each function call, output the function name and arguments within the following XML format:
<tool_call>{function-name}
<arg_key>{arg-key-1}</arg_key>
<arg_value>{arg-value-1}</arg_value>
<arg_key>{arg-key-2}</arg_key>
<arg_value>{arg-value-2}</arg_value>
...
</tool_call>{%- endif -%}
{%- macro visible_text(content) -%}
{%- if content is string -%}
{{- content }}
{%- elif content is iterable and content is not mapping -%}
{%- for item in content -%}
{%- if item is mapping and item.type == 'text' -%}
{{- item.text }}
{%- elif item is string -%}
{{- item }}
{%- endif -%}
{%- endfor -%}
{%- else -%}
{{- content }}
{%- endif -%}
{%- endmacro -%}
{%- set ns = namespace(last_user_index=-1) %}
{%- for m in messages %}
{%- if m.role == 'user' %}
{% set ns.last_user_index = loop.index0 -%}
{%- endif %}
{%- endfor %}
{% for m in messages %}
{%- if m.role == 'user' -%}<|user|>
{{ visible_text(m.content) }}
{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}}
{%- elif m.role == 'assistant' -%}
<|assistant|>
{%- set reasoning_content = '' %}
{%- set content = visible_text(m.content) %}
{%- if m.reasoning_content is string %}
{%- set reasoning_content = m.reasoning_content %}
{%- else %}
{%- if '</think>' in content %}
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{%- if loop.index0 > ns.last_user_index and reasoning_content -%}
{{ '\n<think>' + reasoning_content.strip() + '</think>'}}
{%- else -%}
{{ '\n<think></think>' }}
{%- endif -%}
{%- if content.strip() -%}
{{ '\n' + content.strip() }}
{%- endif -%}
{% if m.tool_calls %}
{% for tc in m.tool_calls %}
{%- if tc.function %}
{%- set tc = tc.function %}
{%- endif %}
{{ '\n<tool_call>' + tc.name }}
{% set _args = tc.arguments %}
{% for k, v in _args.items() %}
<arg_key>{{ k }}</arg_key>
<arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>
{% endfor %}
</tool_call>{% endfor %}
{% endif %}
{%- elif m.role == 'tool' -%}
{%- if m.content is string -%}
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|observation|>' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- m.content }}
{{- '\n</tool_response>' }}
{%- else -%}
<|observation|>{% for tr in m.content %}

<tool_response>
{{ tr.output if tr.output is defined else tr }}
</tool_response>{% endfor -%}
{% endif -%}
{%- elif m.role == 'system' -%}
<|system|>
{{ visible_text(m.content) }}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
<|assistant|>{{- '\n<think></think>' if (enable_thinking is defined and not enable_thinking) else '' -}}
{%- endif -%}
Loading