-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Narrow prefix-preserving check to the actual requirement #5458
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+90
−74
Merged
Changes from 5 commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
4b3aa51
Narrow prefix-preserving check to the actual requirement
qgallouedec 0894910
Merge branch 'main' into narrow-prefix-preserving-check
qgallouedec 730070b
Update chat template examples to use multiplication function calls
qgallouedec 4622d77
style
qgallouedec 8a00354
tools in dummy
qgallouedec cd8cbfc
Enhance chat template handling to include tool call formatting in mes…
qgallouedec 8b35321
align grpo and async
qgallouedec 160d6a0
Merge branch 'main' into narrow-prefix-preserving-check
qgallouedec 8dd341b
revert no content
qgallouedec 7e3ddd7
docstyle ignore
qgallouedec 71bf73f
Merge branch 'main' into narrow-prefix-preserving-check
qgallouedec 103d3c9
Merge branch 'main' into narrow-prefix-preserving-check
qgallouedec fffcb67
Merge branch 'main' into narrow-prefix-preserving-check
qgallouedec 87131e5
handle content=""
qgallouedec File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -630,6 +630,10 @@ def is_chat_template_prefix_preserving(tokenizer: PreTrainedTokenizer) -> bool: | |
| """ | ||
| Check whether the chat template preserves prefixes when applied. | ||
|
|
||
| A prefix-preserving chat template renders earlier messages identically regardless of what messages follow. This | ||
| property is required by `_get_tool_suffix_ids`, which extracts tool response formatting tokens by comparing | ||
| tokenizations with and without tool messages appended. | ||
|
|
||
| Args: | ||
| tokenizer (`PreTrainedTokenizer`): | ||
| Tokenizer instance to check. | ||
|
|
@@ -638,24 +642,22 @@ def is_chat_template_prefix_preserving(tokenizer: PreTrainedTokenizer) -> bool: | |
| `bool`: | ||
| `True` if the chat template preserves prefixes, `False` otherwise. | ||
| """ | ||
| # Use the same dummy messages as _get_tool_suffix_ids to test the exact property it relies on. | ||
| dummy_tool_calls = [{"type": "function", "function": {"name": "dummy", "arguments": {}}}] | ||
| messages1 = [ | ||
| {"role": "user", "content": "What color is the sky?"}, | ||
| {"role": "user", "content": "dummy"}, | ||
| {"role": "assistant", "tool_calls": dummy_tool_calls}, | ||
| ] | ||
| messages2 = [ | ||
| {"role": "user", "content": "What color is the sky?"}, | ||
| {"role": "assistant", "content": "It is blue."}, | ||
| ] | ||
| messages3 = [ | ||
| {"role": "user", "content": "What color is the sky?"}, | ||
| {"role": "assistant", "content": "It is blue."}, | ||
| {"role": "user", "content": "And at night?"}, | ||
| {"role": "user", "content": "dummy"}, | ||
| {"role": "assistant", "tool_calls": dummy_tool_calls}, | ||
| {"role": "tool", "name": "dummy", "content": "dummy"}, | ||
| ] | ||
|
|
||
| text1 = tokenizer.apply_chat_template(messages1, tokenize=False, add_generation_prompt=True) | ||
| text2 = tokenizer.apply_chat_template(messages2, tokenize=False) | ||
| text3 = tokenizer.apply_chat_template(messages3, tokenize=False) | ||
| text1 = tokenizer.apply_chat_template(messages1, tokenize=False) | ||
| text2 = tokenizer.apply_chat_template(messages2, tokenize=False, add_generation_prompt=True) | ||
|
|
||
| return text2.startswith(text1) and text3.startswith(text2) | ||
| return text2.startswith(text1) | ||
|
|
||
|
|
||
| # Modifications: | ||
|
|
@@ -749,33 +751,12 @@ def is_chat_template_prefix_preserving(tokenizer: PreTrainedTokenizer) -> bool: | |
| {%- endif %}""" | ||
|
|
||
|
|
||
| # Modifications: | ||
| # - {%- if '</think>' in content %} | ||
| # + {%- if '<think>' in content and '</think>' in content %} | ||
| # Always check for both tags to avoid edge cases where the model generates only one tag, which would otherwise be parsed incorrectly | ||
| # - {{- '<|im_start|>' + message.role + '\n' + content }} | ||
| # + {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }} | ||
| # Always include thinking block during training. It's important to have a prefix-preserving template. | ||
| def _patch_qwen3_5_training_template(template: str) -> str: | ||
| return template.replace( | ||
| "{%- if '</think>' in content %}", | ||
| "{%- if '<think>' in content and '</think>' in content %}", | ||
| ).replace( | ||
| "{{- '<|im_start|>' + message.role + '\\n' + content }}", | ||
| "{{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content + '\\n</think>\\n\\n' + content }}", | ||
| ) | ||
|
|
||
|
|
||
| qwen3_5_training_chat_template_2b_and_below = _patch_qwen3_5_training_template(qwen3_5_chat_template_2b_and_below) | ||
| qwen3_5_training_chat_template_4b_and_above = _patch_qwen3_5_training_template(qwen3_5_chat_template_4b_and_above) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mention Qwen3.5 doesn't need patching anymore. Does this depend on the transformers version?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no, it's really about the chat template: from transformers import AutoTokenizer, AutoProcessor
dummy_tool_calls = [{"type": "function", "function": {"name": "dummy", "arguments": {}}}]
messages1 = [
{"role": "user", "content": "dummy"},
{"role": "assistant", "content": "", "tool_calls": dummy_tool_calls},
]
messages2 = messages1 + [
{"role": "tool", "name": "dummy", "content": "dummy"},
]
model_id = "Qwen/Qwen3-0.6B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
text1 = tokenizer.apply_chat_template(messages1, tokenize=False)
text2 = tokenizer.apply_chat_template(messages2, tokenize=False, add_generation_prompt=True)
print(f"\n{'='*60}")
print(f"{model_id}")
print(f"Prefix-preserving: {text2.startswith(text1)}")
print(repr(text1))
print(repr(text2))
model_id = "Qwen/Qwen3.5-0.8B"
tokenizer = AutoProcessor.from_pretrained(model_id)
text1 = tokenizer.apply_chat_template(messages1, tokenize=False)
text2 = tokenizer.apply_chat_template(messages2, tokenize=False, add_generation_prompt=True)
print(f"\n{'='*60}")
print(f"{model_id}")
print(f"Prefix-preserving: {text2.startswith(text1)}")
print(repr(text1))
print(repr(text2)) |
||
|
|
||
|
|
||
| def get_training_chat_template(tokenizer: PreTrainedTokenizer) -> str | None: | ||
| r""" | ||
| Get a prefix-preserving chat template for training, if needed. | ||
|
|
||
| If the tokenizer's template isn't prefix-preserving, returns a training-compatible template (currently Qwen3 and | ||
| Qwen3.5 supported). Otherwise, returns `None`. | ||
| If the tokenizer's template isn't prefix-preserving, returns a training-compatible template (currently Qwen3 | ||
| supported). Otherwise, returns `None`. | ||
|
|
||
| Args: | ||
| tokenizer (`PreTrainedTokenizer`): | ||
|
|
@@ -793,27 +774,30 @@ def get_training_chat_template(tokenizer: PreTrainedTokenizer) -> str | None: | |
|
|
||
| >>> tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B") | ||
| >>> messages1 = [ | ||
| ... {"role": "user", "content": "What color is the sky?"}, | ||
| ... {"role": "assistant", "content": "It is blue."}, | ||
| ... {"role": "user", "content": "What is 2 * 3?"}, | ||
| ... { | ||
| ... "role": "assistant", | ||
| ... "tool_calls": [{"type": "function", "function": {"name": "multiply", "arguments": {"a": 2, "b": 3}}}], | ||
| ... }, | ||
| ... ] | ||
| >>> messages2 = [ | ||
| ... {"role": "user", "content": "What color is the sky?"}, | ||
| ... {"role": "assistant", "content": "It is blue."}, | ||
| ... {"role": "user", "content": "And at night?"}, | ||
| >>> messages2 = messages1 + [ | ||
| ... {"role": "tool", "name": "multiply", "content": "6"}, | ||
| ... ] | ||
| >>> tokenizer.apply_chat_template(messages1, tokenize=False) | ||
| '<|im_start|>user\nWhat color is the sky?<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\nIt is blue.<|im_end|>\n' | ||
| '<|im_start|>user\nWhat is 2 * 3?<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n<tool_call>\n{"name": "multiply", "arguments": {"a": 2, "b": 3}}\n</tool_call><|im_end|>\n' | ||
|
|
||
| >>> tokenizer.apply_chat_template(messages2, tokenize=False) | ||
| '<|im_start|>user\nWhat color is the sky?<|im_end|>\n<|im_start|>assistant\nIt is blue.<|im_end|>\n<|im_start|>user\nAnd at night?<|im_end|>\n' | ||
| >>> tokenizer.apply_chat_template(messages2, tokenize=False, add_generation_prompt=True) | ||
| '<|im_start|>user\nWhat is 2 * 3?<|im_end|>\n<|im_start|>assistant\n<tool_call>\n{"name": "multiply", "arguments": {"a": 2, "b": 3}}\n</tool_call><|im_end|>\n<|im_start|>user\n<tool_response>\n6\n</tool_response><|im_end|>\n<|im_start|>assistant\n' | ||
|
|
||
| >>> # ^ think tags missing | ||
| >>> # ^ think tags missing | ||
| >>> chat_template = get_training_chat_template(tokenizer) | ||
| >>> tokenizer.apply_chat_template(messages1, tokenize=False, chat_template=chat_template) | ||
| '<|im_start|>user\nWhat color is the sky?<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\nIt is blue.<|im_end|>\n' | ||
| '<|im_start|>user\nWhat is 2 * 3?<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n<tool_call>\n{"name": "multiply", "arguments": {"a": 2, "b": 3}}\n</tool_call><|im_end|>\n' | ||
|
|
||
| >>> tokenizer.apply_chat_template(messages2, tokenize=False, chat_template=chat_template) | ||
| '<|im_start|>user\nWhat color is the sky?<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\nIt is blue.<|im_end|>\n<|im_start|>user\nAnd at night?<|im_end|>\n' | ||
| >>> tokenizer.apply_chat_template( | ||
| ... messages2, tokenize=False, add_generation_prompt=True, chat_template=chat_template | ||
| ... ) | ||
| '<|im_start|>user\nWhat is 2 * 3?<|im_end|>\n<|im_start|>assistant\n<think>\n\n</think>\n\n<tool_call>\n{"name": "multiply", "arguments": {"a": 2, "b": 3}}\n</tool_call><|im_end|>\n<|im_start|>user\n<tool_response>\n6\n</tool_response><|im_end|>\n<|im_start|>assistant\n' | ||
| ``` | ||
| """ | ||
| # First check if patching is needed | ||
|
|
@@ -822,10 +806,6 @@ def get_training_chat_template(tokenizer: PreTrainedTokenizer) -> str | None: | |
|
|
||
| if tokenizer.chat_template == qwen3_chat_template: | ||
| return qwen3_training_chat_template | ||
| if tokenizer.chat_template == qwen3_5_chat_template_2b_and_below: | ||
| return qwen3_5_training_chat_template_2b_and_below | ||
| if tokenizer.chat_template == qwen3_5_chat_template_4b_and_above: | ||
| return qwen3_5_training_chat_template_4b_and_above | ||
| else: | ||
| raise ValueError( | ||
| "The tokenizer's chat template is not prefix-preserving and patching is not supported for this template. " | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.