-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Add {% generation %} support to training chat templates
#5470
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 22 commits
4b3aa51
0894910
730070b
4622d77
08d4c51
276559d
673c35d
604c476
83a7ef6
0f28384
e5d7cdf
a618809
a0b81b1
67ab0af
63ec7d3
c838146
7b7f5d1
e37fe00
3c2fc8e
d887724
9d4d57f
632e8b1
b33bc66
8820a29
31e640f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -43,7 +43,7 @@ | |
| from transformers.trainer_utils import EvalPrediction | ||
| from transformers.utils import is_peft_available | ||
|
|
||
| from ..chat_template_utils import clone_chat_template | ||
| from ..chat_template_utils import clone_chat_template, get_training_chat_template | ||
| from ..data_utils import ( | ||
| apply_chat_template, | ||
| is_conversational, | ||
|
|
@@ -922,6 +922,13 @@ def __init__( | |
| "supported for conversational datasets." | ||
| ) | ||
|
|
||
| # When assistant_only_loss is enabled, swap in a training chat template with {% generation %} markers | ||
| # if the current template doesn't already have them. | ||
| if args.assistant_only_loss and "{% generation %}" not in processing_class.chat_template: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potential crash when chat_template is NoneLow Severity The Reviewed by Cursor Bugbot for commit 9d4d57f. Configure here. |
||
| self.chat_template = get_training_chat_template(processing_class) | ||
| else: | ||
| self.chat_template = None | ||
|
|
||
| # Dataset | ||
| if self.padding_free and not args.packing and args.max_length is not None and not self._is_vision_dataset: | ||
| raise ValueError( | ||
|
|
@@ -1038,7 +1045,9 @@ def _tokenize( | |
| if isinstance(input, list): # conversational: list of message dicts | ||
| if self._is_vlm: | ||
| input = prepare_multimodal_messages(input) | ||
| result = processing_class.apply_chat_template(input, tokenize=True, return_dict=True, **kwargs) | ||
| result = processing_class.apply_chat_template( | ||
| input, tokenize=True, return_dict=True, chat_template=self.chat_template, **kwargs | ||
| ) | ||
qgallouedec marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| else: # non-conversational: plain text string | ||
| result = processing_class(text=input) | ||
| # VLMs emit a batch dimension even for single examples; unwrap it | ||
|
|
||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for future models,
get_training_chat_templatewill be called with any chat template, not only non-prefix-preserving ones