Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 41 additions & 1 deletion tests/chat_template/test_chat_template.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
from datetime import datetime
import os
import json
import parametrize
from unittest import TestCase
from transformers import AutoTokenizer
import torch
from packaging.version import Version
from transformers import __version__ as transformers_version
import unittest

from xtuner.v1.data_proto.templates import CHAT_TEMPLATE_MAP
from xtuner.v1.data_proto.messages import ChatMessages
from xtuner.v1.data_proto.messages.qwen35_chat import Qwen35ChatMessages, qwen35_tokenize_fn_slowspeed


QWEN3_PATH = os.environ["QWEN3_PATH"]
Expand Down Expand Up @@ -222,6 +228,40 @@ def test_deepseek_v3_template(self, template_type,thinking, tokenizer):
input_ids = _messages.tokenize(tokenizer, chat_template)['input_ids']

self.assertTrue((input_ids == input_ids_ref))


@unittest.skipIf(
Version(transformers_version) < Version("5.2.0"),
f"transformers >= 5.2.0 is required, but got {transformers_version}"
)
def test_qwen35vl_template(self):
QWEN35_VL_PATH = os.environ["QWEN3_5_MOE_PATH"]
chat_template = CHAT_TEMPLATE_MAP["qwen3.5-vl"]
tokenizer = AutoTokenizer.from_pretrained(QWEN35_VL_PATH, trust_remote_code=True)

jsonl_path = 'tests/resource/qwen35_tokenize_data.jsonl'
all_data= []
with open(jsonl_path, 'r') as f:
for line in f:
all_data.append(json.loads(line))

for j, data in enumerate(all_data):
if j in [13,14]: # video 肯定和 hf 对不上
continue
gt_token_ids, gt_labels = qwen35_tokenize_fn_slowspeed(tokenizer, data['messages'], tools=data.get('tools'), add_vision_id=True)
_messages = Qwen35ChatMessages(messages=data["messages"], tools=data.get("tools"))
tokenized = _messages.tokenize(tokenizer, chat_template, add_vision_id=True)
self.assertEqual(tokenized['input_ids'], gt_token_ids)
self.assertEqual(tokenized['labels'], gt_labels)


enable_thinking = any("reasoning_content" in msg for msg in data['messages'])
decode_str = tokenizer.decode(tokenized['input_ids'], skip_special_tokens=False)
hf_text = tokenizer.apply_chat_template(data['messages'],
tools=data.get('tools'),
add_vision_id=True,
tokenize=False,
enable_thinking=enable_thinking,
add_generation_prompt=False)
self.assertEqual(decode_str, hf_text)


338 changes: 338 additions & 0 deletions tests/datasets/test_qwen35_vl_tokenize_fn.py

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions tests/resource/mllm_pretrain_image_example_data_new.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{"id": 1, "messages": [{"role": "pretrain", "content": [{"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}},{"type":"text", "text": "图片中的狗是什么颜色?"}, {"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}},{"type": "text", "text": "图中有几只猫?"}]}]}
{"id": 2, "messages": [{"role": "pretrain", "content": [{"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "text", "text": "这两张图片都有包括动物相关的内容,都包括了非常温馨的画面,太棒了!!!"}]}]}
{"id": 3, "messages": [{"role": "pretrain", "content": [{"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "text", "text": "当狗凝视窗外的时候,它看起来是在渴望着什么东西。 这也许暗示了它想要到外面去或者与某人或某物进行互动。"}]}]}
{"id": 4, "messages": [{"role": "pretrain", "content": [{"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "text", "text": "这两张图片都有包括动物相关的内容,都包括了非常温馨的画面,太棒了!!!"}]}]}
{"id": 5, "messages": [{"role": "pretrain", "content": [{"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "text", "text": "这两张图片都有包括动物相关的内容,都包括了非常温馨的画面,太棒了!!!"}]}]}
{"id": 6, "messages": [{"role": "pretrain", "content": [{"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "text", "text": "这两张图片都有包括动物相关的内容,都包括了非常温馨的画面,太棒了!!!"}]}]}
{"id": 7, "messages": [{"role": "pretrain", "content": [{"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "text", "text": "What color is the dog in the picture?"}, {"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "text", "text": "How many cats are in the picture?"}]}]}
{"id": 8, "messages": [{"role": "pretrain", "content": [{"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "text", "text": "Both of these pictures include animal-related content and very warm scenes, awesome!!!"}]}]}
{"id": 9, "messages": [{"role": "pretrain", "content": [{"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "text", "text": "When the dog gazes out the window, it seems to be longing for something. This may imply that it wants to go outside or interact with someone or something."}]}]}
{"id": 10, "messages": [{"role": "pretrain", "content": [{"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "text", "text": "Both of these pictures include animal-related content and very warm scenes, awesome!!!"}]}]}
{"id": 11, "messages": [{"role": "pretrain", "content": [{"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "text", "text": "Both of these pictures include animal-related content and very warm scenes, awesome!!!"}]}]}
{"id": 12, "messages": [{"role": "pretrain", "content": [{"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "text", "text": "Both of these pictures include animal-related content and very warm scenes, awesome!!!"}]}]}
5 changes: 5 additions & 0 deletions tests/resource/mllm_pretrain_video_example_data_new.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"id": 1, "messages": [{"role": "pretrain", "content": [{"type": "video", "video": {"url": "tennis.mp4", "image_wh": [1280, 720], "origin_video_length": 182,"origin_fps": 30.0}}, {"type": "text", "text": "请描述下视频内容?一男一女在打网球。请简要解释下网球,网球是一项运动,运动员使用球拍将球击打过网进入对方场地。目标是通过让球落入对方场地且对方无法回击来得分。网球可以单人对战(单打)或双人组队对战(双打)。"}]}]}
{"id": 2, "messages": [{"role": "pretrain", "content": [{"type": "video", "video": {"url": "tennis.mp4", "image_wh": [1280, 720], "origin_video_length": 182,"origin_fps": 30.0}}, {"type": "text", "text": "视频中在做什么?打网球"}]}]}
{"id": 3, "messages": [{"role": "pretrain", "content": [{"type": "video", "video": {"url": "tennis.mp4", "image_wh": [1280, 720], "origin_video_length": 182,"origin_fps": 30.0}}, {"type": "text", "text": "Please describe the video content? A man and a woman are playing tennis. Please briefly explain tennis. Tennis is a sport where players use rackets to hit the ball over the net into the opponent's court. The goal is to score points by making the ball land in the opponent's court and the opponent fails to return it. Tennis can be played as singles or doubles."}]}]}
{"id": 4, "messages": [{"role": "pretrain", "content": [{"type": "video", "video": {"url": "tennis.mp4", "image_wh": [1280, 720], "origin_video_length": 182,"origin_fps": 30.0}}, {"type": "text", "text": "What is happening in the video? Playing tennis"}]}]}
{"id": 5, "messages": [{"role": "pretrain", "content": [{"type": "video", "video": {"url": "tennis.mp4", "image_wh": [1280, 720], "origin_video_length": 182,"origin_fps": 30.0}}, {"type": "video", "video": {"url": "tennis.mp4", "image_wh": [1280, 720], "origin_video_length": 182,"origin_fps": 30.0}},{"type": "text", "text": "What is happening in the video? Playing tennis"}]}]}
11 changes: 11 additions & 0 deletions tests/resource/mllm_sft_multi_image_example_data_new.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{"id": 1, "messages": [{"role": "user", "content": [{"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "text", "text": "两张图片有啥相同之处?"}]}, {"role": "assistant", "content": "两幅图片中都存在动物。"}, {"role": "user", "content": "都有些什么动物?"}, {"role": "assistant", "content": "第一幅图片中有一只狗,第二副图片中有两只猫。"}]}
{"id": 2, "messages": [{"role": "user", "content": [{"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "text", "text": "两张图片有啥相同之处?"}]}, {"role": "assistant", "content": "两幅图片中都存在动物。"}, {"role": "user", "content": "都有些什么动物?"}, {"role": "assistant", "content": "第一幅图片中有两只猫,第二副图片中有一只狗。"}]}
{"id": 3, "messages": [{"role": "user", "content": [{"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "text", "text": "请描述下第二幅图片中的狗是什么颜色?"}]}, {"role": "assistant", "content": "图片中的狗是棕色的。"}]}
{"id": 4, "messages": [{"role": "user", "content": [{"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "text", "text": "请描述下第一幅图片中有几只猫?"}]}, {"role": "assistant", "content": "图片中有2只猫。"}, {"role": "user", "content": "第一幅图中还有啥东西?"}, {"role": "assistant", "content": "第一幅图片中还有2个电视遥控器"}, {"role": "user", "content": "两只猫在做什么?"}, {"role": "assistant", "content": "它们悠闲的躺在沙发上"}, {"role": "user", "content": "请描述下第一幅图片?"}, {"role": "assistant", "content": "图片中有两只猫,悠闲的躺在沙发上,旁边还有2个电视遥控器。"}]}
{"id": 5, "messages": [{"role": "user", "content": [{"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "text", "text": "请描述下第二幅图片中这只狗有什么类型的项圈?"}]}, {"role": "assistant", "content": "这只狗有一条红色的项圈。"}]}
{"id": 6, "messages": [{"role": "user", "content": [{"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [375, 500]}}, {"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "text", "text": "What are the similarities between the two images?"}]}, {"role": "assistant", "content": "Both images contain animals."}, {"role": "user", "content": "What animals are there?"}, {"role": "assistant", "content": "The first image contains a dog, and the second image contains two cats."}]}
{"id": 7, "messages": [{"role": "user", "content": [{"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "text", "text": "What are the similarities between the two images?"}]}, {"role": "assistant", "content": "Both images contain animals."}, {"role": "user", "content": "What animals are there?"}, {"role": "assistant", "content": "The first image contains two cats, and the second image contains a dog."}]}
{"id": 8, "messages": [{"role": "user", "content": [{"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "text", "text": "Can you describe the color of the dog in the second image?"}]}, {"role": "assistant", "content": "The dog in the image is brown."}]}
{"id": 9, "messages": [{"role": "user", "content": [{"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "text", "text": "How many cats are in the first image?"}]}, {"role": "assistant", "content": "There are 2 cats in the image."}, {"role": "user", "content": "What else is in the first image?"}, {"role": "assistant", "content": "There are also 2 TV remotes in the first image."}, {"role": "user", "content": "What are the two cats doing?"}, {"role": "assistant", "content": "They are leisurely lying on the sofa."}, {"role": "user", "content": "Can you describe the first image?"}, {"role": "assistant", "content": "The image shows two cats leisurely lying on the sofa, with 2 TV remotes next to them."}]}
{"id": 10, "messages": [{"role": "user", "content": [{"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "text", "text": "Can you describe the type of collar the dog in the second image has?"}]}, {"role": "assistant", "content": "The dog has a red collar."}]}
{"id": 11, "messages": [{"role": "user", "content": [{"type": "image", "image": {"url": "resource/mscoco_twocat_000000039769.jpg", "image_wh": [640, 480]}}, {"type": "text", "text": "How many cats are in the first image?"}]}, {"role": "assistant", "content": "There are 2 cats in the image."}, {"role": "user", "content": "What else is in the first image?"}, {"role": "assistant", "content": "There are also 2 TV remotes in the first image."}, {"role": "user", "content": [{"type": "image", "image": {"url": "resource/mscoco_dog_000000319154.jpg", "image_wh": [375, 500]}}, {"type": "text", "text": "What are the two cats doing?"}]}, {"role": "assistant", "content": "They are leisurely lying on the sofa."}, {"role": "user", "content": "Can you describe the first image?"}, {"role": "assistant", "content": "The image shows two cats leisurely lying on the sofa, with 2 TV remotes next to them."}]}
Loading
Loading