Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions lmdeploy/archs.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ def check_vl_llm(backend: str, config: dict) -> bool:
'InternVLChatModel', 'MiniCPMV', 'LlavaForConditionalGeneration', 'LlavaNextForConditionalGeneration',
'Phi3VForCausalLM', 'Qwen2VLForConditionalGeneration', 'Qwen2_5_VLForConditionalGeneration',
'Qwen3VLForConditionalGeneration', 'Qwen3VLMoeForConditionalGeneration', 'Qwen3_5ForConditionalGeneration',
'Qwen3_5MoeForConditionalGeneration', 'MllamaForConditionalGeneration', 'MolmoForCausalLM',
'Gemma3ForConditionalGeneration', 'Llama4ForConditionalGeneration', 'InternVLForConditionalGeneration',
'InternS1ForConditionalGeneration', 'InternS1ProForConditionalGeneration',
'Qwen3_5MoeForConditionalGeneration', 'Qwen3OmniMoeForConditionalGeneration', 'MllamaForConditionalGeneration',
'MolmoForCausalLM', 'Gemma3ForConditionalGeneration', 'Llama4ForConditionalGeneration',
'InternVLForConditionalGeneration', 'InternS1ForConditionalGeneration', 'InternS1ProForConditionalGeneration',
'InternS1_1_ForConditionalGeneration', 'Glm4vForConditionalGeneration'
])
if arch == 'QWenLMHeadModel' and 'visual' in config:
Expand Down
13 changes: 12 additions & 1 deletion lmdeploy/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,8 +688,19 @@ class HFChatTemplate(BaseChatTemplate):
def __init__(self, model_path: str = '', **kwargs):
self.model_path = model_path
try:
from transformers import AutoTokenizer
from transformers import AutoProcessor, AutoTokenizer
self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)

# Some tokenizers do not have chat_template, in this case try to get chat_template from processor
# If this still does not work, fallback to BaseChatTemplate.
if getattr(self.tokenizer, 'chat_template', None) is None:
try:
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
self.tokenizer.chat_template = getattr(processor, 'chat_template', None)
except Exception as e:
logger.warning(f'Failed to load processor from {model_path} for chat template. '
f'Fallback to tokenizer only. Error: {e}')

# Verify if the model can perform apply_chat_template with different roles.
self.user_start, self.user_end, _, _ = self._user_instruction()
self.assistant_start, self.assistant_end, _, _ = self._assistant_instruction()
Expand Down
19 changes: 19 additions & 0 deletions lmdeploy/pytorch/configurations/qwen3_omni.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .builder import AutoModelConfigBuilder
from .default import DefaultModelConfigBuilder


class Qwen3OmniModelConfigBuilder(AutoModelConfigBuilder):

@classmethod
def condition(cls, hf_config):
"""config."""
return hf_config.model_type == 'qwen3_omni_moe'

@classmethod
def build(cls, hf_config, model_path: str = None, **kwargs):
"""build."""
cfg = DefaultModelConfigBuilder.build(hf_config.thinker_config.text_config, model_path, **kwargs)
cfg.hf_config = hf_config
cfg.use_mrope = True
return cfg
7 changes: 7 additions & 0 deletions lmdeploy/pytorch/models/module_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,13 @@
'Qwen3_5MTPModel': f'{LMDEPLOY_PYTORCH_MODEL_PATH}.qwen3_5_mtp.Qwen3_5MTPModel',
})

# qwen3 omni moe thinker
# only support thinker module, so map to Qwen3OmniMoeThinkerForConditionalGeneration
MODULE_MAP.update({
'Qwen3OmniMoeForConditionalGeneration':
f'{LMDEPLOY_PYTORCH_MODEL_PATH}.qwen3_omni_moe_thinker.Qwen3OmniMoeThinkerForConditionalGeneration',
})

# starcoder2
MODULE_MAP.update({
'Starcoder2ForCausalLM': f'{LMDEPLOY_PYTORCH_MODEL_PATH}.starcoder2.Starcoder2ForCausalLM',
Expand Down
Loading
Loading