Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion autotest/config_ascend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ pytorch_quantization:
- Qwen/Qwen3-VL-8B-Instruct
- Qwen/Qwen3-VL-32B-Instruct

longtext_model:
longtext_benchmark_model:
- Qwen/Qwen3-30B-A3B

benchmark_model:
Expand Down
270 changes: 178 additions & 92 deletions autotest/config_h.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,33 +12,34 @@ device: cuda

config:
tp:
Qwen/Qwen3-235B-A22B-FP8: 4
internlm/Intern-S1: 4
Qwen/Qwen3-235B-A22B-Thinking-2507-FP8: 4
Qwen/Qwen3-30B-A3B: 2
Qwen/Qwen3-32B: 2
openai/gpt-oss-120b: 2
openai/gpt-oss-120b-BF16: 4
openai/gpt-oss-20b-BF16: 2
deepseek/DeepSeek-V3.1: 8
openai/gpt-oss-20b: 2
unsloth/gpt-oss-20b-BF16: 2
deepseek-ai/DeepSeek-V3.1: 8
Qwen/Qwen3-30B-A3B-Base: 2
JetLM/SDAR-30B-A3B-Sci: 2
moonshotai/Kimi-K2-Instruct-0905: 16
Qwen/Qwen3-235B-A22B-Thinking-2507: 8
OpenGVLab/InternVL3_5-38B: 2
Qwen/Qwen3-VL-30B-A3B-Instruct: 2
internlm/Intern-S1-Pro-FP8: 16
zai-org/GLM-5: 16
Qwen/Qwen3.5-27B: 2
Qwen/Qwen3.5-35B-A3B: 2
Qwen/Qwen3.5-122B-A10B: 4
meta-llama/Llama-4-Scout-17B-16E-Instruct: 4
meta-llama/Meta-Llama-3.1-70B-Instruct: 4
OpenGVLab/InternVL3-38B: 2
Qwen/Qwen2.5-VL-32B-Instruct: 2
deepseek-ai/DeepSeek-V2-Lite-Chat: 2
mistralai/Mixtral-8x7B-Instruct-v0.1: 2
OpenGVLab/InternVL3_5-30B-A3B: 2
zai-org/GLM-4.7-Flash: 2
google/gemma-3-27b-it: 2

dp_ep:
moonshotai/Kimi-K2-Instruct-0905:
dp: 16
ep: 16
Qwen/Qwen3-235B-A22B-Thinking-2507:
dp: 8
ep: 8
internlm/Intern-S1-Pro-FP8:
dp: 16
ep: 16

cp_tp:
Qwen/Qwen3-235B-A22B-Thinking-2507:
Expand All @@ -48,64 +49,109 @@ config:

turbomind_chat_model:
tp:
- Qwen/Qwen3-0.6B-FP8
- Qwen/Qwen3-1.7B-FP8
- Qwen/Qwen3-4B-FP8
- Qwen/Qwen3-8B-FP8
- Qwen/Qwen3-14B-FP8
- Qwen/Qwen3-235B-A22B-Thinking-2507-FP8
- Qwen/Qwen3-235B-A22B-Thinking-2507
- meta-llama/Llama-3.2-1B-Instruct
- meta-llama/Llama-3.2-3B-Instruct
- meta-llama/Meta-Llama-3.1-8B-Instruct
- meta-llama/Meta-Llama-3.1-70B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
- internlm/internlm3-8b-instruct
- internlm/internlm3-8b-instruct-awq
- OpenGVLab/InternVL3-8B
- OpenGVLab/InternVL3-38B
- OpenGVLab/InternVL3_5-30B-A3B
- Qwen/Qwen3-0.6B
- Qwen/Qwen3-4B
- Qwen/Qwen3-8B
- Qwen/Qwen3-32B-FP8
- Qwen/Qwen3-30B-A3B
- Qwen/Qwen3-30B-A3B-FP8
- Qwen/Qwen3-32B
- Qwen/Qwen3-32B-FP8
- Qwen/Qwen3-235B-A22B-Thinking-2507
- Qwen/Qwen3-235B-A22B-Thinking-2507-FP8
- Qwen/Qwen2.5-VL-7B-Instruct
- Qwen/Qwen2.5-VL-32B-Instruct
- Qwen/Qwen1.5-MoE-A2.7B-Chat
- mistralai/Mixtral-8x7B-Instruct-v0.1
- OpenGVLab/InternVL3_5-38B
- deepseek-ai/DeepSeek-V2-Lite-Chat
- THUDM/glm-4-9b-chat
- openai/gpt-oss-120b
- openai/gpt-oss-20b

cp_tp:
- Qwen/Qwen3-235B-A22B-Thinking-2507


pytorch_chat_model:
tp:
- Qwen/Qwen3-0.6B-FP8
- Qwen/Qwen3-1.7B-FP8
- Qwen/Qwen3-4B-FP8
- Qwen/Qwen3-8B-FP8
- Qwen/Qwen3-14B-FP8
- Qwen/Qwen3-235B-A22B-Thinking-2507
- Qwen/Qwen3-235B-A22B-Thinking-2507-FP8
- meta-llama/Llama-4-Scout-17B-16E-Instruct
- meta-llama/Llama-3.2-1B-Instruct
- meta-llama/Llama-3.2-3B-Instruct
- meta-llama/Meta-Llama-3.1-8B-Instruct
- meta-llama/Meta-Llama-3.1-70B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
- internlm/internlm3-8b-instruct
- OpenGVLab/InternVL3-8B
- OpenGVLab/InternVL3-38B
- OpenGVLab/InternVL3_5-30B-A3B
- Qwen/Qwen3-0.6B
- Qwen/Qwen3-4B
- Qwen/Qwen3-8B
- Qwen/Qwen3-32B-FP8
- Qwen/Qwen3-30B-A3B
- Qwen/Qwen3-30B-A3B-FP8
- Qwen/Qwen3-32B
- Qwen/Qwen3-32B-FP8
- Qwen/Qwen3-235B-A22B-Thinking-2507
- Qwen/Qwen3-235B-A22B-Thinking-2507-FP8
- Qwen/Qwen3-VL-8B-Instruct
- Qwen/Qwen3-VL-30B-A3B-Instruct
- OpenGVLab/InternVL3_5-38B
- unsloth/gpt-oss-120b-BF16
- THUDM/cogvlm-chat-hf
- THUDM/cogvlm2-llama3-chinese-chat-19B
- THUDM/glm-4v-9b
- THUDM/glm-4-9b-chat
- zai-org/GLM-4.7-Flash
- microsoft/Phi-3.5-vision-instruct
- microsoft/Phi-3-vision-128k-instruct
- zai-org/GLM-5
- Qwen/Qwen3.5-27B
- Qwen/Qwen3.5-35B-A3B
- Qwen/Qwen3.5-122B-A10B
- deepseek-ai/DeepSeek-V3.1
- unsloth/gpt-oss-20b-BF16
- deepseek/DeepSeek-V3.1
- moonshotai/Kimi-K2-Instruct-0905
- internlm/Intern-S1-Pro-FP8
- JetLM/SDAR-30B-A3B-Sci
- google/gemma-3-27b-it
- OpenGVLab/InternVL3_5-38B

dp_ep:
- moonshotai/Kimi-K2-Instruct-0905
- Qwen/Qwen3-235B-A22B-Thinking-2507
- internlm/Intern-S1-Pro-FP8

turbomind_vl_model:
tp:
- OpenGVLab/InternVL3-8B
- OpenGVLab/InternVL3-38B
- OpenGVLab/InternVL3_5-30B-A3B
- Qwen/Qwen2.5-VL-7B-Instruct
- Qwen/Qwen2.5-VL-32B-Instruct
- OpenGVLab/InternVL3_5-38B


pytorch_vl_model:
tp:
- OpenGVLab/InternVL3_5-38B
- OpenGVLab/InternVL3-8B
- OpenGVLab/InternVL3_5-30B-A3B
- Qwen/Qwen3-VL-8B-Instruct
- Qwen/Qwen3-VL-30B-A3B-Instruct
- THUDM/cogvlm-chat-hf
- THUDM/cogvlm2-llama3-chinese-chat-19B
- THUDM/glm-4v-9b
- microsoft/Phi-3-vision-128k-instruct
- microsoft/Phi-3.5-vision-instruct
- Qwen/Qwen3.5-27B
- Qwen/Qwen3.5-35B-A3B
- Qwen/Qwen3.5-122B-A10B
- OpenGVLab/InternVL3_5-38B


turbomind_base_model:
tp:
- Qwen/Qwen3-4B-FP8
- openai/gpt-oss-20b
- Qwen/Qwen3-8B-Base
- Qwen/Qwen3-30B-A3B-Base

pytorch_base_model:
tp:
Expand All @@ -114,94 +160,134 @@ pytorch_base_model:

turbomind_quantization:
no_awq:
- Qwen/Qwen3-0.6B-FP8
- Qwen/Qwen3-1.7B-FP8
- Qwen/Qwen3-4B-FP8
- Qwen/Qwen3-8B-FP8
- Qwen/Qwen3-14B-FP8
- Qwen/Qwen3-235B-A22B-Thinking-2507-FP8
- meta-llama/Meta-Llama-3.1-70B-Instruct
- internlm/internlm3-8b-instruct
- Qwen/Qwen3-30B-A3B
- Qwen/Qwen3-30B-A3B-FP8
- Qwen/Qwen3-32B
- Qwen/Qwen3-235B-A22B-Thinking-2507
- Qwen/Qwen3-235B-A22B-Thinking-2507-FP8
- Qwen/Qwen3-32B-FP8
- Qwen/Qwen3-30B-A3B-FP8
- Qwen/Qwen3-30B-A3B-Base
- Qwen/Qwen1.5-MoE-A2.7B-Chat
- Qwen/Qwen2.5-VL-7B-Instruct
- Qwen/Qwen2.5-VL-32B-Instruct
- OpenGVLab/InternVL3_5-30B-A3B
- deepseek-ai/DeepSeek-V2-Lite-Chat
- openai/gpt-oss-120b
- openai/gpt-oss-20b
- Qwen/Qwen3-235B-A22B-Thinking-2507
- microsoft/Phi-3-mini-4k-instruct
- THUDM/glm-4v-9b
- THUDM/glm-4-9b-chat

gptq:
- empty
no_kvint4:
- Qwen/Qwen3-0.6B-FP8
- Qwen/Qwen3-1.7B-FP8
- Qwen/Qwen3-4B-FP8
- Qwen/Qwen3-8B-FP8
- Qwen/Qwen3-14B-FP8
- Qwen/Qwen3-235B-A22B-Thinking-2507-FP8
- meta-llama/Llama-3.2-1B-Instruct
- OpenGVLab/InternVL3-2B
- OpenGVLab/InternVL3-8B
- Qwen/Qwen3-0.6B
- Qwen/Qwen3-4B
- Qwen/Qwen3-8B
- Qwen/Qwen3-32B-FP8
- Qwen/Qwen3-30B-A3B
- Qwen/Qwen3-30B-A3B-FP8
- Qwen/Qwen3-32B
- Qwen/Qwen3-32B-FP8
- Qwen/Qwen3-235B-A22B-Thinking-2507
- Qwen/Qwen3-235B-A22B-Thinking-2507-FP8
- Qwen/Qwen2.5-VL-7B-Instruct
- Qwen/Qwen2.5-VL-32B-Instruct
- Qwen/Qwen1.5-MoE-A2.7B-Chat
- Qwen/Qwen3-8B-Base
- Qwen/Qwen3-30B-A3B-Base
- deepseek-ai/DeepSeek-V2-Lite-Chat
- openai/gpt-oss-120b
- openai/gpt-oss-20b
- Qwen/Qwen3-235B-A22B-Thinking-2507
no_kvint8:
- Qwen/Qwen3-235B-A22B-Thinking-2507
- deepseek-ai/DeepSeek-V2-Chat
- Qwen/Qwen3-235B-A22B-Thinking-2507-FP8

pytorch_quantization:
awq:
- empty
- meta-llama/Llama-3.2-3B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Meta-Llama-3.1-8B-Instruct
- internlm/internlm3-8b-instruct
- Qwen/Qwen3-0.6B
- Qwen/Qwen3-4B
- Qwen/Qwen3-8B
w8a8:
- empty
- meta-llama/Llama-3.2-1B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Meta-Llama-3.1-8B-Instruct
- internlm/internlm3-8b-instruct
- microsoft/Phi-3-mini-4k-instruct
no_kvint4:
- meta-llama/Llama-3.2-1B-Instruct
- OpenGVLab/InternVL3-2B
- OpenGVLab/InternVL3-8B
- Qwen/Qwen3-8B-Base
- Qwen/Qwen3-0.6B-FP8
- Qwen/Qwen3-1.7B-FP8
- Qwen/Qwen3-4B-FP8
- Qwen/Qwen3-8B-FP8
- Qwen/Qwen3-14B-FP8
- Qwen/Qwen3-235B-A22B-Thinking-2507-FP8
- Qwen/Qwen3-30B-A3B-Base
- Qwen/Qwen3-0.6B
- Qwen/Qwen3-4B
- Qwen/Qwen3-8B
- Qwen/Qwen3-32B-FP8
- Qwen/Qwen3-30B-A3B
- Qwen/Qwen3-30B-A3B-FP8
- Qwen/Qwen3-32B
- Qwen/Qwen3-32B-FP8
- moonshotai/Kimi-K2-Instruct-0905
- Qwen/Qwen3-235B-A22B-Thinking-2507
- internlm/Intern-S1-Pro-FP8
- JetLM/SDAR-30B-A3B-Sci
- deepseek/DeepSeek-V3.1
- Qwen/Qwen3-235B-A22B-Thinking-2507-FP8
- Qwen/Qwen3-VL-8B-Instruct
- Qwen/Qwen3-VL-30B-A3B-Instruct
- microsoft/Phi-3-vision-128k-instruct
- microsoft/Phi-3.5-vision-instruct
- zai-org/GLM-4.7-Flash
- zai-org/GLM-5
- Qwen/Qwen3.5-27B
- Qwen/Qwen3.5-35B-A3B
- Qwen/Qwen3.5-122B-A10B
- deepseek-ai/DeepSeek-V3.1
no_kvint8:
- Qwen/Qwen3-235B-A22B-Thinking-2507
- internlm/Intern-S1-Pro-FP8
- deepseek/DeepSeek-V3.1
- zai-org/GLM-4.7-Flash
- zai-org/GLM-5
- deepseek-ai/DeepSeek-V3.1
- Qwen/Qwen3.5-27B
- Qwen/Qwen3.5-35B-A3B
- Qwen/Qwen3.5-122B-A10B
- Qwen/Qwen3-235B-A22B-Thinking-2507-FP8

longtext_model:
longtext_benchmark_model:
- Qwen/Qwen3-30B-A3B
- Qwen/Qwen3-235B-A22B-Thinking-2507
- zai-org/GLM-5
- Qwen/Qwen3.5-27B
- Qwen/Qwen3.5-35B-A3B
- Qwen/Qwen3.5-122B-A10B

benchmark_model:
- meta-llama/Meta-Llama-3-1-8B-Instruct
- meta-llama/Meta-Llama-3-1-70B-Instruct
- Qwen/Qwen3-32B
- meta-llama/Meta-Llama-3.1-8B-Instruct
- meta-llama/Meta-Llama-3.1-70B-Instruct
- Qwen/Qwen3-30B-A3B
- Qwen/Qwen3-235B-A22B-Thinking-2507
- Qwen/Qwen2.5-72B-Instruct
- openai/gpt-oss-120b
- openai/gpt-oss-20b
- unsloth/gpt-oss-20b-BF16
- unsloth/gpt-oss-120b-BF16
- zai-org/GLM-5
- Qwen/Qwen3.5-27B
- Qwen/Qwen3.5-35B-A3B
- Qwen/Qwen3.5-122B-A10B
- google/gemma-3-27b-it

evaluate_model:
- Qwen/Qwen3-32B
- Qwen/Qwen3-32B-FP8
- Qwen/Qwen3-30B-A3B
- Qwen/Qwen3-30B-A3B-FP8
- Qwen/Qwen3-235B-A22B-Thinking-2507
- Qwen/Qwen3-235B-A22B-Thinking-2507-FP8
- openai/gpt-oss-120b
- unsloth/gpt-oss-120b-BF16
- deepseek/DeepSeek-V3.1
- moonshotai/Kimi-K2-Instruct-0905
- internlm/Intern-S1-Pro-FP8
- JetLM/SDAR-30B-A3B-Sci
- deepseek-ai/DeepSeek-V3.1
- zai-org/GLM-5
- Qwen/Qwen3.5-27B
- Qwen/Qwen3.5-35B-A3B
- Qwen/Qwen3.5-122B-A10B

mllm_evaluate_model:
- OpenGVLab/InternVL3_5-38B
Expand Down
Loading
Loading