From b6dc152403e60a6046876e249398270c35954292 Mon Sep 17 00:00:00 2001 From: "Wang, Yi" Date: Fri, 3 Apr 2026 21:52:26 +0800 Subject: [PATCH] reduce memory for gemma4 moe model in tp Signed-off-by: Wang, Yi --- src/transformers/models/gemma4/configuration_gemma4.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/transformers/models/gemma4/configuration_gemma4.py b/src/transformers/models/gemma4/configuration_gemma4.py index a605d9a862ed..55c6d97d9ffc 100644 --- a/src/transformers/models/gemma4/configuration_gemma4.py +++ b/src/transformers/models/gemma4/configuration_gemma4.py @@ -132,6 +132,9 @@ class Gemma4TextConfig(PreTrainedConfig): "layers.*.mlp.gate_proj": "colwise", "layers.*.mlp.up_proj": "colwise", "layers.*.mlp.down_proj": "rowwise", + "layers.*.experts.gate_up_proj": "packed_colwise", + "layers.*.experts.down_proj": "rowwise", + "layers.*.experts": "moe_tp_experts", } base_model_pp_plan = { "embed_tokens": (["input_ids"], ["inputs_embeds"]),