diff --git a/src/transformers/models/gemma4/configuration_gemma4.py b/src/transformers/models/gemma4/configuration_gemma4.py index a605d9a862ed..55c6d97d9ffc 100644 --- a/src/transformers/models/gemma4/configuration_gemma4.py +++ b/src/transformers/models/gemma4/configuration_gemma4.py @@ -132,6 +132,9 @@ class Gemma4TextConfig(PreTrainedConfig): "layers.*.mlp.gate_proj": "colwise", "layers.*.mlp.up_proj": "colwise", "layers.*.mlp.down_proj": "rowwise", + "layers.*.experts.gate_up_proj": "packed_colwise", + "layers.*.experts.down_proj": "rowwise", + "layers.*.experts": "moe_tp_experts", } base_model_pp_plan = { "embed_tokens": (["input_ids"], ["inputs_embeds"]),