From ac7e7f30c5003b070cfc0aeb5832232db8664ca4 Mon Sep 17 00:00:00 2001 From: Gagan Dhakrey Date: Sun, 5 Apr 2026 09:09:19 +0530 Subject: [PATCH 1/2] Fix UnboundLocalError in invert_attention_mask by adding proper shape validation --- src/transformers/modeling_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index f72b230d9a20..4b83fe0846e8 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -867,8 +867,12 @@ def invert_attention_mask(self, encoder_attention_mask: Tensor) -> Tensor: """ if encoder_attention_mask.dim() == 3: encoder_extended_attention_mask = encoder_attention_mask[:, None, :, :] - if encoder_attention_mask.dim() == 2: + elif encoder_attention_mask.dim() == 2: encoder_extended_attention_mask = encoder_attention_mask[:, None, None, :] + else: + raise ValueError( + f"Wrong shape for encoder_attention_mask (shape {encoder_attention_mask.shape})" + ) # T5 has a mask that can compare sequence ids, we can simulate this here with this transposition # encoder_extended_attention_mask = (encoder_extended_attention_mask == # encoder_extended_attention_mask.transpose(-1, -2)) From f7d9c23dff7718cd5df4ecf19c584d414aa97323 Mon Sep 17 00:00:00 2001 From: Gagan Dhakrey Date: Sun, 5 Apr 2026 09:21:12 +0530 Subject: [PATCH 2/2] code formatting and linting --- src/transformers/modeling_utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 4b83fe0846e8..784b0c625bd0 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -870,9 +870,7 @@ def invert_attention_mask(self, encoder_attention_mask: Tensor) -> Tensor: elif encoder_attention_mask.dim() == 2: encoder_extended_attention_mask = encoder_attention_mask[:, None, None, :] else: - raise ValueError( - f"Wrong shape for encoder_attention_mask (shape {encoder_attention_mask.shape})" - ) + raise ValueError(f"Wrong shape for encoder_attention_mask (shape {encoder_attention_mask.shape})") # T5 has a mask that can compare sequence ids, we can simulate this here with this transposition # encoder_extended_attention_mask = (encoder_extended_attention_mask == # encoder_extended_attention_mask.transpose(-1, -2))