diff --git a/docs/source/dpo_trainer.md b/docs/source/dpo_trainer.md
index 863cefc6eb..d41fb13cc6 100644
--- a/docs/source/dpo_trainer.md
+++ b/docs/source/dpo_trainer.md
@@ -111,6 +111,7 @@ Several formulations of the objective have been proposed in the literature. Init
 | `"sigmoid"` (default) | Given the preference data, we can fit a binary classifier according to the Bradley-Terry model and in fact the [DPO](https://huggingface.co/papers/2305.18290) authors propose the sigmoid loss on the normalized likelihood via the `logsigmoid` to fit a logistic regression. |
 | `"hinge"` | The [RSO](https://huggingface.co/papers/2309.06657) authors propose to use a hinge loss on the normalized likelihood from the [SLiC](https://huggingface.co/papers/2305.10425) paper. In this case, the `beta` is the reciprocal of the margin. |
 | `"ipo"` | The [IPO](https://huggingface.co/papers/2310.12036) authors argue the logit transform can overfit and propose the identity transform to optimize preferences directly; TRL exposes this as `loss_type="ipo"`. |
+| `"sigmoid_norm"` | The [SIMPO](https://huggingface.co/papers/2405.14734) authors address the length-bias in the original sigmoid loss by normalizing by the number of non-mask tokens; TRL exposes this as `loss_type="sigmoid_norm"`. |
 | `"exo_pair"` | The [EXO](https://huggingface.co/papers/2402.00856) authors propose reverse-KL preference optimization. `label_smoothing` must be strictly greater than `0.0`; a recommended value is `1e-3` (see Eq. 16 for the simplified pairwise variant). The full method uses `K>2` SFT completions and approaches PPO as `K` grows. |
 | `"nca_pair"` | The [NCA](https://huggingface.co/papers/2402.05369) authors shows that NCA optimizes the absolute likelihood for each response rather than the relative likelihood. |
 | `"robust"` | The [Robust DPO](https://huggingface.co/papers/2403.00409) authors propose an unbiased DPO loss under noisy preferences. Use `label_smoothing` in [`DPOConfig`] to model label-flip probability; valid values are in the range `[0.0, 0.5)`. |
diff --git a/docs/source/paper_index.md b/docs/source/paper_index.md
index 6f8ba7daf3..8f956692a1 100644
--- a/docs/source/paper_index.md
+++ b/docs/source/paper_index.md
@@ -1121,6 +1121,25 @@ training_args = DPOConfig(
 )
 ```
 
+### Length-Normalized DPO (Sigmoid Norm)
+
+**📜 Paper**: https://huggingface.co/papers/2405.14734
+
+The length-normalized sigmoid loss addresses length bias in DPO by dividing chosen and rejected log-ratio scores by their respective completion lengths before computing the Bradley-Terry loss. This per-token normalization was introduced in [SimPO](https://huggingface.co/papers/2405.14734) as an average log-probability reward for a reference-free setting, and was later adopted for standard reference-model-based DPO in post-training recipes such as [Tulu 3](https://huggingface.co/papers/2411.15124) (Section 4.3). The loss is:
+
+$$
+\mathcal{L}_{\text{sigmoid\_norm}} = -\log\sigma\!\left(\beta \left({\color{red}\frac{1}{|y_w|}}\log\frac{\pi_\theta(y_w|x)}{\pi_{\text{ref}}(y_w|x)} - {\color{red}\frac{1}{|y_l|}}\log\frac{\pi_\theta(y_l|x)}{\pi_{\text{ref}}(y_l|x)}\right)\right),
+$$
+which can be set with:
+
+```python
+from trl import DPOConfig
+
+training_args = DPOConfig(
+    loss_type=["sigmoid_norm"],
+)
+```
+
 ### Enhancing the Reasoning Ability of Multimodal Large Language Models via Mixed Preference Optimization
 
 **📜 Paper**: https://huggingface.co/papers/2411.10442
diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py
index 288dbb2ccb..7456005f51 100644
--- a/tests/test_dpo_trainer.py
+++ b/tests/test_dpo_trainer.py
@@ -266,6 +266,7 @@ def test_train_model(self):
             "sigmoid",
             "hinge",
             "ipo",
+            "sigmoid_norm",
             "exo_pair",
             "nca_pair",
             "robust",
diff --git a/trl/trainer/dpo_config.py b/trl/trainer/dpo_config.py
index 979747dd07..fcbeaec672 100644
--- a/trl/trainer/dpo_config.py
+++ b/trl/trainer/dpo_config.py
@@ -70,7 +70,7 @@ class DPOConfig(_BaseConfig):
         > Parameters that control the training
 
         loss_type (`list[str]`, *optional*, defaults to `["sigmoid"]`):
-            Type of loss to use. Possible values are: `'sigmoid'`, `'hinge'`, `'ipo'`, `'exo_pair'`, `'nca_pair'`,
+            Type of loss to use. Possible values are: `'sigmoid'`, `'hinge'`, `'ipo'`, `'sigmoid_norm'`, `'exo_pair'`, `'nca_pair'`,
             `'robust'`, `'bco_pair'`, `'sppo_hard'`, `'aot'`, `'aot_unpaired'`, `'apo_zero'`, `'apo_down'`,
             `'discopop'`, `'sft'`. If multiple loss types are provided, they will be combined using the weights
             specified in `loss_weights`.
@@ -211,7 +211,7 @@ class DPOConfig(_BaseConfig):
     loss_type: list[str] = field(
         default_factory=lambda: ["sigmoid"],
         metadata={
-            "help": "Type of loss to use. Possible values are: `'sigmoid'`, `'hinge'`, `'ipo'`, `'exo_pair'`, "
+            "help": "Type of loss to use. Possible values are: `'sigmoid'`, `'hinge'`, `'ipo'`, `'sigmoid_norm'`, `'exo_pair'`, "
             "`'nca_pair'`, `'robust'`, `'bco_pair'`, `'sppo_hard'`, `'aot'`, `'aot_unpaired'`, `'apo_zero'`, "
             "`'apo_down'`, `'discopop'`, `'sft'`. If multiple loss types are provided, they will be combined using "
             "the weights specified in `loss_weights`.",
diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py
index 0ae3e90668..0003da66ea 100644
--- a/trl/trainer/dpo_trainer.py
+++ b/trl/trainer/dpo_trainer.py
@@ -1248,6 +1248,13 @@ def _compute_loss(self, model, inputs, return_outputs):
                 # (Eq. 17) of the paper where beta is the regularization parameter for the IPO loss, denoted by τ.
                 per_sequence_loss = (ipo_delta - 1 / (2 * self.beta)) ** 2
 
+            elif loss_type == "sigmoid_norm":
+                chosen_mask, rejected_mask = completion_mask.chunk(2, dim=0)
+                chosen_avg_score = chosen_scores / chosen_mask.sum(dim=1).clamp(min=1.0)
+                rejected_avg_score = rejected_scores / rejected_mask.sum(dim=1).clamp(min=1.0)
+                delta = chosen_avg_score - rejected_avg_score
+                per_sequence_loss = -F.logsigmoid(self.beta * delta)
+
             elif loss_type == "exo_pair":
                 # Implements EXO-pref from the paper https://huggingface.co/papers/2402.00856, (Eq. 16)
                 # Minimize KL(p_fθ || p_rh) for K=2; p_fθ = softmax(βπ * (log πθ − log π_ref)) over {chosen, rejected}
@@ -1348,7 +1355,7 @@ def _compute_loss(self, model, inputs, return_outputs):
 
             else:
                 raise ValueError(
-                    f"Unknown loss type: {loss_type}. Should be one of ['sigmoid', 'hinge', 'ipo', 'exo_pair', "
+                    f"Unknown loss type: {loss_type}. Should be one of ['sigmoid', 'hinge', 'ipo', 'sigmoid_norm', 'exo_pair', "
                     "'nca_pair', 'robust', 'bco_pair', 'sppo_hard', 'aot', 'aot_unpaired', 'apo_zero', 'apo_down', "
                     "'discopop', 'sft']"
                 )