From c081128cd7b78fe38e529421b03e18b700bf06e7 Mon Sep 17 00:00:00 2001
From: Adam Pardyl <adam@pardyl.com>
Date: Mon, 30 Mar 2026 15:04:05 +0200
Subject: [PATCH 1/3] Add `log_multimodal` option to GRPOConfig to swich image
 logging

---
 trl/trainer/grpo_config.py  | 10 ++++++++++
 trl/trainer/grpo_trainer.py |  3 ++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/trl/trainer/grpo_config.py b/trl/trainer/grpo_config.py
index 57c529b2993..a2de53914d2 100644
--- a/trl/trainer/grpo_config.py
+++ b/trl/trainer/grpo_config.py
@@ -319,6 +319,9 @@ class GRPOConfig(_BaseConfig):
             Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is installed,
             it prints the sample. If `wandb` and/or `trackio` logging is enabled, it logs it to `wandb` and/or
             `trackio`.
+        log_multimodal ('bool', *optional*, defaults to `True`):
+            Wether to log multimodal content (images, videos, etc.) together with completions. Disable this to reduce
+            log size when using high-resolution multimodal data.
         num_completions_to_print (`int`, *optional*):
             Number of completions to print with `rich`. If `None`, all completions are logged.
         log_unique_prompts (`bool`, *optional*, defaults to `False`):
@@ -844,6 +847,13 @@ class GRPOConfig(_BaseConfig):
             "installed, it prints the sample. If `wandb` logging is enabled, it logs it to `wandb`."
         },
     )
+    log_multimodal: bool = field(
+        default=True,
+        metadata={
+            "help": "Whether to log multimodal content (images, videos, etc.) together with completions. Disable this "
+            "to reduce log size when using high-resolution multimodal data."
+        },
+    )
     num_completions_to_print: int | None = field(
         default=None,
         metadata={"help": "Number of completions to print with `rich`. If `None`, all completions are logged."},
diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py
index c5eed094192..b7272e3592d 100644
--- a/trl/trainer/grpo_trainer.py
+++ b/trl/trainer/grpo_trainer.py
@@ -699,6 +699,7 @@ def cast_outputs_to_original_dtype(module, args, output):
         self._total_train_tokens = 0
         self._current_train_step_time = 0.0
         self.log_completions = args.log_completions
+        self.log_multimodal = args.log_multimodal
         self.log_unique_prompts = args.log_unique_prompts
         self.num_completions_to_print = args.num_completions_to_print
         # Keep logs sized to the generation batch to record only outputs from the latest model update.
@@ -2049,7 +2050,7 @@ def _generate_and_score_completions(
             self._metrics[mode][name].append(global_mean)
         self._pending_metrics.clear()
 
-        if images is not None:
+        if images is not None and self.log_multimodal:
             self._logs["images"].extend(gather_object(images))
 
         if self.use_vllm and self.vllm_importance_sampling_correction:

From 30290a80ef77ba0b76d0c044e4029f0be90d9570 Mon Sep 17 00:00:00 2001
From: Adam Pardyl <adam@pardyl.com>
Date: Mon, 30 Mar 2026 15:34:56 +0200
Subject: [PATCH 2/3] Add log_multimodal param to RLOOConfig to control image
 logging

---
 trl/trainer/rloo_config.py  | 10 ++++++++++
 trl/trainer/rloo_trainer.py |  3 ++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/trl/trainer/rloo_config.py b/trl/trainer/rloo_config.py
index 8cdb0335a53..b2780444485 100644
--- a/trl/trainer/rloo_config.py
+++ b/trl/trainer/rloo_config.py
@@ -200,6 +200,9 @@ class RLOOConfig(_BaseConfig):
             Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is installed,
             it prints the sample. If `wandb` and/or `trackio` logging is enabled, it logs it to `wandb` and/or
             `trackio`.
+        log_multimodal ('bool', *optional*, defaults to `True`):
+            Wether to log multimodal content (images, videos, etc.) together with completions. Disable this to reduce
+            log size when using high-resolution multimodal data.
         num_completions_to_print (`int`, *optional*):
             Number of completions to print with `rich`. If `None`, all completions are logged.
         log_unique_prompts (`bool`, *optional*, defaults to `False`):
@@ -532,6 +535,13 @@ class RLOOConfig(_BaseConfig):
             "installed, it prints the sample. If `wandb` logging is enabled, it logs it to `wandb`."
         },
     )
+    log_multimodal: bool = field(
+        default=True,
+        metadata={
+            "help": "Whether to log multimodal content (images, videos, etc.) together with completions. Disable this "
+            "to reduce log size when using high-resolution multimodal data."
+        },
+    )
     num_completions_to_print: int | None = field(
         default=None,
         metadata={"help": "Number of completions to print with `rich`. If `None`, all completions are logged."},
diff --git a/trl/trainer/rloo_trainer.py b/trl/trainer/rloo_trainer.py
index 4e62310e7c7..25d4d835359 100644
--- a/trl/trainer/rloo_trainer.py
+++ b/trl/trainer/rloo_trainer.py
@@ -474,6 +474,7 @@ def __init__(
         self._total_train_tokens = 0
         self._current_train_step_time = 0.0
         self.log_completions = args.log_completions
+        self.log_multimodal = args.log_multimodal
         self.log_unique_prompts = args.log_unique_prompts
         self.num_completions_to_print = args.num_completions_to_print
         # Keep logs sized to the generation batch to record only outputs from the latest model update.
@@ -1338,7 +1339,7 @@ def _generate_and_score_completions(
             self._metrics[mode][name].append(global_mean)
         self._pending_metrics.clear()
 
-        if images is not None:
+        if images is not None and self.log_multimodal:
             self._logs["images"].extend(gather_object(images))
 
         output = {

From 9dbbd3cb675f668d7088774b2d8627f915c16471 Mon Sep 17 00:00:00 2001
From: Adam Pardyl <adam@pardyl.com>
Date: Mon, 30 Mar 2026 15:39:46 +0200
Subject: [PATCH 3/3] Fix formatting of `log_multimodal` parameter
 documentation in GRPOConfig and RLOOConfig

---
 trl/trainer/grpo_config.py | 2 +-
 trl/trainer/rloo_config.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/trl/trainer/grpo_config.py b/trl/trainer/grpo_config.py
index a2de53914d2..7a50deeda65 100644
--- a/trl/trainer/grpo_config.py
+++ b/trl/trainer/grpo_config.py
@@ -319,7 +319,7 @@ class GRPOConfig(_BaseConfig):
             Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is installed,
             it prints the sample. If `wandb` and/or `trackio` logging is enabled, it logs it to `wandb` and/or
             `trackio`.
-        log_multimodal ('bool', *optional*, defaults to `True`):
+        log_multimodal (`bool`, *optional*, defaults to `True`):
             Wether to log multimodal content (images, videos, etc.) together with completions. Disable this to reduce
             log size when using high-resolution multimodal data.
         num_completions_to_print (`int`, *optional*):
diff --git a/trl/trainer/rloo_config.py b/trl/trainer/rloo_config.py
index b2780444485..271dc4df155 100644
--- a/trl/trainer/rloo_config.py
+++ b/trl/trainer/rloo_config.py
@@ -200,7 +200,7 @@ class RLOOConfig(_BaseConfig):
             Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is installed,
             it prints the sample. If `wandb` and/or `trackio` logging is enabled, it logs it to `wandb` and/or
             `trackio`.
-        log_multimodal ('bool', *optional*, defaults to `True`):
+        log_multimodal (`bool`, *optional*, defaults to `True`):
             Wether to log multimodal content (images, videos, etc.) together with completions. Disable this to reduce
             log size when using high-resolution multimodal data.
         num_completions_to_print (`int`, *optional*):