From c081128cd7b78fe38e529421b03e18b700bf06e7 Mon Sep 17 00:00:00 2001 From: Adam Pardyl Date: Mon, 30 Mar 2026 15:04:05 +0200 Subject: [PATCH 1/3] Add `log_multimodal` option to GRPOConfig to swich image logging --- trl/trainer/grpo_config.py | 10 ++++++++++ trl/trainer/grpo_trainer.py | 3 ++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/trl/trainer/grpo_config.py b/trl/trainer/grpo_config.py index 57c529b2993..a2de53914d2 100644 --- a/trl/trainer/grpo_config.py +++ b/trl/trainer/grpo_config.py @@ -319,6 +319,9 @@ class GRPOConfig(_BaseConfig): Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is installed, it prints the sample. If `wandb` and/or `trackio` logging is enabled, it logs it to `wandb` and/or `trackio`. + log_multimodal ('bool', *optional*, defaults to `True`): + Wether to log multimodal content (images, videos, etc.) together with completions. Disable this to reduce + log size when using high-resolution multimodal data. num_completions_to_print (`int`, *optional*): Number of completions to print with `rich`. If `None`, all completions are logged. log_unique_prompts (`bool`, *optional*, defaults to `False`): @@ -844,6 +847,13 @@ class GRPOConfig(_BaseConfig): "installed, it prints the sample. If `wandb` logging is enabled, it logs it to `wandb`." }, ) + log_multimodal: bool = field( + default=True, + metadata={ + "help": "Whether to log multimodal content (images, videos, etc.) together with completions. Disable this " + "to reduce log size when using high-resolution multimodal data." + }, + ) num_completions_to_print: int | None = field( default=None, metadata={"help": "Number of completions to print with `rich`. If `None`, all completions are logged."}, diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py index c5eed094192..b7272e3592d 100644 --- a/trl/trainer/grpo_trainer.py +++ b/trl/trainer/grpo_trainer.py @@ -699,6 +699,7 @@ def cast_outputs_to_original_dtype(module, args, output): self._total_train_tokens = 0 self._current_train_step_time = 0.0 self.log_completions = args.log_completions + self.log_multimodal = args.log_multimodal self.log_unique_prompts = args.log_unique_prompts self.num_completions_to_print = args.num_completions_to_print # Keep logs sized to the generation batch to record only outputs from the latest model update. @@ -2049,7 +2050,7 @@ def _generate_and_score_completions( self._metrics[mode][name].append(global_mean) self._pending_metrics.clear() - if images is not None: + if images is not None and self.log_multimodal: self._logs["images"].extend(gather_object(images)) if self.use_vllm and self.vllm_importance_sampling_correction: From 30290a80ef77ba0b76d0c044e4029f0be90d9570 Mon Sep 17 00:00:00 2001 From: Adam Pardyl Date: Mon, 30 Mar 2026 15:34:56 +0200 Subject: [PATCH 2/3] Add log_multimodal param to RLOOConfig to control image logging --- trl/trainer/rloo_config.py | 10 ++++++++++ trl/trainer/rloo_trainer.py | 3 ++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/trl/trainer/rloo_config.py b/trl/trainer/rloo_config.py index 8cdb0335a53..b2780444485 100644 --- a/trl/trainer/rloo_config.py +++ b/trl/trainer/rloo_config.py @@ -200,6 +200,9 @@ class RLOOConfig(_BaseConfig): Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is installed, it prints the sample. If `wandb` and/or `trackio` logging is enabled, it logs it to `wandb` and/or `trackio`. + log_multimodal ('bool', *optional*, defaults to `True`): + Wether to log multimodal content (images, videos, etc.) together with completions. Disable this to reduce + log size when using high-resolution multimodal data. num_completions_to_print (`int`, *optional*): Number of completions to print with `rich`. If `None`, all completions are logged. log_unique_prompts (`bool`, *optional*, defaults to `False`): @@ -532,6 +535,13 @@ class RLOOConfig(_BaseConfig): "installed, it prints the sample. If `wandb` logging is enabled, it logs it to `wandb`." }, ) + log_multimodal: bool = field( + default=True, + metadata={ + "help": "Whether to log multimodal content (images, videos, etc.) together with completions. Disable this " + "to reduce log size when using high-resolution multimodal data." + }, + ) num_completions_to_print: int | None = field( default=None, metadata={"help": "Number of completions to print with `rich`. If `None`, all completions are logged."}, diff --git a/trl/trainer/rloo_trainer.py b/trl/trainer/rloo_trainer.py index 4e62310e7c7..25d4d835359 100644 --- a/trl/trainer/rloo_trainer.py +++ b/trl/trainer/rloo_trainer.py @@ -474,6 +474,7 @@ def __init__( self._total_train_tokens = 0 self._current_train_step_time = 0.0 self.log_completions = args.log_completions + self.log_multimodal = args.log_multimodal self.log_unique_prompts = args.log_unique_prompts self.num_completions_to_print = args.num_completions_to_print # Keep logs sized to the generation batch to record only outputs from the latest model update. @@ -1338,7 +1339,7 @@ def _generate_and_score_completions( self._metrics[mode][name].append(global_mean) self._pending_metrics.clear() - if images is not None: + if images is not None and self.log_multimodal: self._logs["images"].extend(gather_object(images)) output = { From 9dbbd3cb675f668d7088774b2d8627f915c16471 Mon Sep 17 00:00:00 2001 From: Adam Pardyl Date: Mon, 30 Mar 2026 15:39:46 +0200 Subject: [PATCH 3/3] Fix formatting of `log_multimodal` parameter documentation in GRPOConfig and RLOOConfig --- trl/trainer/grpo_config.py | 2 +- trl/trainer/rloo_config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/trl/trainer/grpo_config.py b/trl/trainer/grpo_config.py index a2de53914d2..7a50deeda65 100644 --- a/trl/trainer/grpo_config.py +++ b/trl/trainer/grpo_config.py @@ -319,7 +319,7 @@ class GRPOConfig(_BaseConfig): Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is installed, it prints the sample. If `wandb` and/or `trackio` logging is enabled, it logs it to `wandb` and/or `trackio`. - log_multimodal ('bool', *optional*, defaults to `True`): + log_multimodal (`bool`, *optional*, defaults to `True`): Wether to log multimodal content (images, videos, etc.) together with completions. Disable this to reduce log size when using high-resolution multimodal data. num_completions_to_print (`int`, *optional*): diff --git a/trl/trainer/rloo_config.py b/trl/trainer/rloo_config.py index b2780444485..271dc4df155 100644 --- a/trl/trainer/rloo_config.py +++ b/trl/trainer/rloo_config.py @@ -200,7 +200,7 @@ class RLOOConfig(_BaseConfig): Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is installed, it prints the sample. If `wandb` and/or `trackio` logging is enabled, it logs it to `wandb` and/or `trackio`. - log_multimodal ('bool', *optional*, defaults to `True`): + log_multimodal (`bool`, *optional*, defaults to `True`): Wether to log multimodal content (images, videos, etc.) together with completions. Disable this to reduce log size when using high-resolution multimodal data. num_completions_to_print (`int`, *optional*):