trl/docs/source/_toctree.yml at a0200e03093c1bb1e2ac969e6d6a0aef3598b297 · huggingface/trl · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
- sections:
  - local: index
    title: TRL
  - local: installation
    title: Installation
  - local: quickstart
    title: Quickstart
  title: Getting started
- sections:
  - local: dataset_formats
    title: Dataset Formats
  - local: paper_index
    title: Paper Index
  title: Conceptual Guides
- sections: # Sorted alphabetically
  - local: dpo_trainer
    title: DPO
  - local: grpo_trainer
    title: GRPO
  - local: reward_trainer
    title: Reward
  - local: rloo_trainer
    title: RLOO
  - local: sft_trainer
    title: SFT
  title: Trainers
- sections:
  - local: clis
    title: Command Line Interface (CLI)
  - local: jobs_training
    title: Training using Jobs
  - local: customization
    title: Customizing the Training
  - local: reducing_memory_usage
    title: Reducing Memory Usage
  - local: speeding_up_training
    title: Speeding Up Training
  - local: distributing_training
    title: Distributing Training
  - local: use_model
    title: Using Trained Models
  - local: per_sample_tools
    title: Per-Sample Tool Filtering
  title: How-to guides
- sections:
  - local: deepspeed_integration
    title: DeepSpeed
  - local: kernels_hub
    title: Kernels Hub
  - local: liger_kernel_integration
    title: Liger Kernel
  - local: openenv
    title: OpenEnv
  - local: peft_integration
    title: PEFT
  - local: ptt_integration
    title: Post Training Toolkit
  - local: rapidfire_integration
    title: RapidFire AI
  - local: trackio_integration
    title: Trackio
  - local: unsloth_integration
    title: Unsloth
  - local: vllm_integration
    title: vLLM
  title: Integrations
- sections:
  - local: example_overview
    title: Example Overview
  - local: community_tutorials
    title: Community Tutorials
  - local: lora_without_regret
    title: LoRA Without Regret
  title: Examples
- sections:
  - sections:
    - local: chat_template_utils
      title: Chat Template Utilities
    - local: data_utils
      title: Data Utilities
    - local: script_utils
      title: Script Utilities
    title: Utilities
  - local: callbacks
    title: Callbacks
  - local: rewards
    title: Reward Functions
  title: API
- sections:
  - local: experimental_overview
    title: Experimental Overview
  - local: async_grpo_trainer # Sorted alphabetically
    title: Asynchronous GRPO
  - local: bema_for_reference_model
    title: BEMA for Reference Model
  - local: bco_trainer
    title: BCO
  - local: cpo_trainer
    title: CPO
  - local: gfpo
    title: GFPO
  - local: gkd_trainer
    title: GKD
  - local: gold_trainer
    title: GOLD
  - local: grpo_with_replay_buffer
    title: GRPO With Replay Buffer
  - local: gspo_token
    title: GSPO-token
  - local: judges
    title: Judges
  - local: kto_trainer
    title: KTO
  - local: merge_model_callback
    title: MergeModelCallback
  - local: minillm_trainer
    title: MiniLLM
  - local: nash_md_trainer
    title: Nash-MD
  - local: nemo_gym
    title: NeMo Gym
  - local: online_dpo_trainer
    title: Online DPO
  - local: orpo_trainer
    title: ORPO
  - local: papo_trainer
    title: PAPO
  - local: ppo_trainer
    title: PPO
  - local: prm_trainer
    title: PRM
  - local: sdft_trainer
    title: SDFT
  - local: sdpo_trainer
    title: SDPO
  - local: winrate_callback
    title: WinRateCallback
  - local: xpo_trainer
    title: XPO
  title: Experimental