commaai · haraschax · May 15, 2026 · May 20, 2026 · May 20, 2026
diff --git a/scripts/reporter.py b/scripts/reporter.py
@@ -33,7 +33,16 @@ def get_checkpoint(f):
   print("|-| -----  | --------- |")
 
   for f in glob.glob(BASEDIR + MODEL_PATH + "/*.onnx"):
+    # TODO: add checkpoint to DM
+    if "dmonitoring" in f:
+      continue
+
     fn = os.path.basename(f)
-    master = get_checkpoint(MASTER_PATH + MODEL_PATH + fn)
+    master_path = MASTER_PATH + MODEL_PATH + fn
+    if os.path.exists(master_path):
+      master = get_checkpoint(master_path)
+      master_col = f"[{master}](https://reporter.comma.life/experiment/{master})"
+    else:
+      master_col = "N/A (new model)"
     pr = get_checkpoint(BASEDIR + MODEL_PATH + fn)
-    print("|", fn, "|", f"[{master}](https://reporterv2.comma.life/{master})", "|", f"[{pr}](https://reporterv2.comma.life/{pr})", "|")
+    print("|", fn, "|", master_col, "|", f"[{pr}](https://reporter.comma.life/experiment/{pr})", "|")
diff --git a/selfdrive/modeld/SConscript b/selfdrive/modeld/SConscript
@@ -68,7 +68,7 @@ mac_brew_string = f'HOME={os.path.expanduser("~")}' if arch == 'Darwin' else ''
 
 modeld_dir = Dir("#selfdrive/modeld").abspath
 compile_modeld_script = [File(f"{modeld_dir}/compile_modeld.py")]
-driving_onnx_deps = [File(f"models/{m}.onnx").abspath for m in ['driving_vision', 'driving_policy']]
+driving_onnx_deps = [File(f"models/{m}.onnx").abspath for m in ['driving_vision', 'driving_off_policy', 'driving_on_policy']]
 
 model_w, model_h = MEDMODEL_INPUT_SIZE
 frame_skip = ModelConstants.MODEL_RUN_FREQ // ModelConstants.MODEL_CONTEXT_FREQ
@@ -78,7 +78,8 @@ cmd = (f'{tg_flags} {mac_brew_string} python3 {modeld_dir}/compile_modeld.py '
        f'--model-size {model_w}x{model_h} '
        f'--camera-resolutions {camera_res_args} '
        f'--vision-onnx {File("models/driving_vision.onnx").abspath} '
-       f'--policy-onnx {File("models/driving_policy.onnx").abspath} '
+       f'--off-policy-onnx {File("models/driving_off_policy.onnx").abspath} '
+       f'--on-policy-onnx {File("models/driving_on_policy.onnx").abspath} '
        f'--output {pkl_path} --frame-skip {frame_skip}')
 node = lenv.Command(pkl_path, tinygrad_files + compile_modeld_script + driving_onnx_deps + [Value(camera_res_args), chunker_file, tg_devices_node], cmd)
 onnx_sizes_sum = sum(os.path.getsize(f) for f in driving_onnx_deps)
@@ -102,7 +103,6 @@ for cam_w, cam_h in CAMERA_CONFIGS:
          f'--output {dm_pkl_path}')
   lenv.Command(dm_pkl_path, tinygrad_files + compile_dm_warp_script + compile_modeld_script + [tg_devices_node], cmd)
 
-driving_metadata_deps = [File(f"models/{m}_metadata.pkl").abspath for m in ['driving_vision', 'driving_policy']]
 def tg_compile(flags, model_name):
   pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"'
   fn = File(f"models/{model_name}").abspath

diff --git a/selfdrive/modeld/compile_modeld.py b/selfdrive/modeld/compile_modeld.py
@@ -102,6 +102,8 @@ def make_input_queues(vision_input_shapes, policy_input_shapes, frame_skip, devi
     'tfm': np.zeros((3, 3), dtype=np.float32),
     'big_tfm': np.zeros((3, 3), dtype=np.float32),
   }
+  if 'action_t' in policy_input_shapes:
+    npy['action_t'] = np.zeros(policy_input_shapes['action_t'], dtype=np.float32)
   input_queues = {
     'img_q': Tensor.zeros(img_buf_shape, dtype='uint8', device=device).contiguous().realize(),
     'big_img_q': Tensor.zeros(img_buf_shape, dtype='uint8', device=device).contiguous().realize(),
@@ -125,18 +127,19 @@ def sample_desire(buf, frame_skip):
   return buf.reshape(-1, frame_skip, *buf.shape[1:]).max(1).flatten(0, 1).unsqueeze(0)
 
 
-def make_run_policy(vision_runner, policy_runner, nv12: NV12Frame, model_w, model_h,
+def make_run_policy(vision_runner, off_policy_runner, on_policy_runner, nv12: NV12Frame, model_w, model_h,
                     vision_features_slice, frame_skip, prepare_only=False):
   frame_prepare = make_frame_prepare(nv12, model_w, model_h)
   sample_skip_fn = partial(sample_skip, frame_skip=frame_skip)
   sample_desire_fn = partial(sample_desire, frame_skip=frame_skip)
 
-  def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, tfm, big_tfm, frame, big_frame):
+  def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, action_t, tfm, big_tfm, frame, big_frame):
     tfm = tfm.to(Device.DEFAULT)
     big_tfm = big_tfm.to(Device.DEFAULT)
     desire = desire.to(Device.DEFAULT)
     traffic_convention = traffic_convention.to(Device.DEFAULT)
-    Tensor.realize(tfm, big_tfm, desire, traffic_convention)
+    action_t = action_t.to(Device.DEFAULT)
+    Tensor.realize(tfm, big_tfm, desire, traffic_convention, action_t)
 
     img = shift_and_sample(img_q, frame_prepare(frame, tfm).unsqueeze(0), sample_skip_fn)
     big_img = shift_and_sample(big_img_q, frame_prepare(big_frame, big_tfm).unsqueeze(0), sample_skip_fn)
@@ -150,22 +153,30 @@ def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, t
     feat_buf = shift_and_sample(feat_q, new_feat, sample_skip_fn)
     desire_buf = shift_and_sample(desire_q, desire.reshape(1, 1, -1), sample_desire_fn)
 
-    inputs = {'features_buffer': feat_buf, 'desire_pulse': desire_buf, 'traffic_convention': traffic_convention}
-    policy_out = next(iter(policy_runner(inputs).values())).cast('float32')
+    inputs = {
+      'features_buffer': feat_buf,
+      'desire_pulse': desire_buf,
+      'traffic_convention': traffic_convention,
+      'action_t': action_t,
+    }
+    on_policy_out = next(iter(on_policy_runner(inputs).values())).cast('float32')
+    off_policy_out = next(iter(off_policy_runner(inputs).values())).cast('float32')
 
-    return vision_out, policy_out
+    return vision_out, on_policy_out, off_policy_out
   return run_policy
 
 
 def compile_modeld(nv12: NV12Frame, model_w, model_h, prepare_only, frame_skip,
-                   vision_runner, policy_runner, vision_metadata, policy_metadata):
+                   vision_runner, off_policy_runner, on_policy_runner,
+                   vision_metadata, off_policy_metadata, on_policy_metadata):
   print(f"Compiling combined policy JIT for {nv12.width}x{nv12.height} (prepare_only={prepare_only})...")
 
   vision_features_slice = vision_metadata['output_slices']['hidden_state']
   vision_input_shapes = vision_metadata['input_shapes']
-  policy_input_shapes = policy_metadata['input_shapes']
+  policy_input_shapes = on_policy_metadata['input_shapes']
+  assert policy_input_shapes == off_policy_metadata['input_shapes']
 
-  _run = make_run_policy(vision_runner, policy_runner, nv12, model_w, model_h,
+  _run = make_run_policy(vision_runner, off_policy_runner, on_policy_runner, nv12, model_w, model_h,
                          vision_features_slice, frame_skip, prepare_only)
   run_policy_jit = TinyJit(_run, prune=True)
 
@@ -227,7 +238,8 @@ def _parse_size(s):
   p.add_argument('--camera-resolutions', type=_parse_size, nargs='+', required=True,
                  help='camera resolutions WxH (one or more)')
   p.add_argument('--vision-onnx', required=True)
-  p.add_argument('--policy-onnx', required=True)
+  p.add_argument('--off-policy-onnx', required=True)
+  p.add_argument('--on-policy-onnx', required=True)
   p.add_argument('--output', required=True)
   p.add_argument('--frame-skip', type=int, required=True)
   args = p.parse_args()
@@ -236,16 +248,19 @@ def _parse_size(s):
   # init runners once so weights are shared
   from get_model_metadata import make_metadata_dict
   vision_runner = OnnxRunner(args.vision_onnx)
-  policy_runner = OnnxRunner(args.policy_onnx)
+  off_policy_runner = OnnxRunner(args.off_policy_onnx)
+  on_policy_runner = OnnxRunner(args.on_policy_onnx)
   out['metadata']['vision'] = make_metadata_dict(args.vision_onnx)
-  out['metadata']['policy'] = make_metadata_dict(args.policy_onnx)
+  out['metadata']['off_policy'] = make_metadata_dict(args.off_policy_onnx)
+  out['metadata']['on_policy'] = make_metadata_dict(args.on_policy_onnx)
 
   for cam_w, cam_h in args.camera_resolutions:
     nv12 = NV12Frame(cam_w, cam_h, *get_nv12_info(cam_w, cam_h))
     model_w, model_h = args.model_size
     out[(cam_w,cam_h)] = {
       name: compile_modeld(nv12, model_w, model_h, prepare_only, args.frame_skip,
-                           vision_runner, policy_runner, out['metadata']['vision'], out['metadata']['policy'])
+                           vision_runner, off_policy_runner, on_policy_runner,
+                           out['metadata']['vision'], out['metadata']['off_policy'], out['metadata']['on_policy'])
       for name, prepare_only in [('warp_enqueue', True), ('run_policy', False)]
     }
 

diff --git a/selfdrive/modeld/constants.py b/selfdrive/modeld/constants.py
@@ -38,6 +38,7 @@ class ModelConstants:
   LANE_LINES_WIDTH = 2
   ROAD_EDGES_WIDTH = 2
   PLAN_WIDTH = 15
+  ACTION_WIDTH = 2
   DESIRE_PRED_WIDTH = 8
   LAT_PLANNER_SOLUTION_WIDTH = 4
   DESIRED_CURV_WIDTH = 1

diff --git a/selfdrive/modeld/fill_model_msg.py b/selfdrive/modeld/fill_model_msg.py
@@ -125,7 +125,10 @@ def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._D
 
   # meta
   meta = modelV2.meta
-  meta.desireState = net_output_data['desire_state'][0].reshape(-1).tolist()
+  if 'desire_state' in net_output_data:
+    meta.desireState = net_output_data['desire_state'][0].reshape(-1).tolist()
+  else:
+    meta.desireState = [0.0] * ModelConstants.DESIRE_PRED_WIDTH
   meta.desirePrediction = net_output_data['desire_pred'][0].reshape(-1).tolist()
   meta.engagedProb = net_output_data['meta'][0,Meta.ENGAGED].item()
   meta.init('disengagePredictions')

diff --git a/selfdrive/modeld/modeld.py b/selfdrive/modeld/modeld.py
@@ -18,12 +18,12 @@
 from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
 from openpilot.common.transformations.model import get_warp_matrix
 from openpilot.selfdrive.controls.lib.desire_helper import DesireHelper
-from openpilot.selfdrive.controls.lib.drive_helpers import get_accel_from_plan, smooth_value, get_curvature_from_plan
+from openpilot.selfdrive.controls.lib.drive_helpers import smooth_value
 from openpilot.selfdrive.modeld.parse_model_outputs import Parser
 from openpilot.selfdrive.modeld.compile_modeld import make_input_queues
 from openpilot.selfdrive.modeld.fill_model_msg import fill_model_msg, fill_pose_msg, PublishState
 from openpilot.common.file_chunker import read_file_chunked
-from openpilot.selfdrive.modeld.constants import ModelConstants, Plan
+from openpilot.selfdrive.modeld.constants import ModelConstants
 
 
 PROCESS_NAME = "selfdrive.modeld.modeld"
@@ -35,20 +35,12 @@
 
 
 
-def get_action_from_model(model_output: dict[str, np.ndarray], prev_action: log.ModelDataV2.Action,
-                          lat_action_t: float, long_action_t: float, v_ego: float) -> log.ModelDataV2.Action:
-    plan = model_output['plan'][0]
-    desired_accel, should_stop = get_accel_from_plan(plan[:,Plan.VELOCITY][:,0],
-                                                     plan[:,Plan.ACCELERATION][:,0],
-                                                     ModelConstants.T_IDXS,
-                                                     action_t=long_action_t)
-    desired_accel = smooth_value(desired_accel, prev_action.desiredAcceleration, LONG_SMOOTH_SECONDS)
+def get_action_from_model(model_output: dict[str, np.ndarray], prev_action: log.ModelDataV2.Action, v_ego: float) -> log.ModelDataV2.Action:
+    desired_accel = model_output['action'][0,1]
+    desired_curvature = model_output['action'][0,0] / (max(1.0, v_ego))**2
+    should_stop = (v_ego < 0.3 and desired_accel < 0.1)
 
-    desired_curvature = get_curvature_from_plan(plan[:,Plan.T_FROM_CURRENT_EULER][:,2],
-                                                plan[:,Plan.ORIENTATION_RATE][:,2],
-                                                ModelConstants.T_IDXS,
-                                                v_ego,
-                                                lat_action_t)
+    desired_accel = smooth_value(desired_accel, prev_action.desiredAcceleration, LONG_SMOOTH_SECONDS)
     if v_ego > MIN_LAT_CONTROL_SPEED:
       desired_curvature = smooth_value(desired_curvature, prev_action.desiredCurvature, LAT_SMOOTH_SECONDS)
     else:
@@ -79,7 +71,10 @@ def __init__(self, cam_w: int, cam_h: int):
     self.vision_input_names = list(self.vision_input_shapes.keys())
     self.vision_output_slices = vision_metadata['output_slices']
 
-    policy_metadata = jits['metadata']['policy']
+    off_policy_metadata = jits['metadata']['off_policy']
+    self.off_policy_output_slices = off_policy_metadata['output_slices']
+
+    policy_metadata = jits['metadata']['on_policy']
     self.policy_input_shapes =  policy_metadata['input_shapes']
     self.policy_output_slices = policy_metadata['output_slices']
 
@@ -118,25 +113,29 @@ def run(self, bufs: dict[str, VisionBuf], transforms: dict[str, np.ndarray],
     self.npy['desire'][:] = np.where(inputs['desire_pulse'] - self.prev_desire > .99, inputs['desire_pulse'], 0)
     self.prev_desire[:] = inputs['desire_pulse']
     self.npy['traffic_convention'][:] = inputs['traffic_convention']
+    if 'action_t' in self.npy:
+      self.npy['action_t'][:] = inputs['action_t']
     self.npy['tfm'][:,:] = transforms['img'][:,:]
     self.npy['big_tfm'][:,:] = transforms['big_img'][:,:]
 
     if prepare_only:
       self.warp_enqueue(**self.input_queues, frame=self.full_frames['img'], big_frame=self.full_frames['big_img'])
       return None
 
-    vision_output, policy_output = self.run_policy(
+    vision_output, policy_output, off_policy_output = self.run_policy(
       **self.input_queues, frame=self.full_frames['img'], big_frame=self.full_frames['big_img']
     )
 
     vision_output = vision_output.numpy().flatten()
+    off_policy_output = off_policy_output.numpy().flatten()
     policy_output = policy_output.numpy().flatten()
     vision_outputs_dict = self.parser.parse_vision_outputs(self.slice_outputs(vision_output, self.vision_output_slices))
+    off_policy_outputs_dict = self.parser.parse_off_policy_outputs(self.slice_outputs(off_policy_output, self.off_policy_output_slices))
     policy_outputs_dict = self.parser.parse_policy_outputs(self.slice_outputs(policy_output, self.policy_output_slices))
-    combined_outputs_dict = {**vision_outputs_dict, **policy_outputs_dict}
+    combined_outputs_dict = {**vision_outputs_dict, **off_policy_outputs_dict, **policy_outputs_dict}
 
     if SEND_RAW_PRED:
-      combined_outputs_dict['raw_pred'] = np.concatenate([vision_output.copy(), policy_output.copy()])
+      combined_outputs_dict['raw_pred'] = np.concatenate([vision_output.copy(), policy_output.copy(), off_policy_output.copy()])
     return combined_outputs_dict
 
 
@@ -279,9 +278,14 @@ def main(demo=False):
 
     bufs = {name: buf_extra if 'big' in name else buf_main for name in model.vision_input_names}
     transforms = {name: model_transform_extra if 'big' in name else model_transform_main for name in model.vision_input_names}
+    frame_delay = DT_MDL # compensate for time passed since the frame was captured: current_time - timestamp_eof is 50ms on average
+    action_delay = DT_MDL / 2 # middle of the interval between model output (current state) and next frame (expected state)
+    lat_action_t = lat_delay + frame_delay + action_delay
+    long_action_t = long_delay + frame_delay + action_delay
     inputs:dict[str, np.ndarray] = {
       'desire_pulse': vec_desire,
       'traffic_convention': traffic_convention,
+      'action_t': np.array([lat_action_t, long_action_t], dtype=np.float32),
     }
 
     mt1 = time.perf_counter()
@@ -294,9 +298,7 @@ def main(demo=False):
       drivingdata_send = messaging.new_message('drivingModelData')
       posenet_send = messaging.new_message('cameraOdometry')
 
-      frame_delay = DT_MDL # compensate for time passed since the frame was captured: current_time - timestamp_eof is 50ms on average
-      action_delay = DT_MDL / 2 # middle of the interval between model output (current state) and next frame (expected state)
-      action = get_action_from_model(model_output, prev_action, lat_delay + frame_delay + action_delay, long_delay + frame_delay + action_delay, v_ego)
+      action = get_action_from_model(model_output, prev_action, v_ego)
       prev_action = action
       fill_model_msg(drivingdata_send, modelv2_send, model_output, action,
                      publish_state, meta_main.frame_id, meta_extra.frame_id, frame_id,

diff --git a/selfdrive/modeld/models/big_driving_policy.onnx b/selfdrive/modeld/models/big_driving_policy.onnx
diff --git a/selfdrive/modeld/models/big_driving_vision.onnx b/selfdrive/modeld/models/big_driving_vision.onnx
diff --git a/selfdrive/modeld/models/driving_off_policy.onnx b/selfdrive/modeld/models/driving_off_policy.onnx
diff --git a/selfdrive/modeld/models/driving_on_policy.onnx b/selfdrive/modeld/models/driving_on_policy.onnx
diff --git a/selfdrive/modeld/models/driving_policy.onnx b/selfdrive/modeld/models/driving_policy.onnx
diff --git a/selfdrive/modeld/models/driving_vision.onnx b/selfdrive/modeld/models/driving_vision.onnx
diff --git a/selfdrive/modeld/parse_model_outputs.py b/selfdrive/modeld/parse_model_outputs.py
@@ -96,12 +96,19 @@ def parse_vision_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndar
     self.parse_mdn('pose', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,))
     self.parse_mdn('wide_from_device_euler', outs, in_N=0, out_N=0, out_shape=(ModelConstants.WIDE_FROM_DEVICE_WIDTH,))
     self.parse_mdn('road_transform', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,))
+    self.parse_categorical_crossentropy('desire_pred', outs, out_shape=(ModelConstants.DESIRE_PRED_LEN,ModelConstants.DESIRE_PRED_WIDTH))
+    self.parse_binary_crossentropy('meta', outs)
+    return outs
+
+  def parse_off_policy_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
+    plan_mhp = self.is_mhp(outs, 'plan',  ModelConstants.IDX_N * ModelConstants.PLAN_WIDTH)
+    plan_in_N, plan_out_N = (ModelConstants.PLAN_MHP_N, ModelConstants.PLAN_MHP_SELECTION) if plan_mhp else (0, 0)
+    self.parse_mdn('plan', outs, in_N=plan_in_N, out_N=plan_out_N, out_shape=(ModelConstants.IDX_N, ModelConstants.PLAN_WIDTH))
     self.parse_mdn('lane_lines', outs, in_N=0, out_N=0, out_shape=(ModelConstants.NUM_LANE_LINES,ModelConstants.IDX_N,ModelConstants.LANE_LINES_WIDTH))
     self.parse_mdn('road_edges', outs, in_N=0, out_N=0, out_shape=(ModelConstants.NUM_ROAD_EDGES,ModelConstants.IDX_N,ModelConstants.LANE_LINES_WIDTH))
     self.parse_binary_crossentropy('lane_lines_prob', outs)
-    self.parse_categorical_crossentropy('desire_pred', outs, out_shape=(ModelConstants.DESIRE_PRED_LEN,ModelConstants.DESIRE_PRED_WIDTH))
-    self.parse_binary_crossentropy('meta', outs)
     self.parse_binary_crossentropy('lead_prob', outs)
+    self.parse_categorical_crossentropy('desire_state', outs, out_shape=(ModelConstants.DESIRE_PRED_WIDTH,))
     lead_mhp = self.is_mhp(outs, 'lead', ModelConstants.LEAD_MHP_SELECTION * ModelConstants.LEAD_TRAJ_LEN * ModelConstants.LEAD_WIDTH)
     lead_in_N, lead_out_N = (ModelConstants.LEAD_MHP_N, ModelConstants.LEAD_MHP_SELECTION) if lead_mhp else (0, 0)
     lead_out_shape = (ModelConstants.LEAD_TRAJ_LEN, ModelConstants.LEAD_WIDTH) if lead_mhp else \
@@ -110,15 +117,11 @@ def parse_vision_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndar
     return outs
 
   def parse_policy_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
-    plan_mhp = self.is_mhp(outs, 'plan', ModelConstants.IDX_N * ModelConstants.PLAN_WIDTH)
-    plan_in_N, plan_out_N = (ModelConstants.PLAN_MHP_N, ModelConstants.PLAN_MHP_SELECTION) if plan_mhp else (0, 0)
-    self.parse_mdn('plan', outs, in_N=plan_in_N, out_N=plan_out_N, out_shape=(ModelConstants.IDX_N, ModelConstants.PLAN_WIDTH))
-    if 'planplus' in outs:
-      self.parse_mdn('planplus', outs, in_N=0, out_N=0, out_shape=(ModelConstants.IDX_N, ModelConstants.PLAN_WIDTH))
-    self.parse_categorical_crossentropy('desire_state', outs, out_shape=(ModelConstants.DESIRE_PRED_WIDTH,))
+    self.parse_mdn('action', outs, in_N=0, out_N=0, out_shape=(ModelConstants.ACTION_WIDTH,))
     return outs
 
   def parse_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
     outs = self.parse_vision_outputs(outs)
+    outs = self.parse_off_policy_outputs(outs)
     outs = self.parse_policy_outputs(outs)
     return outs