From 2e6856403473c7fb3a54c1dc94419f0877d352dd Mon Sep 17 00:00:00 2001 From: Bruce Wayne Date: Fri, 15 May 2026 11:23:39 -0700 Subject: [PATCH 1/3] modeld: RL driving model with 3-file split Split the driving model into vision + off_policy + on_policy ONNX files and wire up the RL policy: - 3-file model split (vision / off_policy / on_policy), replacing the combined big_driving_policy/vision models - compiler updates for the split models - actually consume the policy action in modeld - add desire state to the driving model - model iterations (smoothness, off/on-policy weight updates) --- scripts/reporter.py | 13 ++++- selfdrive/modeld/SConscript | 6 +-- selfdrive/modeld/compile_modeld.py | 53 ++++++++++++++----- selfdrive/modeld/constants.py | 1 + selfdrive/modeld/fill_model_msg.py | 5 +- selfdrive/modeld/modeld.py | 49 +++++++++-------- .../modeld/models/big_driving_policy.onnx | 1 - .../modeld/models/big_driving_vision.onnx | 1 - .../modeld/models/driving_off_policy.onnx | 3 ++ .../modeld/models/driving_on_policy.onnx | 3 ++ selfdrive/modeld/models/driving_policy.onnx | 3 -- selfdrive/modeld/models/driving_vision.onnx | 4 +- selfdrive/modeld/parse_model_outputs.py | 19 ++++--- 13 files changed, 105 insertions(+), 56 deletions(-) delete mode 120000 selfdrive/modeld/models/big_driving_policy.onnx delete mode 120000 selfdrive/modeld/models/big_driving_vision.onnx create mode 100644 selfdrive/modeld/models/driving_off_policy.onnx create mode 100644 selfdrive/modeld/models/driving_on_policy.onnx delete mode 100644 selfdrive/modeld/models/driving_policy.onnx diff --git a/scripts/reporter.py b/scripts/reporter.py index 93b71761a9e421..64f6cb99b8370f 100755 --- a/scripts/reporter.py +++ b/scripts/reporter.py @@ -33,7 +33,16 @@ def get_checkpoint(f): print("|-| ----- | --------- |") for f in glob.glob(BASEDIR + MODEL_PATH + "/*.onnx"): + # TODO: add checkpoint to DM + if "dmonitoring" in f: + continue + fn = os.path.basename(f) - master = get_checkpoint(MASTER_PATH + MODEL_PATH + fn) + master_path = MASTER_PATH + MODEL_PATH + fn + if os.path.exists(master_path): + master = get_checkpoint(master_path) + master_col = f"[{master}](https://reporter.comma.life/experiment/{master})" + else: + master_col = "N/A (new model)" pr = get_checkpoint(BASEDIR + MODEL_PATH + fn) - print("|", fn, "|", f"[{master}](https://reporterv2.comma.life/{master})", "|", f"[{pr}](https://reporterv2.comma.life/{pr})", "|") + print("|", fn, "|", master_col, "|", f"[{pr}](https://reporter.comma.life/experiment/{pr})", "|") diff --git a/selfdrive/modeld/SConscript b/selfdrive/modeld/SConscript index 18e806a6ab9e9a..25788e3f537e40 100644 --- a/selfdrive/modeld/SConscript +++ b/selfdrive/modeld/SConscript @@ -68,7 +68,7 @@ mac_brew_string = f'HOME={os.path.expanduser("~")}' if arch == 'Darwin' else '' modeld_dir = Dir("#selfdrive/modeld").abspath compile_modeld_script = [File(f"{modeld_dir}/compile_modeld.py")] -driving_onnx_deps = [File(f"models/{m}.onnx").abspath for m in ['driving_vision', 'driving_policy']] +driving_onnx_deps = [File(f"models/{m}.onnx").abspath for m in ['driving_vision', 'driving_off_policy', 'driving_on_policy']] model_w, model_h = MEDMODEL_INPUT_SIZE frame_skip = ModelConstants.MODEL_RUN_FREQ // ModelConstants.MODEL_CONTEXT_FREQ @@ -78,7 +78,8 @@ cmd = (f'{tg_flags} {mac_brew_string} python3 {modeld_dir}/compile_modeld.py ' f'--model-size {model_w}x{model_h} ' f'--camera-resolutions {camera_res_args} ' f'--vision-onnx {File("models/driving_vision.onnx").abspath} ' - f'--policy-onnx {File("models/driving_policy.onnx").abspath} ' + f'--off-policy-onnx {File("models/driving_off_policy.onnx").abspath} ' + f'--on-policy-onnx {File("models/driving_on_policy.onnx").abspath} ' f'--output {pkl_path} --frame-skip {frame_skip}') node = lenv.Command(pkl_path, tinygrad_files + compile_modeld_script + driving_onnx_deps + [Value(camera_res_args), chunker_file, tg_devices_node], cmd) onnx_sizes_sum = sum(os.path.getsize(f) for f in driving_onnx_deps) @@ -102,7 +103,6 @@ for cam_w, cam_h in CAMERA_CONFIGS: f'--output {dm_pkl_path}') lenv.Command(dm_pkl_path, tinygrad_files + compile_dm_warp_script + compile_modeld_script + [tg_devices_node], cmd) -driving_metadata_deps = [File(f"models/{m}_metadata.pkl").abspath for m in ['driving_vision', 'driving_policy']] def tg_compile(flags, model_name): pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"' fn = File(f"models/{model_name}").abspath diff --git a/selfdrive/modeld/compile_modeld.py b/selfdrive/modeld/compile_modeld.py index 031f13be3a072e..f06b36b2a7e373 100755 --- a/selfdrive/modeld/compile_modeld.py +++ b/selfdrive/modeld/compile_modeld.py @@ -102,6 +102,10 @@ def make_input_queues(vision_input_shapes, policy_input_shapes, frame_skip, devi 'tfm': np.zeros((3, 3), dtype=np.float32), 'big_tfm': np.zeros((3, 3), dtype=np.float32), } + if 'action_t' in policy_input_shapes: + npy['action_t'] = np.zeros(policy_input_shapes['action_t'], dtype=np.float32) + if 'prev_action' in policy_input_shapes: + npy['prev_action'] = np.zeros(policy_input_shapes['prev_action'][2], dtype=np.float32) input_queues = { 'img_q': Tensor.zeros(img_buf_shape, dtype='uint8', device=device).contiguous().realize(), 'big_img_q': Tensor.zeros(img_buf_shape, dtype='uint8', device=device).contiguous().realize(), @@ -109,6 +113,9 @@ def make_input_queues(vision_input_shapes, policy_input_shapes, frame_skip, devi 'desire_q': Tensor.zeros(frame_skip * dp[1], dp[0], dp[2], device=device).contiguous().realize(), **{k: Tensor(v, device='NPY').realize() for k, v in npy.items()}, } + if 'prev_action' in policy_input_shapes: + pa = policy_input_shapes['prev_action'] # (1, 25, 2) + input_queues['prev_action_q'] = Tensor.zeros(frame_skip * (pa[1] - 1) + 1, pa[0], pa[2], device=device).contiguous().realize() return input_queues, npy @@ -125,18 +132,24 @@ def sample_desire(buf, frame_skip): return buf.reshape(-1, frame_skip, *buf.shape[1:]).max(1).flatten(0, 1).unsqueeze(0) -def make_run_policy(vision_runner, policy_runner, nv12: NV12Frame, model_w, model_h, +def make_run_policy(vision_runner, off_policy_runner, on_policy_runner, nv12: NV12Frame, model_w, model_h, vision_features_slice, frame_skip, prepare_only=False): frame_prepare = make_frame_prepare(nv12, model_w, model_h) sample_skip_fn = partial(sample_skip, frame_skip=frame_skip) sample_desire_fn = partial(sample_desire, frame_skip=frame_skip) - def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, tfm, big_tfm, frame, big_frame): + def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, action_t, tfm, big_tfm, frame, big_frame, + prev_action_q=None, prev_action=None): tfm = tfm.to(Device.DEFAULT) big_tfm = big_tfm.to(Device.DEFAULT) desire = desire.to(Device.DEFAULT) traffic_convention = traffic_convention.to(Device.DEFAULT) - Tensor.realize(tfm, big_tfm, desire, traffic_convention) + action_t = action_t.to(Device.DEFAULT) + to_realize = [tfm, big_tfm, desire, traffic_convention, action_t] + if prev_action is not None: + prev_action = prev_action.to(Device.DEFAULT) + to_realize.append(prev_action) + Tensor.realize(*to_realize) img = shift_and_sample(img_q, frame_prepare(frame, tfm).unsqueeze(0), sample_skip_fn) big_img = shift_and_sample(big_img_q, frame_prepare(big_frame, big_tfm).unsqueeze(0), sample_skip_fn) @@ -150,22 +163,32 @@ def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, t feat_buf = shift_and_sample(feat_q, new_feat, sample_skip_fn) desire_buf = shift_and_sample(desire_q, desire.reshape(1, 1, -1), sample_desire_fn) - inputs = {'features_buffer': feat_buf, 'desire_pulse': desire_buf, 'traffic_convention': traffic_convention} - policy_out = next(iter(policy_runner(inputs).values())).cast('float32') + inputs = { + 'features_buffer': feat_buf, + 'desire_pulse': desire_buf, + 'traffic_convention': traffic_convention, + 'action_t': action_t, + } + if prev_action_q is not None and prev_action is not None: + inputs['prev_action'] = shift_and_sample(prev_action_q, prev_action.reshape(1, 1, -1), sample_skip_fn) + on_policy_out = next(iter(on_policy_runner(inputs).values())).cast('float32') + off_policy_out = next(iter(off_policy_runner(inputs).values())).cast('float32') - return vision_out, policy_out + return vision_out, on_policy_out, off_policy_out return run_policy def compile_modeld(nv12: NV12Frame, model_w, model_h, prepare_only, frame_skip, - vision_runner, policy_runner, vision_metadata, policy_metadata): + vision_runner, off_policy_runner, on_policy_runner, + vision_metadata, off_policy_metadata, on_policy_metadata): print(f"Compiling combined policy JIT for {nv12.width}x{nv12.height} (prepare_only={prepare_only})...") vision_features_slice = vision_metadata['output_slices']['hidden_state'] vision_input_shapes = vision_metadata['input_shapes'] - policy_input_shapes = policy_metadata['input_shapes'] + policy_input_shapes = on_policy_metadata['input_shapes'] + assert policy_input_shapes == off_policy_metadata['input_shapes'] - _run = make_run_policy(vision_runner, policy_runner, nv12, model_w, model_h, + _run = make_run_policy(vision_runner, off_policy_runner, on_policy_runner, nv12, model_w, model_h, vision_features_slice, frame_skip, prepare_only) run_policy_jit = TinyJit(_run, prune=True) @@ -227,7 +250,8 @@ def _parse_size(s): p.add_argument('--camera-resolutions', type=_parse_size, nargs='+', required=True, help='camera resolutions WxH (one or more)') p.add_argument('--vision-onnx', required=True) - p.add_argument('--policy-onnx', required=True) + p.add_argument('--off-policy-onnx', required=True) + p.add_argument('--on-policy-onnx', required=True) p.add_argument('--output', required=True) p.add_argument('--frame-skip', type=int, required=True) args = p.parse_args() @@ -236,16 +260,19 @@ def _parse_size(s): # init runners once so weights are shared from get_model_metadata import make_metadata_dict vision_runner = OnnxRunner(args.vision_onnx) - policy_runner = OnnxRunner(args.policy_onnx) + off_policy_runner = OnnxRunner(args.off_policy_onnx) + on_policy_runner = OnnxRunner(args.on_policy_onnx) out['metadata']['vision'] = make_metadata_dict(args.vision_onnx) - out['metadata']['policy'] = make_metadata_dict(args.policy_onnx) + out['metadata']['off_policy'] = make_metadata_dict(args.off_policy_onnx) + out['metadata']['on_policy'] = make_metadata_dict(args.on_policy_onnx) for cam_w, cam_h in args.camera_resolutions: nv12 = NV12Frame(cam_w, cam_h, *get_nv12_info(cam_w, cam_h)) model_w, model_h = args.model_size out[(cam_w,cam_h)] = { name: compile_modeld(nv12, model_w, model_h, prepare_only, args.frame_skip, - vision_runner, policy_runner, out['metadata']['vision'], out['metadata']['policy']) + vision_runner, off_policy_runner, on_policy_runner, + out['metadata']['vision'], out['metadata']['off_policy'], out['metadata']['on_policy']) for name, prepare_only in [('warp_enqueue', True), ('run_policy', False)] } diff --git a/selfdrive/modeld/constants.py b/selfdrive/modeld/constants.py index ff7e1d86006e83..0fb09262d0192e 100644 --- a/selfdrive/modeld/constants.py +++ b/selfdrive/modeld/constants.py @@ -38,6 +38,7 @@ class ModelConstants: LANE_LINES_WIDTH = 2 ROAD_EDGES_WIDTH = 2 PLAN_WIDTH = 15 + ACTION_WIDTH = 2 DESIRE_PRED_WIDTH = 8 LAT_PLANNER_SOLUTION_WIDTH = 4 DESIRED_CURV_WIDTH = 1 diff --git a/selfdrive/modeld/fill_model_msg.py b/selfdrive/modeld/fill_model_msg.py index 82c4c92b1d53c7..92a2dfa58d7f3a 100644 --- a/selfdrive/modeld/fill_model_msg.py +++ b/selfdrive/modeld/fill_model_msg.py @@ -125,7 +125,10 @@ def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._D # meta meta = modelV2.meta - meta.desireState = net_output_data['desire_state'][0].reshape(-1).tolist() + if 'desire_state' in net_output_data: + meta.desireState = net_output_data['desire_state'][0].reshape(-1).tolist() + else: + meta.desireState = [0.0] * ModelConstants.DESIRE_PRED_WIDTH meta.desirePrediction = net_output_data['desire_pred'][0].reshape(-1).tolist() meta.engagedProb = net_output_data['meta'][0,Meta.ENGAGED].item() meta.init('disengagePredictions') diff --git a/selfdrive/modeld/modeld.py b/selfdrive/modeld/modeld.py index 6aa94be4955bdf..e0d264d4bd9a9d 100755 --- a/selfdrive/modeld/modeld.py +++ b/selfdrive/modeld/modeld.py @@ -18,12 +18,12 @@ from openpilot.system.camerad.cameras.nv12_info import get_nv12_info from openpilot.common.transformations.model import get_warp_matrix from openpilot.selfdrive.controls.lib.desire_helper import DesireHelper -from openpilot.selfdrive.controls.lib.drive_helpers import get_accel_from_plan, smooth_value, get_curvature_from_plan +from openpilot.selfdrive.controls.lib.drive_helpers import smooth_value from openpilot.selfdrive.modeld.parse_model_outputs import Parser from openpilot.selfdrive.modeld.compile_modeld import make_input_queues from openpilot.selfdrive.modeld.fill_model_msg import fill_model_msg, fill_pose_msg, PublishState from openpilot.common.file_chunker import read_file_chunked -from openpilot.selfdrive.modeld.constants import ModelConstants, Plan +from openpilot.selfdrive.modeld.constants import ModelConstants PROCESS_NAME = "selfdrive.modeld.modeld" @@ -35,20 +35,12 @@ -def get_action_from_model(model_output: dict[str, np.ndarray], prev_action: log.ModelDataV2.Action, - lat_action_t: float, long_action_t: float, v_ego: float) -> log.ModelDataV2.Action: - plan = model_output['plan'][0] - desired_accel, should_stop = get_accel_from_plan(plan[:,Plan.VELOCITY][:,0], - plan[:,Plan.ACCELERATION][:,0], - ModelConstants.T_IDXS, - action_t=long_action_t) - desired_accel = smooth_value(desired_accel, prev_action.desiredAcceleration, LONG_SMOOTH_SECONDS) +def get_action_from_model(model_output: dict[str, np.ndarray], prev_action: log.ModelDataV2.Action, v_ego: float) -> log.ModelDataV2.Action: + desired_accel = model_output['action'][0,1] + desired_curvature = model_output['action'][0,0] / (max(1.0, v_ego))**2 + should_stop = (v_ego < 0.3 and desired_accel < 0.1) - desired_curvature = get_curvature_from_plan(plan[:,Plan.T_FROM_CURRENT_EULER][:,2], - plan[:,Plan.ORIENTATION_RATE][:,2], - ModelConstants.T_IDXS, - v_ego, - lat_action_t) + desired_accel = smooth_value(desired_accel, prev_action.desiredAcceleration, LONG_SMOOTH_SECONDS) if v_ego > MIN_LAT_CONTROL_SPEED: desired_curvature = smooth_value(desired_curvature, prev_action.desiredCurvature, LAT_SMOOTH_SECONDS) else: @@ -79,7 +71,10 @@ def __init__(self, cam_w: int, cam_h: int): self.vision_input_names = list(self.vision_input_shapes.keys()) self.vision_output_slices = vision_metadata['output_slices'] - policy_metadata = jits['metadata']['policy'] + off_policy_metadata = jits['metadata']['off_policy'] + self.off_policy_output_slices = off_policy_metadata['output_slices'] + + policy_metadata = jits['metadata']['on_policy'] self.policy_input_shapes = policy_metadata['input_shapes'] self.policy_output_slices = policy_metadata['output_slices'] @@ -118,6 +113,10 @@ def run(self, bufs: dict[str, VisionBuf], transforms: dict[str, np.ndarray], self.npy['desire'][:] = np.where(inputs['desire_pulse'] - self.prev_desire > .99, inputs['desire_pulse'], 0) self.prev_desire[:] = inputs['desire_pulse'] self.npy['traffic_convention'][:] = inputs['traffic_convention'] + if 'action_t' in self.npy: + self.npy['action_t'][:] = inputs['action_t'] + if 'prev_action' in self.npy: + self.npy['prev_action'][:] = inputs['prev_action'] self.npy['tfm'][:,:] = transforms['img'][:,:] self.npy['big_tfm'][:,:] = transforms['big_img'][:,:] @@ -125,18 +124,20 @@ def run(self, bufs: dict[str, VisionBuf], transforms: dict[str, np.ndarray], self.warp_enqueue(**self.input_queues, frame=self.full_frames['img'], big_frame=self.full_frames['big_img']) return None - vision_output, policy_output = self.run_policy( + vision_output, policy_output, off_policy_output = self.run_policy( **self.input_queues, frame=self.full_frames['img'], big_frame=self.full_frames['big_img'] ) vision_output = vision_output.numpy().flatten() + off_policy_output = off_policy_output.numpy().flatten() policy_output = policy_output.numpy().flatten() vision_outputs_dict = self.parser.parse_vision_outputs(self.slice_outputs(vision_output, self.vision_output_slices)) + off_policy_outputs_dict = self.parser.parse_off_policy_outputs(self.slice_outputs(off_policy_output, self.off_policy_output_slices)) policy_outputs_dict = self.parser.parse_policy_outputs(self.slice_outputs(policy_output, self.policy_output_slices)) - combined_outputs_dict = {**vision_outputs_dict, **policy_outputs_dict} + combined_outputs_dict = {**vision_outputs_dict, **off_policy_outputs_dict, **policy_outputs_dict} if SEND_RAW_PRED: - combined_outputs_dict['raw_pred'] = np.concatenate([vision_output.copy(), policy_output.copy()]) + combined_outputs_dict['raw_pred'] = np.concatenate([vision_output.copy(), policy_output.copy(), off_policy_output.copy()]) return combined_outputs_dict @@ -279,9 +280,15 @@ def main(demo=False): bufs = {name: buf_extra if 'big' in name else buf_main for name in model.vision_input_names} transforms = {name: model_transform_extra if 'big' in name else model_transform_main for name in model.vision_input_names} + frame_delay = DT_MDL # compensate for time passed since the frame was captured: current_time - timestamp_eof is 50ms on average + action_delay = DT_MDL / 2 # middle of the interval between model output (current state) and next frame (expected state) + lat_action_t = lat_delay + frame_delay + action_delay + long_action_t = long_delay + frame_delay + action_delay inputs:dict[str, np.ndarray] = { 'desire_pulse': vec_desire, 'traffic_convention': traffic_convention, + 'action_t': np.array([lat_action_t, long_action_t], dtype=np.float32), + 'prev_action': np.array([prev_action.desiredCurvature * max(1.0, v_ego)**2, prev_action.desiredAcceleration], dtype=np.float32), } mt1 = time.perf_counter() @@ -294,9 +301,7 @@ def main(demo=False): drivingdata_send = messaging.new_message('drivingModelData') posenet_send = messaging.new_message('cameraOdometry') - frame_delay = DT_MDL # compensate for time passed since the frame was captured: current_time - timestamp_eof is 50ms on average - action_delay = DT_MDL / 2 # middle of the interval between model output (current state) and next frame (expected state) - action = get_action_from_model(model_output, prev_action, lat_delay + frame_delay + action_delay, long_delay + frame_delay + action_delay, v_ego) + action = get_action_from_model(model_output, prev_action, v_ego) prev_action = action fill_model_msg(drivingdata_send, modelv2_send, model_output, action, publish_state, meta_main.frame_id, meta_extra.frame_id, frame_id, diff --git a/selfdrive/modeld/models/big_driving_policy.onnx b/selfdrive/modeld/models/big_driving_policy.onnx deleted file mode 120000 index e1b653a14a03d6..00000000000000 --- a/selfdrive/modeld/models/big_driving_policy.onnx +++ /dev/null @@ -1 +0,0 @@ -driving_policy.onnx \ No newline at end of file diff --git a/selfdrive/modeld/models/big_driving_vision.onnx b/selfdrive/modeld/models/big_driving_vision.onnx deleted file mode 120000 index 28ee71dd746e63..00000000000000 --- a/selfdrive/modeld/models/big_driving_vision.onnx +++ /dev/null @@ -1 +0,0 @@ -driving_vision.onnx \ No newline at end of file diff --git a/selfdrive/modeld/models/driving_off_policy.onnx b/selfdrive/modeld/models/driving_off_policy.onnx new file mode 100644 index 00000000000000..c37ba2a2156478 --- /dev/null +++ b/selfdrive/modeld/models/driving_off_policy.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:978742c0da21bb97d74ec8fd50d20a871f03c6f74de834ff861c925588dbadd6 +size 18178536 diff --git a/selfdrive/modeld/models/driving_on_policy.onnx b/selfdrive/modeld/models/driving_on_policy.onnx new file mode 100644 index 00000000000000..5b8646c363b2ab --- /dev/null +++ b/selfdrive/modeld/models/driving_on_policy.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ee074d71004a6aa846387e62dff2494977eb9d7e40e810f6d385fa6f4292a7 +size 16745042 diff --git a/selfdrive/modeld/models/driving_policy.onnx b/selfdrive/modeld/models/driving_policy.onnx deleted file mode 100644 index 611ae9fe85f837..00000000000000 --- a/selfdrive/modeld/models/driving_policy.onnx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:78477124cbf3ffe30fa951ebada8410b43c4242c6054584d656f1d329b067e15 -size 14060847 diff --git a/selfdrive/modeld/models/driving_vision.onnx b/selfdrive/modeld/models/driving_vision.onnx index 6c9fc4c84d3632..f508eeb24dc637 100644 --- a/selfdrive/modeld/models/driving_vision.onnx +++ b/selfdrive/modeld/models/driving_vision.onnx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee29ee5bce84d1ce23e9ff381280de9b4e4d96d2934cd751740354884e112c66 -size 46877473 +oid sha256:f53b7fc541cfc62f3ecadab87792cb56b219379556a5c13d2b84726564a83a08 +size 23210690 diff --git a/selfdrive/modeld/parse_model_outputs.py b/selfdrive/modeld/parse_model_outputs.py index a0b45d2a981685..1511c7e6f50b08 100644 --- a/selfdrive/modeld/parse_model_outputs.py +++ b/selfdrive/modeld/parse_model_outputs.py @@ -96,12 +96,19 @@ def parse_vision_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndar self.parse_mdn('pose', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,)) self.parse_mdn('wide_from_device_euler', outs, in_N=0, out_N=0, out_shape=(ModelConstants.WIDE_FROM_DEVICE_WIDTH,)) self.parse_mdn('road_transform', outs, in_N=0, out_N=0, out_shape=(ModelConstants.POSE_WIDTH,)) + self.parse_categorical_crossentropy('desire_pred', outs, out_shape=(ModelConstants.DESIRE_PRED_LEN,ModelConstants.DESIRE_PRED_WIDTH)) + self.parse_binary_crossentropy('meta', outs) + return outs + + def parse_off_policy_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]: + plan_mhp = self.is_mhp(outs, 'plan', ModelConstants.IDX_N * ModelConstants.PLAN_WIDTH) + plan_in_N, plan_out_N = (ModelConstants.PLAN_MHP_N, ModelConstants.PLAN_MHP_SELECTION) if plan_mhp else (0, 0) + self.parse_mdn('plan', outs, in_N=plan_in_N, out_N=plan_out_N, out_shape=(ModelConstants.IDX_N, ModelConstants.PLAN_WIDTH)) self.parse_mdn('lane_lines', outs, in_N=0, out_N=0, out_shape=(ModelConstants.NUM_LANE_LINES,ModelConstants.IDX_N,ModelConstants.LANE_LINES_WIDTH)) self.parse_mdn('road_edges', outs, in_N=0, out_N=0, out_shape=(ModelConstants.NUM_ROAD_EDGES,ModelConstants.IDX_N,ModelConstants.LANE_LINES_WIDTH)) self.parse_binary_crossentropy('lane_lines_prob', outs) - self.parse_categorical_crossentropy('desire_pred', outs, out_shape=(ModelConstants.DESIRE_PRED_LEN,ModelConstants.DESIRE_PRED_WIDTH)) - self.parse_binary_crossentropy('meta', outs) self.parse_binary_crossentropy('lead_prob', outs) + self.parse_categorical_crossentropy('desire_state', outs, out_shape=(ModelConstants.DESIRE_PRED_WIDTH,)) lead_mhp = self.is_mhp(outs, 'lead', ModelConstants.LEAD_MHP_SELECTION * ModelConstants.LEAD_TRAJ_LEN * ModelConstants.LEAD_WIDTH) lead_in_N, lead_out_N = (ModelConstants.LEAD_MHP_N, ModelConstants.LEAD_MHP_SELECTION) if lead_mhp else (0, 0) lead_out_shape = (ModelConstants.LEAD_TRAJ_LEN, ModelConstants.LEAD_WIDTH) if lead_mhp else \ @@ -110,15 +117,11 @@ def parse_vision_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndar return outs def parse_policy_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]: - plan_mhp = self.is_mhp(outs, 'plan', ModelConstants.IDX_N * ModelConstants.PLAN_WIDTH) - plan_in_N, plan_out_N = (ModelConstants.PLAN_MHP_N, ModelConstants.PLAN_MHP_SELECTION) if plan_mhp else (0, 0) - self.parse_mdn('plan', outs, in_N=plan_in_N, out_N=plan_out_N, out_shape=(ModelConstants.IDX_N, ModelConstants.PLAN_WIDTH)) - if 'planplus' in outs: - self.parse_mdn('planplus', outs, in_N=0, out_N=0, out_shape=(ModelConstants.IDX_N, ModelConstants.PLAN_WIDTH)) - self.parse_categorical_crossentropy('desire_state', outs, out_shape=(ModelConstants.DESIRE_PRED_WIDTH,)) + self.parse_mdn('action', outs, in_N=0, out_N=0, out_shape=(ModelConstants.ACTION_WIDTH,)) return outs def parse_outputs(self, outs: dict[str, np.ndarray]) -> dict[str, np.ndarray]: outs = self.parse_vision_outputs(outs) + outs = self.parse_off_policy_outputs(outs) outs = self.parse_policy_outputs(outs) return outs From 72101c6e50ea594f24e75d3b605abf8b5cab1d6b Mon Sep 17 00:00:00 2001 From: Bruce Wayne Date: Tue, 19 May 2026 18:27:58 -0700 Subject: [PATCH 2/3] modeld: update driving model --- selfdrive/modeld/models/driving_off_policy.onnx | 4 ++-- selfdrive/modeld/models/driving_on_policy.onnx | 4 ++-- selfdrive/modeld/models/driving_vision.onnx | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/selfdrive/modeld/models/driving_off_policy.onnx b/selfdrive/modeld/models/driving_off_policy.onnx index c37ba2a2156478..9ec6718cf4b80b 100644 --- a/selfdrive/modeld/models/driving_off_policy.onnx +++ b/selfdrive/modeld/models/driving_off_policy.onnx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:978742c0da21bb97d74ec8fd50d20a871f03c6f74de834ff861c925588dbadd6 -size 18178536 +oid sha256:a8e3adb95ae42131cec00ee063f9e1015279e7730fd53bf75de099de3aa2a49e +size 18178309 diff --git a/selfdrive/modeld/models/driving_on_policy.onnx b/selfdrive/modeld/models/driving_on_policy.onnx index 5b8646c363b2ab..815704b1281321 100644 --- a/selfdrive/modeld/models/driving_on_policy.onnx +++ b/selfdrive/modeld/models/driving_on_policy.onnx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f5ee074d71004a6aa846387e62dff2494977eb9d7e40e810f6d385fa6f4292a7 -size 16745042 +oid sha256:0fd3fb0ff34b74940cbca875a6e5f14e894fdac979bc8cf04c260653f5435312 +size 16744879 diff --git a/selfdrive/modeld/models/driving_vision.onnx b/selfdrive/modeld/models/driving_vision.onnx index f508eeb24dc637..13591382305c0e 100644 --- a/selfdrive/modeld/models/driving_vision.onnx +++ b/selfdrive/modeld/models/driving_vision.onnx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f53b7fc541cfc62f3ecadab87792cb56b219379556a5c13d2b84726564a83a08 -size 23210690 +oid sha256:3261a5e5f1f2d43b8c8e98e31ebaf2ebc56a2b204172f712f00f4fa67b10f16f +size 23209954 From 316c8ccf70dc4fa0167fa25dea6f2a16caa92684 Mon Sep 17 00:00:00 2001 From: Bruce Wayne Date: Tue, 19 May 2026 18:38:52 -0700 Subject: [PATCH 3/3] modeld: remove prev_action input --- selfdrive/modeld/compile_modeld.py | 16 ++-------------- selfdrive/modeld/modeld.py | 3 --- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/selfdrive/modeld/compile_modeld.py b/selfdrive/modeld/compile_modeld.py index f06b36b2a7e373..7f80190eefec63 100755 --- a/selfdrive/modeld/compile_modeld.py +++ b/selfdrive/modeld/compile_modeld.py @@ -104,8 +104,6 @@ def make_input_queues(vision_input_shapes, policy_input_shapes, frame_skip, devi } if 'action_t' in policy_input_shapes: npy['action_t'] = np.zeros(policy_input_shapes['action_t'], dtype=np.float32) - if 'prev_action' in policy_input_shapes: - npy['prev_action'] = np.zeros(policy_input_shapes['prev_action'][2], dtype=np.float32) input_queues = { 'img_q': Tensor.zeros(img_buf_shape, dtype='uint8', device=device).contiguous().realize(), 'big_img_q': Tensor.zeros(img_buf_shape, dtype='uint8', device=device).contiguous().realize(), @@ -113,9 +111,6 @@ def make_input_queues(vision_input_shapes, policy_input_shapes, frame_skip, devi 'desire_q': Tensor.zeros(frame_skip * dp[1], dp[0], dp[2], device=device).contiguous().realize(), **{k: Tensor(v, device='NPY').realize() for k, v in npy.items()}, } - if 'prev_action' in policy_input_shapes: - pa = policy_input_shapes['prev_action'] # (1, 25, 2) - input_queues['prev_action_q'] = Tensor.zeros(frame_skip * (pa[1] - 1) + 1, pa[0], pa[2], device=device).contiguous().realize() return input_queues, npy @@ -138,18 +133,13 @@ def make_run_policy(vision_runner, off_policy_runner, on_policy_runner, nv12: NV sample_skip_fn = partial(sample_skip, frame_skip=frame_skip) sample_desire_fn = partial(sample_desire, frame_skip=frame_skip) - def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, action_t, tfm, big_tfm, frame, big_frame, - prev_action_q=None, prev_action=None): + def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, action_t, tfm, big_tfm, frame, big_frame): tfm = tfm.to(Device.DEFAULT) big_tfm = big_tfm.to(Device.DEFAULT) desire = desire.to(Device.DEFAULT) traffic_convention = traffic_convention.to(Device.DEFAULT) action_t = action_t.to(Device.DEFAULT) - to_realize = [tfm, big_tfm, desire, traffic_convention, action_t] - if prev_action is not None: - prev_action = prev_action.to(Device.DEFAULT) - to_realize.append(prev_action) - Tensor.realize(*to_realize) + Tensor.realize(tfm, big_tfm, desire, traffic_convention, action_t) img = shift_and_sample(img_q, frame_prepare(frame, tfm).unsqueeze(0), sample_skip_fn) big_img = shift_and_sample(big_img_q, frame_prepare(big_frame, big_tfm).unsqueeze(0), sample_skip_fn) @@ -169,8 +159,6 @@ def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, a 'traffic_convention': traffic_convention, 'action_t': action_t, } - if prev_action_q is not None and prev_action is not None: - inputs['prev_action'] = shift_and_sample(prev_action_q, prev_action.reshape(1, 1, -1), sample_skip_fn) on_policy_out = next(iter(on_policy_runner(inputs).values())).cast('float32') off_policy_out = next(iter(off_policy_runner(inputs).values())).cast('float32') diff --git a/selfdrive/modeld/modeld.py b/selfdrive/modeld/modeld.py index e0d264d4bd9a9d..19eb2446db5eed 100755 --- a/selfdrive/modeld/modeld.py +++ b/selfdrive/modeld/modeld.py @@ -115,8 +115,6 @@ def run(self, bufs: dict[str, VisionBuf], transforms: dict[str, np.ndarray], self.npy['traffic_convention'][:] = inputs['traffic_convention'] if 'action_t' in self.npy: self.npy['action_t'][:] = inputs['action_t'] - if 'prev_action' in self.npy: - self.npy['prev_action'][:] = inputs['prev_action'] self.npy['tfm'][:,:] = transforms['img'][:,:] self.npy['big_tfm'][:,:] = transforms['big_img'][:,:] @@ -288,7 +286,6 @@ def main(demo=False): 'desire_pulse': vec_desire, 'traffic_convention': traffic_convention, 'action_t': np.array([lat_action_t, long_action_t], dtype=np.float32), - 'prev_action': np.array([prev_action.desiredCurvature * max(1.0, v_ego)**2, prev_action.desiredAcceleration], dtype=np.float32), } mt1 = time.perf_counter()