NVIDIA · rostan-t · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 25, 2026
diff --git a/dali/operators/decoder/image_decoder.cc b/dali/operators/decoder/image_decoder.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -191,7 +191,10 @@ Please note that GPU acceleration for JPEG 2000 decoding is only available for C
   .NumInput(1)
   .NumOutput(1)
   .AddParent("ImageDecoderAttr")
-  .AddParent("CachedDecoderAttr");
+  .AddParent("CachedDecoderAttr")
+  .OutputDType(0, DALI_UINT8)
+  .OutputNDim(0, 3)
+  .OutputLayout(0, "HWC");
 
 // Fused
 
@@ -309,7 +312,10 @@ of the slice (s0, s1, s2, …).
 
 Integer coordinates are interpreted as absolute coordinates, while float coordinates can be
 interpreted as absolute or relative coordinates, depending on the value of
-`normalized_shape`.)code");
+`normalized_shape`.)code")
+  .OutputDType(0, DALI_UINT8)
+  .OutputNDim(0, 3)
+  .OutputLayout(0, "HWC");
 
 
 // Deprecated aliases

diff --git a/dali/operators/generic/constant_value.cc b/dali/operators/generic/constant_value.cc
@@ -152,6 +152,13 @@ void ConstantValue<CPUBackend>::RunImpl(Workspace &ws) {
   }
 }
 
+inline std::optional<int> ConstantValueNDim(const OpSpec &spec) {
+  std::vector<int> shape;
+  if (spec.TryGetRepeatedArgument(shape, "shape"))
+    return shape.size();
+  return std::nullopt;
+}
+
 DALI_SCHEMA(Full)
     .DocStr(R"code(Returns new data of given shape and type, filled with a fill value.
 
@@ -166,7 +173,8 @@ In case of different dimensionality, the input shape is padded with 1s for the m
                                       true)
     .AddOptionalArg<TensorLayout>("layout", R"code(Output layout.
 
-If set and not empty, the layout must match the dimensionality of the output.)code", nullptr);
+If set and not empty, the layout must match the dimensionality of the output.)code", nullptr)
+    .OutputNDim(0, ConstantValueNDim);
 
 DALI_REGISTER_OPERATOR(Full, Full<CPUBackend>, CPU);
 
@@ -192,7 +200,8 @@ DALI_SCHEMA(Zeros)
     .AddOptionalArg<TensorLayout>("layout", R"code(Output layout.
 
 If set and not empty, the layout must match the dimensionality of the output.)code", nullptr)
-    .AddOptionalTypeArg("dtype", R"code(Output data type.)code", DALI_INT32);
+    .AddOptionalTypeArg("dtype", R"code(Output data type.)code", DALI_INT32)
+    .OutputNDim(0, ConstantValueNDim);
 DALI_REGISTER_OPERATOR(Zeros, Zeros<CPUBackend>, CPU);
 
 DALI_SCHEMA(ZerosLike)
@@ -213,7 +222,8 @@ DALI_SCHEMA(Ones)
     .AddOptionalArg<TensorLayout>("layout", R"code(Output layout.
 
 If set and not empty, the layout must match the dimensionality of the output.)code", nullptr)
-    .AddOptionalTypeArg("dtype", R"code(Output data type.)code", DALI_INT32);
+    .AddOptionalTypeArg("dtype", R"code(Output data type.)code", DALI_INT32)
+    .OutputNDim(0, ConstantValueNDim);
 DALI_REGISTER_OPERATOR(Ones, Ones<CPUBackend>, CPU);
 
 DALI_SCHEMA(OnesLike)

diff --git a/dali/operators/generic/expand_dims.cc b/dali/operators/generic/expand_dims.cc
@@ -43,7 +43,47 @@ layout will be empty.")code")
   .AddOptionalArg("new_axis_names", R"code(Names of the new dimensions in the data layout.
 
 The length of `new_axis_names` must match the length of `axes`.
-If argument isn't be provided, the layout will be cleared.)code", TensorLayout(""));
+If argument isn't be provided, the layout will be cleared.)code", TensorLayout(""))
+  .OutputNDim(0, [](const OpSpec &spec)->std::optional<int> {
+    auto &desc = spec.InputDesc(0);
+    if (!desc.ndim)
+      return std::nullopt;
+    return *desc.ndim + spec.GetRepeatedArgument<int>("axes").size();
+  })
+  .OutputLayout(0, [](const OpSpec &spec)->std::optional<TensorLayout> {
+    auto &desc = spec.InputDesc(0);
+    if (!desc.layout)
+      return std::nullopt;
+
+    auto axes = spec.GetRepeatedArgument<int>("axes");
+    if (axes.empty())
+      return desc.layout;
+
+    auto names = spec.GetArgument<TensorLayout>("new_axis_names");
+    int num_new_axes = ssize(axes);
+    if (num_new_axes != names.ndim())
+      return "";
+
+    SmallVector<std::pair<int, char>, 6> ind_with_layout;
+    for (size_t i = 0; i < axes.size(); i++) {
+      ind_with_layout.push_back({ i, names[i] });
+    }
+    std::sort(ind_with_layout.begin(), ind_with_layout.end());
+
+    TensorLayout out_layout = "";
+    int out_ndim = desc.layout->ndim() + names.ndim();
+    int src_axis = 0;
+    int new_axis = 0;
+    for (int j = 0; j < out_ndim; j++) {
+      if (new_axis < num_new_axes && axes[new_axis] == j) {  // inserting new axis
+        out_layout += names[new_axis++];
+      } else {
+        assert(src_axis < desc.layout->ndim());
+        out_layout += (*desc.layout)[src_axis++];
+      }
+    }
+    return out_layout;
+  });
 
 template <typename Backend>
 ExpandDims<Backend>::ExpandDims(const OpSpec &spec)

diff --git a/dali/operators/generic/join.cc b/dali/operators/generic/join.cc
@@ -58,7 +58,32 @@ constructed by inserting that character into the input layout at the position in
 For example, specifying ``axis = 0`` and ``axis_name = "C"`` with input layout "HW" will yield
 the output layout "CHW")", nullptr, false)
   .NumInput(1, 999)
-  .NumOutput(1);
+  .NumOutput(1)
+  .OutputNDim(0, [](const OpSpec &spec)->std::optional<int> {
+    std::optional<int> ndim;
+    for (int i = 0; i < spec.NumInput(); i++)
+      if (spec.InputDesc(i).ndim) {  // any input will do - they must have the same ndim
+        ndim = spec.InputDesc(i).ndim;
+        break;
+      }
+    if (ndim)
+      return *ndim + 1;
+    else
+      return std::nullopt;
+  })
+  .OutputLayout(0, [](const OpSpec &spec)->std::optional<TensorLayout> {
+    std::string new_axis_name;
+    if (!spec.TryGetArgument(new_axis_name, "axis_name") || new_axis_name.length() != 1)
+      return std::nullopt;
+    int axis = spec.GetArgument<int>("axis");
+    for (int i = 0; i < spec.NumInput(); i++) {
+      auto &desc = spec.InputDesc(i);
+      if (!desc.layout || desc.layout->empty())
+        continue;
+      return desc.layout->sub(0, axis) + new_axis_name + desc.layout->sub(axis);
+    }
+    return std::nullopt;
+  });
 
 #define TENSOR_JOIN_TYPES (bool, uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t, \
                           uint64_t, int64_t, float16, float, double)

diff --git a/dali/operators/generic/reshape.cc b/dali/operators/generic/reshape.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -25,6 +25,16 @@
 
 namespace dali {
 
+inline std::optional<int> ReshapeNDimFunc(const OpSpec &spec) {
+  std::vector<int> shape;
+  if (spec.TryGetRepeatedArgument(shape, "shape"))
+    return shape.size();
+  std::vector<float> rel_shape;
+  if (spec.TryGetRepeatedArgument(rel_shape, "rel_shape"))
+    return rel_shape.size();
+  return std::nullopt;
+}
+
 DALI_SCHEMA(Reshape)
   .DocStr(R"code(Treats content of the input as if it had a different shape and/or layout.
 
@@ -94,7 +104,8 @@ extents in `rel_shape` describe to the target dimensions. In the example above,
 ``rel_shape = [-1, 0.5, 2]`` would result in the output shape ``[1, 100, 600]``.
 
 All indices must be in the range of valid dimensions of the input, or -1.)code",
-                  nullptr, true);
+                  nullptr, true)
+  .OutputNDim(0, ReshapeNDimFunc);
 
 DALI_SCHEMA(Reinterpret)
   .DocStr(R"(Treats content of the input as if it had a different type, shape, and/or layout.

diff --git a/dali/operators/generic/shapes.cc b/dali/operators/generic/shapes.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -26,7 +26,9 @@ DALI_SCHEMA(Shapes)
     .AddOptionalTypeArg("dtype", "Data type to which the sizes are converted.", DALI_INT64)
     .DeprecateArgInFavorOf("type", "dtype", "0.27")
     .MakeDocHidden()
-    .Deprecate("1.44", "", "Use :meth:`nvidia.dali.pipeline.DataNode.shape` instead.");
+    .Deprecate("1.44", "", "Use :meth:`nvidia.dali.pipeline.DataNode.shape` instead.")
+    .OutputNDim(0, 1)
+    .OutputLayout(0, std::nullopt);
 
 DALI_SCHEMA(_Shape)
     .DocStr(R"(Returns the shapes of tensors in the input batch.
@@ -39,7 +41,9 @@ INTERNAL ONLY; used by DataNode.shape()
     .AllowSequences()
     .SupportVolumetric()
     .MakeDocHidden()
-    .AddOptionalTypeArg("dtype", "Data type to which the sizes are converted.", DALI_INT64);
+    .AddOptionalTypeArg("dtype", "Data type to which the sizes are converted.", DALI_INT64)
+    .OutputNDim(0, 1)
+    .OutputLayout(0, std::nullopt);
 
 DALI_REGISTER_OPERATOR(Shapes, Shapes<CPUBackend>, CPU);
 DALI_REGISTER_OPERATOR(Shapes, Shapes<GPUBackend>, GPU);

diff --git a/dali/operators/generic/slice/subscript.cc b/dali/operators/generic/slice/subscript.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -64,7 +64,32 @@ DALI_SCHEMA(_TensorSubscript)
     .INDEX_ARGS(28)
     .INDEX_ARGS(29)
     .INDEX_ARGS(30)
-    .INDEX_ARGS(31);
+    .INDEX_ARGS(31)
+    .OutputNDim(0, [](const OpSpec &spec)->std::optional<int> {
+      auto &input_desc = spec.InputDesc(0);
+      if (!input_desc.ndim.has_value())
+        return std::nullopt;
+      int ndim = *input_desc.ndim;
+      for (int i = 0; i < kMaxSubscripts; i++) {
+        if (spec.ArgumentDefined(make_string("at_", i)))
+          ndim--;
+      }
+      if (ndim < 0)
+        return std::nullopt;
+      return ndim;
+    })
+    .OutputLayout(0, [](const OpSpec &spec)->std::optional<TensorLayout> {
+      auto &desc = spec.InputDesc(0);
+      if (!desc.layout)
+        return std::nullopt;
+      if (desc.layout->empty())
+        return "";
+      TensorLayout out_layout;
+      for (int i = 0; i < desc.layout->ndim(); i++)
+        if (!spec.ArgumentDefined(make_string("at_", i)))
+          out_layout += desc.layout.value()[i];
+      return out_layout;
+    });
 
 template <>
 template <int ndim, int element_size>

diff --git a/dali/operators/image/crop/bbox_crop.cc b/dali/operators/image/crop/bbox_crop.cc
@@ -364,7 +364,23 @@ if the fraction of their area within the ROI is greater than or equal to the thr
 For example, when `bbox_prune_threshold=0.2` bboxes that have at least 20% of their original area within
 the ROI are kept, bboxes less than or equal to are pruned. If `bbox_prune_threshold=0.0`, all boxes that
 have some presence in the ROI are kept.)code",
-        nullptr);
+        nullptr)
+    .OutputNDim(0, 1)
+    .OutputNDim(1, 1)
+    .OutputNDim(2, 2)
+    .OutputNDim(3, 1)
+    .OutputNDim(4, 1)
+    .OutputDType(0, DALI_FLOAT)
+    .OutputDType(1, DALI_FLOAT)
+    .OutputDType(2, DALI_FLOAT)
+    .OutputDType(3, DALI_INT32)
+    .OutputDType(4, DALI_INT32)
+    .OutputLayout(0, "")
+    .OutputLayout(1, "")
+    .OutputLayout(2, "")
+    .OutputLayout(3, "")
+    .OutputLayout(4, "");
+
 
 template <int ndim>
 class RandomBBoxCropImpl : public OpImplBase<CPUBackend> {

diff --git a/dali/operators/image/crop/crop_mirror_normalize.cc b/dali/operators/image/crop/crop_mirror_normalize.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -71,7 +71,14 @@ This argument is useful when using integer outputs to improve dynamic range util
 This argument is useful when using unsigned integer outputs to improve dynamic range utilization.)",
     0.0f)
   .AddParent("CropAttr")
-  .AddParent("OutOfBoundsAttr");
+  .AddParent("OutOfBoundsAttr")
+  .OutputLayout(0, [](const OpSpec &spec)->std::optional<TensorLayout> {
+    auto layout = spec.GetArgument<TensorLayout>("output_layout");
+    if (layout == "")
+      return spec.InputDesc(0).layout;
+    else
+      return layout;
+  });
 
 DALI_REGISTER_OPERATOR(CropMirrorNormalize, CropMirrorNormalize<CPUBackend>, CPU);
 

diff --git a/dali/operators/image/remap/remap.cu b/dali/operators/image/remap/remap.cu
@@ -1,4 +1,4 @@
-// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -44,6 +44,7 @@ class RemapGpu : public Remap<GPUBackend> {
     const auto &mapx = ws.template Input<B>(1);
     const auto &mapy = ws.template Input<B>(2);
     auto &output = ws.template Output<B>(0);
+    output.SetLayout(input.GetLayout());
     km_.Resize<Kernel>(1, spec_.template GetArgument<int>("device_id"));
     kernels::KernelContext ctx;
     ctx.gpu.stream = ws.stream();

diff --git a/dali/operators/image/resize/resize.cc b/dali/operators/image/resize/resize.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2017-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -34,7 +34,10 @@ DALI_SCHEMA(Resize)
   .SupportVolumetric()
   .AllowSequences()
   .AddParent("ResizeAttr")
-  .AddParent("ResamplingFilterAttr");
+  .AddParent("ResamplingFilterAttr")
+  .OutputDType(1, DALI_INT32)
+  .OutputNDim(1, 1)
+  .OutputLayout(1, "");
 
 template<typename Backend>
 Resize<Backend>::Resize(const OpSpec &spec)

diff --git a/dali/operators/imgcodec/decoder_schema.cc b/dali/operators/imgcodec/decoder_schema.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -161,7 +161,8 @@ The implementation uses NVIDIA nvImageCodec to decode images.
   .NumInput(1)
   .NumOutput(1)
   .AddParent("ImgcodecDecoderAttr")
-  .AddParent("CachedDecoderAttr");
+  .AddParent("CachedDecoderAttr")
+  .OutputLayout(0, "HWC");
 
 DALI_SCHEMA(experimental__decoders__ImageCrop)
   .DocStr(R"code(Decodes images and extracts regions-of-interest (ROI) that are specified
@@ -189,7 +190,8 @@ When possible, the operator uses the ROI decoding, reducing the decoding time an
   .NumInput(1)
   .NumOutput(1)
   .AddParent("ImgcodecDecoderAttr")
-  .AddParent("CropAttr");
+  .AddParent("CropAttr")
+  .OutputLayout(0, "HWC");
 
 
 DALI_SCHEMA(experimental__decoders__ImageSlice)
@@ -259,7 +261,8 @@ of the slice (s0, s1, s2, …).
 
 Integer coordinates are interpreted as absolute coordinates, while float coordinates can be
 interpreted as absolute or relative coordinates, depending on the value of
-`normalized_shape`.)code");
+`normalized_shape`.)code")
+  .OutputLayout(0, "HWC");
 
 
 DALI_SCHEMA(experimental__decoders__ImageRandomCrop)
@@ -289,7 +292,8 @@ When possible, the operator uses the ROI decoding, reducing the decoding time an
   .NumInput(1)
   .NumOutput(1)
   .AddParent("ImgcodecDecoderAttr")
-  .AddParent("RandomCropAttr");
+  .AddParent("RandomCropAttr")
+  .OutputLayout(0, "HWC");
 
 }  // namespace imgcodec
 }  // namespace dali