Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions dali/operators/decoder/image_decoder.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -191,7 +191,10 @@ Please note that GPU acceleration for JPEG 2000 decoding is only available for C
.NumInput(1)
.NumOutput(1)
.AddParent("ImageDecoderAttr")
.AddParent("CachedDecoderAttr");
.AddParent("CachedDecoderAttr")
.OutputDType(0, DALI_UINT8)
.OutputNDim(0, 3)
.OutputLayout(0, "HWC");

// Fused

Expand Down Expand Up @@ -309,7 +312,10 @@ of the slice (s0, s1, s2, …).

Integer coordinates are interpreted as absolute coordinates, while float coordinates can be
interpreted as absolute or relative coordinates, depending on the value of
`normalized_shape`.)code");
`normalized_shape`.)code")
.OutputDType(0, DALI_UINT8)
.OutputNDim(0, 3)
.OutputLayout(0, "HWC");


// Deprecated aliases
Expand Down
16 changes: 13 additions & 3 deletions dali/operators/generic/constant_value.cc
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,13 @@ void ConstantValue<CPUBackend>::RunImpl(Workspace &ws) {
}
}

inline std::optional<int> ConstantValueNDim(const OpSpec &spec) {
std::vector<int> shape;
if (spec.TryGetRepeatedArgument(shape, "shape"))
return shape.size();
return std::nullopt;
}

DALI_SCHEMA(Full)
.DocStr(R"code(Returns new data of given shape and type, filled with a fill value.

Expand All @@ -166,7 +173,8 @@ In case of different dimensionality, the input shape is padded with 1s for the m
true)
.AddOptionalArg<TensorLayout>("layout", R"code(Output layout.

If set and not empty, the layout must match the dimensionality of the output.)code", nullptr);
If set and not empty, the layout must match the dimensionality of the output.)code", nullptr)
.OutputNDim(0, ConstantValueNDim);

DALI_REGISTER_OPERATOR(Full, Full<CPUBackend>, CPU);

Expand All @@ -192,7 +200,8 @@ DALI_SCHEMA(Zeros)
.AddOptionalArg<TensorLayout>("layout", R"code(Output layout.

If set and not empty, the layout must match the dimensionality of the output.)code", nullptr)
.AddOptionalTypeArg("dtype", R"code(Output data type.)code", DALI_INT32);
.AddOptionalTypeArg("dtype", R"code(Output data type.)code", DALI_INT32)
.OutputNDim(0, ConstantValueNDim);
DALI_REGISTER_OPERATOR(Zeros, Zeros<CPUBackend>, CPU);

DALI_SCHEMA(ZerosLike)
Expand All @@ -213,7 +222,8 @@ DALI_SCHEMA(Ones)
.AddOptionalArg<TensorLayout>("layout", R"code(Output layout.

If set and not empty, the layout must match the dimensionality of the output.)code", nullptr)
.AddOptionalTypeArg("dtype", R"code(Output data type.)code", DALI_INT32);
.AddOptionalTypeArg("dtype", R"code(Output data type.)code", DALI_INT32)
.OutputNDim(0, ConstantValueNDim);
DALI_REGISTER_OPERATOR(Ones, Ones<CPUBackend>, CPU);

DALI_SCHEMA(OnesLike)
Expand Down
42 changes: 41 additions & 1 deletion dali/operators/generic/expand_dims.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,47 @@ layout will be empty.")code")
.AddOptionalArg("new_axis_names", R"code(Names of the new dimensions in the data layout.

The length of `new_axis_names` must match the length of `axes`.
If argument isn't be provided, the layout will be cleared.)code", TensorLayout(""));
If argument isn't be provided, the layout will be cleared.)code", TensorLayout(""))
.OutputNDim(0, [](const OpSpec &spec)->std::optional<int> {
auto &desc = spec.InputDesc(0);
if (!desc.ndim)
return std::nullopt;
return *desc.ndim + spec.GetRepeatedArgument<int>("axes").size();
})
.OutputLayout(0, [](const OpSpec &spec)->std::optional<TensorLayout> {
auto &desc = spec.InputDesc(0);
if (!desc.layout)
return std::nullopt;

auto axes = spec.GetRepeatedArgument<int>("axes");
if (axes.empty())
return desc.layout;

auto names = spec.GetArgument<TensorLayout>("new_axis_names");
int num_new_axes = ssize(axes);
if (num_new_axes != names.ndim())
return "";

SmallVector<std::pair<int, char>, 6> ind_with_layout;
for (size_t i = 0; i < axes.size(); i++) {
ind_with_layout.push_back({ i, names[i] });
}
std::sort(ind_with_layout.begin(), ind_with_layout.end());

TensorLayout out_layout = "";
int out_ndim = desc.layout->ndim() + names.ndim();
int src_axis = 0;
int new_axis = 0;
for (int j = 0; j < out_ndim; j++) {
if (new_axis < num_new_axes && axes[new_axis] == j) { // inserting new axis
out_layout += names[new_axis++];
} else {
assert(src_axis < desc.layout->ndim());
out_layout += (*desc.layout)[src_axis++];
}
}
return out_layout;
});

template <typename Backend>
ExpandDims<Backend>::ExpandDims(const OpSpec &spec)
Expand Down
27 changes: 26 additions & 1 deletion dali/operators/generic/join.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,32 @@ constructed by inserting that character into the input layout at the position in
For example, specifying ``axis = 0`` and ``axis_name = "C"`` with input layout "HW" will yield
the output layout "CHW")", nullptr, false)
.NumInput(1, 999)
.NumOutput(1);
.NumOutput(1)
.OutputNDim(0, [](const OpSpec &spec)->std::optional<int> {
std::optional<int> ndim;
for (int i = 0; i < spec.NumInput(); i++)
if (spec.InputDesc(i).ndim) { // any input will do - they must have the same ndim
ndim = spec.InputDesc(i).ndim;
break;
}
if (ndim)
return *ndim + 1;
else
return std::nullopt;
})
.OutputLayout(0, [](const OpSpec &spec)->std::optional<TensorLayout> {
std::string new_axis_name;
if (!spec.TryGetArgument(new_axis_name, "axis_name") || new_axis_name.length() != 1)
return std::nullopt;
int axis = spec.GetArgument<int>("axis");
for (int i = 0; i < spec.NumInput(); i++) {
auto &desc = spec.InputDesc(i);
if (!desc.layout || desc.layout->empty())
continue;
return desc.layout->sub(0, axis) + new_axis_name + desc.layout->sub(axis);
}
return std::nullopt;
});

#define TENSOR_JOIN_TYPES (bool, uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t, \
uint64_t, int64_t, float16, float, double)
Expand Down
15 changes: 13 additions & 2 deletions dali/operators/generic/reshape.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -25,6 +25,16 @@

namespace dali {

inline std::optional<int> ReshapeNDimFunc(const OpSpec &spec) {
std::vector<int> shape;
if (spec.TryGetRepeatedArgument(shape, "shape"))
return shape.size();
std::vector<float> rel_shape;
if (spec.TryGetRepeatedArgument(rel_shape, "rel_shape"))
return rel_shape.size();
return std::nullopt;
}

DALI_SCHEMA(Reshape)
.DocStr(R"code(Treats content of the input as if it had a different shape and/or layout.

Expand Down Expand Up @@ -94,7 +104,8 @@ extents in `rel_shape` describe to the target dimensions. In the example above,
``rel_shape = [-1, 0.5, 2]`` would result in the output shape ``[1, 100, 600]``.

All indices must be in the range of valid dimensions of the input, or -1.)code",
nullptr, true);
nullptr, true)
.OutputNDim(0, ReshapeNDimFunc);

DALI_SCHEMA(Reinterpret)
.DocStr(R"(Treats content of the input as if it had a different type, shape, and/or layout.
Expand Down
10 changes: 7 additions & 3 deletions dali/operators/generic/shapes.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -26,7 +26,9 @@ DALI_SCHEMA(Shapes)
.AddOptionalTypeArg("dtype", "Data type to which the sizes are converted.", DALI_INT64)
.DeprecateArgInFavorOf("type", "dtype", "0.27")
.MakeDocHidden()
.Deprecate("1.44", "", "Use :meth:`nvidia.dali.pipeline.DataNode.shape` instead.");
.Deprecate("1.44", "", "Use :meth:`nvidia.dali.pipeline.DataNode.shape` instead.")
.OutputNDim(0, 1)
.OutputLayout(0, std::nullopt);

DALI_SCHEMA(_Shape)
.DocStr(R"(Returns the shapes of tensors in the input batch.
Expand All @@ -39,7 +41,9 @@ INTERNAL ONLY; used by DataNode.shape()
.AllowSequences()
.SupportVolumetric()
.MakeDocHidden()
.AddOptionalTypeArg("dtype", "Data type to which the sizes are converted.", DALI_INT64);
.AddOptionalTypeArg("dtype", "Data type to which the sizes are converted.", DALI_INT64)
.OutputNDim(0, 1)
.OutputLayout(0, std::nullopt);

DALI_REGISTER_OPERATOR(Shapes, Shapes<CPUBackend>, CPU);
DALI_REGISTER_OPERATOR(Shapes, Shapes<GPUBackend>, GPU);
Expand Down
29 changes: 27 additions & 2 deletions dali/operators/generic/slice/subscript.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -64,7 +64,32 @@ DALI_SCHEMA(_TensorSubscript)
.INDEX_ARGS(28)
.INDEX_ARGS(29)
.INDEX_ARGS(30)
.INDEX_ARGS(31);
.INDEX_ARGS(31)
.OutputNDim(0, [](const OpSpec &spec)->std::optional<int> {
auto &input_desc = spec.InputDesc(0);
if (!input_desc.ndim.has_value())
return std::nullopt;
int ndim = *input_desc.ndim;
for (int i = 0; i < kMaxSubscripts; i++) {
if (spec.ArgumentDefined(make_string("at_", i)))
ndim--;
}
if (ndim < 0)
return std::nullopt;
return ndim;
})
.OutputLayout(0, [](const OpSpec &spec)->std::optional<TensorLayout> {
auto &desc = spec.InputDesc(0);
if (!desc.layout)
return std::nullopt;
if (desc.layout->empty())
return "";
TensorLayout out_layout;
for (int i = 0; i < desc.layout->ndim(); i++)
if (!spec.ArgumentDefined(make_string("at_", i)))
out_layout += desc.layout.value()[i];
return out_layout;
});

template <>
template <int ndim, int element_size>
Expand Down
18 changes: 17 additions & 1 deletion dali/operators/image/crop/bbox_crop.cc
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,23 @@ if the fraction of their area within the ROI is greater than or equal to the thr
For example, when `bbox_prune_threshold=0.2` bboxes that have at least 20% of their original area within
the ROI are kept, bboxes less than or equal to are pruned. If `bbox_prune_threshold=0.0`, all boxes that
have some presence in the ROI are kept.)code",
nullptr);
nullptr)
.OutputNDim(0, 1)
.OutputNDim(1, 1)
.OutputNDim(2, 2)
.OutputNDim(3, 1)
.OutputNDim(4, 1)
.OutputDType(0, DALI_FLOAT)
.OutputDType(1, DALI_FLOAT)
.OutputDType(2, DALI_FLOAT)
.OutputDType(3, DALI_INT32)
.OutputDType(4, DALI_INT32)
.OutputLayout(0, "")
.OutputLayout(1, "")
.OutputLayout(2, "")
.OutputLayout(3, "")
.OutputLayout(4, "");


template <int ndim>
class RandomBBoxCropImpl : public OpImplBase<CPUBackend> {
Expand Down
11 changes: 9 additions & 2 deletions dali/operators/image/crop/crop_mirror_normalize.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -71,7 +71,14 @@ This argument is useful when using integer outputs to improve dynamic range util
This argument is useful when using unsigned integer outputs to improve dynamic range utilization.)",
0.0f)
.AddParent("CropAttr")
.AddParent("OutOfBoundsAttr");
.AddParent("OutOfBoundsAttr")
.OutputLayout(0, [](const OpSpec &spec)->std::optional<TensorLayout> {
auto layout = spec.GetArgument<TensorLayout>("output_layout");
if (layout == "")
return spec.InputDesc(0).layout;
else
return layout;
});

DALI_REGISTER_OPERATOR(CropMirrorNormalize, CropMirrorNormalize<CPUBackend>, CPU);

Expand Down
3 changes: 2 additions & 1 deletion dali/operators/image/remap/remap.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -44,6 +44,7 @@ class RemapGpu : public Remap<GPUBackend> {
const auto &mapx = ws.template Input<B>(1);
const auto &mapy = ws.template Input<B>(2);
auto &output = ws.template Output<B>(0);
output.SetLayout(input.GetLayout());
km_.Resize<Kernel>(1, spec_.template GetArgument<int>("device_id"));
kernels::KernelContext ctx;
ctx.gpu.stream = ws.stream();
Expand Down
7 changes: 5 additions & 2 deletions dali/operators/image/resize/resize.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2017-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2017-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,7 +34,10 @@ DALI_SCHEMA(Resize)
.SupportVolumetric()
.AllowSequences()
.AddParent("ResizeAttr")
.AddParent("ResamplingFilterAttr");
.AddParent("ResamplingFilterAttr")
.OutputDType(1, DALI_INT32)
.OutputNDim(1, 1)
.OutputLayout(1, "");

template<typename Backend>
Resize<Backend>::Resize(const OpSpec &spec)
Expand Down
14 changes: 9 additions & 5 deletions dali/operators/imgcodec/decoder_schema.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -161,7 +161,8 @@ The implementation uses NVIDIA nvImageCodec to decode images.
.NumInput(1)
.NumOutput(1)
.AddParent("ImgcodecDecoderAttr")
.AddParent("CachedDecoderAttr");
.AddParent("CachedDecoderAttr")
.OutputLayout(0, "HWC");

DALI_SCHEMA(experimental__decoders__ImageCrop)
.DocStr(R"code(Decodes images and extracts regions-of-interest (ROI) that are specified
Expand Down Expand Up @@ -189,7 +190,8 @@ When possible, the operator uses the ROI decoding, reducing the decoding time an
.NumInput(1)
.NumOutput(1)
.AddParent("ImgcodecDecoderAttr")
.AddParent("CropAttr");
.AddParent("CropAttr")
.OutputLayout(0, "HWC");


DALI_SCHEMA(experimental__decoders__ImageSlice)
Expand Down Expand Up @@ -259,7 +261,8 @@ of the slice (s0, s1, s2, …).

Integer coordinates are interpreted as absolute coordinates, while float coordinates can be
interpreted as absolute or relative coordinates, depending on the value of
`normalized_shape`.)code");
`normalized_shape`.)code")
.OutputLayout(0, "HWC");


DALI_SCHEMA(experimental__decoders__ImageRandomCrop)
Expand Down Expand Up @@ -289,7 +292,8 @@ When possible, the operator uses the ROI decoding, reducing the decoding time an
.NumInput(1)
.NumOutput(1)
.AddParent("ImgcodecDecoderAttr")
.AddParent("RandomCropAttr");
.AddParent("RandomCropAttr")
.OutputLayout(0, "HWC");

} // namespace imgcodec
} // namespace dali
Loading