Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ By @beholdnec in [#8505](https://github.com/gfx-rs/wgpu/pull/8505).
};
```
By @AdrianEddy in [#9496](https://github.com/gfx-rs/wgpu/pull/9496).
- Extend `copy_texture_to_texture` to allow copying a single plane of a multi-planar source (NV12, P010) into a single-plane destination of the matching format (e.g. NV12 `Plane0` → `R8Unorm`, NV12 `Plane1` → `Rg8Unorm`). `copy_size` is interpreted in plane texels, not luma texels. By @AdrianEddy in [#9551](https://github.com/gfx-rs/wgpu/pull/9551).

#### Metal

Expand All @@ -148,6 +149,7 @@ By @beholdnec in [#8505](https://github.com/gfx-rs/wgpu/pull/8505).

- Added support for mesh shaders in naga's HLSL writer, completing DX12 support for mesh shaders. By @inner-daemons in [#8752](https://github.com/gfx-rs/wgpu/pull/8752).
- Added `dx12::Queue::add_wait_fence` / `add_signal_fence` (and matching `remove_*` companions). They stage `ID3D12CommandQueue::Wait` / `Signal` calls on the next `Queue::submit`. The wait calls are issued before the submit's `ExecuteCommandLists`, the signal calls after wgpu's own `Signal(signal_fence, signal_value)`. Cross-API interop crates use this to GPU-side gate / publish wgpu submits against foreign-API fences. By @AdrianEddy in [#9463](https://github.com/gfx-rs/wgpu/pull/9463).
- Added `dx12::Texture::with_plane_slice` so cross-API importers can wrap one plane of a multi-plane DXGI resource (e.g. `DXGI_FORMAT_NV12`) as a single-plane wgpu texture. Construct two wgpu textures from the same `ID3D12Resource`, one per plane, with the matching single-plane `TextureFormat` (`R8Unorm` / `Rg8Unorm`, `R16Unorm` / `Rg16Unorm`). Views and copies derived from the wrapper target the correct `PlaneSlice` instead of defaulting to plane 0. Also fixes `map_texture_format_for_copy` panicking on `(planar_format, single_plane_aspect)` during buffer<->texture transfers, and `TextureView::subresource_index` previously being hard-coded to plane 0. By @AdrianEddy in [#9551](https://github.com/gfx-rs/wgpu/pull/9551).
Comment thread
AdrianEddy marked this conversation as resolved.
Outdated

#### Vulkan

Expand Down
195 changes: 195 additions & 0 deletions tests/tests/wgpu-gpu/planar_texture/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub fn all_tests(tests: &mut Vec<GpuTestInitializer>) {
NV12_TEXTURE_RENDERING,
NV12_TEXTURE_COPYING,
P010_TEXTURE_COPYING,
NV12_PLANE_TO_SINGLE_PLANE_COPY,
]);
}

Expand Down Expand Up @@ -387,6 +388,200 @@ static NV12_TEXTURE_COPYING: GpuTestConfiguration = GpuTestConfiguration::new()
ctx.queue.submit([command_encoder.finish()]);
});

/// Ensures that copying a single plane of an NV12 source into a matching
/// single-plane destination (Plane0 → R8Unorm, Plane1 → Rg8Unorm) round-trips
/// byte-for-byte. Exercises the planar→single-plane copy-compatibility
/// extension in `copy_texture_to_texture`.
#[gpu_test]
static NV12_PLANE_TO_SINGLE_PLANE_COPY: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(TestParameters::default().features(wgpu::Features::TEXTURE_FORMAT_NV12))
.run_async(|ctx| async move {
// Width chosen so that bytes-per-row is 256-aligned for both planes:
// luma R8Unorm: 256 px * 1 byte/px = 256
// chroma Rg8Unorm: 128 px * 2 byte/px = 256
const WIDTH: u32 = 256;
const HEIGHT: u32 = 256;
let luma_size = wgpu::Extent3d {
width: WIDTH,
height: HEIGHT,
depth_or_array_layers: 1,
};
let chroma_size = wgpu::Extent3d {
width: WIDTH / 2,
height: HEIGHT / 2,
depth_or_array_layers: 1,
};

let nv12 = ctx.device.create_texture(&wgpu::TextureDescriptor {
label: Some("nv12 src"),
dimension: wgpu::TextureDimension::D2,
size: luma_size,
format: wgpu::TextureFormat::NV12,
usage: wgpu::TextureUsages::COPY_SRC | wgpu::TextureUsages::COPY_DST,
mip_level_count: 1,
sample_count: 1,
view_formats: &[],
});

// Distinct patterns per plane so a swap or plane-0-fallback would fail
// the assertion at the end.
let luma_bytes: Vec<u8> = (0..(WIDTH * HEIGHT) as usize).map(|i| i as u8).collect();
let chroma_bytes: Vec<u8> = (0..(WIDTH / 2 * HEIGHT / 2 * 2) as usize)
.map(|i| (i ^ 0xA5) as u8)
.collect();

ctx.queue.write_texture(
wgpu::TexelCopyTextureInfo {
texture: &nv12,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::Plane0,
},
&luma_bytes,
wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(WIDTH),
rows_per_image: Some(HEIGHT),
},
luma_size,
);
ctx.queue.write_texture(
wgpu::TexelCopyTextureInfo {
texture: &nv12,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::Plane1,
},
&chroma_bytes,
wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(WIDTH / 2 * 2),
rows_per_image: Some(HEIGHT / 2),
},
chroma_size,
);

let r8 = ctx.device.create_texture(&wgpu::TextureDescriptor {
label: Some("r8 dst"),
dimension: wgpu::TextureDimension::D2,
size: luma_size,
format: wgpu::TextureFormat::R8Unorm,
usage: wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::COPY_SRC,
mip_level_count: 1,
sample_count: 1,
view_formats: &[],
});
let rg8 = ctx.device.create_texture(&wgpu::TextureDescriptor {
label: Some("rg8 dst"),
dimension: wgpu::TextureDimension::D2,
size: chroma_size,
format: wgpu::TextureFormat::Rg8Unorm,
usage: wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::COPY_SRC,
mip_level_count: 1,
sample_count: 1,
view_formats: &[],
});

let r8_readback = ctx.device.create_buffer(&wgpu::BufferDescriptor {
label: None,
size: luma_bytes.len() as u64,
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
let rg8_readback = ctx.device.create_buffer(&wgpu::BufferDescriptor {
label: None,
size: chroma_bytes.len() as u64,
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});

let mut encoder = ctx
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor::default());

// The path under test.
encoder.copy_texture_to_texture(
wgpu::TexelCopyTextureInfo {
texture: &nv12,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::Plane0,
},
wgpu::TexelCopyTextureInfo {
texture: &r8,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
luma_size,
);
encoder.copy_texture_to_texture(
wgpu::TexelCopyTextureInfo {
texture: &nv12,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::Plane1,
},
wgpu::TexelCopyTextureInfo {
texture: &rg8,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
chroma_size,
);

encoder.copy_texture_to_buffer(
wgpu::TexelCopyTextureInfo {
texture: &r8,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
wgpu::TexelCopyBufferInfo {
buffer: &r8_readback,
layout: wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(WIDTH),
rows_per_image: Some(HEIGHT),
},
},
luma_size,
);
encoder.copy_texture_to_buffer(
wgpu::TexelCopyTextureInfo {
texture: &rg8,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
wgpu::TexelCopyBufferInfo {
buffer: &rg8_readback,
layout: wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(WIDTH / 2 * 2),
rows_per_image: Some(HEIGHT / 2),
},
},
chroma_size,
);

ctx.queue.submit([encoder.finish()]);

let r8_slice = r8_readback.slice(..);
r8_slice.map_async(wgpu::MapMode::Read, |_| ());
let rg8_slice = rg8_readback.slice(..);
rg8_slice.map_async(wgpu::MapMode::Read, |_| ());
ctx.async_poll(wgpu::PollType::wait_indefinitely())
.await
.unwrap();

let r8_data: Vec<u8> = r8_slice.get_mapped_range().unwrap().to_vec();
let rg8_data: Vec<u8> = rg8_slice.get_mapped_range().unwrap().to_vec();
assert_eq!(r8_data, luma_bytes, "luma plane mismatch");
assert_eq!(rg8_data, chroma_bytes, "chroma plane mismatch");
});

/// Ensures that copying P010 texture to P010 texture works as expected
#[gpu_test]
static P010_TEXTURE_COPYING: GpuTestConfiguration = GpuTestConfiguration::new()
Expand Down
62 changes: 58 additions & 4 deletions wgpu-core/src/command/transfer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1373,9 +1373,23 @@ pub(super) fn copy_texture_to_texture(
dst_texture.same_device(state.device)?;

// src and dst texture format must be copy-compatible
// https://gpuweb.github.io/gpuweb/#copy-compatible
if src_texture.desc.format.remove_srgb_suffix() != dst_texture.desc.format.remove_srgb_suffix()
{
// (https://gpuweb.github.io/gpuweb/#copy-compatible), with an
// extension allowing one plane of a planar source to be copied
// into a single-plane destination of the matching format
// (e.g. NV12 Plane0 -> R8Unorm, NV12 Plane1 -> Rg8Unorm).
//
// When taking this path, `copy_size` and `source.origin` are
// interpreted in *plane* texels, not luma texels: copying NV12
// Plane1 into an Rg8Unorm of size (W/2, H/2) requires
// `copy_size = (W/2, H/2)`. The plane-extent check further down
// enforces this against the subsampled plane extent, so a caller
// passing luma-sized values gets a source-side error pointing at
// the actual mistake rather than an opaque destination overrun.
let src_fmt_no_srgb = src_texture.desc.format.remove_srgb_suffix();
let dst_fmt_no_srgb = dst_texture.desc.format.remove_srgb_suffix();
let planar_split_ok = src_fmt_no_srgb.is_multi_planar_format()
&& src_fmt_no_srgb.aspect_specific_format(source.aspect) == Some(dst_fmt_no_srgb);
if src_fmt_no_srgb != dst_fmt_no_srgb && !planar_split_ok {
return Err(TransferError::TextureFormatsNotCopyCompatible {
src_format: src_texture.desc.format,
dst_format: dst_texture.desc.format,
Expand All @@ -1392,6 +1406,45 @@ pub(super) fn copy_texture_to_texture(
copy_size,
)?;

// For planar -> single-plane copies, re-check the source extent
// in plane coordinates. `validate_texture_copy_range` above used
// the full luma extent of the planar source, so it does not
// catch a caller treating `copy_size` / `origin` as luma-sized
// when targeting a subsampled plane (NV12/P010 plane 1).
if planar_split_ok {
// `planar_split_ok` implies `aspect_specific_format(source.aspect)`
// returned `Some`, which is only true for `Plane{0,1,2}`.
let plane = source.aspect.to_plane().expect("planar_split_ok aspect");
let plane_extent = src_texture
.desc
.compute_render_extent(source.mip_level, Some(plane));
let check = |dimension, start: u32, size: u32, plane_size: u32| {
if start > plane_size || plane_size - start < size {
Err(TransferError::TextureOverrun {
start_offset: start,
end_offset: start.wrapping_add(size),
texture_size: plane_size,
dimension,
side: CopySide::Source,
})
} else {
Ok(())
}
};
check(
TextureErrorDimension::X,
source.origin.x,
copy_size.width,
plane_extent.width,
)?;
check(
TextureErrorDimension::Y,
source.origin.y,
copy_size.height,
plane_extent.height,
)?;
}

if Arc::as_ptr(src_texture) == Arc::as_ptr(dst_texture) {
validate_copy_within_same_texture(
source,
Expand All @@ -1405,7 +1458,8 @@ pub(super) fn copy_texture_to_texture(
let (dst_range, dst_tex_base) = extract_texture_selector(destination, copy_size, dst_texture)?;
let src_texture_aspects = hal::FormatAspects::from(src_texture.desc.format);
let dst_texture_aspects = hal::FormatAspects::from(dst_texture.desc.format);
if src_tex_base.aspect != src_texture_aspects {
// `planar_split_ok` already constrains `source.aspect` to a single plane.
if src_tex_base.aspect != src_texture_aspects && !planar_split_ok {
return Err(TransferError::CopySrcMissingAspects.into());
}
if dst_tex_base.aspect != dst_texture_aspects {
Expand Down
6 changes: 6 additions & 0 deletions wgpu-hal/src/auxil/dxgi/conv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,12 @@ pub fn map_texture_format_for_copy(
crate::FormatAspects::STENCIL,
) => Dxgi::Common::DXGI_FORMAT_R8_UINT,

// `CopyTextureRegion` on a plane subresource wants the
// single-plane DXGI format, not the planar one.
(format, aspects) if format.is_multi_planar_format() && aspects.is_one() => {
map_texture_format(format.aspect_specific_format(aspects.map())?)
}

(format, crate::FormatAspects::COLOR) => map_texture_format(format),

_ => return None,
Expand Down
4 changes: 3 additions & 1 deletion wgpu-hal/src/dx12/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ impl super::Device {
suballocation::AllocationType::Texture,
format.theoretical_memory_footprint(size),
),
plane_slice_override: None,
}
}

Expand Down Expand Up @@ -598,6 +599,7 @@ impl crate::Device for super::Device {
mip_level_count: desc.mip_level_count,
sample_count: desc.sample_count,
allocation,
plane_slice_override: None,
})
}

Expand Down Expand Up @@ -629,7 +631,7 @@ impl crate::Device for super::Device {
subresource_index: texture.calc_subresource(
desc.range.base_mip_level,
desc.range.base_array_layer,
0,
view_desc.plane_slice(),
),
mip_slice: desc.range.base_mip_level,
handle_srv: if desc.usage.intersects(wgt::TextureUses::RESOURCE) {
Expand Down
Loading
Loading