Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ By @beholdnec in [#8505](https://github.com/gfx-rs/wgpu/pull/8505).
};
```
By @AdrianEddy in [#9496](https://github.com/gfx-rs/wgpu/pull/9496).
- Extend `copy_texture_to_texture` to allow copying a single plane of a multi-planar source (NV12, P010) into a single-plane destination of the matching format (e.g. NV12 `Plane0` → `R8Unorm`, NV12 `Plane1` → `Rg8Unorm`). `copy_size` is interpreted in plane texels, not luma texels. By @AdrianEddy in [#9551](https://github.com/gfx-rs/wgpu/pull/9551).

#### Metal

Expand All @@ -149,6 +150,7 @@ By @beholdnec in [#8505](https://github.com/gfx-rs/wgpu/pull/8505).

- Added support for mesh shaders in naga's HLSL writer, completing DX12 support for mesh shaders. By @inner-daemons in [#8752](https://github.com/gfx-rs/wgpu/pull/8752).
- Added `dx12::Queue::add_wait_fence` / `add_signal_fence` (and matching `remove_*` companions). They stage `ID3D12CommandQueue::Wait` / `Signal` calls on the next `Queue::submit`. The wait calls are issued before the submit's `ExecuteCommandLists`, the signal calls after wgpu's own `Signal(signal_fence, signal_value)`. Cross-API interop crates use this to GPU-side gate / publish wgpu submits against foreign-API fences. By @AdrianEddy in [#9463](https://github.com/gfx-rs/wgpu/pull/9463).
- Added `dx12::Texture::with_plane_slice` so cross-API importers can wrap one plane of a multi-plane DXGI resource (e.g. `DXGI_FORMAT_NV12`) as a single-plane wgpu texture. By @AdrianEddy in [#9551](https://github.com/gfx-rs/wgpu/pull/9551).

#### Vulkan

Expand Down Expand Up @@ -228,6 +230,7 @@ By @beholdnec in [#8505](https://github.com/gfx-rs/wgpu/pull/8505).
- Fixed a `debug_assert` during stride validation for indirect multi draw. By @kristoff3r in [#9332](https://github.com/gfx-rs/wgpu/pull/9332)
- Fixed stencil values read with `textureLoad` appearing in G instead of R. By @andyleiserson in [#9520](https://github.com/gfx-rs/wgpu/pull/9520).
- Fixed some cases where the `textureNum{Layers,Levels,Samples}` functions returned incorrect results. By @andyleiserson in [#9542](https://github.com/gfx-rs/wgpu/pull/9542).
- Fixed `map_texture_format_for_copy` panicking on `(planar_format, single_plane_aspect)` during buffer<->texture transfers, and `TextureView::subresource_index` previously being hard-coded to plane 0. By @AdrianEddy in [#9551](https://github.com/gfx-rs/wgpu/pull/9551).

#### Metal

Expand Down
195 changes: 195 additions & 0 deletions tests/tests/wgpu-gpu/planar_texture/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub fn all_tests(tests: &mut Vec<GpuTestInitializer>) {
NV12_TEXTURE_RENDERING,
NV12_TEXTURE_COPYING,
P010_TEXTURE_COPYING,
NV12_PLANE_TO_SINGLE_PLANE_COPY,
]);
}

Expand Down Expand Up @@ -387,6 +388,200 @@ static NV12_TEXTURE_COPYING: GpuTestConfiguration = GpuTestConfiguration::new()
ctx.queue.submit([command_encoder.finish()]);
});

/// Ensures that copying a single plane of an NV12 source into a matching
/// single-plane destination (Plane0 → R8Unorm, Plane1 → Rg8Unorm) round-trips
/// byte-for-byte. Exercises the planar→single-plane copy-compatibility
/// extension in `copy_texture_to_texture`.
#[gpu_test]
static NV12_PLANE_TO_SINGLE_PLANE_COPY: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(TestParameters::default().features(wgpu::Features::TEXTURE_FORMAT_NV12))
.run_async(|ctx| async move {
// Width chosen so that bytes-per-row is 256-aligned for both planes:
// luma R8Unorm: 256 px * 1 byte/px = 256
// chroma Rg8Unorm: 128 px * 2 byte/px = 256
const WIDTH: u32 = 256;
const HEIGHT: u32 = 256;
let luma_size = wgpu::Extent3d {
width: WIDTH,
height: HEIGHT,
depth_or_array_layers: 1,
};
let chroma_size = wgpu::Extent3d {
width: WIDTH / 2,
height: HEIGHT / 2,
depth_or_array_layers: 1,
};

let nv12 = ctx.device.create_texture(&wgpu::TextureDescriptor {
label: Some("nv12 src"),
dimension: wgpu::TextureDimension::D2,
size: luma_size,
format: wgpu::TextureFormat::NV12,
usage: wgpu::TextureUsages::COPY_SRC | wgpu::TextureUsages::COPY_DST,
mip_level_count: 1,
sample_count: 1,
view_formats: &[],
});

// Distinct patterns per plane so a swap or plane-0-fallback would fail
// the assertion at the end.
let luma_bytes: Vec<u8> = (0..(WIDTH * HEIGHT) as usize).map(|i| i as u8).collect();
let chroma_bytes: Vec<u8> = (0..(WIDTH / 2 * HEIGHT / 2 * 2) as usize)
.map(|i| (i ^ 0xA5) as u8)
.collect();

ctx.queue.write_texture(
wgpu::TexelCopyTextureInfo {
texture: &nv12,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::Plane0,
},
&luma_bytes,
wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(WIDTH),
rows_per_image: Some(HEIGHT),
},
luma_size,
);
ctx.queue.write_texture(
wgpu::TexelCopyTextureInfo {
texture: &nv12,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::Plane1,
},
&chroma_bytes,
wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(WIDTH / 2 * 2),
rows_per_image: Some(HEIGHT / 2),
},
chroma_size,
);

let r8 = ctx.device.create_texture(&wgpu::TextureDescriptor {
label: Some("r8 dst"),
dimension: wgpu::TextureDimension::D2,
size: luma_size,
format: wgpu::TextureFormat::R8Unorm,
usage: wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::COPY_SRC,
mip_level_count: 1,
sample_count: 1,
view_formats: &[],
});
let rg8 = ctx.device.create_texture(&wgpu::TextureDescriptor {
label: Some("rg8 dst"),
dimension: wgpu::TextureDimension::D2,
size: chroma_size,
format: wgpu::TextureFormat::Rg8Unorm,
usage: wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::COPY_SRC,
mip_level_count: 1,
sample_count: 1,
view_formats: &[],
});

let r8_readback = ctx.device.create_buffer(&wgpu::BufferDescriptor {
label: None,
size: luma_bytes.len() as u64,
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
let rg8_readback = ctx.device.create_buffer(&wgpu::BufferDescriptor {
label: None,
size: chroma_bytes.len() as u64,
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});

let mut encoder = ctx
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor::default());

// The path under test.
encoder.copy_texture_to_texture(
wgpu::TexelCopyTextureInfo {
texture: &nv12,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::Plane0,
},
wgpu::TexelCopyTextureInfo {
texture: &r8,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
luma_size,
);
encoder.copy_texture_to_texture(
wgpu::TexelCopyTextureInfo {
texture: &nv12,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::Plane1,
},
wgpu::TexelCopyTextureInfo {
texture: &rg8,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
chroma_size,
);

encoder.copy_texture_to_buffer(
wgpu::TexelCopyTextureInfo {
texture: &r8,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
wgpu::TexelCopyBufferInfo {
buffer: &r8_readback,
layout: wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(WIDTH),
rows_per_image: Some(HEIGHT),
},
},
luma_size,
);
encoder.copy_texture_to_buffer(
wgpu::TexelCopyTextureInfo {
texture: &rg8,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
wgpu::TexelCopyBufferInfo {
buffer: &rg8_readback,
layout: wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(WIDTH / 2 * 2),
rows_per_image: Some(HEIGHT / 2),
},
},
chroma_size,
);

ctx.queue.submit([encoder.finish()]);

let r8_slice = r8_readback.slice(..);
r8_slice.map_async(wgpu::MapMode::Read, |_| ());
let rg8_slice = rg8_readback.slice(..);
rg8_slice.map_async(wgpu::MapMode::Read, |_| ());
ctx.async_poll(wgpu::PollType::wait_indefinitely())
.await
.unwrap();

let r8_data: Vec<u8> = r8_slice.get_mapped_range().unwrap().to_vec();
let rg8_data: Vec<u8> = rg8_slice.get_mapped_range().unwrap().to_vec();
assert_eq!(r8_data, luma_bytes, "luma plane mismatch");
assert_eq!(rg8_data, chroma_bytes, "chroma plane mismatch");
});

/// Ensures that copying P010 texture to P010 texture works as expected
#[gpu_test]
static P010_TEXTURE_COPYING: GpuTestConfiguration = GpuTestConfiguration::new()
Expand Down
62 changes: 58 additions & 4 deletions wgpu-core/src/command/transfer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1373,9 +1373,23 @@ pub(super) fn copy_texture_to_texture(
dst_texture.same_device(state.device)?;

// src and dst texture format must be copy-compatible
// https://gpuweb.github.io/gpuweb/#copy-compatible
if src_texture.desc.format.remove_srgb_suffix() != dst_texture.desc.format.remove_srgb_suffix()
{
// (https://gpuweb.github.io/gpuweb/#copy-compatible), with an
// extension allowing one plane of a planar source to be copied
// into a single-plane destination of the matching format
// (e.g. NV12 Plane0 -> R8Unorm, NV12 Plane1 -> Rg8Unorm).
//
// When taking this path, `copy_size` and `source.origin` are
// interpreted in *plane* texels, not luma texels: copying NV12
// Plane1 into an Rg8Unorm of size (W/2, H/2) requires
// `copy_size = (W/2, H/2)`. The plane-extent check further down
// enforces this against the subsampled plane extent, so a caller
// passing luma-sized values gets a source-side error pointing at
// the actual mistake rather than an opaque destination overrun.
let src_fmt_no_srgb = src_texture.desc.format.remove_srgb_suffix();
let dst_fmt_no_srgb = dst_texture.desc.format.remove_srgb_suffix();
let planar_split_ok = src_fmt_no_srgb.is_multi_planar_format()
&& src_fmt_no_srgb.aspect_specific_format(source.aspect) == Some(dst_fmt_no_srgb);
if src_fmt_no_srgb != dst_fmt_no_srgb && !planar_split_ok {
return Err(TransferError::TextureFormatsNotCopyCompatible {
src_format: src_texture.desc.format,
dst_format: dst_texture.desc.format,
Expand All @@ -1392,6 +1406,45 @@ pub(super) fn copy_texture_to_texture(
copy_size,
)?;

// For planar -> single-plane copies, re-check the source extent
// in plane coordinates. `validate_texture_copy_range` above used
// the full luma extent of the planar source, so it does not
// catch a caller treating `copy_size` / `origin` as luma-sized
// when targeting a subsampled plane (NV12/P010 plane 1).
if planar_split_ok {
// `planar_split_ok` implies `aspect_specific_format(source.aspect)`
// returned `Some`, which is only true for `Plane{0,1,2}`.
let plane = source.aspect.to_plane().expect("planar_split_ok aspect");
let plane_extent = src_texture
.desc
.compute_render_extent(source.mip_level, Some(plane));
let check = |dimension, start: u32, size: u32, plane_size: u32| {
if start > plane_size || plane_size - start < size {
Err(TransferError::TextureOverrun {
start_offset: start,
end_offset: start.wrapping_add(size),
texture_size: plane_size,
dimension,
side: CopySide::Source,
})
} else {
Ok(())
}
};
check(
TextureErrorDimension::X,
source.origin.x,
copy_size.width,
plane_extent.width,
)?;
check(
TextureErrorDimension::Y,
source.origin.y,
copy_size.height,
plane_extent.height,
)?;
}

if Arc::as_ptr(src_texture) == Arc::as_ptr(dst_texture) {
validate_copy_within_same_texture(
source,
Expand All @@ -1405,7 +1458,8 @@ pub(super) fn copy_texture_to_texture(
let (dst_range, dst_tex_base) = extract_texture_selector(destination, copy_size, dst_texture)?;
let src_texture_aspects = hal::FormatAspects::from(src_texture.desc.format);
let dst_texture_aspects = hal::FormatAspects::from(dst_texture.desc.format);
if src_tex_base.aspect != src_texture_aspects {
// `planar_split_ok` already constrains `source.aspect` to a single plane.
if src_tex_base.aspect != src_texture_aspects && !planar_split_ok {
return Err(TransferError::CopySrcMissingAspects.into());
}
if dst_tex_base.aspect != dst_texture_aspects {
Expand Down
6 changes: 6 additions & 0 deletions wgpu-hal/src/auxil/dxgi/conv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,12 @@ pub fn map_texture_format_for_copy(
crate::FormatAspects::STENCIL,
) => Dxgi::Common::DXGI_FORMAT_R8_UINT,

// `CopyTextureRegion` on a plane subresource wants the
// single-plane DXGI format, not the planar one.
(format, aspects) if format.is_multi_planar_format() && aspects.is_one() => {
map_texture_format(format.aspect_specific_format(aspects.map())?)
}

(format, crate::FormatAspects::COLOR) => map_texture_format(format),

_ => return None,
Expand Down
4 changes: 3 additions & 1 deletion wgpu-hal/src/dx12/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ impl super::Device {
suballocation::AllocationType::Texture,
format.theoretical_memory_footprint(size),
),
plane_slice_override: None,
}
}

Expand Down Expand Up @@ -598,6 +599,7 @@ impl crate::Device for super::Device {
mip_level_count: desc.mip_level_count,
sample_count: desc.sample_count,
allocation,
plane_slice_override: None,
})
}

Expand Down Expand Up @@ -629,7 +631,7 @@ impl crate::Device for super::Device {
subresource_index: texture.calc_subresource(
desc.range.base_mip_level,
desc.range.base_array_layer,
0,
view_desc.plane_slice(),
),
mip_slice: desc.range.base_mip_level,
handle_srv: if desc.usage.intersects(wgt::TextureUses::RESOURCE) {
Expand Down
Loading
Loading