diff --git a/CHANGELOG.md b/CHANGELOG.md index 24f9c31a726..a97ac368831 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -132,6 +132,7 @@ By @beholdnec in [#8505](https://github.com/gfx-rs/wgpu/pull/8505). }; ``` By @AdrianEddy in [#9496](https://github.com/gfx-rs/wgpu/pull/9496). +- Extend `copy_texture_to_texture` to allow copying a single plane of a multi-planar source (NV12, P010) into a single-plane destination of the matching format (e.g. NV12 `Plane0` → `R8Unorm`, NV12 `Plane1` → `Rg8Unorm`). `copy_size` is interpreted in plane texels, not luma texels. By @AdrianEddy in [#9551](https://github.com/gfx-rs/wgpu/pull/9551). #### Metal @@ -149,6 +150,7 @@ By @beholdnec in [#8505](https://github.com/gfx-rs/wgpu/pull/8505). - Added support for mesh shaders in naga's HLSL writer, completing DX12 support for mesh shaders. By @inner-daemons in [#8752](https://github.com/gfx-rs/wgpu/pull/8752). - Added `dx12::Queue::add_wait_fence` / `add_signal_fence` (and matching `remove_*` companions). They stage `ID3D12CommandQueue::Wait` / `Signal` calls on the next `Queue::submit`. The wait calls are issued before the submit's `ExecuteCommandLists`, the signal calls after wgpu's own `Signal(signal_fence, signal_value)`. Cross-API interop crates use this to GPU-side gate / publish wgpu submits against foreign-API fences. By @AdrianEddy in [#9463](https://github.com/gfx-rs/wgpu/pull/9463). +- Added `dx12::Texture::with_plane_slice` so cross-API importers can wrap one plane of a multi-plane DXGI resource (e.g. `DXGI_FORMAT_NV12`) as a single-plane wgpu texture. By @AdrianEddy in [#9551](https://github.com/gfx-rs/wgpu/pull/9551). #### Vulkan @@ -228,6 +230,7 @@ By @beholdnec in [#8505](https://github.com/gfx-rs/wgpu/pull/8505). - Fixed a `debug_assert` during stride validation for indirect multi draw. By @kristoff3r in [#9332](https://github.com/gfx-rs/wgpu/pull/9332) - Fixed stencil values read with `textureLoad` appearing in G instead of R. By @andyleiserson in [#9520](https://github.com/gfx-rs/wgpu/pull/9520). - Fixed some cases where the `textureNum{Layers,Levels,Samples}` functions returned incorrect results. By @andyleiserson in [#9542](https://github.com/gfx-rs/wgpu/pull/9542). +- Fixed `map_texture_format_for_copy` panicking on `(planar_format, single_plane_aspect)` during buffer<->texture transfers, and `TextureView::subresource_index` previously being hard-coded to plane 0. By @AdrianEddy in [#9551](https://github.com/gfx-rs/wgpu/pull/9551). #### Metal diff --git a/tests/tests/wgpu-gpu/planar_texture/mod.rs b/tests/tests/wgpu-gpu/planar_texture/mod.rs index 2fd965b3679..d1674311a1b 100644 --- a/tests/tests/wgpu-gpu/planar_texture/mod.rs +++ b/tests/tests/wgpu-gpu/planar_texture/mod.rs @@ -11,6 +11,7 @@ pub fn all_tests(tests: &mut Vec) { NV12_TEXTURE_RENDERING, NV12_TEXTURE_COPYING, P010_TEXTURE_COPYING, + NV12_PLANE_TO_SINGLE_PLANE_COPY, ]); } @@ -387,6 +388,200 @@ static NV12_TEXTURE_COPYING: GpuTestConfiguration = GpuTestConfiguration::new() ctx.queue.submit([command_encoder.finish()]); }); +/// Ensures that copying a single plane of an NV12 source into a matching +/// single-plane destination (Plane0 → R8Unorm, Plane1 → Rg8Unorm) round-trips +/// byte-for-byte. Exercises the planar→single-plane copy-compatibility +/// extension in `copy_texture_to_texture`. +#[gpu_test] +static NV12_PLANE_TO_SINGLE_PLANE_COPY: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters(TestParameters::default().features(wgpu::Features::TEXTURE_FORMAT_NV12)) + .run_async(|ctx| async move { + // Width chosen so that bytes-per-row is 256-aligned for both planes: + // luma R8Unorm: 256 px * 1 byte/px = 256 + // chroma Rg8Unorm: 128 px * 2 byte/px = 256 + const WIDTH: u32 = 256; + const HEIGHT: u32 = 256; + let luma_size = wgpu::Extent3d { + width: WIDTH, + height: HEIGHT, + depth_or_array_layers: 1, + }; + let chroma_size = wgpu::Extent3d { + width: WIDTH / 2, + height: HEIGHT / 2, + depth_or_array_layers: 1, + }; + + let nv12 = ctx.device.create_texture(&wgpu::TextureDescriptor { + label: Some("nv12 src"), + dimension: wgpu::TextureDimension::D2, + size: luma_size, + format: wgpu::TextureFormat::NV12, + usage: wgpu::TextureUsages::COPY_SRC | wgpu::TextureUsages::COPY_DST, + mip_level_count: 1, + sample_count: 1, + view_formats: &[], + }); + + // Distinct patterns per plane so a swap or plane-0-fallback would fail + // the assertion at the end. + let luma_bytes: Vec = (0..(WIDTH * HEIGHT) as usize).map(|i| i as u8).collect(); + let chroma_bytes: Vec = (0..(WIDTH / 2 * HEIGHT / 2 * 2) as usize) + .map(|i| (i ^ 0xA5) as u8) + .collect(); + + ctx.queue.write_texture( + wgpu::TexelCopyTextureInfo { + texture: &nv12, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::Plane0, + }, + &luma_bytes, + wgpu::TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(WIDTH), + rows_per_image: Some(HEIGHT), + }, + luma_size, + ); + ctx.queue.write_texture( + wgpu::TexelCopyTextureInfo { + texture: &nv12, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::Plane1, + }, + &chroma_bytes, + wgpu::TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(WIDTH / 2 * 2), + rows_per_image: Some(HEIGHT / 2), + }, + chroma_size, + ); + + let r8 = ctx.device.create_texture(&wgpu::TextureDescriptor { + label: Some("r8 dst"), + dimension: wgpu::TextureDimension::D2, + size: luma_size, + format: wgpu::TextureFormat::R8Unorm, + usage: wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::COPY_SRC, + mip_level_count: 1, + sample_count: 1, + view_formats: &[], + }); + let rg8 = ctx.device.create_texture(&wgpu::TextureDescriptor { + label: Some("rg8 dst"), + dimension: wgpu::TextureDimension::D2, + size: chroma_size, + format: wgpu::TextureFormat::Rg8Unorm, + usage: wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::COPY_SRC, + mip_level_count: 1, + sample_count: 1, + view_formats: &[], + }); + + let r8_readback = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: luma_bytes.len() as u64, + usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + let rg8_readback = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: chroma_bytes.len() as u64, + usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + let mut encoder = ctx + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + + // The path under test. + encoder.copy_texture_to_texture( + wgpu::TexelCopyTextureInfo { + texture: &nv12, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::Plane0, + }, + wgpu::TexelCopyTextureInfo { + texture: &r8, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + luma_size, + ); + encoder.copy_texture_to_texture( + wgpu::TexelCopyTextureInfo { + texture: &nv12, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::Plane1, + }, + wgpu::TexelCopyTextureInfo { + texture: &rg8, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + chroma_size, + ); + + encoder.copy_texture_to_buffer( + wgpu::TexelCopyTextureInfo { + texture: &r8, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + wgpu::TexelCopyBufferInfo { + buffer: &r8_readback, + layout: wgpu::TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(WIDTH), + rows_per_image: Some(HEIGHT), + }, + }, + luma_size, + ); + encoder.copy_texture_to_buffer( + wgpu::TexelCopyTextureInfo { + texture: &rg8, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + wgpu::TexelCopyBufferInfo { + buffer: &rg8_readback, + layout: wgpu::TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(WIDTH / 2 * 2), + rows_per_image: Some(HEIGHT / 2), + }, + }, + chroma_size, + ); + + ctx.queue.submit([encoder.finish()]); + + let r8_slice = r8_readback.slice(..); + r8_slice.map_async(wgpu::MapMode::Read, |_| ()); + let rg8_slice = rg8_readback.slice(..); + rg8_slice.map_async(wgpu::MapMode::Read, |_| ()); + ctx.async_poll(wgpu::PollType::wait_indefinitely()) + .await + .unwrap(); + + let r8_data: Vec = r8_slice.get_mapped_range().unwrap().to_vec(); + let rg8_data: Vec = rg8_slice.get_mapped_range().unwrap().to_vec(); + assert_eq!(r8_data, luma_bytes, "luma plane mismatch"); + assert_eq!(rg8_data, chroma_bytes, "chroma plane mismatch"); + }); + /// Ensures that copying P010 texture to P010 texture works as expected #[gpu_test] static P010_TEXTURE_COPYING: GpuTestConfiguration = GpuTestConfiguration::new() diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index a9208a76b9f..e344b32451d 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -1373,9 +1373,23 @@ pub(super) fn copy_texture_to_texture( dst_texture.same_device(state.device)?; // src and dst texture format must be copy-compatible - // https://gpuweb.github.io/gpuweb/#copy-compatible - if src_texture.desc.format.remove_srgb_suffix() != dst_texture.desc.format.remove_srgb_suffix() - { + // (https://gpuweb.github.io/gpuweb/#copy-compatible), with an + // extension allowing one plane of a planar source to be copied + // into a single-plane destination of the matching format + // (e.g. NV12 Plane0 -> R8Unorm, NV12 Plane1 -> Rg8Unorm). + // + // When taking this path, `copy_size` and `source.origin` are + // interpreted in *plane* texels, not luma texels: copying NV12 + // Plane1 into an Rg8Unorm of size (W/2, H/2) requires + // `copy_size = (W/2, H/2)`. The plane-extent check further down + // enforces this against the subsampled plane extent, so a caller + // passing luma-sized values gets a source-side error pointing at + // the actual mistake rather than an opaque destination overrun. + let src_fmt_no_srgb = src_texture.desc.format.remove_srgb_suffix(); + let dst_fmt_no_srgb = dst_texture.desc.format.remove_srgb_suffix(); + let planar_split_ok = src_fmt_no_srgb.is_multi_planar_format() + && src_fmt_no_srgb.aspect_specific_format(source.aspect) == Some(dst_fmt_no_srgb); + if src_fmt_no_srgb != dst_fmt_no_srgb && !planar_split_ok { return Err(TransferError::TextureFormatsNotCopyCompatible { src_format: src_texture.desc.format, dst_format: dst_texture.desc.format, @@ -1392,6 +1406,45 @@ pub(super) fn copy_texture_to_texture( copy_size, )?; + // For planar -> single-plane copies, re-check the source extent + // in plane coordinates. `validate_texture_copy_range` above used + // the full luma extent of the planar source, so it does not + // catch a caller treating `copy_size` / `origin` as luma-sized + // when targeting a subsampled plane (NV12/P010 plane 1). + if planar_split_ok { + // `planar_split_ok` implies `aspect_specific_format(source.aspect)` + // returned `Some`, which is only true for `Plane{0,1,2}`. + let plane = source.aspect.to_plane().expect("planar_split_ok aspect"); + let plane_extent = src_texture + .desc + .compute_render_extent(source.mip_level, Some(plane)); + let check = |dimension, start: u32, size: u32, plane_size: u32| { + if start > plane_size || plane_size - start < size { + Err(TransferError::TextureOverrun { + start_offset: start, + end_offset: start.wrapping_add(size), + texture_size: plane_size, + dimension, + side: CopySide::Source, + }) + } else { + Ok(()) + } + }; + check( + TextureErrorDimension::X, + source.origin.x, + copy_size.width, + plane_extent.width, + )?; + check( + TextureErrorDimension::Y, + source.origin.y, + copy_size.height, + plane_extent.height, + )?; + } + if Arc::as_ptr(src_texture) == Arc::as_ptr(dst_texture) { validate_copy_within_same_texture( source, @@ -1405,7 +1458,8 @@ pub(super) fn copy_texture_to_texture( let (dst_range, dst_tex_base) = extract_texture_selector(destination, copy_size, dst_texture)?; let src_texture_aspects = hal::FormatAspects::from(src_texture.desc.format); let dst_texture_aspects = hal::FormatAspects::from(dst_texture.desc.format); - if src_tex_base.aspect != src_texture_aspects { + // `planar_split_ok` already constrains `source.aspect` to a single plane. + if src_tex_base.aspect != src_texture_aspects && !planar_split_ok { return Err(TransferError::CopySrcMissingAspects.into()); } if dst_tex_base.aspect != dst_texture_aspects { diff --git a/wgpu-hal/src/auxil/dxgi/conv.rs b/wgpu-hal/src/auxil/dxgi/conv.rs index 6e27081aac4..c9e4dbc2c77 100644 --- a/wgpu-hal/src/auxil/dxgi/conv.rs +++ b/wgpu-hal/src/auxil/dxgi/conv.rs @@ -175,6 +175,12 @@ pub fn map_texture_format_for_copy( crate::FormatAspects::STENCIL, ) => Dxgi::Common::DXGI_FORMAT_R8_UINT, + // `CopyTextureRegion` on a plane subresource wants the + // single-plane DXGI format, not the planar one. + (format, aspects) if format.is_multi_planar_format() && aspects.is_one() => { + map_texture_format(format.aspect_specific_format(aspects.map())?) + } + (format, crate::FormatAspects::COLOR) => map_texture_format(format), _ => return None, diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 359a89fc054..a816f2f45d1 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -477,6 +477,7 @@ impl super::Device { suballocation::AllocationType::Texture, format.theoretical_memory_footprint(size), ), + plane_slice_override: None, } } @@ -598,6 +599,7 @@ impl crate::Device for super::Device { mip_level_count: desc.mip_level_count, sample_count: desc.sample_count, allocation, + plane_slice_override: None, }) } @@ -629,7 +631,7 @@ impl crate::Device for super::Device { subresource_index: texture.calc_subresource( desc.range.base_mip_level, desc.range.base_array_layer, - 0, + view_desc.plane_slice(), ), mip_slice: desc.range.base_mip_level, handle_srv: if desc.usage.intersects(wgt::TextureUses::RESOURCE) { diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 34f4f3a9749..c3a068a46d2 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -1054,12 +1054,57 @@ pub struct Texture { mip_level_count: u32, sample_count: u32, allocation: suballocation::Allocation, + /// Pins `PlaneSlice` for every view and copy derived from this texture, + /// overriding the default aspect-based derivation. Used by cross-API + /// importers wrapping one plane of a multi-plane DXGI resource as a + /// single-plane wgpu texture. + plane_slice_override: Option, } impl Texture { pub unsafe fn raw_resource(&self) -> &Direct3D12::ID3D12Resource { &self.resource } + + /// Pin the D3D12 plane slice for every view and copy derived from + /// this texture. Pass `0` for the luma plane, `1` for chroma. The + /// declared `TextureFormat` must be single-plane (e.g. `R8Unorm`, + /// `Rg8Unorm`) and match the plane's texel layout. + /// + /// # Safety / aliasing + /// + /// The intended use is to wrap one plane of a multi-plane DXGI + /// resource (e.g. NV12) as a single-plane wgpu texture, typically + /// by calling [`Device::texture_from_raw`](Device::texture_from_raw) + /// twice with two clones of the same `ID3D12Resource` and a + /// different `plane_slice` on each. wgpu's state tracker treats + /// these wrappers as independent textures: per-subresource state, + /// hazard tracking, and resource-state barriers all assume + /// non-aliased ownership and have no way to know the two wrappers + /// alias the same underlying resource. + /// + /// The caller is responsible for ensuring that the underlying + /// resource is not concurrently used by another wgpu texture + /// wrapping a different plane in a way that would race or + /// invalidate state tracking — in particular, do not submit work + /// touching both wrappers in the same submission unless every + /// access goes through `COPY_SRC` / `COPY_DST` on the wrapper that + /// owns that plane's subresource, and do not destroy the wrappers + /// concurrently with in-flight work that uses either of them. + pub fn with_plane_slice(mut self, plane_slice: u32) -> Self { + debug_assert!( + !self.format.is_multi_planar_format(), + "`with_plane_slice` expects a single-plane format wrapping a \ + multi-plane DXGI resource; got planar format `{:?}`", + self.format, + ); + self.plane_slice_override = Some(plane_slice); + self + } + + pub(super) fn plane_slice_override(&self) -> Option { + self.plane_slice_override + } } impl crate::DynTexture for Texture {} @@ -1088,14 +1133,18 @@ impl Texture { } fn calc_subresource_for_copy(&self, base: &crate::TextureCopyBase) -> u32 { - let plane = match base.aspect { - crate::FormatAspects::COLOR - | crate::FormatAspects::DEPTH - | crate::FormatAspects::PLANE_0 => 0, - crate::FormatAspects::STENCIL | crate::FormatAspects::PLANE_1 => 1, - crate::FormatAspects::PLANE_2 => 2, - _ => unreachable!(), - }; + // Single-plane wrappers around a multi-plane resource report + // `COLOR` aspect, which would otherwise resolve to plane 0. + let plane = self + .plane_slice_override + .unwrap_or_else(|| match base.aspect { + crate::FormatAspects::COLOR + | crate::FormatAspects::DEPTH + | crate::FormatAspects::PLANE_0 => 0, + crate::FormatAspects::STENCIL | crate::FormatAspects::PLANE_1 => 1, + crate::FormatAspects::PLANE_2 => 2, + _ => unreachable!(), + }); self.calc_subresource(base.mip_level, base.array_layer, plane) } } @@ -1650,6 +1699,7 @@ impl crate::Surface for Surface { suballocation::AllocationType::Texture, sc.format.theoretical_memory_footprint(sc.size), ), + plane_slice_override: None, }; Ok(crate::AcquiredSurfaceTexture { texture, diff --git a/wgpu-hal/src/dx12/view.rs b/wgpu-hal/src/dx12/view.rs index a7181511bb4..ba715833f2f 100644 --- a/wgpu-hal/src/dx12/view.rs +++ b/wgpu-hal/src/dx12/view.rs @@ -12,6 +12,7 @@ pub(super) struct ViewDescriptor { array_layer_count: u32, mip_level_base: u32, mip_level_count: u32, + plane_slice_override: Option, } impl crate::TextureViewDescriptor<'_> { @@ -28,6 +29,7 @@ impl crate::TextureViewDescriptor<'_> { mip_level_count: self.range.mip_level_count.unwrap_or(!0), array_layer_base: self.range.base_array_layer, array_layer_count: self.range.array_layer_count.unwrap_or(!0), + plane_slice_override: texture.plane_slice_override(), } } } @@ -41,6 +43,13 @@ fn aspects_to_plane(aspects: crate::FormatAspects) -> u32 { } } +impl ViewDescriptor { + pub(super) fn plane_slice(&self) -> u32 { + self.plane_slice_override + .unwrap_or_else(|| aspects_to_plane(self.aspects)) + } +} + /// Shader component mapping for stencil views /// /// Stencil views use `DXGI_FORMAT_X24_TYPELESS_G8_UINT` or @@ -103,7 +112,7 @@ impl ViewDescriptor { desc.Anonymous.Texture2D = Direct3D12::D3D12_TEX2D_SRV { MostDetailedMip: self.mip_level_base, MipLevels: self.mip_level_count, - PlaneSlice: aspects_to_plane(self.aspects), + PlaneSlice: self.plane_slice(), ResourceMinLODClamp: 0.0, } } @@ -123,7 +132,7 @@ impl ViewDescriptor { MipLevels: self.mip_level_count, FirstArraySlice: self.array_layer_base, ArraySize: self.array_layer_count, - PlaneSlice: aspects_to_plane(self.aspects), + PlaneSlice: self.plane_slice(), ResourceMinLODClamp: 0.0, } } @@ -189,7 +198,7 @@ impl ViewDescriptor { desc.ViewDimension = Direct3D12::D3D12_UAV_DIMENSION_TEXTURE2D; desc.Anonymous.Texture2D = Direct3D12::D3D12_TEX2D_UAV { MipSlice: self.mip_level_base, - PlaneSlice: aspects_to_plane(self.aspects), + PlaneSlice: self.plane_slice(), } } wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array => { @@ -198,7 +207,7 @@ impl ViewDescriptor { MipSlice: self.mip_level_base, FirstArraySlice: self.array_layer_base, ArraySize: self.array_layer_count, - PlaneSlice: aspects_to_plane(self.aspects), + PlaneSlice: self.plane_slice(), } } wgt::TextureViewDimension::D3 => { @@ -250,7 +259,7 @@ impl ViewDescriptor { desc.ViewDimension = Direct3D12::D3D12_RTV_DIMENSION_TEXTURE2D; desc.Anonymous.Texture2D = Direct3D12::D3D12_TEX2D_RTV { MipSlice: self.mip_level_base, - PlaneSlice: aspects_to_plane(self.aspects), + PlaneSlice: self.plane_slice(), } } wgt::TextureViewDimension::D2 | wgt::TextureViewDimension::D2Array @@ -268,7 +277,7 @@ impl ViewDescriptor { MipSlice: self.mip_level_base, FirstArraySlice: self.array_layer_base, ArraySize: self.array_layer_count, - PlaneSlice: aspects_to_plane(self.aspects), + PlaneSlice: self.plane_slice(), } } wgt::TextureViewDimension::D3