From ad1acdcc44abb7d5a7e98e5f2e7619d30323ee3b Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Tue, 5 May 2026 23:43:53 +0200 Subject: [PATCH 01/20] Add video encode/decode round-trip sample Adds the VideoEncodeDecode sample with an animated compute-generated NV12 source, GPU encode/decode round-trip display, and CLI codec selection for H.264, H.265, and AV1. Uses the backend-neutral NRIVideo flow across Vulkan and D3D12: video sessions and pictures, encode feedback readback, decode picture state queries, Annex-B header generation, AV1 metadata preparation, and aligned NV12 upload/readback layouts. Integrates the sample into CMake, shader configuration, README, and NRI submodule expectations, gating the target on the required NRIVideo version so older NRI trees skip cleanly. --- .gitignore | 2 + CMakeLists.txt | 19 + External/NRIFramework | 2 +- README.md | 3 +- Shaders/Shaders.cfg | 1 + Shaders/VideoEncodePattern.cs.hlsl | 163 +++ Source/VideoEncodeDecode.cpp | 1928 ++++++++++++++++++++++++++++ 7 files changed, 2116 insertions(+), 2 deletions(-) create mode 100644 Shaders/VideoEncodePattern.cs.hlsl create mode 100644 Source/VideoEncodeDecode.cpp diff --git a/.gitignore b/.gitignore index 3e8052d..36930b2 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,5 @@ _Data/ # can be a symbolic link _Data +build +build-video-sample diff --git a/CMakeLists.txt b/CMakeLists.txt index a794417..49dc266 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -187,6 +187,25 @@ add_sample(Resources c) add_sample(SceneViewer cpp) add_sample(Triangle cpp) +set(NRI_VIDEO_HEADER "${NRI_SOURCE_DIR}/Include/Extensions/NRIVideo.h") +set(NRI_VIDEO_VERSION 0) +if(EXISTS "${NRI_VIDEO_HEADER}") + file(STRINGS "${NRI_VIDEO_HEADER}" NRI_VIDEO_VERSION_LINE REGEX "^#define NRI_VIDEO_VERSION ") + if(NRI_VIDEO_VERSION_LINE MATCHES "^#define NRI_VIDEO_VERSION +([0-9]+)") + set(NRI_VIDEO_VERSION "${CMAKE_MATCH_1}") + endif() +endif() + +if(NRI_VIDEO_VERSION GREATER_EQUAL 1) + add_sample(VideoEncodeDecode cpp) + + if(WIN32) + target_link_libraries(VideoEncodeDecode PRIVATE d3d12) + endif() +else() + message(STATUS "Skipping VideoEncodeDecode: selected NRI source does not provide NRIVideo version 1") +endif() + # Wrapper depends on Vulkan SDK availability if(DEFINED ENV{VULKAN_SDK}) add_sample(Wrapper cpp) diff --git a/External/NRIFramework b/External/NRIFramework index 60466c4..67a1c91 160000 --- a/External/NRIFramework +++ b/External/NRIFramework @@ -1 +1 @@ -Subproject commit 60466c47cb8b331ae432459c53cce6656239c0af +Subproject commit 67a1c91f085569a9e5a9d6978e58703cdd707419 diff --git a/README.md b/README.md index 75f2bfb..070b8e8 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ Or by running scripts only: ### CMake options - `DISABLE_SHADER_COMPILATION` - disable compilation of shaders (shaders can be built on other platform) +- `NRI_SOURCE_OVERRIDE_DIR` - use an alternate NRI source tree, for example an in-review NRI branch - `NRIF_USE_WAYLAND` - use Wayland instead of X11 on Linux ## How to run @@ -57,5 +58,5 @@ The executables from `_Bin` directory load resources from `_Data`, therefore the - Resources - various resources allocation related stuff - SceneViewer - loading & rendering of meshes with materials (also tests programmable sample locations, shading rate and pipeline statistics) - Triangle - simple textured triangle rendering (also multiview demonstration in _FLEXIBLE_ mode) +- VideoEncodeDecode - H.264/H.265/AV1 NV12 video encode/decode round trip; built only when the selected NRI source provides the NRIVideo extension - Wrapper - shows how to wrap native D3D11/D3D12/VK objects into *NRI* entities - diff --git a/Shaders/Shaders.cfg b/Shaders/Shaders.cfg index 54c53b1..3e7e9a7 100644 --- a/Shaders/Shaders.cfg +++ b/Shaders/Shaders.cfg @@ -8,6 +8,7 @@ Box5.fs.hlsl -T ps Box6.fs.hlsl -T ps Box7.fs.hlsl -T ps Compute.cs.hlsl -T cs +VideoEncodePattern.cs.hlsl -T cs DescriptorHeapIndexing.cs.hlsl -T cs -m 6_6 GenerateSceneDrawCalls.cs.hlsl -T cs Forward.fs.hlsl -T ps diff --git a/Shaders/VideoEncodePattern.cs.hlsl b/Shaders/VideoEncodePattern.cs.hlsl new file mode 100644 index 0000000..bf34cb1 --- /dev/null +++ b/Shaders/VideoEncodePattern.cs.hlsl @@ -0,0 +1,163 @@ +// © 2021 NVIDIA Corporation + +#include "NRI.hlsl" + +NRI_RESOURCE(RWBuffer, g_Nv12Buffer, u, 0, 0); +NRI_FORMAT("rgba8") NRI_RESOURCE(RWTexture2D, g_SourcePreview, u, 1, 0); +NRI_FORMAT("rgba8") NRI_RESOURCE(RWTexture2D, g_DecodePreview, u, 2, 0); + +static const uint OP_GENERATE_PATTERN = 0u; +static const uint OP_NV12_TO_PREVIEW = 1u; + +struct PatternRootConstants { + uint width; + uint height; + uint yOffsetBytes; + uint yRowPitchBytes; + uint uvRowPitchBytes; + uint uvOffsetBytes; + uint operation; + float time; + uint padding; + uint padding1; +}; + +NRI_ROOT_CONSTANTS(PatternRootConstants, g_Pattern, 0, 0); + +float3 MakePatternColor(float2 normalizedPixelPos, float time) { + const float fx = normalizedPixelPos.x; + const float fy = normalizedPixelPos.y; + const float cx = fx - 0.5f; + const float cy = fy - 0.5f; + const float radius = sqrt(cx * cx + cy * cy); + const float angle = atan2(cy, cx); + + const float sweep = sin(angle * 3.0f + time * 1.7f) * 0.5f + 0.5f; + const float rings = sin((radius * 16.0f - time * 1.25f) * 6.283185307179586f) * 0.5f + 0.5f; + const float diagonal = sin((fx * 5.0f + fy * 3.0f + time * 0.45f) * 6.283185307179586f) * 0.5f + 0.5f; + const float grid = (((uint)(fx * 16.0f) ^ (uint)(fy * 10.0f)) & 1) ? 0.08f : 0.0f; + + const float r = min(1.0f, 0.10f + 0.78f * sweep + 0.18f * diagonal + grid); + const float g = min(1.0f, 0.14f + 0.72f * rings + 0.20f * fy + grid); + const float b = min(1.0f, 0.18f + 0.52f * diagonal + 0.34f * (1.0f - radius) + grid); + + return float3(r, g, b); +} + +uint ClampToByte(float v) { + return (v <= 0.0f) ? 0u : (v >= 255.0f ? 255u : uint(v)); +} + +uint RGBToY(uint3 rgb) { + return ClampToByte(16.0f + 0.257f * float(rgb.x) + 0.504f * float(rgb.y) + 0.098f * float(rgb.z)); +} + +uint RGBToU(uint3 rgb) { + return ClampToByte(128.0f - 0.148f * float(rgb.x) - 0.291f * float(rgb.y) + 0.439f * float(rgb.z)); +} + +uint RGBToV(uint3 rgb) { + return ClampToByte(128.0f + 0.439f * float(rgb.x) - 0.368f * float(rgb.y) - 0.071f * float(rgb.z)); +} + +float3 YuvToRgb(uint y, uint u, uint v) { + const float yy = float(y); + const float uu = float(u) - 128.0f; + const float vv = float(v) - 128.0f; + + const float r = clamp((298.082f * (yy - 16.0f) + 408.583f * vv + 128.0f) / 256.0f, 0.0f, 255.0f); + const float g = clamp((298.082f * (yy - 16.0f) - 100.291f * uu - 208.120f * vv + 128.0f) / 256.0f, 0.0f, 255.0f); + const float b = clamp((298.082f * (yy - 16.0f) + 516.412f * uu + 128.0f) / 256.0f, 0.0f, 255.0f); + + return float3(r / 255.0f, g / 255.0f, b / 255.0f); +} + +float4 LoadPatternColor(uint px, uint py) { + return float4(MakePatternColor(float2(px, py) / float2(g_Pattern.width - 1u, g_Pattern.height - 1u), g_Pattern.time), 1.0f); +} + +void StorePreview(uint2 pixel, float4 color) { + if (g_Pattern.operation == OP_NV12_TO_PREVIEW) + g_DecodePreview[pixel] = color; + else + g_SourcePreview[pixel] = color; +} + +[numthreads(1, 1, 1)] +void main(uint3 dispatchThreadID : SV_DispatchThreadID) +{ + const uint blockX = dispatchThreadID.x * 4u; + const uint y = dispatchThreadID.y; + + if (blockX + 3u >= g_Pattern.width || y >= g_Pattern.height) + return; + + const float4 c0 = LoadPatternColor(blockX + 0u, y); + const float4 c1 = LoadPatternColor(blockX + 1u, y); + const float4 c2 = LoadPatternColor(blockX + 2u, y); + const float4 c3 = LoadPatternColor(blockX + 3u, y); + + if (g_Pattern.operation == OP_GENERATE_PATTERN) { + StorePreview(uint2(blockX + 0u, y), c0); + StorePreview(uint2(blockX + 1u, y), c1); + StorePreview(uint2(blockX + 2u, y), c2); + StorePreview(uint2(blockX + 3u, y), c3); + + const uint3 rgb0 = uint3(c0.rgb * 255.0f); + const uint3 rgb1 = uint3(c1.rgb * 255.0f); + const uint3 rgb2 = uint3(c2.rgb * 255.0f); + const uint3 rgb3 = uint3(c3.rgb * 255.0f); + + const uint y0 = RGBToY(rgb0); + const uint y1 = RGBToY(rgb1); + const uint y2 = RGBToY(rgb2); + const uint y3 = RGBToY(rgb3); + + const uint packedY = y0 | (y1 << 8u) | (y2 << 16u) | (y3 << 24u); + const uint yWordIndex = (g_Pattern.yOffsetBytes + y * g_Pattern.yRowPitchBytes + blockX) / 4u; + g_Nv12Buffer[yWordIndex] = packedY; + + if ((y & 1u) != 0u) + return; + if (y + 1u >= g_Pattern.height) + return; + + const float4 c4 = LoadPatternColor(blockX + 0u, y + 1u); + const float4 c5 = LoadPatternColor(blockX + 1u, y + 1u); + const float4 c6 = LoadPatternColor(blockX + 2u, y + 1u); + const float4 c7 = LoadPatternColor(blockX + 3u, y + 1u); + + const uint3 rgb4 = uint3(c4.rgb * 255.0f); + const uint3 rgb5 = uint3(c5.rgb * 255.0f); + const uint3 rgb6 = uint3(c6.rgb * 255.0f); + const uint3 rgb7 = uint3(c7.rgb * 255.0f); + + const uint u0 = (RGBToU(rgb0) + RGBToU(rgb1) + RGBToU(rgb4) + RGBToU(rgb5) + 2u) >> 2u; + const uint v0 = (RGBToV(rgb0) + RGBToV(rgb1) + RGBToV(rgb4) + RGBToV(rgb5) + 2u) >> 2u; + const uint u1 = (RGBToU(rgb2) + RGBToU(rgb3) + RGBToU(rgb6) + RGBToU(rgb7) + 2u) >> 2u; + const uint v1 = (RGBToV(rgb2) + RGBToV(rgb3) + RGBToV(rgb6) + RGBToV(rgb7) + 2u) >> 2u; + + const uint packedUV = u0 | (v0 << 8u) | (u1 << 16u) | (v1 << 24u); + const uint uvWordIndex = (g_Pattern.uvOffsetBytes + ((y >> 1u) * g_Pattern.uvRowPitchBytes + blockX)) / 4u; + g_Nv12Buffer[uvWordIndex] = packedUV; + return; + } + + if (g_Pattern.operation == OP_NV12_TO_PREVIEW) { + const uint uvBase = (g_Pattern.uvOffsetBytes + (y >> 1u) * g_Pattern.uvRowPitchBytes + blockX) + ((y & 1u) * 0u); + for (uint i = 0; i < 4; i++) { + const uint px = blockX + i; + const uint yIndex = y * g_Pattern.yRowPitchBytes + px + g_Pattern.yOffsetBytes; + const uint yWord = g_Nv12Buffer[yIndex / 4u]; + const uint yValue = (yWord >> ((yIndex & 3u) * 8u)) & 255u; + + const uint uvOffset = uvBase + (i & 2u); + const uint uvWord = g_Nv12Buffer[(uvOffset) / 4u]; + const uint uvShift = (uvOffset & 3u) * 8u; + const uint uValue = (uvWord >> uvShift) & 255u; + const uint vValue = (uvWord >> (uvShift + 8u)) & 255u; + + StorePreview(uint2(px, y), float4(YuvToRgb(yValue, uValue, vValue), 1.0f)); + } + } +} diff --git a/Source/VideoEncodeDecode.cpp b/Source/VideoEncodeDecode.cpp new file mode 100644 index 0000000..a2bc461 --- /dev/null +++ b/Source/VideoEncodeDecode.cpp @@ -0,0 +1,1928 @@ +// © 2021 NVIDIA Corporation + +#if defined(_WIN32) +# include +#endif + +#include "NRIFramework.h" + +#include "Extensions/NRIVideo.h" + +#include +#include +#include +#include +#include +#include + +namespace { + +constexpr uint32_t VIDEO_WIDTH = 1920; +constexpr uint32_t VIDEO_HEIGHT = 1088; +constexpr double ROUND_TRIP_INTERVAL_SEC = 1.0 / 60.0; +constexpr uint64_t BITSTREAM_SIZE = 2 * 1024 * 1024; +constexpr uint64_t ENCODED_SLICE_OFFSET = 4096; +constexpr uint64_t AV1_HEADER_READBACK_SIZE = 4096; +constexpr uint64_t METADATA_SIZE = 4 * 1024 * 1024; +constexpr uint64_t RESOLVED_METADATA_SIZE = 4096; + +static_assert(VIDEO_WIDTH % 4 == 0, "Compute-backed NV12 writer expects width divisible by 4"); +static_assert(VIDEO_WIDTH % 16 == 0, "H.264 macroblock width should stay aligned"); +static_assert(VIDEO_HEIGHT % 16 == 0, "H.264 macroblock height should stay aligned"); + +enum PatternOperation : uint32_t { + OP_GENERATE_PATTERN = 0, + OP_NV12_TO_PREVIEW = 1, +}; + +enum class SampleCodec : uint8_t { + H264, + H265, + AV1, +}; + +static const char* GetCodecName(SampleCodec codec) { + switch (codec) { + case SampleCodec::H265: + return "H.265"; + case SampleCodec::AV1: + return "AV1"; + case SampleCodec::H264: + default: + return "H.264"; + } +} + +static nri::VideoCodec GetNriCodec(SampleCodec codec) { + switch (codec) { + case SampleCodec::H265: + return nri::VideoCodec::H265; + case SampleCodec::AV1: + return nri::VideoCodec::AV1; + case SampleCodec::H264: + default: + return nri::VideoCodec::H264; + } +} + +static uint64_t GetEncodedPayloadHeaderSkip(SampleCodec codec, uint64_t encodedBitstreamBytes) { + const uint64_t headerSize = codec == SampleCodec::H264 ? 1 : 0; + return std::min(headerSize, encodedBitstreamBytes); +} + +static nri::VideoAV1SequenceDesc MakeAV1SequenceDesc() { + nri::VideoAV1SequenceDesc desc = {}; + desc.flags = nri::VideoAV1SequenceBits::ENABLE_ORDER_HINT | + nri::VideoAV1SequenceBits::ENABLE_CDEF | + nri::VideoAV1SequenceBits::ENABLE_RESTORATION | + nri::VideoAV1SequenceBits::COLOR_DESCRIPTION_PRESENT; + desc.bitDepth = 8; + desc.subsamplingX = 1; + desc.subsamplingY = 1; + desc.maxFrameWidthMinus1 = VIDEO_WIDTH - 1; + desc.maxFrameHeightMinus1 = VIDEO_HEIGHT - 1; + desc.frameWidthBitsMinus1 = 15; + desc.frameHeightBitsMinus1 = 15; + desc.orderHintBitsMinus1 = 7; + desc.seqForceIntegerMv = 2; + desc.seqForceScreenContentTools = 2; + desc.colorPrimaries = 1; + desc.transferCharacteristics = 1; + desc.matrixCoefficients = 1; + desc.chromaSamplePosition = 1; + return desc; +} + +struct QueuedFrame { + nri::CommandAllocator* commandAllocator = nullptr; + nri::CommandBuffer* commandBuffer = nullptr; +}; + +struct PatternConstants { + uint32_t width = VIDEO_WIDTH; + uint32_t height = VIDEO_HEIGHT; + uint32_t yOffsetBytes = 0; + uint32_t yRowPitchBytes = 0; + uint32_t uvRowPitchBytes = 0; + uint32_t uvOffsetBytes = 0; + uint32_t operation = OP_GENERATE_PATTERN; + float time = 0.0f; + uint32_t _padding = 0; + uint32_t _padding1 = 0; +}; + +struct Nv12BufferLayout { + uint32_t yRowPitchBytes = VIDEO_WIDTH; + uint32_t ySlicePitchBytes = VIDEO_WIDTH * VIDEO_HEIGHT; + uint64_t uvOffsetBytes = uint64_t(VIDEO_WIDTH) * VIDEO_HEIGHT; + uint32_t uvRowPitchBytes = VIDEO_WIDTH; + uint32_t uvSlicePitchBytes = VIDEO_WIDTH * VIDEO_HEIGHT / 2; + uint64_t totalSizeBytes = uint64_t(VIDEO_WIDTH) * VIDEO_HEIGHT * 3 / 2; +}; + +static uint64_t AlignUp(uint64_t value, uint64_t alignment) { + return alignment == 0 ? value : ((value + alignment - 1) / alignment) * alignment; +} + +static Nv12BufferLayout MakeNv12BufferLayout(const nri::DeviceDesc& deviceDesc) { + const uint32_t rowAlignment = std::max(deviceDesc.memoryAlignment.uploadBufferTextureRow, 1u); + const uint32_t sliceAlignment = std::max(deviceDesc.memoryAlignment.uploadBufferTextureSlice, 1u); + + Nv12BufferLayout layout = {}; + layout.yRowPitchBytes = (uint32_t)AlignUp(VIDEO_WIDTH, rowAlignment); + layout.ySlicePitchBytes = (uint32_t)AlignUp(uint64_t(layout.yRowPitchBytes) * VIDEO_HEIGHT, sliceAlignment); + layout.uvOffsetBytes = layout.ySlicePitchBytes; + layout.uvRowPitchBytes = (uint32_t)AlignUp(VIDEO_WIDTH, rowAlignment); + layout.uvSlicePitchBytes = (uint32_t)AlignUp(uint64_t(layout.uvRowPitchBytes) * (VIDEO_HEIGHT / 2), sliceAlignment); + layout.totalSizeBytes = layout.uvOffsetBytes + layout.uvSlicePitchBytes; + return layout; +} + +template +static bool SubmitOneTime(nri::CoreInterface& core, nri::Queue& queue, Record&& record) { + nri::CommandAllocator* allocator = nullptr; + nri::CommandBuffer* commandBuffer = nullptr; + bool ok = core.CreateCommandAllocator(queue, allocator) == nri::Result::SUCCESS && allocator && core.CreateCommandBuffer(*allocator, commandBuffer) == nri::Result::SUCCESS && commandBuffer && core.BeginCommandBuffer(*commandBuffer, nullptr) == nri::Result::SUCCESS; + if (ok) { + std::forward(record)(*commandBuffer); + ok = core.EndCommandBuffer(*commandBuffer) == nri::Result::SUCCESS; + } + if (ok) { + const nri::CommandBuffer* commandBuffers[] = {commandBuffer}; + nri::QueueSubmitDesc submit = {}; + submit.commandBuffers = commandBuffers; + submit.commandBufferNum = 1; + ok = core.QueueSubmit(queue, submit) == nri::Result::SUCCESS && core.QueueWaitIdle(&queue) == nri::Result::SUCCESS; + } + if (commandBuffer) + core.DestroyCommandBuffer(commandBuffer); + if (allocator) + core.DestroyCommandAllocator(allocator); + return ok; +} + +template +static bool SubmitOneTime( + nri::CoreInterface& core, nri::Queue& queue, nri::DescriptorPool* descriptorPool, Record&& record) { + nri::CommandAllocator* allocator = nullptr; + nri::CommandBuffer* commandBuffer = nullptr; + bool ok = core.CreateCommandAllocator(queue, allocator) == nri::Result::SUCCESS && allocator && core.CreateCommandBuffer(*allocator, commandBuffer) == nri::Result::SUCCESS && commandBuffer && + core.BeginCommandBuffer(*commandBuffer, descriptorPool) == nri::Result::SUCCESS; + if (ok) { + std::forward(record)(*commandBuffer); + ok = core.EndCommandBuffer(*commandBuffer) == nri::Result::SUCCESS; + } + if (ok) { + const nri::CommandBuffer* commandBuffers[] = {commandBuffer}; + nri::QueueSubmitDesc submit = {}; + submit.commandBuffers = commandBuffers; + submit.commandBufferNum = 1; + ok = core.QueueSubmit(queue, submit) == nri::Result::SUCCESS && core.QueueWaitIdle(&queue) == nri::Result::SUCCESS; + } + if (commandBuffer) + core.DestroyCommandBuffer(commandBuffer); + if (allocator) + core.DestroyCommandAllocator(allocator); + return ok; +} + +static bool CopyNv12BufferToTexture(nri::CoreInterface& core, nri::Queue& queue, const Nv12BufferLayout& layout, nri::Buffer& src, nri::Texture& dst) { + return SubmitOneTime(core, queue, [&](nri::CommandBuffer& commandBuffer) { + nri::BufferBarrierDesc bufferBarrier = {}; + bufferBarrier.buffer = &src; + bufferBarrier.before = {nri::AccessBits::SHADER_RESOURCE_STORAGE, nri::StageBits::COMPUTE_SHADER}; + bufferBarrier.after = {nri::AccessBits::COPY_SOURCE, nri::StageBits::COPY}; + + nri::TextureBarrierDesc textureBarrier = {}; + textureBarrier.texture = &dst; + textureBarrier.before = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; + textureBarrier.after = {nri::AccessBits::COPY_DESTINATION, nri::Layout::COPY_DESTINATION, nri::StageBits::COPY}; + textureBarrier.mipNum = nri::REMAINING; + textureBarrier.layerNum = nri::REMAINING; + textureBarrier.planes = nri::PlaneBits::ALL; + + nri::BarrierDesc barrierDesc = {}; + barrierDesc.buffers = &bufferBarrier; + barrierDesc.bufferNum = 1; + barrierDesc.textures = &textureBarrier; + barrierDesc.textureNum = 1; + core.CmdBarrier(commandBuffer, barrierDesc); + + nri::TextureRegionDesc lumaRegion = {}; + lumaRegion.width = VIDEO_WIDTH; + lumaRegion.height = VIDEO_HEIGHT; + lumaRegion.depth = 1; + lumaRegion.planes = nri::PlaneBits::PLANE_0; + + nri::TextureDataLayoutDesc lumaLayout = {}; + lumaLayout.rowPitch = layout.yRowPitchBytes; + lumaLayout.slicePitch = layout.ySlicePitchBytes; + core.CmdUploadBufferToTexture(commandBuffer, dst, lumaRegion, src, lumaLayout); + + nri::TextureRegionDesc chromaRegion = {}; + chromaRegion.width = VIDEO_WIDTH; + chromaRegion.height = VIDEO_HEIGHT; + chromaRegion.depth = 1; + chromaRegion.planes = nri::PlaneBits::PLANE_1; + + nri::TextureDataLayoutDesc chromaLayout = {}; + chromaLayout.offset = layout.uvOffsetBytes; + chromaLayout.rowPitch = layout.uvRowPitchBytes; + chromaLayout.slicePitch = layout.uvSlicePitchBytes; + core.CmdUploadBufferToTexture(commandBuffer, dst, chromaRegion, src, chromaLayout); + + textureBarrier.before = textureBarrier.after; + textureBarrier.after = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; + bufferBarrier.before = bufferBarrier.after; + bufferBarrier.after = {nri::AccessBits::SHADER_RESOURCE_STORAGE, nri::StageBits::COMPUTE_SHADER}; + core.CmdBarrier(commandBuffer, barrierDesc); + }); +} + +} // namespace + +class Sample : public SampleBase { +public: + Sample() = default; + ~Sample(); + + bool Initialize(nri::GraphicsAPI graphicsAPI, bool) override; + void InitCmdLine(cmdline::parser& cmdLine) override; + void ReadCmdLine(cmdline::parser& cmdLine) override; + void LatencySleep(uint32_t frameIndex) override; + void PrepareFrame(uint32_t frameIndex) override; + void RenderFrame(uint32_t frameIndex) override; + +private: + bool InitializeGraphics(nri::GraphicsAPI graphicsAPI); + bool TryInitializePreviewTextures(nri::GraphicsAPI graphicsAPI); + void InitializeGeneratedFrames(float timeSec); + bool CanRunRoundTrip() const; + void TryInitializeVideo(nri::GraphicsAPI graphicsAPI); + PatternConstants MakePatternConstants(PatternOperation operation, float timeSec) const; + bool GeneratePatternWithCompute(const PatternConstants& constants, nri::Descriptor* previewTexture, bool returnSourceBufferToShaderStorage = false); + bool WriteAnnexBHeadersToUploadBuffer(std::vector& annexBHeaders); + bool TrySubmitEncodeAndMetadataReadback(float timeSec); + bool TryDecodePendingMetadata(float timeSec); + bool DecodeEncodedBitstream(const nri::VideoEncodeFeedback& feedback, const nri::VideoAV1EncodeDecodeInfo* av1DecodeInfo, float timeSec); + bool TryRunRoundTrip(float timeSec); + void DrawTexturePanel(const char* label, nri::Descriptor* texture, const ImVec2& size); + +private: + NRIInterface NRI = {}; + nri::VideoInterface Video = {}; + nri::GraphicsAPI m_GraphicsAPI = nri::GraphicsAPI::NONE; + + nri::Device* m_Device = nullptr; + nri::Streamer* m_Streamer = nullptr; + nri::SwapChain* m_SwapChain = nullptr; + nri::Queue* m_GraphicsQueue = nullptr; + nri::Queue* m_VideoEncodeQueue = nullptr; + nri::Queue* m_VideoDecodeQueue = nullptr; + nri::Fence* m_FrameFence = nullptr; + + nri::VideoSession* m_EncodeSession = nullptr; + nri::VideoSession* m_DecodeSession = nullptr; + nri::VideoSessionParameters* m_EncodeParameters = nullptr; + nri::VideoSessionParameters* m_DecodeParameters = nullptr; + nri::Texture* m_EncodeTexture = nullptr; + nri::Texture* m_ReconstructedTexture = nullptr; + nri::Texture* m_DecodeTexture = nullptr; + nri::Texture* m_SourcePreviewTexture = nullptr; + nri::Texture* m_DecodePreviewTexture = nullptr; + nri::Buffer* m_UploadBuffer = nullptr; + nri::Descriptor* m_UploadBufferView = nullptr; + nri::Descriptor* m_SourcePreviewStorage = nullptr; + nri::Descriptor* m_DecodePreviewStorage = nullptr; + nri::Descriptor* m_SourcePreviewTextureView = nullptr; + nri::Descriptor* m_DecodePreviewTextureView = nullptr; + nri::PipelineLayout* m_GeneratePipelineLayout = nullptr; + nri::Pipeline* m_GenerateComputePipeline = nullptr; + nri::DescriptorPool* m_GenerateDescriptorPool = nullptr; + nri::DescriptorSet* m_GenerateDescriptorSet = nullptr; + nri::Buffer* m_BitstreamHeaderUploadBuffer = nullptr; + nri::Buffer* m_BitstreamHeaderReadbackBuffer = nullptr; + nri::Buffer* m_BitstreamBuffer = nullptr; + nri::Buffer* m_DecodeBitstreamBuffer = nullptr; + nri::Buffer* m_MetadataBuffer = nullptr; + nri::Buffer* m_ResolvedMetadataBuffer = nullptr; + nri::Buffer* m_ResolvedMetadataReadbackBuffer = nullptr; + nri::VideoPicture* m_EncodePicture = nullptr; + nri::VideoPicture* m_ReconstructedPicture = nullptr; + nri::VideoPicture* m_DecodePicture = nullptr; + nri::CommandAllocator* m_MetadataReadbackCommandAllocator = nullptr; + nri::CommandBuffer* m_MetadataReadbackCommandBuffer = nullptr; + nri::Fence* m_MetadataReadbackFence = nullptr; + + std::vector m_QueuedFrames; + std::vector m_SwapChainTextures; + + Nv12BufferLayout m_Nv12Layout = {}; + nri::Format m_SwapChainFormat = nri::Format::UNKNOWN; + std::string m_VideoStatus = "Initializing video"; + std::string m_PreviewStatus = "Initializing preview"; + std::string m_CodecArg = "H264"; + SampleCodec m_Codec = SampleCodec::H264; + nri::VideoH264SequenceParameterSetDesc m_H264Sps = {}; + nri::VideoH264PictureParameterSetDesc m_H264Pps = {}; + nri::VideoH265VideoParameterSetDesc m_H265Vps = {}; + nri::VideoH265SequenceParameterSetDesc m_H265Sps = {}; + nri::VideoH265PictureParameterSetDesc m_H265Pps = {}; + nri::VideoAV1SequenceDesc m_AV1Sequence = {}; + double m_StartTimeSec = 0.0; + double m_LastRoundTripTimeSec = -1.0; + bool m_VideoReady = false; + bool m_DecodePreviewReady = false; + bool m_PreviewTexturesShaderReadable = false; + bool m_MetadataReadbackPending = false; + uint64_t m_MetadataReadbackFenceValue = 0; +}; + +Sample::~Sample() { + if (NRI.HasCore()) { + NRI.DeviceWaitIdle(m_Device); + + if (Video.DestroyVideoPicture) { + if (m_DecodePicture) + Video.DestroyVideoPicture(*m_DecodePicture); + if (m_ReconstructedPicture) + Video.DestroyVideoPicture(*m_ReconstructedPicture); + if (m_EncodePicture) + Video.DestroyVideoPicture(*m_EncodePicture); + if (m_DecodeParameters) + Video.DestroyVideoSessionParameters(*m_DecodeParameters); + if (m_EncodeParameters) + Video.DestroyVideoSessionParameters(*m_EncodeParameters); + if (m_DecodeSession) + Video.DestroyVideoSession(*m_DecodeSession); + if (m_EncodeSession) + Video.DestroyVideoSession(*m_EncodeSession); + } + + if (m_MetadataReadbackCommandBuffer) + NRI.DestroyCommandBuffer(m_MetadataReadbackCommandBuffer); + if (m_MetadataReadbackCommandAllocator) + NRI.DestroyCommandAllocator(m_MetadataReadbackCommandAllocator); + if (m_MetadataReadbackFence) + NRI.DestroyFence(m_MetadataReadbackFence); + if (m_ResolvedMetadataReadbackBuffer) + NRI.DestroyBuffer(m_ResolvedMetadataReadbackBuffer); + if (m_ResolvedMetadataBuffer) + NRI.DestroyBuffer(m_ResolvedMetadataBuffer); + if (m_GenerateDescriptorPool) + NRI.DestroyDescriptorPool(m_GenerateDescriptorPool); + if (m_GenerateComputePipeline) + NRI.DestroyPipeline(m_GenerateComputePipeline); + if (m_GeneratePipelineLayout) + NRI.DestroyPipelineLayout(m_GeneratePipelineLayout); + if (m_UploadBufferView) + NRI.DestroyDescriptor(m_UploadBufferView); + if (m_MetadataBuffer) + NRI.DestroyBuffer(m_MetadataBuffer); + if (m_DecodeBitstreamBuffer) + NRI.DestroyBuffer(m_DecodeBitstreamBuffer); + if (m_BitstreamBuffer) + NRI.DestroyBuffer(m_BitstreamBuffer); + if (m_BitstreamHeaderReadbackBuffer) + NRI.DestroyBuffer(m_BitstreamHeaderReadbackBuffer); + if (m_BitstreamHeaderUploadBuffer) + NRI.DestroyBuffer(m_BitstreamHeaderUploadBuffer); + if (m_UploadBuffer) + NRI.DestroyBuffer(m_UploadBuffer); + if (m_SourcePreviewStorage) + NRI.DestroyDescriptor(m_SourcePreviewStorage); + if (m_DecodePreviewStorage) + NRI.DestroyDescriptor(m_DecodePreviewStorage); + if (m_SourcePreviewTextureView) + NRI.DestroyDescriptor(m_SourcePreviewTextureView); + if (m_DecodePreviewTextureView) + NRI.DestroyDescriptor(m_DecodePreviewTextureView); + if (m_SourcePreviewTexture) + NRI.DestroyTexture(m_SourcePreviewTexture); + if (m_DecodePreviewTexture) + NRI.DestroyTexture(m_DecodePreviewTexture); + if (m_DecodeTexture) + NRI.DestroyTexture(m_DecodeTexture); + if (m_ReconstructedTexture) + NRI.DestroyTexture(m_ReconstructedTexture); + if (m_EncodeTexture) + NRI.DestroyTexture(m_EncodeTexture); + + for (QueuedFrame& queuedFrame : m_QueuedFrames) { + NRI.DestroyCommandBuffer(queuedFrame.commandBuffer); + NRI.DestroyCommandAllocator(queuedFrame.commandAllocator); + } + + for (SwapChainTexture& swapChainTexture : m_SwapChainTextures) { + NRI.DestroyFence(swapChainTexture.acquireSemaphore); + NRI.DestroyFence(swapChainTexture.releaseSemaphore); + NRI.DestroyDescriptor(swapChainTexture.colorAttachment); + } + + NRI.DestroyFence(m_FrameFence); + } + + if (NRI.HasSwapChain()) + NRI.DestroySwapChain(m_SwapChain); + + if (NRI.HasStreamer()) + NRI.DestroyStreamer(m_Streamer); + + DestroyImgui(); + + nri::nriDestroyDevice(m_Device); +} + +void Sample::InitCmdLine(cmdline::parser& cmdLine) { + cmdLine.add("codec", 0, "video codec: H264, H265, or AV1", false, m_CodecArg, cmdline::oneof("H264", "H265", "AV1")); +} + +void Sample::ReadCmdLine(cmdline::parser& cmdLine) { + m_CodecArg = cmdLine.get("codec"); + m_Codec = m_CodecArg == "H265" ? SampleCodec::H265 : (m_CodecArg == "AV1" ? SampleCodec::AV1 : SampleCodec::H264); +} + +bool Sample::Initialize(nri::GraphicsAPI graphicsAPI, bool) { + m_GraphicsAPI = graphicsAPI; + if (!InitializeGraphics(graphicsAPI)) + return false; + + m_StartTimeSec = m_Timer.GetTimeStamp() * 0.001; + if (!TryInitializePreviewTextures(graphicsAPI)) + return false; + TryInitializeVideo(graphicsAPI); + InitializeGeneratedFrames(0.0f); + + return InitImgui(*m_Device); +} + +bool Sample::InitializeGraphics(nri::GraphicsAPI graphicsAPI) { + nri::AdapterDesc adapterDesc[2] = {}; + uint32_t adapterDescsNum = helper::GetCountOf(adapterDesc); + NRI_ABORT_ON_FAILURE(nri::nriEnumerateAdapters(adapterDesc, adapterDescsNum)); + + nri::DeviceCreationDesc deviceCreationDesc = {}; + nri::QueueFamilyDesc queueFamilies[] = { + {nullptr, 1, nri::QueueType::GRAPHICS}, + {nullptr, 1, nri::QueueType::VIDEO_ENCODE}, + {nullptr, 1, nri::QueueType::VIDEO_DECODE}, + }; + + deviceCreationDesc.graphicsAPI = graphicsAPI; + deviceCreationDesc.enableGraphicsAPIValidation = m_DebugAPI; + deviceCreationDesc.enableNRIValidation = m_DebugNRI; + deviceCreationDesc.enableD3D11CommandBufferEmulation = D3D11_ENABLE_COMMAND_BUFFER_EMULATION; + deviceCreationDesc.disableD3D12EnhancedBarriers = D3D12_DISABLE_ENHANCED_BARRIERS; + deviceCreationDesc.vkBindingOffsets = VK_BINDING_OFFSETS; + deviceCreationDesc.adapterDesc = &adapterDesc[std::min(m_AdapterIndex, adapterDescsNum - 1)]; + deviceCreationDesc.allocationCallbacks = m_AllocationCallbacks; + deviceCreationDesc.queueFamilies = queueFamilies; + deviceCreationDesc.queueFamilyNum = helper::GetCountOf(queueFamilies); + NRI_ABORT_ON_FAILURE(nri::nriCreateDevice(deviceCreationDesc, m_Device)); + + NRI_ABORT_ON_FAILURE(nri::nriGetInterface(*m_Device, NRI_INTERFACE(nri::CoreInterface), (nri::CoreInterface*)&NRI)); + NRI_ABORT_ON_FAILURE(nri::nriGetInterface(*m_Device, NRI_INTERFACE(nri::HelperInterface), (nri::HelperInterface*)&NRI)); + NRI_ABORT_ON_FAILURE(nri::nriGetInterface(*m_Device, NRI_INTERFACE(nri::StreamerInterface), (nri::StreamerInterface*)&NRI)); + NRI_ABORT_ON_FAILURE(nri::nriGetInterface(*m_Device, NRI_INTERFACE(nri::SwapChainInterface), (nri::SwapChainInterface*)&NRI)); + + m_Nv12Layout = MakeNv12BufferLayout(NRI.GetDeviceDesc(*m_Device)); + + nri::StreamerDesc streamerDesc = {}; + streamerDesc.dynamicBufferMemoryLocation = nri::MemoryLocation::HOST_UPLOAD; + streamerDesc.dynamicBufferDesc = {0, 0, nri::BufferUsageBits::VERTEX_BUFFER | nri::BufferUsageBits::INDEX_BUFFER}; + streamerDesc.constantBufferMemoryLocation = nri::MemoryLocation::HOST_UPLOAD; + streamerDesc.queuedFrameNum = GetQueuedFrameNum(); + NRI_ABORT_ON_FAILURE(NRI.CreateStreamer(*m_Device, streamerDesc, m_Streamer)); + + NRI_ABORT_ON_FAILURE(NRI.GetQueue(*m_Device, nri::QueueType::GRAPHICS, 0, m_GraphicsQueue)); + NRI_ABORT_ON_FAILURE(NRI.CreateFence(*m_Device, 0, m_FrameFence)); + + nri::SwapChainDesc swapChainDesc = {}; + swapChainDesc.window = GetWindow(); + swapChainDesc.queue = m_GraphicsQueue; + swapChainDesc.format = nri::SwapChainFormat::BT709_G22_8BIT; + swapChainDesc.flags = (m_Vsync ? nri::SwapChainBits::VSYNC : nri::SwapChainBits::NONE) | nri::SwapChainBits::ALLOW_TEARING; + swapChainDesc.width = (uint16_t)GetOutputResolution().x; + swapChainDesc.height = (uint16_t)GetOutputResolution().y; + swapChainDesc.textureNum = GetOptimalSwapChainTextureNum(); + swapChainDesc.queuedFrameNum = GetQueuedFrameNum(); + NRI_ABORT_ON_FAILURE(NRI.CreateSwapChain(*m_Device, swapChainDesc, m_SwapChain)); + + uint32_t swapChainTextureNum; + nri::Texture* const* swapChainTextures = NRI.GetSwapChainTextures(*m_SwapChain, swapChainTextureNum); + m_SwapChainFormat = NRI.GetTextureDesc(*swapChainTextures[0]).format; + + for (uint32_t i = 0; i < swapChainTextureNum; i++) { + nri::TextureViewDesc textureViewDesc = {swapChainTextures[i], nri::TextureView::COLOR_ATTACHMENT, m_SwapChainFormat}; + + nri::Descriptor* colorAttachment = nullptr; + NRI_ABORT_ON_FAILURE(NRI.CreateTextureView(textureViewDesc, colorAttachment)); + + nri::Fence* acquireSemaphore = nullptr; + NRI_ABORT_ON_FAILURE(NRI.CreateFence(*m_Device, nri::SWAPCHAIN_SEMAPHORE, acquireSemaphore)); + + nri::Fence* releaseSemaphore = nullptr; + NRI_ABORT_ON_FAILURE(NRI.CreateFence(*m_Device, nri::SWAPCHAIN_SEMAPHORE, releaseSemaphore)); + + SwapChainTexture& swapChainTexture = m_SwapChainTextures.emplace_back(); + swapChainTexture = {}; + swapChainTexture.acquireSemaphore = acquireSemaphore; + swapChainTexture.releaseSemaphore = releaseSemaphore; + swapChainTexture.texture = swapChainTextures[i]; + swapChainTexture.colorAttachment = colorAttachment; + swapChainTexture.attachmentFormat = m_SwapChainFormat; + } + + m_QueuedFrames.resize(GetQueuedFrameNum()); + for (QueuedFrame& queuedFrame : m_QueuedFrames) { + NRI_ABORT_ON_FAILURE(NRI.CreateCommandAllocator(*m_GraphicsQueue, queuedFrame.commandAllocator)); + NRI_ABORT_ON_FAILURE(NRI.CreateCommandBuffer(*queuedFrame.commandAllocator, queuedFrame.commandBuffer)); + } + + return true; +} + +PatternConstants Sample::MakePatternConstants(PatternOperation operation, float timeSec) const { + PatternConstants patternConstants = {}; + patternConstants.width = VIDEO_WIDTH; + patternConstants.height = VIDEO_HEIGHT; + patternConstants.yRowPitchBytes = m_Nv12Layout.yRowPitchBytes; + patternConstants.uvRowPitchBytes = m_Nv12Layout.uvRowPitchBytes; + patternConstants.uvOffsetBytes = (uint32_t)m_Nv12Layout.uvOffsetBytes; + patternConstants.operation = operation; + patternConstants.time = timeSec; + return patternConstants; +} + +void Sample::InitializeGeneratedFrames(float timeSec) { + if (!m_SourcePreviewTexture || !m_SourcePreviewStorage || !m_UploadBuffer || !m_UploadBufferView) + return; + + PatternConstants patternConstants = MakePatternConstants(OP_GENERATE_PATTERN, timeSec); + + if (m_EncodeTexture) { + if (!GeneratePatternWithCompute(patternConstants, m_SourcePreviewStorage, true)) { + m_PreviewStatus = "Failed to generate source pattern via compute"; + return; + } + + if (!CopyNv12BufferToTexture(NRI, *m_GraphicsQueue, m_Nv12Layout, *m_UploadBuffer, *m_EncodeTexture)) { + m_PreviewStatus = "Failed to upload NV12 source to video texture"; + return; + } + m_PreviewStatus = "Source preview is generated by compute"; + return; + } + + if (!GeneratePatternWithCompute(patternConstants, m_SourcePreviewStorage, true)) { + m_PreviewStatus = "Failed to generate source pattern via compute"; + return; + } + + m_PreviewStatus = "Source preview is generated by compute"; +} + +bool Sample::GeneratePatternWithCompute(const PatternConstants& constants, nri::Descriptor* previewTexture, bool returnSourceBufferToShaderStorage) { + if (!m_GeneratePipelineLayout || !m_GenerateComputePipeline || !m_GenerateDescriptorSet || !m_UploadBufferView || !previewTexture) + return false; + if ((constants.width % 4) != 0 || (constants.height % 2) != 0) + return false; + + const uint32_t dispatchX = (constants.width + 3) / 4; + const uint32_t dispatchY = constants.height; + + const bool previewTexturesShaderReadable = m_PreviewTexturesShaderReadable; + const bool submitted = SubmitOneTime( + NRI, + *m_GraphicsQueue, + m_GenerateDescriptorPool, + [this, &constants, dispatchX, dispatchY, previewTexturesShaderReadable, returnSourceBufferToShaderStorage](nri::CommandBuffer& commandBuffer) { + nri::SetDescriptorSetDesc descriptorSet = {0, m_GenerateDescriptorSet}; + + nri::BufferBarrierDesc bufferBarrier = {}; + bufferBarrier.buffer = m_UploadBuffer; + bufferBarrier.before = {nri::AccessBits::NONE, nri::StageBits::NONE}; + bufferBarrier.after = {nri::AccessBits::SHADER_RESOURCE_STORAGE, nri::StageBits::COMPUTE_SHADER}; + + nri::TextureBarrierDesc textureBarriers[2] = {}; + textureBarriers[0].texture = m_SourcePreviewTexture; + textureBarriers[1].texture = m_DecodePreviewTexture; + for (nri::TextureBarrierDesc& textureBarrier : textureBarriers) { + if (previewTexturesShaderReadable) + textureBarrier.before = {nri::AccessBits::SHADER_RESOURCE, nri::Layout::SHADER_RESOURCE, nri::StageBits::ALL}; + else + textureBarrier.before = {nri::AccessBits::NONE, nri::Layout::UNDEFINED, nri::StageBits::ALL}; + textureBarrier.after = {nri::AccessBits::SHADER_RESOURCE_STORAGE, nri::Layout::SHADER_RESOURCE_STORAGE, nri::StageBits::COMPUTE_SHADER}; + textureBarrier.mipNum = nri::REMAINING; + textureBarrier.layerNum = nri::REMAINING; + textureBarrier.planes = nri::PlaneBits::ALL; + } + + nri::BarrierDesc barrierDesc = {}; + barrierDesc.buffers = &bufferBarrier; + barrierDesc.bufferNum = 1; + barrierDesc.textures = textureBarriers; + barrierDesc.textureNum = helper::GetCountOf(textureBarriers); + + nri::SetRootConstantsDesc rootConstants = {0, &constants, sizeof(PatternConstants)}; + NRI.CmdBarrier(commandBuffer, barrierDesc); + NRI.CmdSetPipelineLayout(commandBuffer, nri::BindPoint::COMPUTE, *m_GeneratePipelineLayout); + NRI.CmdSetDescriptorSet(commandBuffer, descriptorSet); + NRI.CmdSetPipeline(commandBuffer, *m_GenerateComputePipeline); + NRI.CmdSetRootConstants(commandBuffer, rootConstants); + NRI.CmdDispatch(commandBuffer, {dispatchX, dispatchY, 1}); + + bufferBarrier.before = {nri::AccessBits::SHADER_RESOURCE_STORAGE, nri::StageBits::COMPUTE_SHADER}; + if (returnSourceBufferToShaderStorage) + bufferBarrier.after = {nri::AccessBits::SHADER_RESOURCE_STORAGE, nri::StageBits::COMPUTE_SHADER}; + else + bufferBarrier.after = {nri::AccessBits::COPY_SOURCE, nri::StageBits::COPY}; + for (nri::TextureBarrierDesc& textureBarrier : textureBarriers) { + textureBarrier.before = {nri::AccessBits::SHADER_RESOURCE_STORAGE, nri::Layout::SHADER_RESOURCE_STORAGE, nri::StageBits::COMPUTE_SHADER}; + textureBarrier.after = {nri::AccessBits::SHADER_RESOURCE, nri::Layout::SHADER_RESOURCE, nri::StageBits::ALL}; + } + NRI.CmdBarrier(commandBuffer, barrierDesc); + }); + + if (submitted) + m_PreviewTexturesShaderReadable = true; + + return submitted; +} + +bool Sample::TryInitializePreviewTextures(nri::GraphicsAPI) { + if (m_SourcePreviewTexture && m_DecodePreviewTexture && m_SourcePreviewTextureView && m_DecodePreviewTextureView) + return true; + + nri::TextureDesc previewTextureDesc = {}; + previewTextureDesc.type = nri::TextureType::TEXTURE_2D; + previewTextureDesc.format = nri::Format::RGBA8_UNORM; + previewTextureDesc.width = VIDEO_WIDTH; + previewTextureDesc.height = VIDEO_HEIGHT; + previewTextureDesc.mipNum = 1; + previewTextureDesc.layerNum = 1; + previewTextureDesc.usage = nri::TextureUsageBits::SHADER_RESOURCE | nri::TextureUsageBits::SHADER_RESOURCE_STORAGE; + + if (!m_SourcePreviewTexture) { + if (NRI.CreateCommittedTexture(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, previewTextureDesc, m_SourcePreviewTexture) != nri::Result::SUCCESS) { + m_PreviewStatus = "Failed to create source preview texture"; + return false; + } + NRI.SetDebugName(m_SourcePreviewTexture, "VideoSourcePreviewTexture"); + } + + if (!m_DecodePreviewTexture) { + if (NRI.CreateCommittedTexture(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, previewTextureDesc, m_DecodePreviewTexture) != nri::Result::SUCCESS) { + m_PreviewStatus = "Failed to create decode preview texture"; + return false; + } + NRI.SetDebugName(m_DecodePreviewTexture, "VideoDecodePreviewTexture"); + } + + if (!m_PreviewTexturesShaderReadable) { + const bool initialized = SubmitOneTime(NRI, *m_GraphicsQueue, [&](nri::CommandBuffer& commandBuffer) { + nri::TextureBarrierDesc textureBarriers[2] = {}; + textureBarriers[0].texture = m_SourcePreviewTexture; + textureBarriers[1].texture = m_DecodePreviewTexture; + + for (nri::TextureBarrierDesc& textureBarrier : textureBarriers) { + textureBarrier.before = {nri::AccessBits::NONE, nri::Layout::UNDEFINED, nri::StageBits::ALL}; + textureBarrier.after = {nri::AccessBits::SHADER_RESOURCE, nri::Layout::SHADER_RESOURCE, nri::StageBits::ALL}; + textureBarrier.mipNum = nri::REMAINING; + textureBarrier.layerNum = nri::REMAINING; + textureBarrier.planes = nri::PlaneBits::ALL; + } + + nri::BarrierDesc barrierDesc = {}; + barrierDesc.textures = textureBarriers; + barrierDesc.textureNum = helper::GetCountOf(textureBarriers); + NRI.CmdBarrier(commandBuffer, barrierDesc); + }); + + if (!initialized) { + m_PreviewStatus = "Failed to initialize preview texture layouts"; + return false; + } + + m_PreviewTexturesShaderReadable = true; + } + + nri::TextureViewDesc sourceTextureViewDesc = {m_SourcePreviewTexture, nri::TextureView::TEXTURE, previewTextureDesc.format}; + nri::TextureViewDesc decodeTextureViewDesc = {m_DecodePreviewTexture, nri::TextureView::TEXTURE, previewTextureDesc.format}; + + if (!m_SourcePreviewTextureView) { + if (NRI.CreateTextureView(sourceTextureViewDesc, m_SourcePreviewTextureView) != nri::Result::SUCCESS) { + m_PreviewStatus = "Failed to create source preview ImGui texture view"; + return false; + } + } + + if (!m_DecodePreviewTextureView) { + if (NRI.CreateTextureView(decodeTextureViewDesc, m_DecodePreviewTextureView) != nri::Result::SUCCESS) { + m_PreviewStatus = "Failed to create decode preview ImGui texture view"; + return false; + } + } + + nri::TextureViewDesc sourceStorageTextureViewDesc = {m_SourcePreviewTexture, nri::TextureView::STORAGE_TEXTURE, nri::Format::RGBA8_UNORM}; + nri::TextureViewDesc decodeStorageTextureViewDesc = {m_DecodePreviewTexture, nri::TextureView::STORAGE_TEXTURE, nri::Format::RGBA8_UNORM}; + + if (!m_SourcePreviewStorage) { + if (NRI.CreateTextureView(sourceStorageTextureViewDesc, m_SourcePreviewStorage) != nri::Result::SUCCESS) { + m_PreviewStatus = "Failed to create source preview storage texture view"; + return false; + } + } + + if (!m_DecodePreviewStorage) { + if (NRI.CreateTextureView(decodeStorageTextureViewDesc, m_DecodePreviewStorage) != nri::Result::SUCCESS) { + m_PreviewStatus = "Failed to create decode preview storage texture view"; + return false; + } + } + + if (!m_UploadBuffer) { + nri::BufferDesc uploadBufferDesc = {}; + uploadBufferDesc.size = m_Nv12Layout.totalSizeBytes; + uploadBufferDesc.usage = nri::BufferUsageBits::SHADER_RESOURCE_STORAGE; + + if (NRI.CreateCommittedBuffer(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, uploadBufferDesc, m_UploadBuffer) != nri::Result::SUCCESS) { + m_PreviewStatus = "Failed to create compute NV12 buffer"; + return false; + } + } + + if (!m_UploadBufferView) { + nri::BufferViewDesc uploadBufferViewDesc = {}; + uploadBufferViewDesc.buffer = m_UploadBuffer; + uploadBufferViewDesc.format = nri::Format::R32_UINT; + uploadBufferViewDesc.type = nri::BufferView::STORAGE_BUFFER; + uploadBufferViewDesc.size = NRI.GetBufferDesc(*m_UploadBuffer).size; + + if (NRI.CreateBufferView(uploadBufferViewDesc, m_UploadBufferView) != nri::Result::SUCCESS) { + m_PreviewStatus = "Failed to create compute NV12 buffer view"; + return false; + } + } + + if (!m_GeneratePipelineLayout) { + nri::DescriptorRangeDesc descriptorRanges[] = { + {0, 1, nri::DescriptorType::STORAGE_BUFFER, nri::StageBits::COMPUTE_SHADER}, + {1, 1, nri::DescriptorType::STORAGE_TEXTURE, nri::StageBits::COMPUTE_SHADER}, + {2, 1, nri::DescriptorType::STORAGE_TEXTURE, nri::StageBits::COMPUTE_SHADER}, + }; + nri::DescriptorSetDesc descriptorSetDescs[] = {{0, descriptorRanges, helper::GetCountOf(descriptorRanges)}}; + + nri::RootConstantDesc rootConstantDesc = {}; + rootConstantDesc.registerIndex = 0; + rootConstantDesc.size = sizeof(PatternConstants); + rootConstantDesc.shaderStages = nri::StageBits::COMPUTE_SHADER; + + nri::PipelineLayoutDesc pipelineLayoutDesc = {}; + pipelineLayoutDesc.rootConstantNum = 1; + pipelineLayoutDesc.rootConstants = &rootConstantDesc; + pipelineLayoutDesc.descriptorSetNum = helper::GetCountOf(descriptorSetDescs); + pipelineLayoutDesc.descriptorSets = descriptorSetDescs; + pipelineLayoutDesc.shaderStages = nri::StageBits::COMPUTE_SHADER; + if (NRI.CreatePipelineLayout(*m_Device, pipelineLayoutDesc, m_GeneratePipelineLayout) != nri::Result::SUCCESS) { + m_PreviewStatus = "Failed to create compute pipeline layout for pattern generation"; + return false; + } + } + + if (!m_GenerateComputePipeline) { + const nri::DeviceDesc& deviceDesc = NRI.GetDeviceDesc(*m_Device); + utils::ShaderCodeStorage shaderCodeStorage; + nri::ComputePipelineDesc computePipelineDesc = {}; + computePipelineDesc.pipelineLayout = m_GeneratePipelineLayout; + computePipelineDesc.shader = utils::LoadShader(deviceDesc.graphicsAPI, "VideoEncodePattern.cs", shaderCodeStorage); + if (NRI.CreateComputePipeline(*m_Device, computePipelineDesc, m_GenerateComputePipeline) != nri::Result::SUCCESS) { + m_PreviewStatus = "Failed to create pattern generation compute pipeline"; + return false; + } + } + + if (!m_GenerateDescriptorPool) { + nri::DescriptorPoolDesc descriptorPoolDesc = {}; + descriptorPoolDesc.descriptorSetMaxNum = 1; + descriptorPoolDesc.storageBufferMaxNum = 1; + descriptorPoolDesc.storageTextureMaxNum = 2; + if (NRI.CreateDescriptorPool(*m_Device, descriptorPoolDesc, m_GenerateDescriptorPool) != nri::Result::SUCCESS) { + m_PreviewStatus = "Failed to create compute descriptor pool for pattern generation"; + return false; + } + } + + if (!m_GenerateDescriptorSet) { + if (NRI.AllocateDescriptorSets(*m_GenerateDescriptorPool, *m_GeneratePipelineLayout, 0, &m_GenerateDescriptorSet, 1, 0) != nri::Result::SUCCESS) { + m_PreviewStatus = "Failed to allocate compute descriptor set for pattern generation"; + return false; + } + + nri::UpdateDescriptorRangeDesc updateDescriptorRangeDescs[] = { + {m_GenerateDescriptorSet, 0, 0, &m_UploadBufferView, 1}, + {m_GenerateDescriptorSet, 1, 0, &m_SourcePreviewStorage, 1}, + {m_GenerateDescriptorSet, 2, 0, &m_DecodePreviewStorage, 1}, + }; + NRI.UpdateDescriptorRanges(updateDescriptorRangeDescs, helper::GetCountOf(updateDescriptorRangeDescs)); + } + + return true; +} + +void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { + const nri::DeviceDesc& deviceDesc = NRI.GetDeviceDesc(*m_Device); + + if (graphicsAPI == nri::GraphicsAPI::D3D11) { + m_VideoStatus = "D3D11 does not expose NRI video queues"; + return; + } + + if (!deviceDesc.adapterDesc.queueNum[(uint32_t)nri::QueueType::VIDEO_ENCODE] || !deviceDesc.adapterDesc.queueNum[(uint32_t)nri::QueueType::VIDEO_DECODE]) { + m_VideoStatus = "Adapter has no NRI video encode/decode queues"; + return; + } + + if (nri::nriGetInterface(*m_Device, NRI_INTERFACE(nri::VideoInterface), &Video) != nri::Result::SUCCESS || !Video.CreateVideoSession) { + m_VideoStatus = "NRI video interface is unavailable"; + return; + } + + nri::VideoSessionDesc encodeSessionDesc = {}; + encodeSessionDesc.usage = nri::VideoUsage::ENCODE; + encodeSessionDesc.codec = GetNriCodec(m_Codec); + encodeSessionDesc.format = nri::Format::NV12_UNORM; + encodeSessionDesc.width = VIDEO_WIDTH; + encodeSessionDesc.height = VIDEO_HEIGHT; + encodeSessionDesc.maxReferenceNum = 1; + + nri::VideoSessionDesc decodeSessionDesc = encodeSessionDesc; + decodeSessionDesc.usage = nri::VideoUsage::DECODE; + decodeSessionDesc.maxReferenceNum = 16; + + if (Video.GetVideoQueue(*m_Device, encodeSessionDesc, m_VideoEncodeQueue) != nri::Result::SUCCESS || Video.GetVideoQueue(*m_Device, decodeSessionDesc, m_VideoDecodeQueue) != nri::Result::SUCCESS) { + m_VideoStatus = std::string("Failed to get ") + GetCodecName(m_Codec) + "-capable video queues"; + return; + } + + if (Video.CreateVideoSession(*m_Device, encodeSessionDesc, m_EncodeSession) != nri::Result::SUCCESS) { + m_VideoStatus = std::string("Failed to create ") + GetCodecName(m_Codec) + " encode session"; + return; + } + + if (Video.CreateVideoSession(*m_Device, decodeSessionDesc, m_DecodeSession) != nri::Result::SUCCESS) { + m_VideoStatus = std::string("Failed to create ") + GetCodecName(m_Codec) + " decode session"; + return; + } + + nri::VideoH264SequenceParameterSetDesc sps = {}; + sps.flags = nri::VideoH264SequenceParameterSetBits::DIRECT_8X8_INFERENCE | nri::VideoH264SequenceParameterSetBits::FRAME_MBS_ONLY; + sps.profileIdc = 100; + sps.levelIdc = 42; + sps.chromaFormatIdc = 1; + sps.sequenceParameterSetId = 0; + sps.log2MaxFrameNumMinus4 = 0; + sps.pictureOrderCountType = 0; + sps.log2MaxPictureOrderCountLsbMinus4 = 0; + sps.referenceFrameNum = 1; + sps.pictureWidthInMbsMinus1 = VIDEO_WIDTH / 16 - 1; + sps.pictureHeightInMapUnitsMinus1 = VIDEO_HEIGHT / 16 - 1; + + nri::VideoH264PictureParameterSetDesc pps = {}; + pps.flags = nri::VideoH264PictureParameterSetBits::DEBLOCKING_FILTER_CONTROL_PRESENT; + pps.sequenceParameterSetId = 0; + pps.pictureParameterSetId = 0; + pps.refIndexL0DefaultActiveMinus1 = 0; + pps.refIndexL1DefaultActiveMinus1 = 0; + m_H264Sps = sps; + m_H264Pps = pps; + + nri::VideoH264SessionParametersDesc h264Parameters = {}; + h264Parameters.sequenceParameterSets = &sps; + h264Parameters.sequenceParameterSetNum = 1; + h264Parameters.pictureParameterSets = &pps; + h264Parameters.pictureParameterSetNum = 1; + h264Parameters.maxSequenceParameterSetNum = 1; + h264Parameters.maxPictureParameterSetNum = 1; + + nri::VideoH265VideoParameterSetDesc vps = {}; + vps.flags = nri::VideoH265VideoParameterSetBits::TEMPORAL_ID_NESTING; + vps.videoParameterSetId = 0; + vps.maxSubLayersMinus1 = 0; + vps.profileTierLevel.flags = nri::VideoH265ProfileTierLevelBits::FRAME_ONLY_CONSTRAINT; + vps.profileTierLevel.generalProfileIdc = 1; + vps.profileTierLevel.generalLevelIdc = 90; + vps.decPicBufMgr.maxDecPicBufferingMinus1[0] = 2; + vps.decPicBufMgr.maxNumReorderPics[0] = 1; + + nri::VideoH265SequenceParameterSetDesc h265Sps = {}; + h265Sps.flags = nri::VideoH265SequenceParameterSetBits::TEMPORAL_ID_NESTING | nri::VideoH265SequenceParameterSetBits::AMP_ENABLED | + nri::VideoH265SequenceParameterSetBits::SAMPLE_ADAPTIVE_OFFSET_ENABLED; + h265Sps.videoParameterSetId = vps.videoParameterSetId; + h265Sps.maxSubLayersMinus1 = vps.maxSubLayersMinus1; + h265Sps.sequenceParameterSetId = 0; + h265Sps.chromaFormatIdc = 1; + h265Sps.pictureWidthInLumaSamples = VIDEO_WIDTH; + h265Sps.pictureHeightInLumaSamples = VIDEO_HEIGHT; + h265Sps.log2MaxPictureOrderCountLsbMinus4 = 3; + h265Sps.log2MinLumaCodingBlockSizeMinus3 = 0; + h265Sps.log2DiffMaxMinLumaCodingBlockSize = 2; + h265Sps.log2MinLumaTransformBlockSizeMinus2 = 0; + h265Sps.log2DiffMaxMinLumaTransformBlockSize = 3; + h265Sps.maxTransformHierarchyDepthInter = 3; + h265Sps.maxTransformHierarchyDepthIntra = 3; + h265Sps.profileTierLevel = vps.profileTierLevel; + h265Sps.decPicBufMgr = vps.decPicBufMgr; + + nri::VideoH265PictureParameterSetDesc h265Pps = {}; + h265Pps.flags = nri::VideoH265PictureParameterSetBits::CABAC_INIT_PRESENT | nri::VideoH265PictureParameterSetBits::TRANSFORM_SKIP_ENABLED | + nri::VideoH265PictureParameterSetBits::CU_QP_DELTA_ENABLED | nri::VideoH265PictureParameterSetBits::SLICE_CHROMA_QP_OFFSETS_PRESENT | + nri::VideoH265PictureParameterSetBits::DEBLOCKING_FILTER_CONTROL_PRESENT; + h265Pps.pictureParameterSetId = 0; + h265Pps.sequenceParameterSetId = h265Sps.sequenceParameterSetId; + h265Pps.videoParameterSetId = vps.videoParameterSetId; + m_H265Vps = vps; + m_H265Sps = h265Sps; + m_H265Pps = h265Pps; + + nri::VideoH265SessionParametersDesc h265Parameters = {}; + h265Parameters.videoParameterSets = &vps; + h265Parameters.videoParameterSetNum = 1; + h265Parameters.sequenceParameterSets = &h265Sps; + h265Parameters.sequenceParameterSetNum = 1; + h265Parameters.pictureParameterSets = &h265Pps; + h265Parameters.pictureParameterSetNum = 1; + h265Parameters.maxVideoParameterSetNum = 1; + h265Parameters.maxSequenceParameterSetNum = 1; + h265Parameters.maxPictureParameterSetNum = 1; + + m_AV1Sequence = MakeAV1SequenceDesc(); + nri::VideoAV1SessionParametersDesc av1Parameters = {}; + av1Parameters.sequence = m_AV1Sequence; + + nri::VideoSessionParametersDesc encodeParametersDesc = {}; + encodeParametersDesc.session = m_EncodeSession; + encodeParametersDesc.h264Parameters = m_Codec == SampleCodec::H264 ? &h264Parameters : nullptr; + encodeParametersDesc.h265Parameters = m_Codec == SampleCodec::H265 ? &h265Parameters : nullptr; + encodeParametersDesc.av1Parameters = m_Codec == SampleCodec::AV1 ? &av1Parameters : nullptr; + + nri::VideoSessionParametersDesc decodeParametersDesc = {}; + decodeParametersDesc.session = m_DecodeSession; + decodeParametersDesc.h264Parameters = m_Codec == SampleCodec::H264 ? &h264Parameters : nullptr; + decodeParametersDesc.h265Parameters = m_Codec == SampleCodec::H265 ? &h265Parameters : nullptr; + decodeParametersDesc.av1Parameters = m_Codec == SampleCodec::AV1 ? &av1Parameters : nullptr; + + if (Video.CreateVideoSessionParameters(*m_Device, encodeParametersDesc, m_EncodeParameters) != nri::Result::SUCCESS) { + m_VideoStatus = std::string("Failed to create ") + GetCodecName(m_Codec) + " encode parameters"; + return; + } + + if (Video.CreateVideoSessionParameters(*m_Device, decodeParametersDesc, m_DecodeParameters) != nri::Result::SUCCESS) { + m_VideoStatus = std::string("Failed to create ") + GetCodecName(m_Codec) + " decode parameters"; + return; + } + + nri::TextureDesc encodeTextureDesc = {}; + encodeTextureDesc.type = nri::TextureType::TEXTURE_2D; + encodeTextureDesc.usage = nri::TextureUsageBits::VIDEO_ENCODE; + encodeTextureDesc.format = nri::Format::NV12_UNORM; + encodeTextureDesc.width = VIDEO_WIDTH; + encodeTextureDesc.height = VIDEO_HEIGHT; + encodeTextureDesc.mipNum = 1; + encodeTextureDesc.layerNum = 1; + + nri::TextureDesc decodeTextureDesc = encodeTextureDesc; + decodeTextureDesc.usage = nri::TextureUsageBits::VIDEO_DECODE; + + nri::VideoTextureDesc encodeVideoTextureDesc = {}; + encodeVideoTextureDesc.textureDesc = encodeTextureDesc; + encodeVideoTextureDesc.codec = GetNriCodec(m_Codec); + + nri::VideoTextureDesc decodeVideoTextureDesc = {}; + decodeVideoTextureDesc.textureDesc = decodeTextureDesc; + decodeVideoTextureDesc.codec = GetNriCodec(m_Codec); + + if (Video.CreateCommittedVideoTexture(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, encodeVideoTextureDesc, m_EncodeTexture) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create NV12 encode texture"; + return; + } + NRI.SetDebugName(m_EncodeTexture, "VideoEncodeTexture"); + + if (Video.CreateCommittedVideoTexture(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, encodeVideoTextureDesc, m_ReconstructedTexture) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create NV12 reconstructed texture"; + return; + } + NRI.SetDebugName(m_ReconstructedTexture, "VideoReconstructedTexture"); + + if (Video.CreateCommittedVideoTexture(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, decodeVideoTextureDesc, m_DecodeTexture) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create NV12 decode texture"; + return; + } + NRI.SetDebugName(m_DecodeTexture, "VideoDecodeTexture"); + + if (!SubmitOneTime(NRI, *m_GraphicsQueue, [&](nri::CommandBuffer& commandBuffer) { + nri::TextureBarrierDesc textureBarriers[3] = {}; + textureBarriers[0].texture = m_EncodeTexture; + textureBarriers[1].texture = m_ReconstructedTexture; + textureBarriers[2].texture = m_DecodeTexture; + + for (nri::TextureBarrierDesc& textureBarrier : textureBarriers) { + textureBarrier.before = {nri::AccessBits::NONE, nri::Layout::UNDEFINED, nri::StageBits::ALL}; + textureBarrier.after = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; + textureBarrier.mipNum = nri::REMAINING; + textureBarrier.layerNum = nri::REMAINING; + textureBarrier.planes = nri::PlaneBits::ALL; + } + + nri::BarrierDesc barrierDesc = {}; + barrierDesc.textures = textureBarriers; + barrierDesc.textureNum = helper::GetCountOf(textureBarriers); + NRI.CmdBarrier(commandBuffer, barrierDesc); + })) { + m_VideoStatus = "Failed to initialize video texture layouts"; + return; + } + + if (!m_GenerateComputePipeline) { + nri::TextureViewDesc sourceStorageTextureViewDesc = {m_SourcePreviewTexture, nri::TextureView::STORAGE_TEXTURE, nri::Format::RGBA8_UNORM}; + nri::TextureViewDesc decodeStorageTextureViewDesc = {m_DecodePreviewTexture, nri::TextureView::STORAGE_TEXTURE, nri::Format::RGBA8_UNORM}; + + if (!m_SourcePreviewStorage) { + if (NRI.CreateTextureView(sourceStorageTextureViewDesc, m_SourcePreviewStorage) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create source preview storage texture view"; + return; + } + } + + if (!m_DecodePreviewStorage) { + if (NRI.CreateTextureView(decodeStorageTextureViewDesc, m_DecodePreviewStorage) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create decode preview storage texture view"; + return; + } + } + + nri::BufferDesc uploadBufferDesc = {}; + uploadBufferDesc.size = m_Nv12Layout.totalSizeBytes; + uploadBufferDesc.usage = nri::BufferUsageBits::SHADER_RESOURCE_STORAGE; + + if (NRI.CreateCommittedBuffer(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, uploadBufferDesc, m_UploadBuffer) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create NV12 upload buffer"; + return; + } + + nri::BufferViewDesc uploadBufferViewDesc = {}; + uploadBufferViewDesc.buffer = m_UploadBuffer; + uploadBufferViewDesc.format = nri::Format::R32_UINT; + uploadBufferViewDesc.type = nri::BufferView::STORAGE_BUFFER; + uploadBufferViewDesc.size = uploadBufferDesc.size; + + if (NRI.CreateBufferView(uploadBufferViewDesc, m_UploadBufferView) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create NV12 compute output buffer view"; + return; + } + + utils::ShaderCodeStorage shaderCodeStorage; + { + nri::DescriptorRangeDesc descriptorRanges[] = { + {0, 1, nri::DescriptorType::STORAGE_BUFFER, nri::StageBits::COMPUTE_SHADER}, + {1, 1, nri::DescriptorType::STORAGE_TEXTURE, nri::StageBits::COMPUTE_SHADER}, + {2, 1, nri::DescriptorType::STORAGE_TEXTURE, nri::StageBits::COMPUTE_SHADER}, + }; + nri::DescriptorSetDesc descriptorSetDescs[] = {{0, descriptorRanges, helper::GetCountOf(descriptorRanges)}}; + + nri::RootConstantDesc rootConstantDesc = {}; + rootConstantDesc.registerIndex = 0; + rootConstantDesc.size = sizeof(PatternConstants); + rootConstantDesc.shaderStages = nri::StageBits::COMPUTE_SHADER; + + nri::PipelineLayoutDesc pipelineLayoutDesc = {}; + pipelineLayoutDesc.rootConstantNum = 1; + pipelineLayoutDesc.rootConstants = &rootConstantDesc; + pipelineLayoutDesc.descriptorSetNum = helper::GetCountOf(descriptorSetDescs); + pipelineLayoutDesc.descriptorSets = descriptorSetDescs; + pipelineLayoutDesc.shaderStages = nri::StageBits::COMPUTE_SHADER; + if (NRI.CreatePipelineLayout(*m_Device, pipelineLayoutDesc, m_GeneratePipelineLayout) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create compute pipeline layout for pattern generation"; + return; + } + + nri::ComputePipelineDesc computePipelineDesc = {}; + computePipelineDesc.pipelineLayout = m_GeneratePipelineLayout; + computePipelineDesc.shader = utils::LoadShader(deviceDesc.graphicsAPI, "VideoEncodePattern.cs", shaderCodeStorage); + if (NRI.CreateComputePipeline(*m_Device, computePipelineDesc, m_GenerateComputePipeline) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create pattern generation compute pipeline"; + return; + } + + nri::DescriptorPoolDesc descriptorPoolDesc = {}; + descriptorPoolDesc.descriptorSetMaxNum = 1; + descriptorPoolDesc.storageBufferMaxNum = 1; + descriptorPoolDesc.storageTextureMaxNum = 2; + descriptorPoolDesc.textureMaxNum = 2; + if (NRI.CreateDescriptorPool(*m_Device, descriptorPoolDesc, m_GenerateDescriptorPool) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create compute descriptor pool for pattern generation"; + return; + } + + if (NRI.AllocateDescriptorSets(*m_GenerateDescriptorPool, *m_GeneratePipelineLayout, 0, &m_GenerateDescriptorSet, 1, 0) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to allocate compute descriptor set for pattern generation"; + return; + } + + nri::UpdateDescriptorRangeDesc updateDescriptorRangeDescs[] = { + {m_GenerateDescriptorSet, 0, 0, &m_UploadBufferView, 1}, + {m_GenerateDescriptorSet, 1, 0, &m_SourcePreviewStorage, 1}, + {m_GenerateDescriptorSet, 2, 0, &m_DecodePreviewStorage, 1}, + }; + NRI.UpdateDescriptorRanges(updateDescriptorRangeDescs, helper::GetCountOf(updateDescriptorRangeDescs)); + } + } + + nri::BufferDesc bitstreamHeaderUploadBufferDesc = {}; + bitstreamHeaderUploadBufferDesc.size = ENCODED_SLICE_OFFSET; + + nri::BufferDesc bitstreamBufferDesc = {}; + bitstreamBufferDesc.size = BITSTREAM_SIZE; + bitstreamBufferDesc.usage = nri::BufferUsageBits::VIDEO_ENCODE | nri::BufferUsageBits::VIDEO_DECODE; + + nri::BufferDesc decodeBitstreamBufferDesc = {}; + decodeBitstreamBufferDesc.size = BITSTREAM_SIZE; + decodeBitstreamBufferDesc.usage = nri::BufferUsageBits::VIDEO_DECODE; + + nri::BufferDesc metadataBufferDesc = {}; + metadataBufferDesc.size = METADATA_SIZE; + metadataBufferDesc.usage = nri::BufferUsageBits::VIDEO_ENCODE; + + nri::BufferDesc resolvedMetadataBufferDesc = {}; + resolvedMetadataBufferDesc.size = RESOLVED_METADATA_SIZE; + resolvedMetadataBufferDesc.usage = nri::BufferUsageBits::VIDEO_ENCODE; + + nri::BufferDesc resolvedMetadataReadbackBufferDesc = {}; + resolvedMetadataReadbackBufferDesc.size = RESOLVED_METADATA_SIZE; + resolvedMetadataReadbackBufferDesc.usage = nri::BufferUsageBits::NONE; + + nri::BufferDesc bitstreamHeaderReadbackBufferDesc = {}; + bitstreamHeaderReadbackBufferDesc.size = AV1_HEADER_READBACK_SIZE; + bitstreamHeaderReadbackBufferDesc.usage = nri::BufferUsageBits::NONE; + + if (NRI.CreateCommittedBuffer(*m_Device, nri::MemoryLocation::HOST_UPLOAD, 0.0f, bitstreamHeaderUploadBufferDesc, m_BitstreamHeaderUploadBuffer) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create bitstream header upload buffer"; + return; + } + + if (Video.CreateCommittedVideoBitstreamBuffer(*m_Device, 0.0f, bitstreamBufferDesc, m_BitstreamBuffer) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create encode bitstream buffer"; + return; + } + + if (NRI.CreateCommittedBuffer(*m_Device, nri::MemoryLocation::HOST_READBACK, 0.0f, bitstreamHeaderReadbackBufferDesc, m_BitstreamHeaderReadbackBuffer) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create bitstream header readback buffer"; + return; + } + + if (Video.CreateCommittedVideoBitstreamBuffer(*m_Device, 0.0f, decodeBitstreamBufferDesc, m_DecodeBitstreamBuffer) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create decode bitstream buffer"; + return; + } + + if (NRI.CreateCommittedBuffer(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, metadataBufferDesc, m_MetadataBuffer) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create encode metadata buffer"; + return; + } + + if (NRI.CreateCommittedBuffer(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, resolvedMetadataBufferDesc, m_ResolvedMetadataBuffer) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create resolved encode metadata buffer"; + return; + } + + if (NRI.CreateCommittedBuffer(*m_Device, nri::MemoryLocation::HOST_READBACK, 0.0f, resolvedMetadataReadbackBufferDesc, m_ResolvedMetadataReadbackBuffer) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create resolved encode metadata readback buffer"; + return; + } + + if (NRI.CreateFence(*m_Device, 0, m_MetadataReadbackFence) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create metadata readback fence"; + return; + } + + if (NRI.CreateCommandAllocator(*m_GraphicsQueue, m_MetadataReadbackCommandAllocator) != nri::Result::SUCCESS || + NRI.CreateCommandBuffer(*m_MetadataReadbackCommandAllocator, m_MetadataReadbackCommandBuffer) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create metadata readback command buffer"; + return; + } + + nri::VideoPictureDesc encodePictureDesc = {}; + encodePictureDesc.texture = m_EncodeTexture; + encodePictureDesc.usage = nri::VideoPictureUsage::ENCODE_INPUT; + encodePictureDesc.format = nri::Format::NV12_UNORM; + encodePictureDesc.width = VIDEO_WIDTH; + encodePictureDesc.height = VIDEO_HEIGHT; + + nri::VideoPictureDesc decodePictureDesc = {}; + decodePictureDesc.texture = m_DecodeTexture; + decodePictureDesc.usage = nri::VideoPictureUsage::DECODE_OUTPUT; + decodePictureDesc.format = nri::Format::NV12_UNORM; + decodePictureDesc.width = VIDEO_WIDTH; + decodePictureDesc.height = VIDEO_HEIGHT; + + nri::VideoPictureDesc reconstructedPictureDesc = encodePictureDesc; + reconstructedPictureDesc.texture = m_ReconstructedTexture; + reconstructedPictureDesc.usage = nri::VideoPictureUsage::ENCODE_REFERENCE; + + if (Video.CreateVideoPicture(*m_Device, encodePictureDesc, m_EncodePicture) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create encode picture"; + return; + } + + if (Video.CreateVideoPicture(*m_Device, reconstructedPictureDesc, m_ReconstructedPicture) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create reconstructed picture"; + return; + } + + if (Video.CreateVideoPicture(*m_Device, decodePictureDesc, m_DecodePicture) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create decode picture"; + return; + } + + m_VideoReady = true; + m_VideoStatus = std::string("NRI video queues and ") + GetCodecName(m_Codec) + " encode/decode objects initialized"; +} + +bool Sample::WriteAnnexBHeadersToUploadBuffer(std::vector& annexBHeaders) { + if (m_Codec == SampleCodec::AV1) { + void* headerPtr = NRI.MapBuffer(*m_BitstreamHeaderUploadBuffer, 0, ENCODED_SLICE_OFFSET); + if (!headerPtr) { + m_VideoStatus = "Failed to map bitstream header upload buffer"; + return false; + } + std::memset(headerPtr, 0, (size_t)ENCODED_SLICE_OFFSET); + NRI.UnmapBuffer(*m_BitstreamHeaderUploadBuffer); + annexBHeaders.clear(); + return true; + } + + nri::VideoAnnexBParameterSetsDesc annexBDesc = {}; + annexBDesc.codec = GetNriCodec(m_Codec); + annexBDesc.h264Sps = &m_H264Sps; + annexBDesc.h264Pps = &m_H264Pps; + annexBDesc.h265Vps = &m_H265Vps; + annexBDesc.h265Sps = &m_H265Sps; + annexBDesc.h265Pps = &m_H265Pps; + + if (Video.WriteVideoAnnexBParameterSets(annexBDesc) != nri::Result::SUCCESS || annexBDesc.writtenSize == 0 || annexBDesc.writtenSize >= ENCODED_SLICE_OFFSET) { + m_VideoStatus = std::string("Failed to query ") + GetCodecName(m_Codec) + " Annex-B parameter-set size"; + return false; + } + + annexBHeaders.resize((size_t)annexBDesc.writtenSize); + annexBDesc.dst = annexBHeaders.data(); + annexBDesc.dstSize = annexBHeaders.size(); + if (Video.WriteVideoAnnexBParameterSets(annexBDesc) != nri::Result::SUCCESS) { + m_VideoStatus = std::string("Failed to build ") + GetCodecName(m_Codec) + " Annex-B parameter sets"; + return false; + } + + void* headerPtr = NRI.MapBuffer(*m_BitstreamHeaderUploadBuffer, 0, ENCODED_SLICE_OFFSET); + if (!headerPtr) { + m_VideoStatus = "Failed to map bitstream header upload buffer"; + return false; + } + std::memset(headerPtr, 0, (size_t)ENCODED_SLICE_OFFSET); + std::memcpy(headerPtr, annexBHeaders.data(), annexBHeaders.size()); + NRI.UnmapBuffer(*m_BitstreamHeaderUploadBuffer); + return true; +} + +bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { + if (!CanRunRoundTrip()) { + m_VideoStatus = std::string(GetCodecName(m_Codec)) + " round trip is not currently supported in this configuration"; + return false; + } + + PatternConstants patternConstants = MakePatternConstants(OP_GENERATE_PATTERN, timeSec); + if (!GeneratePatternWithCompute(patternConstants, m_SourcePreviewStorage, true)) { + m_VideoStatus = "Failed to generate NV12 source pattern via compute"; + return false; + } + + if (!CopyNv12BufferToTexture(NRI, *m_GraphicsQueue, m_Nv12Layout, *m_UploadBuffer, *m_EncodeTexture)) { + m_VideoStatus = "Failed to upload NV12 source to video texture"; + return false; + } + + std::vector annexBHeaders; + if (!WriteAnnexBHeadersToUploadBuffer(annexBHeaders)) + return false; + + if (!SubmitOneTime(NRI, *m_GraphicsQueue, [&](nri::CommandBuffer& commandBuffer) { + NRI.CmdZeroBuffer(commandBuffer, *m_BitstreamBuffer, 0, BITSTREAM_SIZE); + NRI.CmdCopyBuffer(commandBuffer, *m_BitstreamBuffer, 0, *m_BitstreamHeaderUploadBuffer, 0, ENCODED_SLICE_OFFSET); + })) { + m_VideoStatus = std::string("Failed to upload ") + GetCodecName(m_Codec) + " Annex-B parameter sets"; + return false; + } + + nri::VideoEncodePictureDesc pictureDesc = {}; + pictureDesc.frameType = nri::VideoEncodeFrameType::IDR; + pictureDesc.idrPictureId = 1; + + uint16_t av1MiColumnStarts[] = {0, (uint16_t)(2 * ((VIDEO_WIDTH + 7) >> 3))}; + uint16_t av1MiRowStarts[] = {0, (uint16_t)(2 * ((VIDEO_HEIGHT + 7) >> 3))}; + uint16_t av1WidthInSuperblocksMinus1[] = {(uint16_t)(((VIDEO_WIDTH + 63) / 64) - 1)}; + uint16_t av1HeightInSuperblocksMinus1[] = {(uint16_t)(((VIDEO_HEIGHT + 63) / 64) - 1)}; + nri::VideoAV1TileLayoutDesc av1TileLayout = {}; + av1TileLayout.columnNum = 1; + av1TileLayout.rowNum = 1; + av1TileLayout.tileSizeBytesMinus1 = 3; + av1TileLayout.uniformSpacing = 1; + av1TileLayout.miColumnStarts = av1MiColumnStarts; + av1TileLayout.miRowStarts = av1MiRowStarts; + av1TileLayout.widthInSuperblocksMinus1 = av1WidthInSuperblocksMinus1; + av1TileLayout.heightInSuperblocksMinus1 = av1HeightInSuperblocksMinus1; + nri::VideoAV1LoopFilterDesc av1LoopFilter = {}; + av1LoopFilter.refDeltas[0] = 1; + av1LoopFilter.refDeltas[4] = -1; + av1LoopFilter.refDeltas[6] = -1; + av1LoopFilter.refDeltas[7] = -1; + nri::VideoAV1CdefDesc av1Cdef = {}; + nri::VideoAV1LoopRestorationDesc av1LoopRestoration = {}; + nri::VideoAV1GlobalMotionDesc av1GlobalMotion = {}; + for (auto& params : av1GlobalMotion.params) { + params[2] = 1 << 16; + params[5] = 1 << 16; + } + nri::VideoAV1PictureDesc av1PictureDesc = {}; + av1PictureDesc.currentFrameId = 0; + av1PictureDesc.refreshFrameFlags = 0xFF; + av1PictureDesc.primaryReferenceName = nri::VideoAV1ReferenceName::NONE; + av1PictureDesc.flags = nri::VideoAV1PictureBits::ERROR_RESILIENT_MODE | + nri::VideoAV1PictureBits::DISABLE_CDF_UPDATE | + nri::VideoAV1PictureBits::ALLOW_SCREEN_CONTENT_TOOLS | + nri::VideoAV1PictureBits::FORCE_INTEGER_MV | + nri::VideoAV1PictureBits::SHOW_FRAME | + nri::VideoAV1PictureBits::SHOWABLE_FRAME; + av1PictureDesc.renderWidthMinus1 = VIDEO_WIDTH - 1; + av1PictureDesc.renderHeightMinus1 = VIDEO_HEIGHT - 1; + av1PictureDesc.baseQIndex = 20; + av1PictureDesc.interpolationFilter = 0; + av1PictureDesc.txMode = 2; + av1PictureDesc.cdefDampingMinus3 = 3; + av1PictureDesc.tileLayout = &av1TileLayout; + av1PictureDesc.loopFilter = &av1LoopFilter; + av1PictureDesc.cdef = &av1Cdef; + av1PictureDesc.loopRestoration = &av1LoopRestoration; + av1PictureDesc.globalMotion = &av1GlobalMotion; + + nri::VideoEncodeRateControlDesc rateControlDesc = {}; + rateControlDesc.mode = nri::VideoEncodeRateControlMode::CQP; + rateControlDesc.qpI = 20; + rateControlDesc.qpP = 22; + rateControlDesc.qpB = 24; + rateControlDesc.frameRateNumerator = 30; + rateControlDesc.frameRateDenominator = 1; + + nri::VideoEncodeDesc encodeDesc = {}; + encodeDesc.session = m_EncodeSession; + encodeDesc.parameters = m_EncodeParameters; + encodeDesc.srcPicture = m_EncodePicture; + encodeDesc.dstBitstream.buffer = m_BitstreamBuffer; + encodeDesc.dstBitstream.offset = ENCODED_SLICE_OFFSET; + encodeDesc.dstBitstream.size = BITSTREAM_SIZE - ENCODED_SLICE_OFFSET; + encodeDesc.bitstreamMetadataSize = ENCODED_SLICE_OFFSET; + encodeDesc.pictureDesc = &pictureDesc; + encodeDesc.rateControlDesc = &rateControlDesc; + encodeDesc.reconstructedPicture = m_ReconstructedPicture; + encodeDesc.metadata = m_MetadataBuffer; + encodeDesc.resolvedMetadata = m_ResolvedMetadataBuffer; + encodeDesc.av1PictureDesc = m_Codec == SampleCodec::AV1 ? &av1PictureDesc : nullptr; + + if (!SubmitOneTime(NRI, *m_VideoEncodeQueue, [&](nri::CommandBuffer& commandBuffer) { + nri::BufferBarrierDesc bufferBarriers[3] = {}; + bufferBarriers[0].buffer = m_BitstreamBuffer; + bufferBarriers[0].before = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; + bufferBarriers[0].after = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::StageBits::VIDEO_ENCODE}; + bufferBarriers[1].buffer = m_MetadataBuffer; + bufferBarriers[1].after = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::StageBits::VIDEO_ENCODE}; + bufferBarriers[2].buffer = m_ResolvedMetadataBuffer; + bufferBarriers[2].after = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::StageBits::VIDEO_ENCODE}; + + nri::TextureBarrierDesc textureBarriers[2] = {}; + textureBarriers[0].texture = m_EncodeTexture; + textureBarriers[0].before = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; + textureBarriers[0].after = {nri::AccessBits::VIDEO_ENCODE_READ, nri::Layout::VIDEO_ENCODE_SRC, nri::StageBits::VIDEO_ENCODE}; + textureBarriers[0].mipNum = nri::REMAINING; + textureBarriers[0].layerNum = nri::REMAINING; + textureBarriers[0].planes = nri::PlaneBits::ALL; + textureBarriers[1].texture = m_ReconstructedTexture; + textureBarriers[1].before = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; + textureBarriers[1].after = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::Layout::VIDEO_ENCODE_DPB, nri::StageBits::VIDEO_ENCODE}; + textureBarriers[1].mipNum = nri::REMAINING; + textureBarriers[1].layerNum = nri::REMAINING; + textureBarriers[1].planes = nri::PlaneBits::ALL; + + nri::BarrierDesc barrierDesc = {}; + barrierDesc.buffers = bufferBarriers; + barrierDesc.bufferNum = helper::GetCountOf(bufferBarriers); + barrierDesc.textures = textureBarriers; + barrierDesc.textureNum = helper::GetCountOf(textureBarriers); + NRI.CmdBarrier(commandBuffer, barrierDesc); + Video.CmdEncodeVideo(commandBuffer, encodeDesc); + bufferBarriers[0].before = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::StageBits::VIDEO_ENCODE}; + bufferBarriers[0].after = {}; + // D3D12 resolves encode metadata inside CmdEncodeVideo and transitions the raw metadata buffer to encode-read before returning. + bufferBarriers[1].before = {nri::AccessBits::VIDEO_ENCODE_READ, nri::StageBits::VIDEO_ENCODE}; + bufferBarriers[1].after = {}; + bufferBarriers[2].before = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::StageBits::VIDEO_ENCODE}; + bufferBarriers[2].after = {}; + textureBarriers[0].before = {nri::AccessBits::VIDEO_ENCODE_READ, nri::Layout::VIDEO_ENCODE_SRC, nri::StageBits::VIDEO_ENCODE}; + textureBarriers[0].after = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; + textureBarriers[1].before = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::Layout::VIDEO_ENCODE_DPB, nri::StageBits::VIDEO_ENCODE}; + textureBarriers[1].after = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; + barrierDesc.textures = textureBarriers; + barrierDesc.textureNum = helper::GetCountOf(textureBarriers); + NRI.CmdBarrier(commandBuffer, barrierDesc); + + })) { + m_VideoStatus = std::string(GetCodecName(m_Codec)) + " encode submission failed"; + return false; + } + + if (m_MetadataReadbackPending) + return true; + + NRI.ResetCommandAllocator(*m_MetadataReadbackCommandAllocator); + if (NRI.BeginCommandBuffer(*m_MetadataReadbackCommandBuffer, nullptr) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to begin metadata readback command buffer"; + return false; + } + + nri::BufferBarrierDesc metadataBarriers[4] = {}; + metadataBarriers[0].buffer = m_ResolvedMetadataBuffer; + metadataBarriers[0].before = {nri::AccessBits::NONE, nri::StageBits::NONE}; + metadataBarriers[0].after = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; + metadataBarriers[1].buffer = m_ResolvedMetadataReadbackBuffer; + metadataBarriers[1].after = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; + metadataBarriers[2].buffer = m_BitstreamBuffer; + metadataBarriers[2].before = {nri::AccessBits::NONE, nri::StageBits::NONE}; + metadataBarriers[2].after = {nri::AccessBits::COPY_SOURCE, nri::StageBits::COPY}; + metadataBarriers[3].buffer = m_BitstreamHeaderReadbackBuffer; + metadataBarriers[3].after = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; + + nri::BarrierDesc metadataBarrierDesc = {}; + metadataBarrierDesc.buffers = metadataBarriers; + metadataBarrierDesc.bufferNum = helper::GetCountOf(metadataBarriers); + NRI.CmdBarrier(*m_MetadataReadbackCommandBuffer, metadataBarrierDesc); + Video.CmdResolveVideoEncodeFeedback(*m_MetadataReadbackCommandBuffer, *m_EncodeSession, *m_ResolvedMetadataBuffer, 0); + metadataBarriers[0].before = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; + metadataBarriers[0].after = {nri::AccessBits::COPY_SOURCE, nri::StageBits::COPY}; + metadataBarrierDesc.bufferNum = 1; + NRI.CmdBarrier(*m_MetadataReadbackCommandBuffer, metadataBarrierDesc); + metadataBarrierDesc.bufferNum = helper::GetCountOf(metadataBarriers); + NRI.CmdCopyBuffer(*m_MetadataReadbackCommandBuffer, *m_ResolvedMetadataReadbackBuffer, 0, *m_ResolvedMetadataBuffer, 0, RESOLVED_METADATA_SIZE); + NRI.CmdCopyBuffer(*m_MetadataReadbackCommandBuffer, *m_BitstreamHeaderReadbackBuffer, 0, *m_BitstreamBuffer, ENCODED_SLICE_OFFSET, AV1_HEADER_READBACK_SIZE); + metadataBarriers[0].before = {nri::AccessBits::COPY_SOURCE, nri::StageBits::COPY}; + metadataBarriers[0].after = {nri::AccessBits::NONE, nri::StageBits::NONE}; + metadataBarriers[1].before = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; + metadataBarriers[1].after = {nri::AccessBits::NONE, nri::StageBits::NONE}; + metadataBarriers[2].before = {nri::AccessBits::COPY_SOURCE, nri::StageBits::COPY}; + metadataBarriers[2].after = {nri::AccessBits::NONE, nri::StageBits::NONE}; + metadataBarriers[3].before = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; + metadataBarriers[3].after = {nri::AccessBits::NONE, nri::StageBits::NONE}; + NRI.CmdBarrier(*m_MetadataReadbackCommandBuffer, metadataBarrierDesc); + + if (NRI.EndCommandBuffer(*m_MetadataReadbackCommandBuffer) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to end metadata readback command buffer"; + return false; + } + + m_MetadataReadbackFenceValue++; + nri::FenceSubmitDesc signalFence = {}; + signalFence.fence = m_MetadataReadbackFence; + signalFence.value = m_MetadataReadbackFenceValue; + + const nri::CommandBuffer* commandBuffers[] = {m_MetadataReadbackCommandBuffer}; + nri::QueueSubmitDesc submit = {}; + submit.commandBuffers = commandBuffers; + submit.commandBufferNum = helper::GetCountOf(commandBuffers); + submit.signalFences = &signalFence; + submit.signalFenceNum = 1; + if (NRI.QueueSubmit(*m_GraphicsQueue, submit) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to submit async metadata readback"; + return false; + } + + m_MetadataReadbackPending = true; + m_VideoStatus = std::string(GetCodecName(m_Codec)) + " encode submitted; waiting for async metadata readback"; + return true; +} + +bool Sample::TryDecodePendingMetadata(float timeSec) { + if (!m_MetadataReadbackPending) + return false; + + if (NRI.GetFenceValue(*m_MetadataReadbackFence) < m_MetadataReadbackFenceValue) + return false; + + m_MetadataReadbackPending = false; + + nri::VideoEncodeFeedback feedback = {}; + const nri::Result feedbackResult = Video.GetVideoEncodeFeedback(*m_EncodeSession, *m_ResolvedMetadataReadbackBuffer, 0, feedback); + if (feedbackResult != nri::Result::SUCCESS) { + if (feedbackResult == nri::Result::UNSUPPORTED && m_Codec == SampleCodec::AV1) { + feedback.encodedBitstreamOffset = 0; + feedback.encodedBitstreamWrittenBytes = AV1_HEADER_READBACK_SIZE; + feedback.writtenSubregionNum = 1; + } else { + if (feedbackResult == nri::Result::UNSUPPORTED) + m_VideoStatus = std::string(GetCodecName(m_Codec)) + " encode metadata feedback is unsupported"; + else + m_VideoStatus = "Failed to read resolved encode metadata"; + return false; + } + } + + if (feedback.errorFlags || !feedback.encodedBitstreamWrittenBytes) { + char message[160] = {}; + std::snprintf(message, sizeof(message), "Encoder returned errorFlags=0x%llX bytes=%llu", + (unsigned long long)feedback.errorFlags, (unsigned long long)feedback.encodedBitstreamWrittenBytes); + m_VideoStatus = message; + return false; + } + + nri::VideoAV1EncodeDecodeInfo av1DecodeInfo = {}; + if (m_Codec == SampleCodec::AV1) { + const uint8_t* encodedHeader = (const uint8_t*)NRI.MapBuffer(*m_BitstreamHeaderReadbackBuffer, 0, AV1_HEADER_READBACK_SIZE); + if (!encodedHeader && feedbackResult == nri::Result::UNSUPPORTED) { + m_VideoStatus = "Failed to map AV1 encoded header readback"; + return false; + } + + nri::VideoAV1EncodeDecodeInfoDesc av1InfoDesc = {}; + av1InfoDesc.feedback = &feedback; + av1InfoDesc.sequence = &m_AV1Sequence; + av1InfoDesc.encodedPayloadHeader = encodedHeader; + av1InfoDesc.encodedPayloadHeaderSize = encodedHeader ? std::min(AV1_HEADER_READBACK_SIZE, feedback.encodedBitstreamWrittenBytes) : 0; + const nri::Result av1InfoResult = Video.GetVideoEncodeAV1DecodeInfo(*m_EncodeSession, *m_ResolvedMetadataReadbackBuffer, 0, av1InfoDesc, av1DecodeInfo); + if (encodedHeader) + NRI.UnmapBuffer(*m_BitstreamHeaderReadbackBuffer); + if (av1InfoResult != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to prepare AV1 decode metadata"; + return false; + } + feedback.encodedBitstreamWrittenBytes = av1DecodeInfo.bitstreamOffset + av1DecodeInfo.bitstreamSize; + } + + return DecodeEncodedBitstream(feedback, m_Codec == SampleCodec::AV1 ? &av1DecodeInfo : nullptr, timeSec); +} + +bool Sample::DecodeEncodedBitstream(const nri::VideoEncodeFeedback& feedback, const nri::VideoAV1EncodeDecodeInfo* av1DecodeInfo, float timeSec) { + std::vector annexBHeaders; + if (!WriteAnnexBHeadersToUploadBuffer(annexBHeaders)) + return false; + + const uint64_t encodedPayloadSkip = av1DecodeInfo ? av1DecodeInfo->bitstreamOffset : GetEncodedPayloadHeaderSkip(m_Codec, feedback.encodedBitstreamWrittenBytes); + const uint64_t encodedPayloadBytes = av1DecodeInfo ? av1DecodeInfo->bitstreamSize : feedback.encodedBitstreamWrittenBytes - encodedPayloadSkip; + const uint64_t decodeSliceOffset = annexBHeaders.size(); + const uint64_t decodeBitstreamSize = AlignUp(decodeSliceOffset + encodedPayloadBytes, 256); + const uint64_t encodedSourceOffset = ENCODED_SLICE_OFFSET + feedback.encodedBitstreamOffset + encodedPayloadSkip; + if (feedback.encodedBitstreamOffset > BITSTREAM_SIZE - ENCODED_SLICE_OFFSET || + encodedSourceOffset > BITSTREAM_SIZE || + encodedPayloadBytes > BITSTREAM_SIZE - encodedSourceOffset || + decodeBitstreamSize > BITSTREAM_SIZE) { + m_VideoStatus = std::string("Encoded ") + GetCodecName(m_Codec) + " bitstream exceeded decode buffer size"; + return false; + } + + if (!SubmitOneTime(NRI, *m_GraphicsQueue, [&](nri::CommandBuffer& commandBuffer) { + NRI.CmdZeroBuffer(commandBuffer, *m_DecodeBitstreamBuffer, 0, BITSTREAM_SIZE); + if (!annexBHeaders.empty()) + NRI.CmdCopyBuffer(commandBuffer, *m_DecodeBitstreamBuffer, 0, *m_BitstreamHeaderUploadBuffer, 0, annexBHeaders.size()); + + nri::BufferBarrierDesc bufferBarriers[2] = {}; + bufferBarriers[0].buffer = m_BitstreamBuffer; + bufferBarriers[0].before = {nri::AccessBits::NONE, nri::StageBits::NONE}; + bufferBarriers[0].after = {nri::AccessBits::COPY_SOURCE, nri::StageBits::COPY}; + bufferBarriers[1].buffer = m_DecodeBitstreamBuffer; + bufferBarriers[1].before = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; + bufferBarriers[1].after = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; + + nri::BarrierDesc barrierDesc = {}; + barrierDesc.buffers = bufferBarriers; + barrierDesc.bufferNum = helper::GetCountOf(bufferBarriers); + NRI.CmdBarrier(commandBuffer, barrierDesc); + + NRI.CmdCopyBuffer(commandBuffer, *m_DecodeBitstreamBuffer, decodeSliceOffset, *m_BitstreamBuffer, encodedSourceOffset, encodedPayloadBytes); + + bufferBarriers[0].before = {nri::AccessBits::COPY_SOURCE, nri::StageBits::COPY}; + bufferBarriers[0].after = {nri::AccessBits::NONE, nri::StageBits::NONE}; + bufferBarriers[1].before = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; + bufferBarriers[1].after = {nri::AccessBits::NONE, nri::StageBits::NONE}; + NRI.CmdBarrier(commandBuffer, barrierDesc); + })) { + m_VideoStatus = std::string("Failed to build exact ") + GetCodecName(m_Codec) + " decode bitstream"; + return false; + } + + const uint32_t pictureOffsets[] = {(uint32_t)decodeSliceOffset}; + + nri::VideoH264DecodePictureDesc h264DecodePicture = {}; + h264DecodePicture.flags = nri::VideoH264DecodePictureBits::IDR | nri::VideoH264DecodePictureBits::INTRA | nri::VideoH264DecodePictureBits::REFERENCE; + h264DecodePicture.pictureParameterSetId = m_H264Pps.pictureParameterSetId; + h264DecodePicture.frameNum = 0; + h264DecodePicture.idrPictureId = 1; + h264DecodePicture.topFieldOrderCount = 0; + h264DecodePicture.bottomFieldOrderCount = 0; + h264DecodePicture.sliceOffsets = pictureOffsets; + h264DecodePicture.sliceOffsetNum = helper::GetCountOf(pictureOffsets); + + nri::VideoH265DecodePictureDesc h265DecodePicture = {}; + h265DecodePicture.flags = nri::VideoH265DecodePictureBits::IRAP | nri::VideoH265DecodePictureBits::IDR | nri::VideoH265DecodePictureBits::REFERENCE; + h265DecodePicture.videoParameterSetId = m_H265Vps.videoParameterSetId; + h265DecodePicture.sequenceParameterSetId = m_H265Sps.sequenceParameterSetId; + h265DecodePicture.pictureParameterSetId = m_H265Pps.pictureParameterSetId; + h265DecodePicture.pictureOrderCount = 0; + h265DecodePicture.sliceSegmentOffsets = pictureOffsets; + h265DecodePicture.sliceSegmentOffsetNum = helper::GetCountOf(pictureOffsets); + + nri::VideoAV1EncodeDecodeInfo av1Info = {}; + if (av1DecodeInfo) { + av1Info = *av1DecodeInfo; + av1Info.picture.tiles = av1Info.tiles; + av1Info.picture.tileLayout = &av1Info.tileLayout; + av1Info.picture.quantization = &av1Info.quantization; + av1Info.picture.loopFilter = &av1Info.loopFilter; + av1Info.picture.cdef = &av1Info.cdef; + av1Info.picture.segmentation = av1DecodeInfo->picture.segmentation ? &av1Info.segmentation : nullptr; + av1Info.picture.loopRestoration = &av1Info.loopRestoration; + av1Info.picture.globalMotion = &av1Info.globalMotion; + av1Info.tileLayout.miColumnStarts = av1Info.miColumnStarts; + av1Info.tileLayout.miRowStarts = av1Info.miRowStarts; + av1Info.tileLayout.widthInSuperblocksMinus1 = av1Info.widthInSuperblocksMinus1; + av1Info.tileLayout.heightInSuperblocksMinus1 = av1Info.heightInSuperblocksMinus1; + } + + nri::VideoDecodeDesc decodeDesc = {}; + decodeDesc.session = m_DecodeSession; + decodeDesc.parameters = m_DecodeParameters; + decodeDesc.bitstream.buffer = m_DecodeBitstreamBuffer; + decodeDesc.bitstream.size = decodeBitstreamSize; + decodeDesc.dstPicture = m_DecodePicture; + decodeDesc.dstSlot = 0; + decodeDesc.h264PictureDesc = m_Codec == SampleCodec::H264 ? &h264DecodePicture : nullptr; + decodeDesc.h265PictureDesc = m_Codec == SampleCodec::H265 ? &h265DecodePicture : nullptr; + decodeDesc.av1PictureDesc = av1DecodeInfo ? &av1Info.picture : nullptr; + + nri::VideoDecodePictureStates decodePictureStates = {}; + if (Video.GetVideoDecodePictureStates(*m_DecodePicture, decodePictureStates) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to query video decode picture states"; + return false; + } + + if (!SubmitOneTime(NRI, *m_VideoDecodeQueue, [&](nri::CommandBuffer& commandBuffer) { + nri::TextureBarrierDesc textureBarrier = {}; + textureBarrier.texture = m_DecodeTexture; + textureBarrier.before = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; + textureBarrier.after = decodePictureStates.decodeWrite; + textureBarrier.mipNum = nri::REMAINING; + textureBarrier.layerNum = nri::REMAINING; + textureBarrier.planes = nri::PlaneBits::ALL; + + nri::BarrierDesc barrierDesc = {}; + barrierDesc.textures = &textureBarrier; + barrierDesc.textureNum = 1; + NRI.CmdBarrier(commandBuffer, barrierDesc); + Video.CmdDecodeVideo(commandBuffer, decodeDesc); + + if (decodePictureStates.releaseAfterDecode) { + textureBarrier.before = decodePictureStates.decodeWrite; + textureBarrier.after = decodePictureStates.afterDecode; + NRI.CmdBarrier(commandBuffer, barrierDesc); + } + })) { + m_VideoStatus = std::string(GetCodecName(m_Codec)) + " decode submission failed"; + return false; + } + + if (!SubmitOneTime(NRI, *m_GraphicsQueue, [&](nri::CommandBuffer& commandBuffer) { + nri::TextureBarrierDesc textureBarrier = {}; + textureBarrier.texture = m_DecodeTexture; + textureBarrier.before = decodePictureStates.graphicsBefore; + textureBarrier.after = {nri::AccessBits::COPY_SOURCE, nri::Layout::COPY_SOURCE, nri::StageBits::COPY}; + textureBarrier.mipNum = nri::REMAINING; + textureBarrier.layerNum = nri::REMAINING; + textureBarrier.planes = nri::PlaneBits::ALL; + + nri::BarrierDesc barrierDesc = {}; + barrierDesc.textures = &textureBarrier; + barrierDesc.textureNum = 1; + NRI.CmdBarrier(commandBuffer, barrierDesc); + + nri::BufferBarrierDesc nv12BufferBarrier = {}; + nv12BufferBarrier.buffer = m_UploadBuffer; + nv12BufferBarrier.before = {nri::AccessBits::NONE, nri::StageBits::NONE}; + nv12BufferBarrier.after = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; + + nri::BarrierDesc copyBarrierDesc = {}; + copyBarrierDesc.buffers = &nv12BufferBarrier; + copyBarrierDesc.bufferNum = 1; + NRI.CmdBarrier(commandBuffer, copyBarrierDesc); + + nri::TextureRegionDesc lumaRegion = {}; + lumaRegion.width = VIDEO_WIDTH; + lumaRegion.height = VIDEO_HEIGHT; + lumaRegion.depth = 1; + lumaRegion.planes = nri::PlaneBits::PLANE_0; + + nri::TextureDataLayoutDesc lumaLayout = {}; + lumaLayout.rowPitch = m_Nv12Layout.yRowPitchBytes; + lumaLayout.slicePitch = m_Nv12Layout.ySlicePitchBytes; + NRI.CmdReadbackTextureToBuffer(commandBuffer, *m_UploadBuffer, lumaLayout, *m_DecodeTexture, lumaRegion); + + nri::TextureRegionDesc chromaRegion = {}; + chromaRegion.width = VIDEO_WIDTH; + chromaRegion.height = VIDEO_HEIGHT; + chromaRegion.depth = 1; + chromaRegion.planes = nri::PlaneBits::PLANE_1; + + nri::TextureDataLayoutDesc chromaLayout = {}; + chromaLayout.offset = m_Nv12Layout.uvOffsetBytes; + chromaLayout.rowPitch = m_Nv12Layout.uvRowPitchBytes; + chromaLayout.slicePitch = m_Nv12Layout.uvSlicePitchBytes; + NRI.CmdReadbackTextureToBuffer(commandBuffer, *m_UploadBuffer, chromaLayout, *m_DecodeTexture, chromaRegion); + + nv12BufferBarrier.before = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; + nv12BufferBarrier.after = {nri::AccessBits::NONE, nri::StageBits::NONE}; + NRI.CmdBarrier(commandBuffer, copyBarrierDesc); + + textureBarrier.before = {nri::AccessBits::COPY_SOURCE, nri::Layout::COPY_SOURCE, nri::StageBits::COPY}; + textureBarrier.after = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; + NRI.CmdBarrier(commandBuffer, barrierDesc); + })) { + m_VideoStatus = "Failed to copy decoded NV12 for preview"; + return false; + } + + PatternConstants patternConstants = MakePatternConstants(OP_NV12_TO_PREVIEW, timeSec); + if (!GeneratePatternWithCompute(patternConstants, m_DecodePreviewStorage, true)) { + m_VideoStatus = "Failed to convert decoded NV12 to preview texture"; + return false; + } + + m_DecodePreviewReady = true; + char message[128] = {}; + std::snprintf(message, sizeof(message), "%s encode/decode round trip complete, encoded %llu bytes", GetCodecName(m_Codec), (unsigned long long)feedback.encodedBitstreamWrittenBytes); + m_VideoStatus = message; + return true; +} + +bool Sample::TryRunRoundTrip(float timeSec) { + if (m_MetadataReadbackPending) + return TryDecodePendingMetadata(timeSec); + + return TrySubmitEncodeAndMetadataReadback(timeSec); +} + +bool Sample::CanRunRoundTrip() const { + return m_VideoReady && m_GraphicsQueue && m_VideoEncodeQueue && m_VideoDecodeQueue && m_UploadBuffer && m_UploadBufferView && m_SourcePreviewStorage && + m_DecodePreviewStorage && m_GeneratePipelineLayout && m_GenerateComputePipeline && m_GenerateDescriptorSet; +} + +void Sample::LatencySleep(uint32_t frameIndex) { + uint32_t queuedFrameIndex = frameIndex % GetQueuedFrameNum(); + const QueuedFrame& queuedFrame = m_QueuedFrames[queuedFrameIndex]; + + NRI.Wait(*m_FrameFence, frameIndex >= GetQueuedFrameNum() ? 1 + frameIndex - GetQueuedFrameNum() : 0); + NRI.ResetCommandAllocator(*queuedFrame.commandAllocator); +} + +void Sample::DrawTexturePanel(const char* label, nri::Descriptor* texture, const ImVec2& size) { + ImGui::Text("%s", label); + if (!texture) { + ImGui::Text("Not ready"); + return; + } + ImGui::Image((ImTextureID)texture, size); +} + +void Sample::PrepareFrame(uint32_t frameIndex) { + (void)frameIndex; + + const double timeSec = m_Timer.GetTimeStamp() * 0.001 - m_StartTimeSec; + const bool canRunRoundTrip = CanRunRoundTrip(); + + InitializeGeneratedFrames((float)timeSec); + + if (canRunRoundTrip && timeSec - m_LastRoundTripTimeSec >= ROUND_TRIP_INTERVAL_SEC) { + if (TryRunRoundTrip((float)timeSec)) + m_LastRoundTripTimeSec = timeSec; + } + + ImGui::NewFrame(); + { + ImGui::SetNextWindowPos({20.0f, 20.0f}, ImGuiCond_Once); + ImGui::SetNextWindowSize({900.0f, 520.0f}, ImGuiCond_Once); + ImGui::Begin("NRI Video Encode / Decode"); + { + ImGui::Text("Codec: %s, format: NV12, size: %ux%u", GetCodecName(m_Codec), VIDEO_WIDTH, VIDEO_HEIGHT); + ImGui::TextWrapped("Video: %s", m_VideoStatus.c_str()); + ImGui::TextWrapped("Preview: %s", m_PreviewStatus.c_str()); + ImGui::Text("Encode queue: %s, decode queue: %s", m_VideoEncodeQueue ? "yes" : "no", m_VideoDecodeQueue ? "yes" : "no"); + const bool roundTripSupported = CanRunRoundTrip(); + ImGui::Text("Round trip: %s", roundTripSupported ? "running" : "backend must support NRI video encode/decode"); + if (m_VideoReady && !m_DecodePreviewReady) + ImGui::Text("Decode preview: waiting for first decoded frame"); + + ImGui::Separator(); + if (ImGui::BeginTable("PreviewPanels", 2, ImGuiTableFlags_SizingStretchSame)) { + ImGui::TableNextColumn(); + float width = std::max(200.0f, ImGui::GetContentRegionAvail().x); + DrawTexturePanel("Generated source", m_SourcePreviewTextureView, {width, width * float(VIDEO_HEIGHT) / float(VIDEO_WIDTH)}); + + ImGui::TableNextColumn(); + width = std::max(200.0f, ImGui::GetContentRegionAvail().x); + DrawTexturePanel(m_DecodePreviewReady ? "Decoded preview" : "Decoded preview pending", m_DecodePreviewReady ? m_DecodePreviewTextureView : nullptr, {width, width * float(VIDEO_HEIGHT) / float(VIDEO_WIDTH)}); + ImGui::EndTable(); + } + } + ImGui::End(); + } + ImGui::EndFrame(); + ImGui::Render(); +} + +void Sample::RenderFrame(uint32_t frameIndex) { + uint32_t queuedFrameIndex = frameIndex % GetQueuedFrameNum(); + const QueuedFrame& queuedFrame = m_QueuedFrames[queuedFrameIndex]; + + uint32_t recycledSemaphoreIndex = frameIndex % (uint32_t)m_SwapChainTextures.size(); + nri::Fence* swapChainAcquireSemaphore = m_SwapChainTextures[recycledSemaphoreIndex].acquireSemaphore; + + uint32_t currentSwapChainTextureIndex = 0; + NRI.AcquireNextTexture(*m_SwapChain, *swapChainAcquireSemaphore, currentSwapChainTextureIndex); + + const SwapChainTexture& swapChainTexture = m_SwapChainTextures[currentSwapChainTextureIndex]; + + nri::CommandBuffer& commandBuffer = *queuedFrame.commandBuffer; + NRI.BeginCommandBuffer(commandBuffer, nullptr); + { + nri::TextureBarrierDesc textureBarriers = {}; + textureBarriers.texture = swapChainTexture.texture; + textureBarriers.after = {nri::AccessBits::COLOR_ATTACHMENT, nri::Layout::COLOR_ATTACHMENT}; + textureBarriers.layerNum = 1; + textureBarriers.mipNum = 1; + + nri::BarrierDesc barrierDesc = {}; + barrierDesc.textureNum = 1; + barrierDesc.textures = &textureBarriers; + NRI.CmdBarrier(commandBuffer, barrierDesc); + + nri::AttachmentDesc colorAttachmentDesc = {}; + colorAttachmentDesc.descriptor = swapChainTexture.colorAttachment; + colorAttachmentDesc.clearValue.color.f = {0.03f, 0.03f, 0.03f, 1.0f}; + + nri::RenderingDesc renderingDesc = {}; + renderingDesc.colorNum = 1; + renderingDesc.colors = &colorAttachmentDesc; + + CmdCopyImguiData(commandBuffer, *m_Streamer); + + NRI.CmdBeginRendering(commandBuffer, renderingDesc); + CmdDrawImgui(commandBuffer, swapChainTexture.attachmentFormat, 1.0f, true); + NRI.CmdEndRendering(commandBuffer); + + textureBarriers.before = textureBarriers.after; + textureBarriers.after = {nri::AccessBits::NONE, nri::Layout::PRESENT, nri::StageBits::NONE}; + NRI.CmdBarrier(commandBuffer, barrierDesc); + } + NRI.EndCommandBuffer(commandBuffer); + + nri::FenceSubmitDesc textureAcquiredFence = {}; + textureAcquiredFence.fence = swapChainAcquireSemaphore; + textureAcquiredFence.stages = nri::StageBits::COLOR_ATTACHMENT; + + nri::FenceSubmitDesc renderingFinishedFence = {}; + renderingFinishedFence.fence = swapChainTexture.releaseSemaphore; + + nri::QueueSubmitDesc queueSubmitDesc = {}; + queueSubmitDesc.waitFences = &textureAcquiredFence; + queueSubmitDesc.waitFenceNum = 1; + queueSubmitDesc.commandBuffers = &queuedFrame.commandBuffer; + queueSubmitDesc.commandBufferNum = 1; + queueSubmitDesc.signalFences = &renderingFinishedFence; + queueSubmitDesc.signalFenceNum = 1; + NRI.QueueSubmit(*m_GraphicsQueue, queueSubmitDesc); + + NRI.EndStreamerFrame(*m_Streamer); + NRI.QueuePresent(*m_SwapChain, *swapChainTexture.releaseSemaphore); + + nri::FenceSubmitDesc signalFence = {}; + signalFence.fence = m_FrameFence; + signalFence.value = 1 + frameIndex; + + nri::QueueSubmitDesc signalSubmitDesc = {}; + signalSubmitDesc.signalFences = &signalFence; + signalSubmitDesc.signalFenceNum = 1; + NRI.QueueSubmit(*m_GraphicsQueue, signalSubmitDesc); +} + +SAMPLE_MAIN(Sample, 0); From 6d9afd66d04a58459303c0fe25cbb0a5d39e4a43 Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Wed, 6 May 2026 00:28:26 +0200 Subject: [PATCH 02/20] Update video sample dependencies Point the NRIFramework submodule at the branch that uses NRI video-queues, remove the local NRI override documentation, and apply formatting cleanup to the video sample. --- .gitignore | 2 - .gitmodules | 4 +- External/NRIFramework | 2 +- README.md | 1 - Source/VideoEncodeDecode.cpp | 72 +++++++++++++----------------------- 5 files changed, 29 insertions(+), 52 deletions(-) diff --git a/.gitignore b/.gitignore index 36930b2..3e8052d 100644 --- a/.gitignore +++ b/.gitignore @@ -20,5 +20,3 @@ _Data/ # can be a symbolic link _Data -build -build-video-sample diff --git a/.gitmodules b/.gitmodules index b919ec0..797781a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,5 +1,5 @@ [submodule "External/NRIFramework"] path = External/NRIFramework - url = https://github.com/NVIDIA-RTX/NRIFramework.git - branch = main + url = https://github.com/Daedie-git/NRIFramework.git + branch = codex/video-encode-decode-sample update = merge diff --git a/External/NRIFramework b/External/NRIFramework index 67a1c91..c2ace9b 160000 --- a/External/NRIFramework +++ b/External/NRIFramework @@ -1 +1 @@ -Subproject commit 67a1c91f085569a9e5a9d6978e58703cdd707419 +Subproject commit c2ace9b2b9d9879c22ccfd48a958c24f0a11949b diff --git a/README.md b/README.md index 070b8e8..01fab10 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,6 @@ Or by running scripts only: ### CMake options - `DISABLE_SHADER_COMPILATION` - disable compilation of shaders (shaders can be built on other platform) -- `NRI_SOURCE_OVERRIDE_DIR` - use an alternate NRI source tree, for example an in-review NRI branch - `NRIF_USE_WAYLAND` - use Wayland instead of X11 on Linux ## How to run diff --git a/Source/VideoEncodeDecode.cpp b/Source/VideoEncodeDecode.cpp index a2bc461..ed65962 100644 --- a/Source/VideoEncodeDecode.cpp +++ b/Source/VideoEncodeDecode.cpp @@ -43,25 +43,25 @@ enum class SampleCodec : uint8_t { static const char* GetCodecName(SampleCodec codec) { switch (codec) { - case SampleCodec::H265: - return "H.265"; - case SampleCodec::AV1: - return "AV1"; - case SampleCodec::H264: - default: - return "H.264"; + case SampleCodec::H265: + return "H.265"; + case SampleCodec::AV1: + return "AV1"; + case SampleCodec::H264: + default: + return "H.264"; } } static nri::VideoCodec GetNriCodec(SampleCodec codec) { switch (codec) { - case SampleCodec::H265: - return nri::VideoCodec::H265; - case SampleCodec::AV1: - return nri::VideoCodec::AV1; - case SampleCodec::H264: - default: - return nri::VideoCodec::H264; + case SampleCodec::H265: + return nri::VideoCodec::H265; + case SampleCodec::AV1: + return nri::VideoCodec::AV1; + case SampleCodec::H264: + default: + return nri::VideoCodec::H264; } } @@ -72,10 +72,7 @@ static uint64_t GetEncodedPayloadHeaderSkip(SampleCodec codec, uint64_t encodedB static nri::VideoAV1SequenceDesc MakeAV1SequenceDesc() { nri::VideoAV1SequenceDesc desc = {}; - desc.flags = nri::VideoAV1SequenceBits::ENABLE_ORDER_HINT | - nri::VideoAV1SequenceBits::ENABLE_CDEF | - nri::VideoAV1SequenceBits::ENABLE_RESTORATION | - nri::VideoAV1SequenceBits::COLOR_DESCRIPTION_PRESENT; + desc.flags = nri::VideoAV1SequenceBits::ENABLE_ORDER_HINT | nri::VideoAV1SequenceBits::ENABLE_CDEF | nri::VideoAV1SequenceBits::ENABLE_RESTORATION | nri::VideoAV1SequenceBits::COLOR_DESCRIPTION_PRESENT; desc.bitDepth = 8; desc.subsamplingX = 1; desc.subsamplingY = 1; @@ -166,8 +163,7 @@ static bool SubmitOneTime( nri::CoreInterface& core, nri::Queue& queue, nri::DescriptorPool* descriptorPool, Record&& record) { nri::CommandAllocator* allocator = nullptr; nri::CommandBuffer* commandBuffer = nullptr; - bool ok = core.CreateCommandAllocator(queue, allocator) == nri::Result::SUCCESS && allocator && core.CreateCommandBuffer(*allocator, commandBuffer) == nri::Result::SUCCESS && commandBuffer && - core.BeginCommandBuffer(*commandBuffer, descriptorPool) == nri::Result::SUCCESS; + bool ok = core.CreateCommandAllocator(queue, allocator) == nri::Result::SUCCESS && allocator && core.CreateCommandBuffer(*allocator, commandBuffer) == nri::Result::SUCCESS && commandBuffer && core.BeginCommandBuffer(*commandBuffer, descriptorPool) == nri::Result::SUCCESS; if (ok) { std::forward(record)(*commandBuffer); ok = core.EndCommandBuffer(*commandBuffer) == nri::Result::SUCCESS; @@ -254,11 +250,11 @@ class Sample : public SampleBase { void RenderFrame(uint32_t frameIndex) override; private: - bool InitializeGraphics(nri::GraphicsAPI graphicsAPI); + bool InitializeGraphics(nri::GraphicsAPI graphicsAPI); bool TryInitializePreviewTextures(nri::GraphicsAPI graphicsAPI); - void InitializeGeneratedFrames(float timeSec); + void InitializeGeneratedFrames(float timeSec); bool CanRunRoundTrip() const; - void TryInitializeVideo(nri::GraphicsAPI graphicsAPI); + void TryInitializeVideo(nri::GraphicsAPI graphicsAPI); PatternConstants MakePatternConstants(PatternOperation operation, float timeSec) const; bool GeneratePatternWithCompute(const PatternConstants& constants, nri::Descriptor* previewTexture, bool returnSourceBufferToShaderStorage = false); bool WriteAnnexBHeadersToUploadBuffer(std::vector& annexBHeaders); @@ -916,8 +912,7 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { vps.decPicBufMgr.maxNumReorderPics[0] = 1; nri::VideoH265SequenceParameterSetDesc h265Sps = {}; - h265Sps.flags = nri::VideoH265SequenceParameterSetBits::TEMPORAL_ID_NESTING | nri::VideoH265SequenceParameterSetBits::AMP_ENABLED | - nri::VideoH265SequenceParameterSetBits::SAMPLE_ADAPTIVE_OFFSET_ENABLED; + h265Sps.flags = nri::VideoH265SequenceParameterSetBits::TEMPORAL_ID_NESTING | nri::VideoH265SequenceParameterSetBits::AMP_ENABLED | nri::VideoH265SequenceParameterSetBits::SAMPLE_ADAPTIVE_OFFSET_ENABLED; h265Sps.videoParameterSetId = vps.videoParameterSetId; h265Sps.maxSubLayersMinus1 = vps.maxSubLayersMinus1; h265Sps.sequenceParameterSetId = 0; @@ -935,9 +930,7 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { h265Sps.decPicBufMgr = vps.decPicBufMgr; nri::VideoH265PictureParameterSetDesc h265Pps = {}; - h265Pps.flags = nri::VideoH265PictureParameterSetBits::CABAC_INIT_PRESENT | nri::VideoH265PictureParameterSetBits::TRANSFORM_SKIP_ENABLED | - nri::VideoH265PictureParameterSetBits::CU_QP_DELTA_ENABLED | nri::VideoH265PictureParameterSetBits::SLICE_CHROMA_QP_OFFSETS_PRESENT | - nri::VideoH265PictureParameterSetBits::DEBLOCKING_FILTER_CONTROL_PRESENT; + h265Pps.flags = nri::VideoH265PictureParameterSetBits::CABAC_INIT_PRESENT | nri::VideoH265PictureParameterSetBits::TRANSFORM_SKIP_ENABLED | nri::VideoH265PictureParameterSetBits::CU_QP_DELTA_ENABLED | nri::VideoH265PictureParameterSetBits::SLICE_CHROMA_QP_OFFSETS_PRESENT | nri::VideoH265PictureParameterSetBits::DEBLOCKING_FILTER_CONTROL_PRESENT; h265Pps.pictureParameterSetId = 0; h265Pps.sequenceParameterSetId = h265Sps.sequenceParameterSetId; h265Pps.videoParameterSetId = vps.videoParameterSetId; @@ -1205,8 +1198,7 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { return; } - if (NRI.CreateCommandAllocator(*m_GraphicsQueue, m_MetadataReadbackCommandAllocator) != nri::Result::SUCCESS || - NRI.CreateCommandBuffer(*m_MetadataReadbackCommandAllocator, m_MetadataReadbackCommandBuffer) != nri::Result::SUCCESS) { + if (NRI.CreateCommandAllocator(*m_GraphicsQueue, m_MetadataReadbackCommandAllocator) != nri::Result::SUCCESS || NRI.CreateCommandBuffer(*m_MetadataReadbackCommandAllocator, m_MetadataReadbackCommandBuffer) != nri::Result::SUCCESS) { m_VideoStatus = "Failed to create metadata readback command buffer"; return; } @@ -1355,12 +1347,7 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { av1PictureDesc.currentFrameId = 0; av1PictureDesc.refreshFrameFlags = 0xFF; av1PictureDesc.primaryReferenceName = nri::VideoAV1ReferenceName::NONE; - av1PictureDesc.flags = nri::VideoAV1PictureBits::ERROR_RESILIENT_MODE | - nri::VideoAV1PictureBits::DISABLE_CDF_UPDATE | - nri::VideoAV1PictureBits::ALLOW_SCREEN_CONTENT_TOOLS | - nri::VideoAV1PictureBits::FORCE_INTEGER_MV | - nri::VideoAV1PictureBits::SHOW_FRAME | - nri::VideoAV1PictureBits::SHOWABLE_FRAME; + av1PictureDesc.flags = nri::VideoAV1PictureBits::ERROR_RESILIENT_MODE | nri::VideoAV1PictureBits::DISABLE_CDF_UPDATE | nri::VideoAV1PictureBits::ALLOW_SCREEN_CONTENT_TOOLS | nri::VideoAV1PictureBits::FORCE_INTEGER_MV | nri::VideoAV1PictureBits::SHOW_FRAME | nri::VideoAV1PictureBits::SHOWABLE_FRAME; av1PictureDesc.renderWidthMinus1 = VIDEO_WIDTH - 1; av1PictureDesc.renderHeightMinus1 = VIDEO_HEIGHT - 1; av1PictureDesc.baseQIndex = 20; @@ -1441,7 +1428,6 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { barrierDesc.textures = textureBarriers; barrierDesc.textureNum = helper::GetCountOf(textureBarriers); NRI.CmdBarrier(commandBuffer, barrierDesc); - })) { m_VideoStatus = std::string(GetCodecName(m_Codec)) + " encode submission failed"; return false; @@ -1585,10 +1571,7 @@ bool Sample::DecodeEncodedBitstream(const nri::VideoEncodeFeedback& feedback, co const uint64_t decodeSliceOffset = annexBHeaders.size(); const uint64_t decodeBitstreamSize = AlignUp(decodeSliceOffset + encodedPayloadBytes, 256); const uint64_t encodedSourceOffset = ENCODED_SLICE_OFFSET + feedback.encodedBitstreamOffset + encodedPayloadSkip; - if (feedback.encodedBitstreamOffset > BITSTREAM_SIZE - ENCODED_SLICE_OFFSET || - encodedSourceOffset > BITSTREAM_SIZE || - encodedPayloadBytes > BITSTREAM_SIZE - encodedSourceOffset || - decodeBitstreamSize > BITSTREAM_SIZE) { + if (feedback.encodedBitstreamOffset > BITSTREAM_SIZE - ENCODED_SLICE_OFFSET || encodedSourceOffset > BITSTREAM_SIZE || encodedPayloadBytes > BITSTREAM_SIZE - encodedSourceOffset || decodeBitstreamSize > BITSTREAM_SIZE) { m_VideoStatus = std::string("Encoded ") + GetCodecName(m_Codec) + " bitstream exceeded decode buffer size"; return false; } @@ -1783,8 +1766,7 @@ bool Sample::TryRunRoundTrip(float timeSec) { } bool Sample::CanRunRoundTrip() const { - return m_VideoReady && m_GraphicsQueue && m_VideoEncodeQueue && m_VideoDecodeQueue && m_UploadBuffer && m_UploadBufferView && m_SourcePreviewStorage && - m_DecodePreviewStorage && m_GeneratePipelineLayout && m_GenerateComputePipeline && m_GenerateDescriptorSet; + return m_VideoReady && m_GraphicsQueue && m_VideoEncodeQueue && m_VideoDecodeQueue && m_UploadBuffer && m_UploadBufferView && m_SourcePreviewStorage && m_DecodePreviewStorage && m_GeneratePipelineLayout && m_GenerateComputePipeline && m_GenerateDescriptorSet; } void Sample::LatencySleep(uint32_t frameIndex) { @@ -1804,9 +1786,7 @@ void Sample::DrawTexturePanel(const char* label, nri::Descriptor* texture, const ImGui::Image((ImTextureID)texture, size); } -void Sample::PrepareFrame(uint32_t frameIndex) { - (void)frameIndex; - +void Sample::PrepareFrame(uint32_t) { const double timeSec = m_Timer.GetTimeStamp() * 0.001 - m_StartTimeSec; const bool canRunRoundTrip = CanRunRoundTrip(); From d844cc432eeb4e68520e86ec9c9d78d30c08a7c8 Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Wed, 6 May 2026 00:42:31 +0200 Subject: [PATCH 03/20] Update video NRIFramework dependency --- External/NRIFramework | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/External/NRIFramework b/External/NRIFramework index 980f9f6..5fd2314 160000 --- a/External/NRIFramework +++ b/External/NRIFramework @@ -1 +1 @@ -Subproject commit 980f9f6f2d395802aabfe2ef852d206820278005 +Subproject commit 5fd2314015dfcc27d30758fc6ea7d87ac4cb0300 From f61ebe30c43ecf2cf0e0b8c666b24f635ad711d9 Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Wed, 6 May 2026 00:44:35 +0200 Subject: [PATCH 04/20] Update video NRIFramework wrapper formatting --- External/NRIFramework | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/External/NRIFramework b/External/NRIFramework index 5fd2314..ca40770 160000 --- a/External/NRIFramework +++ b/External/NRIFramework @@ -1 +1 @@ -Subproject commit 5fd2314015dfcc27d30758fc6ea7d87ac4cb0300 +Subproject commit ca4077014a0d66e26d0be1e5715bb018c3dd7dee From ee6ec2cc15f28e476d141c07e255651c204e48f1 Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Wed, 6 May 2026 00:47:24 +0200 Subject: [PATCH 05/20] Update video NRIFramework Vulkan formatting --- External/NRIFramework | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/External/NRIFramework b/External/NRIFramework index ca40770..3782faf 160000 --- a/External/NRIFramework +++ b/External/NRIFramework @@ -1 +1 @@ -Subproject commit ca4077014a0d66e26d0be1e5715bb018c3dd7dee +Subproject commit 3782faf3983365eea085da917e5f44b3fa3d04ff From faf668150e013bb3ebcb73267feb46b2820db3af Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Wed, 6 May 2026 00:51:02 +0200 Subject: [PATCH 06/20] Use upstream NRIFramework submodule URL --- .gitmodules | 4 ++-- External/NRIFramework | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 797781a..b919ec0 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,5 +1,5 @@ [submodule "External/NRIFramework"] path = External/NRIFramework - url = https://github.com/Daedie-git/NRIFramework.git - branch = codex/video-encode-decode-sample + url = https://github.com/NVIDIA-RTX/NRIFramework.git + branch = main update = merge diff --git a/External/NRIFramework b/External/NRIFramework index 3782faf..3356636 160000 --- a/External/NRIFramework +++ b/External/NRIFramework @@ -1 +1 @@ -Subproject commit 3782faf3983365eea085da917e5f44b3fa3d04ff +Subproject commit 3356636e9f4fb8b5bcf4f9cf58da201e7d79a342 From 2605b6e71320f7eef16da1bbdb879658a8b912a3 Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Wed, 6 May 2026 08:54:23 +0200 Subject: [PATCH 07/20] Use simplified NRI video API --- CMakeLists.txt | 19 +++---------------- External/NRIFramework | 2 +- Source/VideoEncodeDecode.cpp | 23 ++++++++--------------- 3 files changed, 12 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 49dc266..7395143 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -187,23 +187,10 @@ add_sample(Resources c) add_sample(SceneViewer cpp) add_sample(Triangle cpp) -set(NRI_VIDEO_HEADER "${NRI_SOURCE_DIR}/Include/Extensions/NRIVideo.h") -set(NRI_VIDEO_VERSION 0) -if(EXISTS "${NRI_VIDEO_HEADER}") - file(STRINGS "${NRI_VIDEO_HEADER}" NRI_VIDEO_VERSION_LINE REGEX "^#define NRI_VIDEO_VERSION ") - if(NRI_VIDEO_VERSION_LINE MATCHES "^#define NRI_VIDEO_VERSION +([0-9]+)") - set(NRI_VIDEO_VERSION "${CMAKE_MATCH_1}") - endif() -endif() +add_sample(VideoEncodeDecode cpp) -if(NRI_VIDEO_VERSION GREATER_EQUAL 1) - add_sample(VideoEncodeDecode cpp) - - if(WIN32) - target_link_libraries(VideoEncodeDecode PRIVATE d3d12) - endif() -else() - message(STATUS "Skipping VideoEncodeDecode: selected NRI source does not provide NRIVideo version 1") +if(WIN32) + target_link_libraries(VideoEncodeDecode PRIVATE d3d12) endif() # Wrapper depends on Vulkan SDK availability diff --git a/External/NRIFramework b/External/NRIFramework index 3356636..2640bd0 160000 --- a/External/NRIFramework +++ b/External/NRIFramework @@ -1 +1 @@ -Subproject commit 3356636e9f4fb8b5bcf4f9cf58da201e7d79a342 +Subproject commit 2640bd0dd33e97cb663a4e7ea968125c8b5cc97f diff --git a/Source/VideoEncodeDecode.cpp b/Source/VideoEncodeDecode.cpp index ed65962..6a9908b 100644 --- a/Source/VideoEncodeDecode.cpp +++ b/Source/VideoEncodeDecode.cpp @@ -856,8 +856,8 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { decodeSessionDesc.usage = nri::VideoUsage::DECODE; decodeSessionDesc.maxReferenceNum = 16; - if (Video.GetVideoQueue(*m_Device, encodeSessionDesc, m_VideoEncodeQueue) != nri::Result::SUCCESS || Video.GetVideoQueue(*m_Device, decodeSessionDesc, m_VideoDecodeQueue) != nri::Result::SUCCESS) { - m_VideoStatus = std::string("Failed to get ") + GetCodecName(m_Codec) + "-capable video queues"; + if (NRI.GetQueue(*m_Device, nri::QueueType::VIDEO_ENCODE, 0, m_VideoEncodeQueue) != nri::Result::SUCCESS || NRI.GetQueue(*m_Device, nri::QueueType::VIDEO_DECODE, 0, m_VideoDecodeQueue) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to get video queues"; return; } @@ -983,31 +983,24 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { encodeTextureDesc.height = VIDEO_HEIGHT; encodeTextureDesc.mipNum = 1; encodeTextureDesc.layerNum = 1; + encodeTextureDesc.videoCodec = GetNriCodec(m_Codec); nri::TextureDesc decodeTextureDesc = encodeTextureDesc; decodeTextureDesc.usage = nri::TextureUsageBits::VIDEO_DECODE; - nri::VideoTextureDesc encodeVideoTextureDesc = {}; - encodeVideoTextureDesc.textureDesc = encodeTextureDesc; - encodeVideoTextureDesc.codec = GetNriCodec(m_Codec); - - nri::VideoTextureDesc decodeVideoTextureDesc = {}; - decodeVideoTextureDesc.textureDesc = decodeTextureDesc; - decodeVideoTextureDesc.codec = GetNriCodec(m_Codec); - - if (Video.CreateCommittedVideoTexture(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, encodeVideoTextureDesc, m_EncodeTexture) != nri::Result::SUCCESS) { + if (NRI.CreateCommittedTexture(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, encodeTextureDesc, m_EncodeTexture) != nri::Result::SUCCESS) { m_VideoStatus = "Failed to create NV12 encode texture"; return; } NRI.SetDebugName(m_EncodeTexture, "VideoEncodeTexture"); - if (Video.CreateCommittedVideoTexture(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, encodeVideoTextureDesc, m_ReconstructedTexture) != nri::Result::SUCCESS) { + if (NRI.CreateCommittedTexture(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, encodeTextureDesc, m_ReconstructedTexture) != nri::Result::SUCCESS) { m_VideoStatus = "Failed to create NV12 reconstructed texture"; return; } NRI.SetDebugName(m_ReconstructedTexture, "VideoReconstructedTexture"); - if (Video.CreateCommittedVideoTexture(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, decodeVideoTextureDesc, m_DecodeTexture) != nri::Result::SUCCESS) { + if (NRI.CreateCommittedTexture(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, decodeTextureDesc, m_DecodeTexture) != nri::Result::SUCCESS) { m_VideoStatus = "Failed to create NV12 decode texture"; return; } @@ -1163,7 +1156,7 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { return; } - if (Video.CreateCommittedVideoBitstreamBuffer(*m_Device, 0.0f, bitstreamBufferDesc, m_BitstreamBuffer) != nri::Result::SUCCESS) { + if (NRI.CreateCommittedBuffer(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, bitstreamBufferDesc, m_BitstreamBuffer) != nri::Result::SUCCESS) { m_VideoStatus = "Failed to create encode bitstream buffer"; return; } @@ -1173,7 +1166,7 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { return; } - if (Video.CreateCommittedVideoBitstreamBuffer(*m_Device, 0.0f, decodeBitstreamBufferDesc, m_DecodeBitstreamBuffer) != nri::Result::SUCCESS) { + if (NRI.CreateCommittedBuffer(*m_Device, nri::MemoryLocation::HOST_UPLOAD, 0.0f, decodeBitstreamBufferDesc, m_DecodeBitstreamBuffer) != nri::Result::SUCCESS) { m_VideoStatus = "Failed to create decode bitstream buffer"; return; } From 8bf5b7fb27b29b22f13975caeb209d4bf0ea9f12 Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Wed, 6 May 2026 09:47:05 +0200 Subject: [PATCH 08/20] Choose video bitstream memory per backend --- Source/VideoEncodeDecode.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Source/VideoEncodeDecode.cpp b/Source/VideoEncodeDecode.cpp index 6a9908b..5956cdf 100644 --- a/Source/VideoEncodeDecode.cpp +++ b/Source/VideoEncodeDecode.cpp @@ -235,6 +235,16 @@ static bool CopyNv12BufferToTexture(nri::CoreInterface& core, nri::Queue& queue, }); } +static nri::Result CreateVideoBitstreamBuffer(nri::CoreInterface& core, nri::Device& device, nri::GraphicsAPI graphicsAPI, float priority, const nri::BufferDesc& bufferDesc, nri::Buffer*& buffer) { + nri::MemoryLocation memoryLocation = nri::MemoryLocation::DEVICE; + if (bufferDesc.usage & nri::BufferUsageBits::VIDEO_ENCODE) + memoryLocation = graphicsAPI == nri::GraphicsAPI::VK ? nri::MemoryLocation::HOST_READBACK : nri::MemoryLocation::DEVICE; + else if (bufferDesc.usage & nri::BufferUsageBits::VIDEO_DECODE) + memoryLocation = nri::MemoryLocation::HOST_UPLOAD; + + return core.CreateCommittedBuffer(device, memoryLocation, priority, bufferDesc, buffer); +} + } // namespace class Sample : public SampleBase { @@ -1156,7 +1166,7 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { return; } - if (NRI.CreateCommittedBuffer(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, bitstreamBufferDesc, m_BitstreamBuffer) != nri::Result::SUCCESS) { + if (CreateVideoBitstreamBuffer(NRI, *m_Device, graphicsAPI, 0.0f, bitstreamBufferDesc, m_BitstreamBuffer) != nri::Result::SUCCESS) { m_VideoStatus = "Failed to create encode bitstream buffer"; return; } @@ -1166,7 +1176,7 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { return; } - if (NRI.CreateCommittedBuffer(*m_Device, nri::MemoryLocation::HOST_UPLOAD, 0.0f, decodeBitstreamBufferDesc, m_DecodeBitstreamBuffer) != nri::Result::SUCCESS) { + if (CreateVideoBitstreamBuffer(NRI, *m_Device, graphicsAPI, 0.0f, decodeBitstreamBufferDesc, m_DecodeBitstreamBuffer) != nri::Result::SUCCESS) { m_VideoStatus = "Failed to create decode bitstream buffer"; return; } From 2b4fd0429ba2a2b4a1a34665bb7d5bb2b3c1523b Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Wed, 6 May 2026 10:27:56 +0200 Subject: [PATCH 09/20] Update NRIFramework video dependency --- External/NRIFramework | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/External/NRIFramework b/External/NRIFramework index 2640bd0..49f52f6 160000 --- a/External/NRIFramework +++ b/External/NRIFramework @@ -1 +1 @@ -Subproject commit 2640bd0dd33e97cb663a4e7ea968125c8b5cc97f +Subproject commit 49f52f671eb8d2ca0728de0c9adfdc5909bdae2a From b05639436791d2880e566868d261f38cc5a766a7 Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Wed, 6 May 2026 12:15:37 +0200 Subject: [PATCH 10/20] Use readback encode bitstream buffers --- Source/VideoEncodeDecode.cpp | 37 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/Source/VideoEncodeDecode.cpp b/Source/VideoEncodeDecode.cpp index 5956cdf..7a4bed7 100644 --- a/Source/VideoEncodeDecode.cpp +++ b/Source/VideoEncodeDecode.cpp @@ -235,14 +235,12 @@ static bool CopyNv12BufferToTexture(nri::CoreInterface& core, nri::Queue& queue, }); } -static nri::Result CreateVideoBitstreamBuffer(nri::CoreInterface& core, nri::Device& device, nri::GraphicsAPI graphicsAPI, float priority, const nri::BufferDesc& bufferDesc, nri::Buffer*& buffer) { - nri::MemoryLocation memoryLocation = nri::MemoryLocation::DEVICE; - if (bufferDesc.usage & nri::BufferUsageBits::VIDEO_ENCODE) - memoryLocation = graphicsAPI == nri::GraphicsAPI::VK ? nri::MemoryLocation::HOST_READBACK : nri::MemoryLocation::DEVICE; - else if (bufferDesc.usage & nri::BufferUsageBits::VIDEO_DECODE) - memoryLocation = nri::MemoryLocation::HOST_UPLOAD; - - return core.CreateCommittedBuffer(device, memoryLocation, priority, bufferDesc, buffer); +static nri::Result CreateEncodeBitstreamBuffer(nri::CoreInterface& core, nri::Device& device, float priority, const nri::BufferDesc& bufferDesc, nri::Buffer*& buffer) { + return core.CreateCommittedBuffer(device, nri::MemoryLocation::HOST_READBACK, priority, bufferDesc, buffer); +} + +static nri::Result CreateDecodeBitstreamBuffer(nri::CoreInterface& core, nri::Device& device, float priority, const nri::BufferDesc& bufferDesc, nri::Buffer*& buffer) { + return core.CreateCommittedBuffer(device, nri::MemoryLocation::HOST_UPLOAD, priority, bufferDesc, buffer); } } // namespace @@ -1139,7 +1137,7 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { nri::BufferDesc bitstreamBufferDesc = {}; bitstreamBufferDesc.size = BITSTREAM_SIZE; - bitstreamBufferDesc.usage = nri::BufferUsageBits::VIDEO_ENCODE | nri::BufferUsageBits::VIDEO_DECODE; + bitstreamBufferDesc.usage = nri::BufferUsageBits::VIDEO_ENCODE; nri::BufferDesc decodeBitstreamBufferDesc = {}; decodeBitstreamBufferDesc.size = BITSTREAM_SIZE; @@ -1166,7 +1164,7 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { return; } - if (CreateVideoBitstreamBuffer(NRI, *m_Device, graphicsAPI, 0.0f, bitstreamBufferDesc, m_BitstreamBuffer) != nri::Result::SUCCESS) { + if (CreateEncodeBitstreamBuffer(NRI, *m_Device, 0.0f, bitstreamBufferDesc, m_BitstreamBuffer) != nri::Result::SUCCESS) { m_VideoStatus = "Failed to create encode bitstream buffer"; return; } @@ -1176,7 +1174,7 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { return; } - if (CreateVideoBitstreamBuffer(NRI, *m_Device, graphicsAPI, 0.0f, decodeBitstreamBufferDesc, m_DecodeBitstreamBuffer) != nri::Result::SUCCESS) { + if (CreateDecodeBitstreamBuffer(NRI, *m_Device, 0.0f, decodeBitstreamBufferDesc, m_DecodeBitstreamBuffer) != nri::Result::SUCCESS) { m_VideoStatus = "Failed to create decode bitstream buffer"; return; } @@ -1387,14 +1385,11 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { encodeDesc.av1PictureDesc = m_Codec == SampleCodec::AV1 ? &av1PictureDesc : nullptr; if (!SubmitOneTime(NRI, *m_VideoEncodeQueue, [&](nri::CommandBuffer& commandBuffer) { - nri::BufferBarrierDesc bufferBarriers[3] = {}; - bufferBarriers[0].buffer = m_BitstreamBuffer; - bufferBarriers[0].before = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; + nri::BufferBarrierDesc bufferBarriers[2] = {}; + bufferBarriers[0].buffer = m_MetadataBuffer; bufferBarriers[0].after = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::StageBits::VIDEO_ENCODE}; - bufferBarriers[1].buffer = m_MetadataBuffer; + bufferBarriers[1].buffer = m_ResolvedMetadataBuffer; bufferBarriers[1].after = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::StageBits::VIDEO_ENCODE}; - bufferBarriers[2].buffer = m_ResolvedMetadataBuffer; - bufferBarriers[2].after = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::StageBits::VIDEO_ENCODE}; nri::TextureBarrierDesc textureBarriers[2] = {}; textureBarriers[0].texture = m_EncodeTexture; @@ -1417,13 +1412,11 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { barrierDesc.textureNum = helper::GetCountOf(textureBarriers); NRI.CmdBarrier(commandBuffer, barrierDesc); Video.CmdEncodeVideo(commandBuffer, encodeDesc); - bufferBarriers[0].before = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::StageBits::VIDEO_ENCODE}; - bufferBarriers[0].after = {}; // D3D12 resolves encode metadata inside CmdEncodeVideo and transitions the raw metadata buffer to encode-read before returning. - bufferBarriers[1].before = {nri::AccessBits::VIDEO_ENCODE_READ, nri::StageBits::VIDEO_ENCODE}; + bufferBarriers[0].before = {nri::AccessBits::VIDEO_ENCODE_READ, nri::StageBits::VIDEO_ENCODE}; + bufferBarriers[0].after = {}; + bufferBarriers[1].before = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::StageBits::VIDEO_ENCODE}; bufferBarriers[1].after = {}; - bufferBarriers[2].before = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::StageBits::VIDEO_ENCODE}; - bufferBarriers[2].after = {}; textureBarriers[0].before = {nri::AccessBits::VIDEO_ENCODE_READ, nri::Layout::VIDEO_ENCODE_SRC, nri::StageBits::VIDEO_ENCODE}; textureBarriers[0].after = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; textureBarriers[1].before = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::Layout::VIDEO_ENCODE_DPB, nri::StageBits::VIDEO_ENCODE}; From 5bcc91e2369c143f25cd31bc4b3e4f48447dbb63 Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Wed, 6 May 2026 15:48:18 +0200 Subject: [PATCH 11/20] Update video sample for queue selection cleanup --- External/NRIFramework | 2 +- Source/VideoEncodeDecode.cpp | 45 +++++++++++++++++++++++++----------- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/External/NRIFramework b/External/NRIFramework index 49f52f6..516173f 160000 --- a/External/NRIFramework +++ b/External/NRIFramework @@ -1 +1 @@ -Subproject commit 49f52f671eb8d2ca0728de0c9adfdc5909bdae2a +Subproject commit 516173fec91229a6e878837000c0187fd8075620 diff --git a/Source/VideoEncodeDecode.cpp b/Source/VideoEncodeDecode.cpp index 7a4bed7..70fb9d7 100644 --- a/Source/VideoEncodeDecode.cpp +++ b/Source/VideoEncodeDecode.cpp @@ -336,6 +336,7 @@ class Sample : public SampleBase { double m_StartTimeSec = 0.0; double m_LastRoundTripTimeSec = -1.0; bool m_VideoReady = false; + bool m_VideoQueuesRequested = false; bool m_DecodePreviewReady = false; bool m_PreviewTexturesShaderReadable = false; bool m_MetadataReadbackPending = false; @@ -348,19 +349,19 @@ Sample::~Sample() { if (Video.DestroyVideoPicture) { if (m_DecodePicture) - Video.DestroyVideoPicture(*m_DecodePicture); + Video.DestroyVideoPicture(m_DecodePicture); if (m_ReconstructedPicture) - Video.DestroyVideoPicture(*m_ReconstructedPicture); + Video.DestroyVideoPicture(m_ReconstructedPicture); if (m_EncodePicture) - Video.DestroyVideoPicture(*m_EncodePicture); + Video.DestroyVideoPicture(m_EncodePicture); if (m_DecodeParameters) - Video.DestroyVideoSessionParameters(*m_DecodeParameters); + Video.DestroyVideoSessionParameters(m_DecodeParameters); if (m_EncodeParameters) - Video.DestroyVideoSessionParameters(*m_EncodeParameters); + Video.DestroyVideoSessionParameters(m_EncodeParameters); if (m_DecodeSession) - Video.DestroyVideoSession(*m_DecodeSession); + Video.DestroyVideoSession(m_DecodeSession); if (m_EncodeSession) - Video.DestroyVideoSession(*m_EncodeSession); + Video.DestroyVideoSession(m_EncodeSession); } if (m_MetadataReadbackCommandBuffer) @@ -465,12 +466,23 @@ bool Sample::InitializeGraphics(nri::GraphicsAPI graphicsAPI) { uint32_t adapterDescsNum = helper::GetCountOf(adapterDesc); NRI_ABORT_ON_FAILURE(nri::nriEnumerateAdapters(adapterDesc, adapterDescsNum)); + const nri::AdapterDesc& selectedAdapter = adapterDesc[std::min(m_AdapterIndex, adapterDescsNum - 1)]; + m_VideoQueuesRequested = graphicsAPI != nri::GraphicsAPI::D3D11 && + selectedAdapter.queueNum[(uint32_t)nri::QueueType::VIDEO_ENCODE] && + selectedAdapter.queueNum[(uint32_t)nri::QueueType::VIDEO_DECODE]; + + nri::QueueFamilyDesc queueFamilies[3] = {}; + uint32_t queueFamilyNum = 0; + queueFamilies[queueFamilyNum].queueNum = 1; + queueFamilies[queueFamilyNum++].queueType = nri::QueueType::GRAPHICS; + if (m_VideoQueuesRequested) { + queueFamilies[queueFamilyNum].queueNum = 1; + queueFamilies[queueFamilyNum++].queueType = nri::QueueType::VIDEO_ENCODE; + queueFamilies[queueFamilyNum].queueNum = 1; + queueFamilies[queueFamilyNum++].queueType = nri::QueueType::VIDEO_DECODE; + } + nri::DeviceCreationDesc deviceCreationDesc = {}; - nri::QueueFamilyDesc queueFamilies[] = { - {nullptr, 1, nri::QueueType::GRAPHICS}, - {nullptr, 1, nri::QueueType::VIDEO_ENCODE}, - {nullptr, 1, nri::QueueType::VIDEO_DECODE}, - }; deviceCreationDesc.graphicsAPI = graphicsAPI; deviceCreationDesc.enableGraphicsAPIValidation = m_DebugAPI; @@ -478,10 +490,10 @@ bool Sample::InitializeGraphics(nri::GraphicsAPI graphicsAPI) { deviceCreationDesc.enableD3D11CommandBufferEmulation = D3D11_ENABLE_COMMAND_BUFFER_EMULATION; deviceCreationDesc.disableD3D12EnhancedBarriers = D3D12_DISABLE_ENHANCED_BARRIERS; deviceCreationDesc.vkBindingOffsets = VK_BINDING_OFFSETS; - deviceCreationDesc.adapterDesc = &adapterDesc[std::min(m_AdapterIndex, adapterDescsNum - 1)]; + deviceCreationDesc.adapterDesc = &selectedAdapter; deviceCreationDesc.allocationCallbacks = m_AllocationCallbacks; deviceCreationDesc.queueFamilies = queueFamilies; - deviceCreationDesc.queueFamilyNum = helper::GetCountOf(queueFamilies); + deviceCreationDesc.queueFamilyNum = queueFamilyNum; NRI_ABORT_ON_FAILURE(nri::nriCreateDevice(deviceCreationDesc, m_Device)); NRI_ABORT_ON_FAILURE(nri::nriGetInterface(*m_Device, NRI_INTERFACE(nri::CoreInterface), (nri::CoreInterface*)&NRI)); @@ -842,6 +854,11 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { return; } + if (!m_VideoQueuesRequested) { + m_VideoStatus = "Adapter has no NRI video encode/decode queues"; + return; + } + if (!deviceDesc.adapterDesc.queueNum[(uint32_t)nri::QueueType::VIDEO_ENCODE] || !deviceDesc.adapterDesc.queueNum[(uint32_t)nri::QueueType::VIDEO_DECODE]) { m_VideoStatus = "Adapter has no NRI video encode/decode queues"; return; From e399e769e917d2fa4c93a40951dbc9f7bbcc9272 Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Wed, 6 May 2026 16:01:14 +0200 Subject: [PATCH 12/20] Update NRIFramework video header cleanup --- External/NRIFramework | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/External/NRIFramework b/External/NRIFramework index 516173f..f6baf97 160000 --- a/External/NRIFramework +++ b/External/NRIFramework @@ -1 +1 @@ -Subproject commit 516173fec91229a6e878837000c0187fd8075620 +Subproject commit f6baf974ed90f38dc405214c1fa70a4f91e2f681 From 833e14338da35d0d1a8506f495d3fd5bfbff0655 Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Thu, 7 May 2026 14:30:23 +0200 Subject: [PATCH 13/20] Update video encode decode sample --- External/NRIFramework | 2 +- README.md | 8 + Source/VideoEncodeDecode.cpp | 378 ++++++++++++++++++++++++++--------- 3 files changed, 287 insertions(+), 101 deletions(-) diff --git a/External/NRIFramework b/External/NRIFramework index f6baf97..6aa54f6 160000 --- a/External/NRIFramework +++ b/External/NRIFramework @@ -1 +1 @@ -Subproject commit f6baf974ed90f38dc405214c1fa70a4f91e2f681 +Subproject commit 6aa54f6320077b8c8d58215df1713f6ed78707a2 diff --git a/README.md b/README.md index 01fab10..296bf78 100644 --- a/README.md +++ b/README.md @@ -59,3 +59,11 @@ The executables from `_Bin` directory load resources from `_Data`, therefore the - Triangle - simple textured triangle rendering (also multiview demonstration in _FLEXIBLE_ mode) - VideoEncodeDecode - H.264/H.265/AV1 NV12 video encode/decode round trip; built only when the selected NRI source provides the NRIVideo extension - Wrapper - shows how to wrap native D3D11/D3D12/VK objects into *NRI* entities + +### VideoEncodeDecode AV1 notes + +`VideoEncodeDecode --codec=AV1 --av1Frame=P` encodes a static generated source image as an IDR reference followed by a P frame. The source is intentionally frozen for this permutation so the left generated image and right decoded image can be compared directly while validating AV1 reference/DPB handling. It is not intended to demonstrate animated AV1 P-frame motion. + +The framework `--width` and `--height` arguments control the sample output window size only. Use `--videoWidth` and `--videoHeight` to select the NV12 encode/decode surface size. The default video size is 1920x1080. + +The sample uses CQP rate control by default. Use `--qpI`, `--qpP`, and `--qpB` to change the per-frame-type quantizers, and `--av1BaseQIndex` to change AV1's frame base quantizer. H.264/H.265 QP values must be in the 0..51 range; AV1 quantizer values use the 0..255 range. Lower values generally produce higher quality and larger bitstreams. diff --git a/Source/VideoEncodeDecode.cpp b/Source/VideoEncodeDecode.cpp index 70fb9d7..956af94 100644 --- a/Source/VideoEncodeDecode.cpp +++ b/Source/VideoEncodeDecode.cpp @@ -17,8 +17,8 @@ namespace { -constexpr uint32_t VIDEO_WIDTH = 1920; -constexpr uint32_t VIDEO_HEIGHT = 1088; +constexpr uint32_t DEFAULT_VIDEO_WIDTH = 1920; +constexpr uint32_t DEFAULT_VIDEO_HEIGHT = 1080; constexpr double ROUND_TRIP_INTERVAL_SEC = 1.0 / 60.0; constexpr uint64_t BITSTREAM_SIZE = 2 * 1024 * 1024; constexpr uint64_t ENCODED_SLICE_OFFSET = 4096; @@ -26,10 +26,6 @@ constexpr uint64_t AV1_HEADER_READBACK_SIZE = 4096; constexpr uint64_t METADATA_SIZE = 4 * 1024 * 1024; constexpr uint64_t RESOLVED_METADATA_SIZE = 4096; -static_assert(VIDEO_WIDTH % 4 == 0, "Compute-backed NV12 writer expects width divisible by 4"); -static_assert(VIDEO_WIDTH % 16 == 0, "H.264 macroblock width should stay aligned"); -static_assert(VIDEO_HEIGHT % 16 == 0, "H.264 macroblock height should stay aligned"); - enum PatternOperation : uint32_t { OP_GENERATE_PATTERN = 0, OP_NV12_TO_PREVIEW = 1, @@ -70,14 +66,14 @@ static uint64_t GetEncodedPayloadHeaderSkip(SampleCodec codec, uint64_t encodedB return std::min(headerSize, encodedBitstreamBytes); } -static nri::VideoAV1SequenceDesc MakeAV1SequenceDesc() { +static nri::VideoAV1SequenceDesc MakeAV1SequenceDesc(uint32_t width, uint32_t height) { nri::VideoAV1SequenceDesc desc = {}; desc.flags = nri::VideoAV1SequenceBits::ENABLE_ORDER_HINT | nri::VideoAV1SequenceBits::ENABLE_CDEF | nri::VideoAV1SequenceBits::ENABLE_RESTORATION | nri::VideoAV1SequenceBits::COLOR_DESCRIPTION_PRESENT; desc.bitDepth = 8; desc.subsamplingX = 1; desc.subsamplingY = 1; - desc.maxFrameWidthMinus1 = VIDEO_WIDTH - 1; - desc.maxFrameHeightMinus1 = VIDEO_HEIGHT - 1; + desc.maxFrameWidthMinus1 = (uint16_t)(width - 1); + desc.maxFrameHeightMinus1 = (uint16_t)(height - 1); desc.frameWidthBitsMinus1 = 15; desc.frameHeightBitsMinus1 = 15; desc.orderHintBitsMinus1 = 7; @@ -96,8 +92,8 @@ struct QueuedFrame { }; struct PatternConstants { - uint32_t width = VIDEO_WIDTH; - uint32_t height = VIDEO_HEIGHT; + uint32_t width = DEFAULT_VIDEO_WIDTH; + uint32_t height = DEFAULT_VIDEO_HEIGHT; uint32_t yOffsetBytes = 0; uint32_t yRowPitchBytes = 0; uint32_t uvRowPitchBytes = 0; @@ -109,28 +105,28 @@ struct PatternConstants { }; struct Nv12BufferLayout { - uint32_t yRowPitchBytes = VIDEO_WIDTH; - uint32_t ySlicePitchBytes = VIDEO_WIDTH * VIDEO_HEIGHT; - uint64_t uvOffsetBytes = uint64_t(VIDEO_WIDTH) * VIDEO_HEIGHT; - uint32_t uvRowPitchBytes = VIDEO_WIDTH; - uint32_t uvSlicePitchBytes = VIDEO_WIDTH * VIDEO_HEIGHT / 2; - uint64_t totalSizeBytes = uint64_t(VIDEO_WIDTH) * VIDEO_HEIGHT * 3 / 2; + uint32_t yRowPitchBytes = DEFAULT_VIDEO_WIDTH; + uint32_t ySlicePitchBytes = DEFAULT_VIDEO_WIDTH * DEFAULT_VIDEO_HEIGHT; + uint64_t uvOffsetBytes = uint64_t(DEFAULT_VIDEO_WIDTH) * DEFAULT_VIDEO_HEIGHT; + uint32_t uvRowPitchBytes = DEFAULT_VIDEO_WIDTH; + uint32_t uvSlicePitchBytes = DEFAULT_VIDEO_WIDTH * DEFAULT_VIDEO_HEIGHT / 2; + uint64_t totalSizeBytes = uint64_t(DEFAULT_VIDEO_WIDTH) * DEFAULT_VIDEO_HEIGHT * 3 / 2; }; static uint64_t AlignUp(uint64_t value, uint64_t alignment) { return alignment == 0 ? value : ((value + alignment - 1) / alignment) * alignment; } -static Nv12BufferLayout MakeNv12BufferLayout(const nri::DeviceDesc& deviceDesc) { +static Nv12BufferLayout MakeNv12BufferLayout(const nri::DeviceDesc& deviceDesc, uint32_t width, uint32_t height) { const uint32_t rowAlignment = std::max(deviceDesc.memoryAlignment.uploadBufferTextureRow, 1u); const uint32_t sliceAlignment = std::max(deviceDesc.memoryAlignment.uploadBufferTextureSlice, 1u); Nv12BufferLayout layout = {}; - layout.yRowPitchBytes = (uint32_t)AlignUp(VIDEO_WIDTH, rowAlignment); - layout.ySlicePitchBytes = (uint32_t)AlignUp(uint64_t(layout.yRowPitchBytes) * VIDEO_HEIGHT, sliceAlignment); + layout.yRowPitchBytes = (uint32_t)AlignUp(width, rowAlignment); + layout.ySlicePitchBytes = (uint32_t)AlignUp(uint64_t(layout.yRowPitchBytes) * height, sliceAlignment); layout.uvOffsetBytes = layout.ySlicePitchBytes; - layout.uvRowPitchBytes = (uint32_t)AlignUp(VIDEO_WIDTH, rowAlignment); - layout.uvSlicePitchBytes = (uint32_t)AlignUp(uint64_t(layout.uvRowPitchBytes) * (VIDEO_HEIGHT / 2), sliceAlignment); + layout.uvRowPitchBytes = (uint32_t)AlignUp(width, rowAlignment); + layout.uvSlicePitchBytes = (uint32_t)AlignUp(uint64_t(layout.uvRowPitchBytes) * (height / 2), sliceAlignment); layout.totalSizeBytes = layout.uvOffsetBytes + layout.uvSlicePitchBytes; return layout; } @@ -182,7 +178,7 @@ static bool SubmitOneTime( return ok; } -static bool CopyNv12BufferToTexture(nri::CoreInterface& core, nri::Queue& queue, const Nv12BufferLayout& layout, nri::Buffer& src, nri::Texture& dst) { +static bool CopyNv12BufferToTexture(nri::CoreInterface& core, nri::Queue& queue, const Nv12BufferLayout& layout, nri::Buffer& src, nri::Texture& dst, uint32_t width, uint32_t height) { return SubmitOneTime(core, queue, [&](nri::CommandBuffer& commandBuffer) { nri::BufferBarrierDesc bufferBarrier = {}; bufferBarrier.buffer = &src; @@ -205,8 +201,8 @@ static bool CopyNv12BufferToTexture(nri::CoreInterface& core, nri::Queue& queue, core.CmdBarrier(commandBuffer, barrierDesc); nri::TextureRegionDesc lumaRegion = {}; - lumaRegion.width = VIDEO_WIDTH; - lumaRegion.height = VIDEO_HEIGHT; + lumaRegion.width = (nri::Dim_t)width; + lumaRegion.height = (nri::Dim_t)height; lumaRegion.depth = 1; lumaRegion.planes = nri::PlaneBits::PLANE_0; @@ -216,8 +212,8 @@ static bool CopyNv12BufferToTexture(nri::CoreInterface& core, nri::Queue& queue, core.CmdUploadBufferToTexture(commandBuffer, dst, lumaRegion, src, lumaLayout); nri::TextureRegionDesc chromaRegion = {}; - chromaRegion.width = VIDEO_WIDTH; - chromaRegion.height = VIDEO_HEIGHT; + chromaRegion.width = (nri::Dim_t)width; + chromaRegion.height = (nri::Dim_t)height; chromaRegion.depth = 1; chromaRegion.planes = nri::PlaneBits::PLANE_1; @@ -291,7 +287,9 @@ class Sample : public SampleBase { nri::VideoSessionParameters* m_DecodeParameters = nullptr; nri::Texture* m_EncodeTexture = nullptr; nri::Texture* m_ReconstructedTexture = nullptr; + nri::Texture* m_AV1PReconstructedTexture = nullptr; nri::Texture* m_DecodeTexture = nullptr; + nri::Texture* m_AV1PDecodeTexture = nullptr; nri::Texture* m_SourcePreviewTexture = nullptr; nri::Texture* m_DecodePreviewTexture = nullptr; nri::Buffer* m_UploadBuffer = nullptr; @@ -313,7 +311,9 @@ class Sample : public SampleBase { nri::Buffer* m_ResolvedMetadataReadbackBuffer = nullptr; nri::VideoPicture* m_EncodePicture = nullptr; nri::VideoPicture* m_ReconstructedPicture = nullptr; + nri::VideoPicture* m_AV1PReconstructedPicture = nullptr; nri::VideoPicture* m_DecodePicture = nullptr; + nri::VideoPicture* m_AV1PDecodePicture = nullptr; nri::CommandAllocator* m_MetadataReadbackCommandAllocator = nullptr; nri::CommandBuffer* m_MetadataReadbackCommandBuffer = nullptr; nri::Fence* m_MetadataReadbackFence = nullptr; @@ -326,6 +326,13 @@ class Sample : public SampleBase { std::string m_VideoStatus = "Initializing video"; std::string m_PreviewStatus = "Initializing preview"; std::string m_CodecArg = "H264"; + std::string m_AV1FrameArg = "IDR"; + uint32_t m_VideoWidth = DEFAULT_VIDEO_WIDTH; + uint32_t m_VideoHeight = DEFAULT_VIDEO_HEIGHT; + uint32_t m_QpI = 20; + uint32_t m_QpP = 22; + uint32_t m_QpB = 24; + uint32_t m_AV1BaseQIndex = 20; SampleCodec m_Codec = SampleCodec::H264; nri::VideoH264SequenceParameterSetDesc m_H264Sps = {}; nri::VideoH264PictureParameterSetDesc m_H264Pps = {}; @@ -340,6 +347,8 @@ class Sample : public SampleBase { bool m_DecodePreviewReady = false; bool m_PreviewTexturesShaderReadable = false; bool m_MetadataReadbackPending = false; + bool m_AV1PFrameVisual = false; + uint32_t m_AV1PFrameStage = 0; uint64_t m_MetadataReadbackFenceValue = 0; }; @@ -348,8 +357,12 @@ Sample::~Sample() { NRI.DeviceWaitIdle(m_Device); if (Video.DestroyVideoPicture) { + if (m_AV1PDecodePicture) + Video.DestroyVideoPicture(m_AV1PDecodePicture); if (m_DecodePicture) Video.DestroyVideoPicture(m_DecodePicture); + if (m_AV1PReconstructedPicture) + Video.DestroyVideoPicture(m_AV1PReconstructedPicture); if (m_ReconstructedPicture) Video.DestroyVideoPicture(m_ReconstructedPicture); if (m_EncodePicture) @@ -408,8 +421,12 @@ Sample::~Sample() { NRI.DestroyTexture(m_DecodePreviewTexture); if (m_DecodeTexture) NRI.DestroyTexture(m_DecodeTexture); + if (m_AV1PDecodeTexture) + NRI.DestroyTexture(m_AV1PDecodeTexture); if (m_ReconstructedTexture) NRI.DestroyTexture(m_ReconstructedTexture); + if (m_AV1PReconstructedTexture) + NRI.DestroyTexture(m_AV1PReconstructedTexture); if (m_EncodeTexture) NRI.DestroyTexture(m_EncodeTexture); @@ -440,14 +457,41 @@ Sample::~Sample() { void Sample::InitCmdLine(cmdline::parser& cmdLine) { cmdLine.add("codec", 0, "video codec: H264, H265, or AV1", false, m_CodecArg, cmdline::oneof("H264", "H265", "AV1")); + cmdLine.add("av1Frame", 0, "AV1 visual frame permutation: IDR or P", false, m_AV1FrameArg, cmdline::oneof("IDR", "P")); + cmdLine.add("videoWidth", 0, "NV12 video encode/decode width", false, m_VideoWidth); + cmdLine.add("videoHeight", 0, "NV12 video encode/decode height", false, m_VideoHeight); + cmdLine.add("qpI", 0, "CQP quantizer for I/IDR frames", false, m_QpI); + cmdLine.add("qpP", 0, "CQP quantizer for P frames", false, m_QpP); + cmdLine.add("qpB", 0, "CQP quantizer for B frames", false, m_QpB); + cmdLine.add("av1BaseQIndex", 0, "AV1 base quantizer index", false, m_AV1BaseQIndex); } void Sample::ReadCmdLine(cmdline::parser& cmdLine) { m_CodecArg = cmdLine.get("codec"); + m_AV1FrameArg = cmdLine.get("av1Frame"); + m_VideoWidth = cmdLine.get("videoWidth"); + m_VideoHeight = cmdLine.get("videoHeight"); + m_QpI = cmdLine.get("qpI"); + m_QpP = cmdLine.get("qpP"); + m_QpB = cmdLine.get("qpB"); + m_AV1BaseQIndex = cmdLine.get("av1BaseQIndex"); m_Codec = m_CodecArg == "H265" ? SampleCodec::H265 : (m_CodecArg == "AV1" ? SampleCodec::AV1 : SampleCodec::H264); + m_AV1PFrameVisual = m_Codec == SampleCodec::AV1 && m_AV1FrameArg == "P"; } bool Sample::Initialize(nri::GraphicsAPI graphicsAPI, bool) { + if (!m_VideoWidth || !m_VideoHeight || (m_VideoWidth % 4) != 0 || (m_VideoHeight % 2) != 0 || m_VideoWidth > 65535 || m_VideoHeight > 65535) { + m_VideoStatus = "Video size must be non-zero, width must be divisible by 4, height must be even, and both dimensions must fit 16-bit video descriptors"; + std::fprintf(stderr, "%s\n", m_VideoStatus.c_str()); + return false; + } + const uint32_t maxCodecQp = m_Codec == SampleCodec::AV1 ? 255 : 51; + if (m_QpI > maxCodecQp || m_QpP > maxCodecQp || m_QpB > maxCodecQp || m_AV1BaseQIndex > 255) { + m_VideoStatus = m_Codec == SampleCodec::AV1 ? "AV1 quantizers must be in the 0..255 range" : "H.264/H.265 QP values must be in the 0..51 range"; + std::fprintf(stderr, "%s\n", m_VideoStatus.c_str()); + return false; + } + m_GraphicsAPI = graphicsAPI; if (!InitializeGraphics(graphicsAPI)) return false; @@ -501,7 +545,7 @@ bool Sample::InitializeGraphics(nri::GraphicsAPI graphicsAPI) { NRI_ABORT_ON_FAILURE(nri::nriGetInterface(*m_Device, NRI_INTERFACE(nri::StreamerInterface), (nri::StreamerInterface*)&NRI)); NRI_ABORT_ON_FAILURE(nri::nriGetInterface(*m_Device, NRI_INTERFACE(nri::SwapChainInterface), (nri::SwapChainInterface*)&NRI)); - m_Nv12Layout = MakeNv12BufferLayout(NRI.GetDeviceDesc(*m_Device)); + m_Nv12Layout = MakeNv12BufferLayout(NRI.GetDeviceDesc(*m_Device), m_VideoWidth, m_VideoHeight); nri::StreamerDesc streamerDesc = {}; streamerDesc.dynamicBufferMemoryLocation = nri::MemoryLocation::HOST_UPLOAD; @@ -560,8 +604,8 @@ bool Sample::InitializeGraphics(nri::GraphicsAPI graphicsAPI) { PatternConstants Sample::MakePatternConstants(PatternOperation operation, float timeSec) const { PatternConstants patternConstants = {}; - patternConstants.width = VIDEO_WIDTH; - patternConstants.height = VIDEO_HEIGHT; + patternConstants.width = m_VideoWidth; + patternConstants.height = m_VideoHeight; patternConstants.yRowPitchBytes = m_Nv12Layout.yRowPitchBytes; patternConstants.uvRowPitchBytes = m_Nv12Layout.uvRowPitchBytes; patternConstants.uvOffsetBytes = (uint32_t)m_Nv12Layout.uvOffsetBytes; @@ -582,7 +626,7 @@ void Sample::InitializeGeneratedFrames(float timeSec) { return; } - if (!CopyNv12BufferToTexture(NRI, *m_GraphicsQueue, m_Nv12Layout, *m_UploadBuffer, *m_EncodeTexture)) { + if (!CopyNv12BufferToTexture(NRI, *m_GraphicsQueue, m_Nv12Layout, *m_UploadBuffer, *m_EncodeTexture, m_VideoWidth, m_VideoHeight)) { m_PreviewStatus = "Failed to upload NV12 source to video texture"; return; } @@ -673,8 +717,8 @@ bool Sample::TryInitializePreviewTextures(nri::GraphicsAPI) { nri::TextureDesc previewTextureDesc = {}; previewTextureDesc.type = nri::TextureType::TEXTURE_2D; previewTextureDesc.format = nri::Format::RGBA8_UNORM; - previewTextureDesc.width = VIDEO_WIDTH; - previewTextureDesc.height = VIDEO_HEIGHT; + previewTextureDesc.width = (nri::Dim_t)m_VideoWidth; + previewTextureDesc.height = (nri::Dim_t)m_VideoHeight; previewTextureDesc.mipNum = 1; previewTextureDesc.layerNum = 1; previewTextureDesc.usage = nri::TextureUsageBits::SHADER_RESOURCE | nri::TextureUsageBits::SHADER_RESOURCE_STORAGE; @@ -873,8 +917,8 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { encodeSessionDesc.usage = nri::VideoUsage::ENCODE; encodeSessionDesc.codec = GetNriCodec(m_Codec); encodeSessionDesc.format = nri::Format::NV12_UNORM; - encodeSessionDesc.width = VIDEO_WIDTH; - encodeSessionDesc.height = VIDEO_HEIGHT; + encodeSessionDesc.width = m_VideoWidth; + encodeSessionDesc.height = m_VideoHeight; encodeSessionDesc.maxReferenceNum = 1; nri::VideoSessionDesc decodeSessionDesc = encodeSessionDesc; @@ -906,8 +950,8 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { sps.pictureOrderCountType = 0; sps.log2MaxPictureOrderCountLsbMinus4 = 0; sps.referenceFrameNum = 1; - sps.pictureWidthInMbsMinus1 = VIDEO_WIDTH / 16 - 1; - sps.pictureHeightInMapUnitsMinus1 = VIDEO_HEIGHT / 16 - 1; + sps.pictureWidthInMbsMinus1 = (uint16_t)((m_VideoWidth + 15) / 16 - 1); + sps.pictureHeightInMapUnitsMinus1 = (uint16_t)((m_VideoHeight + 15) / 16 - 1); nri::VideoH264PictureParameterSetDesc pps = {}; pps.flags = nri::VideoH264PictureParameterSetBits::DEBLOCKING_FILTER_CONTROL_PRESENT; @@ -942,8 +986,8 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { h265Sps.maxSubLayersMinus1 = vps.maxSubLayersMinus1; h265Sps.sequenceParameterSetId = 0; h265Sps.chromaFormatIdc = 1; - h265Sps.pictureWidthInLumaSamples = VIDEO_WIDTH; - h265Sps.pictureHeightInLumaSamples = VIDEO_HEIGHT; + h265Sps.pictureWidthInLumaSamples = m_VideoWidth; + h265Sps.pictureHeightInLumaSamples = m_VideoHeight; h265Sps.log2MaxPictureOrderCountLsbMinus4 = 3; h265Sps.log2MinLumaCodingBlockSizeMinus3 = 0; h265Sps.log2DiffMaxMinLumaCodingBlockSize = 2; @@ -974,7 +1018,7 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { h265Parameters.maxSequenceParameterSetNum = 1; h265Parameters.maxPictureParameterSetNum = 1; - m_AV1Sequence = MakeAV1SequenceDesc(); + m_AV1Sequence = MakeAV1SequenceDesc(m_VideoWidth, m_VideoHeight); nri::VideoAV1SessionParametersDesc av1Parameters = {}; av1Parameters.sequence = m_AV1Sequence; @@ -1004,8 +1048,8 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { encodeTextureDesc.type = nri::TextureType::TEXTURE_2D; encodeTextureDesc.usage = nri::TextureUsageBits::VIDEO_ENCODE; encodeTextureDesc.format = nri::Format::NV12_UNORM; - encodeTextureDesc.width = VIDEO_WIDTH; - encodeTextureDesc.height = VIDEO_HEIGHT; + encodeTextureDesc.width = (nri::Dim_t)m_VideoWidth; + encodeTextureDesc.height = (nri::Dim_t)m_VideoHeight; encodeTextureDesc.mipNum = 1; encodeTextureDesc.layerNum = 1; encodeTextureDesc.videoCodec = GetNriCodec(m_Codec); @@ -1025,19 +1069,39 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { } NRI.SetDebugName(m_ReconstructedTexture, "VideoReconstructedTexture"); + if (m_AV1PFrameVisual) { + if (NRI.CreateCommittedTexture(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, encodeTextureDesc, m_AV1PReconstructedTexture) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create second NV12 reconstructed texture"; + return; + } + NRI.SetDebugName(m_AV1PReconstructedTexture, "VideoAV1PReconstructedTexture"); + } + if (NRI.CreateCommittedTexture(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, decodeTextureDesc, m_DecodeTexture) != nri::Result::SUCCESS) { m_VideoStatus = "Failed to create NV12 decode texture"; return; } NRI.SetDebugName(m_DecodeTexture, "VideoDecodeTexture"); + if (m_AV1PFrameVisual) { + if (NRI.CreateCommittedTexture(*m_Device, nri::MemoryLocation::DEVICE, 0.0f, decodeTextureDesc, m_AV1PDecodeTexture) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create second NV12 decode texture"; + return; + } + NRI.SetDebugName(m_AV1PDecodeTexture, "VideoAV1PDecodeTexture"); + } + if (!SubmitOneTime(NRI, *m_GraphicsQueue, [&](nri::CommandBuffer& commandBuffer) { - nri::TextureBarrierDesc textureBarriers[3] = {}; + nri::TextureBarrierDesc textureBarriers[5] = {}; textureBarriers[0].texture = m_EncodeTexture; textureBarriers[1].texture = m_ReconstructedTexture; textureBarriers[2].texture = m_DecodeTexture; + textureBarriers[3].texture = m_AV1PReconstructedTexture; + textureBarriers[4].texture = m_AV1PDecodeTexture; for (nri::TextureBarrierDesc& textureBarrier : textureBarriers) { + if (!textureBarrier.texture) + continue; textureBarrier.before = {nri::AccessBits::NONE, nri::Layout::UNDEFINED, nri::StageBits::ALL}; textureBarrier.after = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; textureBarrier.mipNum = nri::REMAINING; @@ -1047,7 +1111,7 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { nri::BarrierDesc barrierDesc = {}; barrierDesc.textures = textureBarriers; - barrierDesc.textureNum = helper::GetCountOf(textureBarriers); + barrierDesc.textureNum = m_AV1PFrameVisual ? helper::GetCountOf(textureBarriers) : 3; NRI.CmdBarrier(commandBuffer, barrierDesc); })) { m_VideoStatus = "Failed to initialize video texture layouts"; @@ -1225,15 +1289,15 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { encodePictureDesc.texture = m_EncodeTexture; encodePictureDesc.usage = nri::VideoPictureUsage::ENCODE_INPUT; encodePictureDesc.format = nri::Format::NV12_UNORM; - encodePictureDesc.width = VIDEO_WIDTH; - encodePictureDesc.height = VIDEO_HEIGHT; + encodePictureDesc.width = m_VideoWidth; + encodePictureDesc.height = m_VideoHeight; nri::VideoPictureDesc decodePictureDesc = {}; decodePictureDesc.texture = m_DecodeTexture; decodePictureDesc.usage = nri::VideoPictureUsage::DECODE_OUTPUT; decodePictureDesc.format = nri::Format::NV12_UNORM; - decodePictureDesc.width = VIDEO_WIDTH; - decodePictureDesc.height = VIDEO_HEIGHT; + decodePictureDesc.width = m_VideoWidth; + decodePictureDesc.height = m_VideoHeight; nri::VideoPictureDesc reconstructedPictureDesc = encodePictureDesc; reconstructedPictureDesc.texture = m_ReconstructedTexture; @@ -1249,11 +1313,27 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { return; } + if (m_AV1PFrameVisual) { + reconstructedPictureDesc.texture = m_AV1PReconstructedTexture; + if (Video.CreateVideoPicture(*m_Device, reconstructedPictureDesc, m_AV1PReconstructedPicture) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create second reconstructed picture"; + return; + } + } + if (Video.CreateVideoPicture(*m_Device, decodePictureDesc, m_DecodePicture) != nri::Result::SUCCESS) { m_VideoStatus = "Failed to create decode picture"; return; } + if (m_AV1PFrameVisual) { + decodePictureDesc.texture = m_AV1PDecodeTexture; + if (Video.CreateVideoPicture(*m_Device, decodePictureDesc, m_AV1PDecodePicture) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to create second decode picture"; + return; + } + } + m_VideoReady = true; m_VideoStatus = std::string("NRI video queues and ") + GetCodecName(m_Codec) + " encode/decode objects initialized"; } @@ -1309,13 +1389,14 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { return false; } - PatternConstants patternConstants = MakePatternConstants(OP_GENERATE_PATTERN, timeSec); + const bool av1PFrame = m_AV1PFrameVisual && m_AV1PFrameStage == 1; + PatternConstants patternConstants = MakePatternConstants(OP_GENERATE_PATTERN, m_AV1PFrameVisual ? 0.0f : timeSec); if (!GeneratePatternWithCompute(patternConstants, m_SourcePreviewStorage, true)) { m_VideoStatus = "Failed to generate NV12 source pattern via compute"; return false; } - if (!CopyNv12BufferToTexture(NRI, *m_GraphicsQueue, m_Nv12Layout, *m_UploadBuffer, *m_EncodeTexture)) { + if (!CopyNv12BufferToTexture(NRI, *m_GraphicsQueue, m_Nv12Layout, *m_UploadBuffer, *m_EncodeTexture, m_VideoWidth, m_VideoHeight)) { m_VideoStatus = "Failed to upload NV12 source to video texture"; return false; } @@ -1333,13 +1414,15 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { } nri::VideoEncodePictureDesc pictureDesc = {}; - pictureDesc.frameType = nri::VideoEncodeFrameType::IDR; - pictureDesc.idrPictureId = 1; - - uint16_t av1MiColumnStarts[] = {0, (uint16_t)(2 * ((VIDEO_WIDTH + 7) >> 3))}; - uint16_t av1MiRowStarts[] = {0, (uint16_t)(2 * ((VIDEO_HEIGHT + 7) >> 3))}; - uint16_t av1WidthInSuperblocksMinus1[] = {(uint16_t)(((VIDEO_WIDTH + 63) / 64) - 1)}; - uint16_t av1HeightInSuperblocksMinus1[] = {(uint16_t)(((VIDEO_HEIGHT + 63) / 64) - 1)}; + pictureDesc.frameType = av1PFrame ? nri::VideoEncodeFrameType::P : nri::VideoEncodeFrameType::IDR; + pictureDesc.frameIndex = av1PFrame ? 1 : 0; + pictureDesc.pictureOrderCount = av1PFrame ? 1 : 0; + pictureDesc.idrPictureId = av1PFrame ? 0 : 1; + + uint16_t av1MiColumnStarts[] = {0, (uint16_t)(2 * ((m_VideoWidth + 7) >> 3))}; + uint16_t av1MiRowStarts[] = {0, (uint16_t)(2 * ((m_VideoHeight + 7) >> 3))}; + uint16_t av1WidthInSuperblocksMinus1[] = {(uint16_t)(((m_VideoWidth + 63) / 64) - 1)}; + uint16_t av1HeightInSuperblocksMinus1[] = {(uint16_t)(((m_VideoHeight + 63) / 64) - 1)}; nri::VideoAV1TileLayoutDesc av1TileLayout = {}; av1TileLayout.columnNum = 1; av1TileLayout.rowNum = 1; @@ -1362,13 +1445,15 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { params[5] = 1 << 16; } nri::VideoAV1PictureDesc av1PictureDesc = {}; - av1PictureDesc.currentFrameId = 0; - av1PictureDesc.refreshFrameFlags = 0xFF; - av1PictureDesc.primaryReferenceName = nri::VideoAV1ReferenceName::NONE; - av1PictureDesc.flags = nri::VideoAV1PictureBits::ERROR_RESILIENT_MODE | nri::VideoAV1PictureBits::DISABLE_CDF_UPDATE | nri::VideoAV1PictureBits::ALLOW_SCREEN_CONTENT_TOOLS | nri::VideoAV1PictureBits::FORCE_INTEGER_MV | nri::VideoAV1PictureBits::SHOW_FRAME | nri::VideoAV1PictureBits::SHOWABLE_FRAME; - av1PictureDesc.renderWidthMinus1 = VIDEO_WIDTH - 1; - av1PictureDesc.renderHeightMinus1 = VIDEO_HEIGHT - 1; - av1PictureDesc.baseQIndex = 20; + av1PictureDesc.currentFrameId = av1PFrame ? 1 : 0; + av1PictureDesc.orderHint = av1PFrame ? 1 : 0; + av1PictureDesc.refreshFrameFlags = av1PFrame ? 0x1 : 0xFF; + av1PictureDesc.primaryReferenceName = av1PFrame ? nri::VideoAV1ReferenceName::LAST : nri::VideoAV1ReferenceName::NONE; + av1PictureDesc.flags = av1PFrame ? nri::VideoAV1PictureBits::SHOW_FRAME | nri::VideoAV1PictureBits::SHOWABLE_FRAME + : nri::VideoAV1PictureBits::ERROR_RESILIENT_MODE | nri::VideoAV1PictureBits::DISABLE_CDF_UPDATE | nri::VideoAV1PictureBits::ALLOW_SCREEN_CONTENT_TOOLS | nri::VideoAV1PictureBits::FORCE_INTEGER_MV | nri::VideoAV1PictureBits::SHOW_FRAME | nri::VideoAV1PictureBits::SHOWABLE_FRAME; + av1PictureDesc.renderWidthMinus1 = (uint16_t)(m_VideoWidth - 1); + av1PictureDesc.renderHeightMinus1 = (uint16_t)(m_VideoHeight - 1); + av1PictureDesc.baseQIndex = (uint8_t)m_AV1BaseQIndex; av1PictureDesc.interpolationFilter = 0; av1PictureDesc.txMode = 2; av1PictureDesc.cdefDampingMinus3 = 3; @@ -1377,12 +1462,35 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { av1PictureDesc.cdef = &av1Cdef; av1PictureDesc.loopRestoration = &av1LoopRestoration; av1PictureDesc.globalMotion = &av1GlobalMotion; + nri::VideoReference av1Reference = {m_ReconstructedPicture, 0}; + nri::VideoAV1ReferenceDesc av1References[8] = {}; + if (av1PFrame) { + const nri::VideoAV1ReferenceName av1ReferenceNames[] = { + nri::VideoAV1ReferenceName::LAST, + nri::VideoAV1ReferenceName::LAST2, + nri::VideoAV1ReferenceName::LAST3, + nri::VideoAV1ReferenceName::GOLDEN, + nri::VideoAV1ReferenceName::BWDREF, + nri::VideoAV1ReferenceName::ALTREF2, + nri::VideoAV1ReferenceName::ALTREF, + }; + for (uint32_t i = 0; i < helper::GetCountOf(av1ReferenceNames); i++) { + av1References[i].name = av1ReferenceNames[i]; + av1References[i].refFrameIndex = 0; + av1References[i].frameType = nri::VideoEncodeFrameType::IDR; + av1References[i].orderHint = 0; + av1References[i].frameId = 0; + av1References[i].slot = 0; + } + av1PictureDesc.references = av1References; + av1PictureDesc.referenceNum = helper::GetCountOf(av1ReferenceNames); + } nri::VideoEncodeRateControlDesc rateControlDesc = {}; rateControlDesc.mode = nri::VideoEncodeRateControlMode::CQP; - rateControlDesc.qpI = 20; - rateControlDesc.qpP = 22; - rateControlDesc.qpB = 24; + rateControlDesc.qpI = (uint8_t)m_QpI; + rateControlDesc.qpP = (uint8_t)m_QpP; + rateControlDesc.qpB = (uint8_t)m_QpB; rateControlDesc.frameRateNumerator = 30; rateControlDesc.frameRateDenominator = 1; @@ -1397,6 +1505,12 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { encodeDesc.pictureDesc = &pictureDesc; encodeDesc.rateControlDesc = &rateControlDesc; encodeDesc.reconstructedPicture = m_ReconstructedPicture; + if (av1PFrame) { + encodeDesc.reconstructedPicture = m_AV1PReconstructedPicture; + encodeDesc.references = &av1Reference; + encodeDesc.referenceNum = 1; + encodeDesc.reconstructedSlot = 1; + } encodeDesc.metadata = m_MetadataBuffer; encodeDesc.resolvedMetadata = m_ResolvedMetadataBuffer; encodeDesc.av1PictureDesc = m_Codec == SampleCodec::AV1 ? &av1PictureDesc : nullptr; @@ -1408,25 +1522,31 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { bufferBarriers[1].buffer = m_ResolvedMetadataBuffer; bufferBarriers[1].after = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::StageBits::VIDEO_ENCODE}; - nri::TextureBarrierDesc textureBarriers[2] = {}; + nri::TextureBarrierDesc textureBarriers[3] = {}; textureBarriers[0].texture = m_EncodeTexture; textureBarriers[0].before = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; textureBarriers[0].after = {nri::AccessBits::VIDEO_ENCODE_READ, nri::Layout::VIDEO_ENCODE_SRC, nri::StageBits::VIDEO_ENCODE}; textureBarriers[0].mipNum = nri::REMAINING; textureBarriers[0].layerNum = nri::REMAINING; textureBarriers[0].planes = nri::PlaneBits::ALL; - textureBarriers[1].texture = m_ReconstructedTexture; + textureBarriers[1].texture = av1PFrame ? m_AV1PReconstructedTexture : m_ReconstructedTexture; textureBarriers[1].before = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; textureBarriers[1].after = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::Layout::VIDEO_ENCODE_DPB, nri::StageBits::VIDEO_ENCODE}; textureBarriers[1].mipNum = nri::REMAINING; textureBarriers[1].layerNum = nri::REMAINING; textureBarriers[1].planes = nri::PlaneBits::ALL; + textureBarriers[2].texture = m_ReconstructedTexture; + textureBarriers[2].before = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::Layout::VIDEO_ENCODE_DPB, nri::StageBits::VIDEO_ENCODE}; + textureBarriers[2].after = {nri::AccessBits::VIDEO_ENCODE_READ, nri::Layout::VIDEO_ENCODE_DPB, nri::StageBits::VIDEO_ENCODE}; + textureBarriers[2].mipNum = nri::REMAINING; + textureBarriers[2].layerNum = nri::REMAINING; + textureBarriers[2].planes = nri::PlaneBits::ALL; nri::BarrierDesc barrierDesc = {}; barrierDesc.buffers = bufferBarriers; barrierDesc.bufferNum = helper::GetCountOf(bufferBarriers); barrierDesc.textures = textureBarriers; - barrierDesc.textureNum = helper::GetCountOf(textureBarriers); + barrierDesc.textureNum = av1PFrame ? helper::GetCountOf(textureBarriers) : 2; NRI.CmdBarrier(commandBuffer, barrierDesc); Video.CmdEncodeVideo(commandBuffer, encodeDesc); // D3D12 resolves encode metadata inside CmdEncodeVideo and transitions the raw metadata buffer to encode-read before returning. @@ -1438,8 +1558,10 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { textureBarriers[0].after = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; textureBarriers[1].before = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::Layout::VIDEO_ENCODE_DPB, nri::StageBits::VIDEO_ENCODE}; textureBarriers[1].after = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; + textureBarriers[2].before = {nri::AccessBits::VIDEO_ENCODE_READ, nri::Layout::VIDEO_ENCODE_DPB, nri::StageBits::VIDEO_ENCODE}; + textureBarriers[2].after = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; barrierDesc.textures = textureBarriers; - barrierDesc.textureNum = helper::GetCountOf(textureBarriers); + barrierDesc.textureNum = av1PFrame ? helper::GetCountOf(textureBarriers) : 2; NRI.CmdBarrier(commandBuffer, barrierDesc); })) { m_VideoStatus = std::string(GetCodecName(m_Codec)) + " encode submission failed"; @@ -1511,8 +1633,7 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { } m_MetadataReadbackPending = true; - m_VideoStatus = std::string(GetCodecName(m_Codec)) + " encode submitted; waiting for async metadata readback"; - return true; + return false; } bool Sample::TryDecodePendingMetadata(float timeSec) { @@ -1562,16 +1683,25 @@ bool Sample::TryDecodePendingMetadata(float timeSec) { av1InfoDesc.encodedPayloadHeader = encodedHeader; av1InfoDesc.encodedPayloadHeaderSize = encodedHeader ? std::min(AV1_HEADER_READBACK_SIZE, feedback.encodedBitstreamWrittenBytes) : 0; const nri::Result av1InfoResult = Video.GetVideoEncodeAV1DecodeInfo(*m_EncodeSession, *m_ResolvedMetadataReadbackBuffer, 0, av1InfoDesc, av1DecodeInfo); - if (encodedHeader) - NRI.UnmapBuffer(*m_BitstreamHeaderReadbackBuffer); if (av1InfoResult != nri::Result::SUCCESS) { + if (encodedHeader) + NRI.UnmapBuffer(*m_BitstreamHeaderReadbackBuffer); m_VideoStatus = "Failed to prepare AV1 decode metadata"; return false; } + if (encodedHeader) + NRI.UnmapBuffer(*m_BitstreamHeaderReadbackBuffer); feedback.encodedBitstreamWrittenBytes = av1DecodeInfo.bitstreamOffset + av1DecodeInfo.bitstreamSize; } - return DecodeEncodedBitstream(feedback, m_Codec == SampleCodec::AV1 ? &av1DecodeInfo : nullptr, timeSec); + const bool decoded = DecodeEncodedBitstream(feedback, m_Codec == SampleCodec::AV1 ? &av1DecodeInfo : nullptr, timeSec); + if (decoded && m_AV1PFrameVisual && m_AV1PFrameStage == 0) { + m_AV1PFrameStage = 1; + m_DecodePreviewReady = false; + return TrySubmitEncodeAndMetadataReadback(timeSec); + } + + return decoded; } bool Sample::DecodeEncodedBitstream(const nri::VideoEncodeFeedback& feedback, const nri::VideoAV1EncodeDecodeInfo* av1DecodeInfo, float timeSec) { @@ -1656,42 +1786,87 @@ bool Sample::DecodeEncodedBitstream(const nri::VideoEncodeFeedback& feedback, co av1Info.tileLayout.widthInSuperblocksMinus1 = av1Info.widthInSuperblocksMinus1; av1Info.tileLayout.heightInSuperblocksMinus1 = av1Info.heightInSuperblocksMinus1; } + nri::VideoReference av1DecodeReference = {m_DecodePicture, 0}; + nri::VideoAV1ReferenceDesc av1DecodeReferences[8] = {}; + uint8_t av1DecodeOrderHints[8] = {}; + const bool av1PFrame = m_AV1PFrameVisual && m_AV1PFrameStage == 1; + if (av1PFrame) { + const nri::VideoAV1ReferenceName av1ReferenceNames[] = { + nri::VideoAV1ReferenceName::LAST, + nri::VideoAV1ReferenceName::LAST2, + nri::VideoAV1ReferenceName::LAST3, + nri::VideoAV1ReferenceName::GOLDEN, + nri::VideoAV1ReferenceName::BWDREF, + nri::VideoAV1ReferenceName::ALTREF2, + nri::VideoAV1ReferenceName::ALTREF, + }; + for (uint32_t i = 0; i < helper::GetCountOf(av1ReferenceNames); i++) { + av1DecodeReferences[i].name = av1ReferenceNames[i]; + av1DecodeReferences[i].refFrameIndex = 0; + av1DecodeReferences[i].frameType = nri::VideoEncodeFrameType::IDR; + av1DecodeReferences[i].orderHint = 0; + av1DecodeReferences[i].frameId = 0; + av1DecodeReferences[i].slot = 0; + av1DecodeReferences[i].savedOrderHints = av1DecodeOrderHints; + } + av1Info.picture.frameType = nri::VideoEncodeFrameType::P; + av1Info.picture.orderHint = 1; + av1Info.picture.refreshFrameFlags = 0x1; + av1Info.picture.primaryReferenceName = nri::VideoAV1ReferenceName::LAST; + av1Info.picture.currentFrameId = 1; + av1Info.picture.flags = nri::VideoAV1PictureBits::SHOW_FRAME | nri::VideoAV1PictureBits::SHOWABLE_FRAME; + av1Info.picture.orderHints = av1DecodeOrderHints; + av1Info.picture.references = av1DecodeReferences; + av1Info.picture.referenceNum = helper::GetCountOf(av1ReferenceNames); + } nri::VideoDecodeDesc decodeDesc = {}; decodeDesc.session = m_DecodeSession; decodeDesc.parameters = m_DecodeParameters; decodeDesc.bitstream.buffer = m_DecodeBitstreamBuffer; decodeDesc.bitstream.size = decodeBitstreamSize; - decodeDesc.dstPicture = m_DecodePicture; - decodeDesc.dstSlot = 0; + decodeDesc.dstPicture = av1PFrame ? m_AV1PDecodePicture : m_DecodePicture; + decodeDesc.references = av1PFrame ? &av1DecodeReference : nullptr; + decodeDesc.referenceNum = av1PFrame ? 1u : 0u; + decodeDesc.dstSlot = av1PFrame ? 1u : 0u; decodeDesc.h264PictureDesc = m_Codec == SampleCodec::H264 ? &h264DecodePicture : nullptr; decodeDesc.h265PictureDesc = m_Codec == SampleCodec::H265 ? &h265DecodePicture : nullptr; decodeDesc.av1PictureDesc = av1DecodeInfo ? &av1Info.picture : nullptr; nri::VideoDecodePictureStates decodePictureStates = {}; - if (Video.GetVideoDecodePictureStates(*m_DecodePicture, decodePictureStates) != nri::Result::SUCCESS) { + if (Video.GetVideoDecodePictureStates(*(av1PFrame ? m_AV1PDecodePicture : m_DecodePicture), decodePictureStates) != nri::Result::SUCCESS) { m_VideoStatus = "Failed to query video decode picture states"; return false; } if (!SubmitOneTime(NRI, *m_VideoDecodeQueue, [&](nri::CommandBuffer& commandBuffer) { - nri::TextureBarrierDesc textureBarrier = {}; - textureBarrier.texture = m_DecodeTexture; - textureBarrier.before = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; - textureBarrier.after = decodePictureStates.decodeWrite; - textureBarrier.mipNum = nri::REMAINING; - textureBarrier.layerNum = nri::REMAINING; - textureBarrier.planes = nri::PlaneBits::ALL; + nri::TextureBarrierDesc textureBarriers[2] = {}; + textureBarriers[0].texture = av1PFrame ? m_AV1PDecodeTexture : m_DecodeTexture; + textureBarriers[0].before = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; + textureBarriers[0].after = decodePictureStates.decodeWrite; + textureBarriers[0].mipNum = nri::REMAINING; + textureBarriers[0].layerNum = nri::REMAINING; + textureBarriers[0].planes = nri::PlaneBits::ALL; + textureBarriers[1].texture = m_DecodeTexture; + textureBarriers[1].before = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; + textureBarriers[1].after = {nri::AccessBits::VIDEO_DECODE_READ, nri::Layout::VIDEO_DECODE_DPB, nri::StageBits::VIDEO_DECODE}; + textureBarriers[1].mipNum = nri::REMAINING; + textureBarriers[1].layerNum = nri::REMAINING; + textureBarriers[1].planes = nri::PlaneBits::ALL; nri::BarrierDesc barrierDesc = {}; - barrierDesc.textures = &textureBarrier; - barrierDesc.textureNum = 1; + barrierDesc.textures = textureBarriers; + barrierDesc.textureNum = av1PFrame ? 2 : 1; NRI.CmdBarrier(commandBuffer, barrierDesc); Video.CmdDecodeVideo(commandBuffer, decodeDesc); if (decodePictureStates.releaseAfterDecode) { - textureBarrier.before = decodePictureStates.decodeWrite; - textureBarrier.after = decodePictureStates.afterDecode; + textureBarriers[0].before = decodePictureStates.decodeWrite; + textureBarriers[0].after = decodePictureStates.afterDecode; + if (av1PFrame) { + textureBarriers[1].before = {nri::AccessBits::VIDEO_DECODE_READ, nri::Layout::VIDEO_DECODE_DPB, nri::StageBits::VIDEO_DECODE}; + textureBarriers[1].after = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; + } NRI.CmdBarrier(commandBuffer, barrierDesc); } })) { @@ -1701,7 +1876,7 @@ bool Sample::DecodeEncodedBitstream(const nri::VideoEncodeFeedback& feedback, co if (!SubmitOneTime(NRI, *m_GraphicsQueue, [&](nri::CommandBuffer& commandBuffer) { nri::TextureBarrierDesc textureBarrier = {}; - textureBarrier.texture = m_DecodeTexture; + textureBarrier.texture = av1PFrame ? m_AV1PDecodeTexture : m_DecodeTexture; textureBarrier.before = decodePictureStates.graphicsBefore; textureBarrier.after = {nri::AccessBits::COPY_SOURCE, nri::Layout::COPY_SOURCE, nri::StageBits::COPY}; textureBarrier.mipNum = nri::REMAINING; @@ -1724,19 +1899,19 @@ bool Sample::DecodeEncodedBitstream(const nri::VideoEncodeFeedback& feedback, co NRI.CmdBarrier(commandBuffer, copyBarrierDesc); nri::TextureRegionDesc lumaRegion = {}; - lumaRegion.width = VIDEO_WIDTH; - lumaRegion.height = VIDEO_HEIGHT; + lumaRegion.width = (nri::Dim_t)m_VideoWidth; + lumaRegion.height = (nri::Dim_t)m_VideoHeight; lumaRegion.depth = 1; lumaRegion.planes = nri::PlaneBits::PLANE_0; nri::TextureDataLayoutDesc lumaLayout = {}; lumaLayout.rowPitch = m_Nv12Layout.yRowPitchBytes; lumaLayout.slicePitch = m_Nv12Layout.ySlicePitchBytes; - NRI.CmdReadbackTextureToBuffer(commandBuffer, *m_UploadBuffer, lumaLayout, *m_DecodeTexture, lumaRegion); + NRI.CmdReadbackTextureToBuffer(commandBuffer, *m_UploadBuffer, lumaLayout, *(av1PFrame ? m_AV1PDecodeTexture : m_DecodeTexture), lumaRegion); nri::TextureRegionDesc chromaRegion = {}; - chromaRegion.width = VIDEO_WIDTH; - chromaRegion.height = VIDEO_HEIGHT; + chromaRegion.width = (nri::Dim_t)m_VideoWidth; + chromaRegion.height = (nri::Dim_t)m_VideoHeight; chromaRegion.depth = 1; chromaRegion.planes = nri::PlaneBits::PLANE_1; @@ -1744,7 +1919,7 @@ bool Sample::DecodeEncodedBitstream(const nri::VideoEncodeFeedback& feedback, co chromaLayout.offset = m_Nv12Layout.uvOffsetBytes; chromaLayout.rowPitch = m_Nv12Layout.uvRowPitchBytes; chromaLayout.slicePitch = m_Nv12Layout.uvSlicePitchBytes; - NRI.CmdReadbackTextureToBuffer(commandBuffer, *m_UploadBuffer, chromaLayout, *m_DecodeTexture, chromaRegion); + NRI.CmdReadbackTextureToBuffer(commandBuffer, *m_UploadBuffer, chromaLayout, *(av1PFrame ? m_AV1PDecodeTexture : m_DecodeTexture), chromaRegion); nv12BufferBarrier.before = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; nv12BufferBarrier.after = {nri::AccessBits::NONE, nri::StageBits::NONE}; @@ -1816,7 +1991,10 @@ void Sample::PrepareFrame(uint32_t) { ImGui::SetNextWindowSize({900.0f, 520.0f}, ImGuiCond_Once); ImGui::Begin("NRI Video Encode / Decode"); { - ImGui::Text("Codec: %s, format: NV12, size: %ux%u", GetCodecName(m_Codec), VIDEO_WIDTH, VIDEO_HEIGHT); + ImGui::Text("Codec: %s, format: NV12, size: %ux%u", GetCodecName(m_Codec), m_VideoWidth, m_VideoHeight); + ImGui::Text("CQP: I=%u, P=%u, B=%u%s", m_QpI, m_QpP, m_QpB, m_Codec == SampleCodec::AV1 ? ", AV1 baseQIndex follows below" : ""); + if (m_Codec == SampleCodec::AV1) + ImGui::Text("AV1: frame=%s, baseQIndex=%u", m_AV1FrameArg.c_str(), m_AV1BaseQIndex); ImGui::TextWrapped("Video: %s", m_VideoStatus.c_str()); ImGui::TextWrapped("Preview: %s", m_PreviewStatus.c_str()); ImGui::Text("Encode queue: %s, decode queue: %s", m_VideoEncodeQueue ? "yes" : "no", m_VideoDecodeQueue ? "yes" : "no"); @@ -1829,11 +2007,11 @@ void Sample::PrepareFrame(uint32_t) { if (ImGui::BeginTable("PreviewPanels", 2, ImGuiTableFlags_SizingStretchSame)) { ImGui::TableNextColumn(); float width = std::max(200.0f, ImGui::GetContentRegionAvail().x); - DrawTexturePanel("Generated source", m_SourcePreviewTextureView, {width, width * float(VIDEO_HEIGHT) / float(VIDEO_WIDTH)}); + DrawTexturePanel("Generated source", m_SourcePreviewTextureView, {width, width * float(m_VideoHeight) / float(m_VideoWidth)}); ImGui::TableNextColumn(); width = std::max(200.0f, ImGui::GetContentRegionAvail().x); - DrawTexturePanel(m_DecodePreviewReady ? "Decoded preview" : "Decoded preview pending", m_DecodePreviewReady ? m_DecodePreviewTextureView : nullptr, {width, width * float(VIDEO_HEIGHT) / float(VIDEO_WIDTH)}); + DrawTexturePanel(m_DecodePreviewReady ? "Decoded preview" : "Decoded preview pending", m_DecodePreviewReady ? m_DecodePreviewTextureView : nullptr, {width, width * float(m_VideoHeight) / float(m_VideoWidth)}); ImGui::EndTable(); } } From 86baaf8b5e4d140f4505afee1a0a5bf3a970beda Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Thu, 7 May 2026 14:31:54 +0200 Subject: [PATCH 14/20] Update NRIFramework after NRI test cleanup --- External/NRIFramework | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/External/NRIFramework b/External/NRIFramework index 6aa54f6..646c0ba 160000 --- a/External/NRIFramework +++ b/External/NRIFramework @@ -1 +1 @@ -Subproject commit 6aa54f6320077b8c8d58215df1713f6ed78707a2 +Subproject commit 646c0ba0a39b941ac1980b84e81e4ba3419bc856 From 778f9ac56ee4dd4b7399277a8d0a82d2f0f0ad60 Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Thu, 7 May 2026 15:29:18 +0200 Subject: [PATCH 15/20] Fix D3D12 AV1 decode preview --- External/NRIFramework | 2 +- Source/VideoEncodeDecode.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/External/NRIFramework b/External/NRIFramework index 646c0ba..c88c0d5 160000 --- a/External/NRIFramework +++ b/External/NRIFramework @@ -1 +1 @@ -Subproject commit 646c0ba0a39b941ac1980b84e81e4ba3419bc856 +Subproject commit c88c0d5c0e8cd860b8dc94b6066444e3bf68cd6d diff --git a/Source/VideoEncodeDecode.cpp b/Source/VideoEncodeDecode.cpp index 956af94..d19fd7e 100644 --- a/Source/VideoEncodeDecode.cpp +++ b/Source/VideoEncodeDecode.cpp @@ -1671,7 +1671,8 @@ bool Sample::TryDecodePendingMetadata(float timeSec) { nri::VideoAV1EncodeDecodeInfo av1DecodeInfo = {}; if (m_Codec == SampleCodec::AV1) { - const uint8_t* encodedHeader = (const uint8_t*)NRI.MapBuffer(*m_BitstreamHeaderReadbackBuffer, 0, AV1_HEADER_READBACK_SIZE); + const bool needsEncodedHeaderReadback = feedbackResult == nri::Result::UNSUPPORTED; + const uint8_t* encodedHeader = needsEncodedHeaderReadback ? (const uint8_t*)NRI.MapBuffer(*m_BitstreamHeaderReadbackBuffer, 0, AV1_HEADER_READBACK_SIZE) : nullptr; if (!encodedHeader && feedbackResult == nri::Result::UNSUPPORTED) { m_VideoStatus = "Failed to map AV1 encoded header readback"; return false; From a168a1c47d9a719f8a4c0651024cb424149f16f2 Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Thu, 7 May 2026 15:39:02 +0200 Subject: [PATCH 16/20] Keep AV1 decode info backend neutral --- External/NRIFramework | 2 +- Source/VideoEncodeDecode.cpp | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/External/NRIFramework b/External/NRIFramework index c88c0d5..9477183 160000 --- a/External/NRIFramework +++ b/External/NRIFramework @@ -1 +1 @@ -Subproject commit c88c0d5c0e8cd860b8dc94b6066444e3bf68cd6d +Subproject commit 9477183a12988142e9eef81ec016e89b949eed73 diff --git a/Source/VideoEncodeDecode.cpp b/Source/VideoEncodeDecode.cpp index d19fd7e..e6f85b7 100644 --- a/Source/VideoEncodeDecode.cpp +++ b/Source/VideoEncodeDecode.cpp @@ -1671,9 +1671,8 @@ bool Sample::TryDecodePendingMetadata(float timeSec) { nri::VideoAV1EncodeDecodeInfo av1DecodeInfo = {}; if (m_Codec == SampleCodec::AV1) { - const bool needsEncodedHeaderReadback = feedbackResult == nri::Result::UNSUPPORTED; - const uint8_t* encodedHeader = needsEncodedHeaderReadback ? (const uint8_t*)NRI.MapBuffer(*m_BitstreamHeaderReadbackBuffer, 0, AV1_HEADER_READBACK_SIZE) : nullptr; - if (!encodedHeader && feedbackResult == nri::Result::UNSUPPORTED) { + const uint8_t* encodedHeader = (const uint8_t*)NRI.MapBuffer(*m_BitstreamHeaderReadbackBuffer, 0, AV1_HEADER_READBACK_SIZE); + if (!encodedHeader) { m_VideoStatus = "Failed to map AV1 encoded header readback"; return false; } From 4e6884cfc31b041f21b1cfac4e3e6d416eefdc7b Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Thu, 7 May 2026 15:53:09 +0200 Subject: [PATCH 17/20] Use NRI AV1 metadata without header readback --- External/NRIFramework | 2 +- Source/VideoEncodeDecode.cpp | 53 ++++-------------------------------- 2 files changed, 7 insertions(+), 48 deletions(-) diff --git a/External/NRIFramework b/External/NRIFramework index 9477183..3b3f726 160000 --- a/External/NRIFramework +++ b/External/NRIFramework @@ -1 +1 @@ -Subproject commit 9477183a12988142e9eef81ec016e89b949eed73 +Subproject commit 3b3f72609243d579ae55d0b1aea80226f8b3bd1e diff --git a/Source/VideoEncodeDecode.cpp b/Source/VideoEncodeDecode.cpp index e6f85b7..f6ede04 100644 --- a/Source/VideoEncodeDecode.cpp +++ b/Source/VideoEncodeDecode.cpp @@ -22,7 +22,6 @@ constexpr uint32_t DEFAULT_VIDEO_HEIGHT = 1080; constexpr double ROUND_TRIP_INTERVAL_SEC = 1.0 / 60.0; constexpr uint64_t BITSTREAM_SIZE = 2 * 1024 * 1024; constexpr uint64_t ENCODED_SLICE_OFFSET = 4096; -constexpr uint64_t AV1_HEADER_READBACK_SIZE = 4096; constexpr uint64_t METADATA_SIZE = 4 * 1024 * 1024; constexpr uint64_t RESOLVED_METADATA_SIZE = 4096; @@ -303,7 +302,6 @@ class Sample : public SampleBase { nri::DescriptorPool* m_GenerateDescriptorPool = nullptr; nri::DescriptorSet* m_GenerateDescriptorSet = nullptr; nri::Buffer* m_BitstreamHeaderUploadBuffer = nullptr; - nri::Buffer* m_BitstreamHeaderReadbackBuffer = nullptr; nri::Buffer* m_BitstreamBuffer = nullptr; nri::Buffer* m_DecodeBitstreamBuffer = nullptr; nri::Buffer* m_MetadataBuffer = nullptr; @@ -401,8 +399,6 @@ Sample::~Sample() { NRI.DestroyBuffer(m_DecodeBitstreamBuffer); if (m_BitstreamBuffer) NRI.DestroyBuffer(m_BitstreamBuffer); - if (m_BitstreamHeaderReadbackBuffer) - NRI.DestroyBuffer(m_BitstreamHeaderReadbackBuffer); if (m_BitstreamHeaderUploadBuffer) NRI.DestroyBuffer(m_BitstreamHeaderUploadBuffer); if (m_UploadBuffer) @@ -1236,10 +1232,6 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { resolvedMetadataReadbackBufferDesc.size = RESOLVED_METADATA_SIZE; resolvedMetadataReadbackBufferDesc.usage = nri::BufferUsageBits::NONE; - nri::BufferDesc bitstreamHeaderReadbackBufferDesc = {}; - bitstreamHeaderReadbackBufferDesc.size = AV1_HEADER_READBACK_SIZE; - bitstreamHeaderReadbackBufferDesc.usage = nri::BufferUsageBits::NONE; - if (NRI.CreateCommittedBuffer(*m_Device, nri::MemoryLocation::HOST_UPLOAD, 0.0f, bitstreamHeaderUploadBufferDesc, m_BitstreamHeaderUploadBuffer) != nri::Result::SUCCESS) { m_VideoStatus = "Failed to create bitstream header upload buffer"; return; @@ -1250,11 +1242,6 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { return; } - if (NRI.CreateCommittedBuffer(*m_Device, nri::MemoryLocation::HOST_READBACK, 0.0f, bitstreamHeaderReadbackBufferDesc, m_BitstreamHeaderReadbackBuffer) != nri::Result::SUCCESS) { - m_VideoStatus = "Failed to create bitstream header readback buffer"; - return; - } - if (CreateDecodeBitstreamBuffer(NRI, *m_Device, 0.0f, decodeBitstreamBufferDesc, m_DecodeBitstreamBuffer) != nri::Result::SUCCESS) { m_VideoStatus = "Failed to create decode bitstream buffer"; return; @@ -1577,17 +1564,12 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { return false; } - nri::BufferBarrierDesc metadataBarriers[4] = {}; + nri::BufferBarrierDesc metadataBarriers[2] = {}; metadataBarriers[0].buffer = m_ResolvedMetadataBuffer; metadataBarriers[0].before = {nri::AccessBits::NONE, nri::StageBits::NONE}; metadataBarriers[0].after = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; metadataBarriers[1].buffer = m_ResolvedMetadataReadbackBuffer; metadataBarriers[1].after = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; - metadataBarriers[2].buffer = m_BitstreamBuffer; - metadataBarriers[2].before = {nri::AccessBits::NONE, nri::StageBits::NONE}; - metadataBarriers[2].after = {nri::AccessBits::COPY_SOURCE, nri::StageBits::COPY}; - metadataBarriers[3].buffer = m_BitstreamHeaderReadbackBuffer; - metadataBarriers[3].after = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; nri::BarrierDesc metadataBarrierDesc = {}; metadataBarrierDesc.buffers = metadataBarriers; @@ -1600,15 +1582,10 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { NRI.CmdBarrier(*m_MetadataReadbackCommandBuffer, metadataBarrierDesc); metadataBarrierDesc.bufferNum = helper::GetCountOf(metadataBarriers); NRI.CmdCopyBuffer(*m_MetadataReadbackCommandBuffer, *m_ResolvedMetadataReadbackBuffer, 0, *m_ResolvedMetadataBuffer, 0, RESOLVED_METADATA_SIZE); - NRI.CmdCopyBuffer(*m_MetadataReadbackCommandBuffer, *m_BitstreamHeaderReadbackBuffer, 0, *m_BitstreamBuffer, ENCODED_SLICE_OFFSET, AV1_HEADER_READBACK_SIZE); metadataBarriers[0].before = {nri::AccessBits::COPY_SOURCE, nri::StageBits::COPY}; metadataBarriers[0].after = {nri::AccessBits::NONE, nri::StageBits::NONE}; metadataBarriers[1].before = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; metadataBarriers[1].after = {nri::AccessBits::NONE, nri::StageBits::NONE}; - metadataBarriers[2].before = {nri::AccessBits::COPY_SOURCE, nri::StageBits::COPY}; - metadataBarriers[2].after = {nri::AccessBits::NONE, nri::StageBits::NONE}; - metadataBarriers[3].before = {nri::AccessBits::COPY_DESTINATION, nri::StageBits::COPY}; - metadataBarriers[3].after = {nri::AccessBits::NONE, nri::StageBits::NONE}; NRI.CmdBarrier(*m_MetadataReadbackCommandBuffer, metadataBarrierDesc); if (NRI.EndCommandBuffer(*m_MetadataReadbackCommandBuffer) != nri::Result::SUCCESS) { @@ -1648,17 +1625,11 @@ bool Sample::TryDecodePendingMetadata(float timeSec) { nri::VideoEncodeFeedback feedback = {}; const nri::Result feedbackResult = Video.GetVideoEncodeFeedback(*m_EncodeSession, *m_ResolvedMetadataReadbackBuffer, 0, feedback); if (feedbackResult != nri::Result::SUCCESS) { - if (feedbackResult == nri::Result::UNSUPPORTED && m_Codec == SampleCodec::AV1) { - feedback.encodedBitstreamOffset = 0; - feedback.encodedBitstreamWrittenBytes = AV1_HEADER_READBACK_SIZE; - feedback.writtenSubregionNum = 1; - } else { - if (feedbackResult == nri::Result::UNSUPPORTED) - m_VideoStatus = std::string(GetCodecName(m_Codec)) + " encode metadata feedback is unsupported"; - else - m_VideoStatus = "Failed to read resolved encode metadata"; - return false; - } + if (feedbackResult == nri::Result::UNSUPPORTED) + m_VideoStatus = std::string(GetCodecName(m_Codec)) + " encode metadata feedback is unsupported"; + else + m_VideoStatus = "Failed to read resolved encode metadata"; + return false; } if (feedback.errorFlags || !feedback.encodedBitstreamWrittenBytes) { @@ -1671,26 +1642,14 @@ bool Sample::TryDecodePendingMetadata(float timeSec) { nri::VideoAV1EncodeDecodeInfo av1DecodeInfo = {}; if (m_Codec == SampleCodec::AV1) { - const uint8_t* encodedHeader = (const uint8_t*)NRI.MapBuffer(*m_BitstreamHeaderReadbackBuffer, 0, AV1_HEADER_READBACK_SIZE); - if (!encodedHeader) { - m_VideoStatus = "Failed to map AV1 encoded header readback"; - return false; - } - nri::VideoAV1EncodeDecodeInfoDesc av1InfoDesc = {}; av1InfoDesc.feedback = &feedback; av1InfoDesc.sequence = &m_AV1Sequence; - av1InfoDesc.encodedPayloadHeader = encodedHeader; - av1InfoDesc.encodedPayloadHeaderSize = encodedHeader ? std::min(AV1_HEADER_READBACK_SIZE, feedback.encodedBitstreamWrittenBytes) : 0; const nri::Result av1InfoResult = Video.GetVideoEncodeAV1DecodeInfo(*m_EncodeSession, *m_ResolvedMetadataReadbackBuffer, 0, av1InfoDesc, av1DecodeInfo); if (av1InfoResult != nri::Result::SUCCESS) { - if (encodedHeader) - NRI.UnmapBuffer(*m_BitstreamHeaderReadbackBuffer); m_VideoStatus = "Failed to prepare AV1 decode metadata"; return false; } - if (encodedHeader) - NRI.UnmapBuffer(*m_BitstreamHeaderReadbackBuffer); feedback.encodedBitstreamWrittenBytes = av1DecodeInfo.bitstreamOffset + av1DecodeInfo.bitstreamSize; } From aa9721dc48118777a71c38f0511293d10e2cf220 Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Thu, 21 May 2026 12:05:51 +0200 Subject: [PATCH 18/20] Exercise video EOS helper in encode sample --- Source/VideoEncodeDecode.cpp | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/Source/VideoEncodeDecode.cpp b/Source/VideoEncodeDecode.cpp index f6ede04..6bea690 100644 --- a/Source/VideoEncodeDecode.cpp +++ b/Source/VideoEncodeDecode.cpp @@ -261,6 +261,7 @@ class Sample : public SampleBase { PatternConstants MakePatternConstants(PatternOperation operation, float timeSec) const; bool GeneratePatternWithCompute(const PatternConstants& constants, nri::Descriptor* previewTexture, bool returnSourceBufferToShaderStorage = false); bool WriteAnnexBHeadersToUploadBuffer(std::vector& annexBHeaders); + bool WriteAnnexBEndOfStream(std::vector& annexBEndOfStream); bool TrySubmitEncodeAndMetadataReadback(float timeSec); bool TryDecodePendingMetadata(float timeSec); bool DecodeEncodedBitstream(const nri::VideoEncodeFeedback& feedback, const nri::VideoAV1EncodeDecodeInfo* av1DecodeInfo, float timeSec); @@ -1370,6 +1371,29 @@ bool Sample::WriteAnnexBHeadersToUploadBuffer(std::vector& annexBHeader return true; } +bool Sample::WriteAnnexBEndOfStream(std::vector& annexBEndOfStream) { + annexBEndOfStream.clear(); + if (m_Codec == SampleCodec::AV1) + return true; + + nri::VideoAnnexBEndOfStreamDesc annexBDesc = {}; + annexBDesc.codec = GetNriCodec(m_Codec); + if (Video.WriteVideoAnnexBEndOfStream(annexBDesc) != nri::Result::SUCCESS || annexBDesc.writtenSize == 0) { + m_VideoStatus = std::string("Failed to query ") + GetCodecName(m_Codec) + " Annex-B end-of-stream size"; + return false; + } + + annexBEndOfStream.resize((size_t)annexBDesc.writtenSize); + annexBDesc.dst = annexBEndOfStream.data(); + annexBDesc.dstSize = annexBEndOfStream.size(); + if (Video.WriteVideoAnnexBEndOfStream(annexBDesc) != nri::Result::SUCCESS) { + m_VideoStatus = std::string("Failed to build ") + GetCodecName(m_Codec) + " Annex-B end-of-stream marker"; + return false; + } + + return true; +} + bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { if (!CanRunRoundTrip()) { m_VideoStatus = std::string(GetCodecName(m_Codec)) + " round trip is not currently supported in this configuration"; @@ -1667,6 +1691,9 @@ bool Sample::DecodeEncodedBitstream(const nri::VideoEncodeFeedback& feedback, co std::vector annexBHeaders; if (!WriteAnnexBHeadersToUploadBuffer(annexBHeaders)) return false; + std::vector annexBEndOfStream; + if (!WriteAnnexBEndOfStream(annexBEndOfStream)) + return false; const uint64_t encodedPayloadSkip = av1DecodeInfo ? av1DecodeInfo->bitstreamOffset : GetEncodedPayloadHeaderSkip(m_Codec, feedback.encodedBitstreamWrittenBytes); const uint64_t encodedPayloadBytes = av1DecodeInfo ? av1DecodeInfo->bitstreamSize : feedback.encodedBitstreamWrittenBytes - encodedPayloadSkip; @@ -1900,7 +1927,7 @@ bool Sample::DecodeEncodedBitstream(const nri::VideoEncodeFeedback& feedback, co m_DecodePreviewReady = true; char message[128] = {}; - std::snprintf(message, sizeof(message), "%s encode/decode round trip complete, encoded %llu bytes", GetCodecName(m_Codec), (unsigned long long)feedback.encodedBitstreamWrittenBytes); + std::snprintf(message, sizeof(message), "%s encode/decode round trip complete, encoded %llu bytes, EOS %llu bytes", GetCodecName(m_Codec), (unsigned long long)feedback.encodedBitstreamWrittenBytes, (unsigned long long)annexBEndOfStream.size()); m_VideoStatus = message; return true; } From e6195c3b08287650ece8baffff40268a8c583fa9 Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Thu, 21 May 2026 12:27:14 +0200 Subject: [PATCH 19/20] Use video capability helpers in sample --- Source/VideoEncodeDecode.cpp | 62 ++++++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/Source/VideoEncodeDecode.cpp b/Source/VideoEncodeDecode.cpp index 6bea690..043d5fb 100644 --- a/Source/VideoEncodeDecode.cpp +++ b/Source/VideoEncodeDecode.cpp @@ -116,6 +116,10 @@ static uint64_t AlignUp(uint64_t value, uint64_t alignment) { return alignment == 0 ? value : ((value + alignment - 1) / alignment) * alignment; } +static bool IsAligned(uint64_t value, uint64_t alignment) { + return alignment == 0 || value % alignment == 0; +} + static Nv12BufferLayout MakeNv12BufferLayout(const nri::DeviceDesc& deviceDesc, uint32_t width, uint32_t height) { const uint32_t rowAlignment = std::max(deviceDesc.memoryAlignment.uploadBufferTextureRow, 1u); const uint32_t sliceAlignment = std::max(deviceDesc.memoryAlignment.uploadBufferTextureSlice, 1u); @@ -922,6 +926,29 @@ void Sample::TryInitializeVideo(nri::GraphicsAPI graphicsAPI) { decodeSessionDesc.usage = nri::VideoUsage::DECODE; decodeSessionDesc.maxReferenceNum = 16; + nri::VideoCapabilities encodeCapabilities = {}; + if (Video.GetVideoCapabilities(*m_Device, encodeSessionDesc, encodeCapabilities) != nri::Result::SUCCESS) { + m_VideoStatus = std::string(GetCodecName(m_Codec)) + " encode is unsupported for the requested size"; + return; + } + + nri::VideoCapabilities decodeCapabilities = {}; + if (Video.GetVideoCapabilities(*m_Device, decodeSessionDesc, decodeCapabilities) != nri::Result::SUCCESS) { + m_VideoStatus = std::string(GetCodecName(m_Codec)) + " decode is unsupported for the requested size"; + return; + } + + const uint64_t encodePayloadCapacity = BITSTREAM_SIZE - ENCODED_SLICE_OFFSET; + if (!IsAligned(ENCODED_SLICE_OFFSET, encodeCapabilities.bitstreamOffsetAlignment) || !IsAligned(encodePayloadCapacity, encodeCapabilities.bitstreamSizeAlignment) || encodePayloadCapacity > encodeCapabilities.bitstreamSizeMax) { + m_VideoStatus = std::string(GetCodecName(m_Codec)) + " encode bitstream buffer does not satisfy video capabilities"; + return; + } + + if (BITSTREAM_SIZE > decodeCapabilities.bitstreamSizeMax || !IsAligned(BITSTREAM_SIZE, decodeCapabilities.bitstreamSizeAlignment)) { + m_VideoStatus = std::string(GetCodecName(m_Codec)) + " decode bitstream buffer does not satisfy video capabilities"; + return; + } + if (NRI.GetQueue(*m_Device, nri::QueueType::VIDEO_ENCODE, 0, m_VideoEncodeQueue) != nri::Result::SUCCESS || NRI.GetQueue(*m_Device, nri::QueueType::VIDEO_DECODE, 0, m_VideoDecodeQueue) != nri::Result::SUCCESS) { m_VideoStatus = "Failed to get video queues"; return; @@ -1526,7 +1553,15 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { encodeDesc.resolvedMetadata = m_ResolvedMetadataBuffer; encodeDesc.av1PictureDesc = m_Codec == SampleCodec::AV1 ? &av1PictureDesc : nullptr; + nri::VideoEncodePictureStates encodePictureStates = {}; + if (Video.GetVideoEncodePictureStates(*(av1PFrame ? m_AV1PReconstructedPicture : m_ReconstructedPicture), encodePictureStates) != nri::Result::SUCCESS) { + m_VideoStatus = "Failed to query video encode picture states"; + return false; + } + if (!SubmitOneTime(NRI, *m_VideoEncodeQueue, [&](nri::CommandBuffer& commandBuffer) { + const nri::AccessLayoutStage encodeReferenceRead = {nri::AccessBits::VIDEO_ENCODE_READ, nri::Layout::VIDEO_ENCODE_DPB, nri::StageBits::VIDEO_ENCODE}; + nri::BufferBarrierDesc bufferBarriers[2] = {}; bufferBarriers[0].buffer = m_MetadataBuffer; bufferBarriers[0].after = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::StageBits::VIDEO_ENCODE}; @@ -1536,19 +1571,19 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { nri::TextureBarrierDesc textureBarriers[3] = {}; textureBarriers[0].texture = m_EncodeTexture; textureBarriers[0].before = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; - textureBarriers[0].after = {nri::AccessBits::VIDEO_ENCODE_READ, nri::Layout::VIDEO_ENCODE_SRC, nri::StageBits::VIDEO_ENCODE}; + textureBarriers[0].after = encodePictureStates.encodeRead; textureBarriers[0].mipNum = nri::REMAINING; textureBarriers[0].layerNum = nri::REMAINING; textureBarriers[0].planes = nri::PlaneBits::ALL; textureBarriers[1].texture = av1PFrame ? m_AV1PReconstructedTexture : m_ReconstructedTexture; textureBarriers[1].before = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; - textureBarriers[1].after = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::Layout::VIDEO_ENCODE_DPB, nri::StageBits::VIDEO_ENCODE}; + textureBarriers[1].after = encodePictureStates.encodeWrite; textureBarriers[1].mipNum = nri::REMAINING; textureBarriers[1].layerNum = nri::REMAINING; textureBarriers[1].planes = nri::PlaneBits::ALL; textureBarriers[2].texture = m_ReconstructedTexture; - textureBarriers[2].before = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::Layout::VIDEO_ENCODE_DPB, nri::StageBits::VIDEO_ENCODE}; - textureBarriers[2].after = {nri::AccessBits::VIDEO_ENCODE_READ, nri::Layout::VIDEO_ENCODE_DPB, nri::StageBits::VIDEO_ENCODE}; + textureBarriers[2].before = encodePictureStates.releaseAfterEncode ? encodePictureStates.afterEncode : encodePictureStates.encodeWrite; + textureBarriers[2].after = encodeReferenceRead; textureBarriers[2].mipNum = nri::REMAINING; textureBarriers[2].layerNum = nri::REMAINING; textureBarriers[2].planes = nri::PlaneBits::ALL; @@ -1565,14 +1600,21 @@ bool Sample::TrySubmitEncodeAndMetadataReadback(float timeSec) { bufferBarriers[0].after = {}; bufferBarriers[1].before = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::StageBits::VIDEO_ENCODE}; bufferBarriers[1].after = {}; - textureBarriers[0].before = {nri::AccessBits::VIDEO_ENCODE_READ, nri::Layout::VIDEO_ENCODE_SRC, nri::StageBits::VIDEO_ENCODE}; + textureBarriers[0].before = encodePictureStates.encodeRead; textureBarriers[0].after = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; - textureBarriers[1].before = {nri::AccessBits::VIDEO_ENCODE_WRITE, nri::Layout::VIDEO_ENCODE_DPB, nri::StageBits::VIDEO_ENCODE}; - textureBarriers[1].after = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; - textureBarriers[2].before = {nri::AccessBits::VIDEO_ENCODE_READ, nri::Layout::VIDEO_ENCODE_DPB, nri::StageBits::VIDEO_ENCODE}; - textureBarriers[2].after = {nri::AccessBits::NONE, nri::Layout::GENERAL, nri::StageBits::NONE}; + uint32_t textureBarrierNum = 1; + if (encodePictureStates.releaseAfterEncode) { + textureBarriers[1].before = encodePictureStates.encodeWrite; + textureBarriers[1].after = encodePictureStates.afterEncode; + textureBarrierNum = 2; + if (av1PFrame) { + textureBarriers[2].before = encodeReferenceRead; + textureBarriers[2].after = encodePictureStates.afterEncode; + textureBarrierNum = 3; + } + } barrierDesc.textures = textureBarriers; - barrierDesc.textureNum = av1PFrame ? helper::GetCountOf(textureBarriers) : 2; + barrierDesc.textureNum = textureBarrierNum; NRI.CmdBarrier(commandBuffer, barrierDesc); })) { m_VideoStatus = std::string(GetCodecName(m_Codec)) + " encode submission failed"; From 61a8450cefc28cb958e2ecfb38dbce31989ddb6f Mon Sep 17 00:00:00 2001 From: Bjorn Schobben Date: Thu, 21 May 2026 13:37:53 +0200 Subject: [PATCH 20/20] Update NRIFramework video fixes --- External/NRIFramework | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/External/NRIFramework b/External/NRIFramework index 3b3f726..78bf694 160000 --- a/External/NRIFramework +++ b/External/NRIFramework @@ -1 +1 @@ -Subproject commit 3b3f72609243d579ae55d0b1aea80226f8b3bd1e +Subproject commit 78bf6947b3ebe70c69cc12c6850bfd4fc8fa3c15