diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 03993f25fa..4e0425292d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -24,6 +24,8 @@ jobs: - uses: actions/checkout@v4 with: submodules: "recursive" + - name: Update submodules + run: git submodule update --init --recursive - if: ${{ matrix.platform == 'ubuntu' }} name: Install RandR headers run: | @@ -68,6 +70,8 @@ jobs: - uses: actions/checkout@v4 with: submodules: "recursive" + - name: Update submodules + run: git submodule update --init --recursive - if: ${{ matrix.platform == 'ubuntu' }} name: Install RandR headers @@ -97,6 +101,8 @@ jobs: - uses: actions/checkout@v4 with: submodules: "recursive" + - name: Update submodules + run: git submodule update --init --recursive - name: ccache uses: hendrikmuhs/ccache-action@v1.2.9 with: @@ -115,6 +121,8 @@ jobs: - uses: actions/checkout@v4 with: submodules: "recursive" + - name: Update submodules + run: git submodule update --init --recursive - name: Install RandR headers run: | sudo apt-get update @@ -139,6 +147,8 @@ jobs: - uses: actions/checkout@v4 with: submodules: "recursive" + - name: Update submodules + run: git submodule update --init --recursive - name: set up JDK uses: actions/setup-java@v4 @@ -171,6 +181,8 @@ jobs: - uses: actions/checkout@v4 with: submodules: "recursive" + - name: Update submodules + run: git submodule update --init --recursive - name: ccache uses: hendrikmuhs/ccache-action@v1.2.9 diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 02fae70b50..34063b7598 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -29,7 +29,7 @@ jobs: run: | git config --global --add safe.directory /__w/Vulkan-Samples/Vulkan-Samples git fetch origin $TARGET_BRANCH:$TARGET_BRANCH - echo all="$(git diff --name-only --diff-filter=ACMRT $TARGET_BRANCH | grep -v -e .github -e third_party -e .in$ | xargs)" >> $GITHUB_OUTPUT + echo all="$(git diff --name-only --diff-filter=ACMRT $TARGET_BRANCH | grep -v -e .github -e third_party -e 'samples/complex/render_octomap/octomap' -e .in$ | xargs)" >> $GITHUB_OUTPUT doxygen: name: Doxygen Syntax Check @@ -101,7 +101,7 @@ jobs: - run: git config --global --add safe.directory /__w/Vulkan-Samples/Vulkan-Samples - run: cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DGLFW_BUILD_WAYLAND=OFF -Bbuild/clang - run: | - /usr/bin/run-clang-tidy -j $(($(nproc)/2+1)) -p build/clang -header-filter=framework,samples,app -checks=-*,google-*,-google-runtime-references -quiet ${{ needs.changed-files.outputs.all }} + /usr/bin/run-clang-tidy -j $(($(nproc)/2+1)) -p build/clang -header-filter=framework,samples,app -checks=-*,google-*,-google-runtime-references -quiet ${{ needs.changed-files.outputs.all }} 2>&1 | grep -v "third_party/glfw" pre-commit: name: Pre-Commit Checks diff --git a/.gitmodules b/.gitmodules index d3bfb3ed65..da99229779 100644 --- a/.gitmodules +++ b/.gitmodules @@ -50,3 +50,6 @@ [submodule "third_party/tracy"] path = third_party/tracy url = https://github.com/wolfpld/tracy.git +[submodule "samples/complex/render_octomap/octomap"] + path = samples/complex/render_octomap/octomap + url = https://github.com/OctoMap/octomap.git diff --git a/assets b/assets index 8db8ce9c52..3606a63ccb 160000 --- a/assets +++ b/assets @@ -1 +1 @@ -Subproject commit 8db8ce9c528330f0b1261b07531b009732b08731 +Subproject commit 3606a63ccb56f6f48f6ce42849fe1de6007480b6 diff --git a/framework/CMakeLists.txt b/framework/CMakeLists.txt index 8d2386257a..1d4fe31e01 100644 --- a/framework/CMakeLists.txt +++ b/framework/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025, Arm Limited and Contributors +# Copyright (c) 2019-2026, Arm Limited and Contributors # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 @@ -158,6 +158,7 @@ set(SCENE_GRAPH_COMPONENT_FILES scene_graph/components/pbr_material.h scene_graph/components/sampler.h scene_graph/components/sub_mesh.h + scene_graph/components/gaussian_splat.h scene_graph/components/texture.h scene_graph/components/transform.h scene_graph/components/image/astc.h @@ -179,6 +180,7 @@ set(SCENE_GRAPH_COMPONENT_FILES scene_graph/components/mesh.cpp scene_graph/components/pbr_material.cpp scene_graph/components/sub_mesh.cpp + scene_graph/components/gaussian_splat.cpp scene_graph/components/texture.cpp scene_graph/components/transform.cpp scene_graph/components/image/astc.cpp diff --git a/framework/gltf_loader.cpp b/framework/gltf_loader.cpp index b7fc3859ab..fa95570e08 100644 --- a/framework/gltf_loader.cpp +++ b/framework/gltf_loader.cpp @@ -1,5 +1,5 @@ -/* Copyright (c) 2018-2025, Arm Limited and Contributors - * Copyright (c) 2019-2025, Sascha Willems +/* Copyright (c) 2018-2026, Arm Limited and Contributors + * Copyright (c) 2019-2026, Sascha Willems * * SPDX-License-Identifier: Apache-2.0 * @@ -400,7 +400,8 @@ static inline bool texture_needs_srgb_colorspace(const std::string &name) } // namespace std::unordered_map GLTFLoader::supported_extensions = { - {KHR_LIGHTS_PUNCTUAL_EXTENSION, false}}; + {KHR_LIGHTS_PUNCTUAL_EXTENSION, false}, + {KHR_GAUSSIAN_SPLATTING_EXTENSION, false}}; GLTFLoader::GLTFLoader(vkb::core::DeviceC &device) : device{device} diff --git a/framework/gltf_loader.h b/framework/gltf_loader.h index 4176e96f5d..b84fc5e99f 100644 --- a/framework/gltf_loader.h +++ b/framework/gltf_loader.h @@ -1,5 +1,5 @@ -/* Copyright (c) 2018-2025, Arm Limited and Contributors - * Copyright (c) 2019-2025, Sascha Willems +/* Copyright (c) 2018-2026, Arm Limited and Contributors + * Copyright (c) 2019-2026, Sascha Willems * * SPDX-License-Identifier: Apache-2.0 * @@ -34,6 +34,7 @@ #include "vulkan/vulkan.h" #define KHR_LIGHTS_PUNCTUAL_EXTENSION "KHR_lights_punctual" +#define KHR_GAUSSIAN_SPLATTING_EXTENSION "KHR_gaussian_splatting" namespace vkb { diff --git a/framework/scene_graph/components/gaussian_splat.cpp b/framework/scene_graph/components/gaussian_splat.cpp new file mode 100644 index 0000000000..66c30817d0 --- /dev/null +++ b/framework/scene_graph/components/gaussian_splat.cpp @@ -0,0 +1,68 @@ +/* Copyright (c) 2024-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "gaussian_splat.h" + +namespace vkb +{ +namespace sg +{ + +GaussianSplat::GaussianSplat(const std::string &name) : + Component{name} +{ +} + +std::type_index GaussianSplat::get_type() +{ + return typeid(GaussianSplat); +} + +size_t GaussianSplat::get_gpu_memory_size() const +{ + size_t total = 0; + + if (position_buffer) + { + total += position_buffer->get_size(); + } + if (rotation_buffer) + { + total += rotation_buffer->get_size(); + } + if (scale_buffer) + { + total += scale_buffer->get_size(); + } + if (opacity_buffer) + { + total += opacity_buffer->get_size(); + } + if (color_buffer) + { + total += color_buffer->get_size(); + } + if (sh_buffer) + { + total += sh_buffer->get_size(); + } + + return total; +} + +} // namespace sg +} // namespace vkb diff --git a/framework/scene_graph/components/gaussian_splat.h b/framework/scene_graph/components/gaussian_splat.h new file mode 100644 index 0000000000..c644343afc --- /dev/null +++ b/framework/scene_graph/components/gaussian_splat.h @@ -0,0 +1,113 @@ +/* Copyright (c) 2024-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include "common/glm_common.h" +#include "common/vk_common.h" +#include "core/buffer.h" +#include "scene_graph/component.h" + +namespace vkb +{ +namespace sg +{ + +/** + * @brief Gaussian Splat rendering primitive data + * + * Stores data for rendering 3D Gaussian Splats as defined by the + * KHR_gaussian_splatting GLTF extension. + * + * Each splat is an oriented 3D Gaussian defined by: + * - Position (center point) + * - Rotation (quaternion orientation) + * - Scale (3D scale factors) + * - Opacity (alpha value) + * - Color (RGB or spherical harmonics coefficients) + */ +class GaussianSplat : public Component +{ + public: + /** + * @brief Kernel type for splat rendering + */ + enum class KernelType + { + Ellipse, // Default elliptical kernel + Sphere // Spherical kernel (isotropic) + }; + + /** + * @brief Color space for splat colors + */ + enum class ColorSpace + { + SRGB, // BT.709-sRGB + Linear // Linear RGB + }; + + GaussianSplat(const std::string &name = {}); + + virtual ~GaussianSplat() = default; + + virtual std::type_index get_type() override; + + // Number of splats + uint32_t splat_count = 0; + + // Spherical harmonics degree (0-3) + uint32_t sh_degree = 0; + + // Whether antialiasing is enabled + bool antialiased = false; + + // Kernel type for rendering + KernelType kernel = KernelType::Ellipse; + + // Color space + ColorSpace color_space = ColorSpace::SRGB; + + // GPU buffers for splat data + std::unique_ptr position_buffer; // VEC3 positions + std::unique_ptr rotation_buffer; // VEC4 quaternions + std::unique_ptr scale_buffer; // VEC3 scales + std::unique_ptr opacity_buffer; // SCALAR opacities + std::unique_ptr color_buffer; // VEC3 colors (or SH coefficients) + std::unique_ptr sh_buffer; // MAT3 spherical harmonics (if sh_degree > 0) + + /** + * @brief Get the total GPU memory used by this splat primitive + */ + size_t get_gpu_memory_size() const; + + /** + * @brief Check if spherical harmonics data is available + */ + bool has_spherical_harmonics() const + { + return sh_degree > 0 && sh_buffer != nullptr; + } +}; + +} // namespace sg +} // namespace vkb diff --git a/samples/README.adoc b/samples/README.adoc index 315c312622..c590f49442 100644 --- a/samples/README.adoc +++ b/samples/README.adoc @@ -1,5 +1,5 @@ //// -- Copyright (c) 2020-2024, Arm Limited and Contributors +- Copyright (c) 2020-2026, Arm Limited and Contributors - - SPDX-License-Identifier: Apache-2.0 - @@ -44,3 +44,6 @@ include::./extensions/README.adoc[] [[tooling-samples]] include::./tooling/README.adoc[] + +[[complex-samples]] +include::../complex/README.adoc[] \ No newline at end of file diff --git a/samples/complex/README.adoc b/samples/complex/README.adoc new file mode 100644 index 0000000000..55414b9174 --- /dev/null +++ b/samples/complex/README.adoc @@ -0,0 +1,35 @@ +//// +- Copyright (c) 2021-2026, The Khronos Group +- +- SPDX-License-Identifier: Apache-2.0 +- +- Licensed under the Apache License, Version 2.0 the "License"; +- you may not use this file except in compliance with the License. +- You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +- +//// +ifndef::complex_samplespath[:complex_samplespath:] + +== Complex samples + +The goal of these samples is to demonstrate how to use Vulkan in a real world scenario. These are meant to be demonstrations of how to use Vulkan as a whole instead of a singular item within Vulkan. + +This format for samples is to demonstrate how to work with other libraries or setup projects. Some of the samples may or may not use the Framework as needed. + +While the Vulkan concepts found here aren't complex, the interaction and unique benefit with the real-world use cases they work with are meant to be. Examples of what might eventually be found here include AI, OpenXR, GLTF, etc. Topics which don't belong in the other sample categories by themselves yet are useful in many areas. + +=== xref:./{complex_samplespath}render_octmap/README.adoc[Render an Octo-Map] + +*Instancing*: https://docs.vulkan.org/spec/latest/chapters/drawing.html#vkCmdDrawIndexed[`vkCmdDrawIndexed`] + +Uses instancing to render octmaps generated in realtime by ARCore and ARKit then saved, and joined together using Point Cloud Registration into one shared map. + +These maps are commonly used by Robot Operating System (ROS) and this is a solution showing how to render the map created. This is a topic of interest to SLAM and AI with OpenXR for map understanding and joining. diff --git a/samples/complex/render_octomap/CMakeLists.txt b/samples/complex/render_octomap/CMakeLists.txt new file mode 100644 index 0000000000..558d091d38 --- /dev/null +++ b/samples/complex/render_octomap/CMakeLists.txt @@ -0,0 +1,75 @@ +# Copyright (c) 2025-2026, Holochip Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 the "License"; +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +get_filename_component(FOLDER_NAME ${CMAKE_CURRENT_LIST_DIR} NAME) +get_filename_component(PARENT_DIR ${CMAKE_CURRENT_LIST_DIR} PATH) +get_filename_component(CATEGORY_NAME ${PARENT_DIR} NAME) + +# On Windows, build as STATIC to avoid DLL export issues +# On other platforms, build as SHARED for better memory efficiency +if(WIN32) + set(OCTOMAP_LIBRARY_TYPE STATIC) +else() + set(OCTOMAP_LIBRARY_TYPE SHARED) +endif() + +add_library( octomath STATIC + octomap/octomap/src/math/Vector3.cpp + octomap/octomap/src/math/Quaternion.cpp + octomap/octomap/src/math/Pose6D.cpp +) +set_target_properties(octomath PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_include_directories(octomath PUBLIC ${CMAKE_CURRENT_LIST_DIR}/octomap/octomap/include) + +add_library(octomap ${OCTOMAP_LIBRARY_TYPE} + octomap/octomap/src/AbstractOcTree.cpp + octomap/octomap/src/AbstractOccupancyOcTree.cpp + octomap/octomap/src/Pointcloud.cpp + octomap/octomap/src/ScanGraph.cpp + octomap/octomap/src/CountingOcTree.cpp + octomap/octomap/src/OcTree.cpp + octomap/octomap/src/OcTreeNode.cpp + octomap/octomap/src/OcTreeStamped.cpp + octomap/octomap/src/ColorOcTree.cpp +) +target_link_libraries(octomap PUBLIC octomath) + +add_library(dynamicEDT3d ${OCTOMAP_LIBRARY_TYPE} octomap/dynamicEDT3D/src/dynamicEDT3D.cpp) +target_include_directories(dynamicEDT3d PUBLIC octomap/dynamicEDT3D/include) +target_link_libraries(dynamicEDT3d PUBLIC octomap) + +add_sample_with_tags( + ID ${FOLDER_NAME} + CATEGORY ${CATEGORY_NAME} + AUTHOR "Holochip" + NAME "Octmap rendering" + DESCRIPTION "Demonstration of how to render an OctMap which was generated from ICP combining ARCore and ARKit SLAM maps" + FILES + ImGUIUtil.cpp + Screens/MapView.cpp + SHADER_FILES_GLSL + "render_octomap/glsl/render.vert" + "render_octomap/glsl/render.frag" + "render_octomap/glsl/imgui.vert" + "render_octomap/glsl/imgui.frag" + "render_octomap/glsl/gltf.vert" + "render_octomap/glsl/gltf.frag" + "render_octomap/glsl/splat.vert" + "render_octomap/glsl/splat.frag" + LIBS + dynamicEDT3d +) diff --git a/samples/complex/render_octomap/ImGUIUtil.cpp b/samples/complex/render_octomap/ImGUIUtil.cpp new file mode 100644 index 0000000000..4141b41f1b --- /dev/null +++ b/samples/complex/render_octomap/ImGUIUtil.cpp @@ -0,0 +1,819 @@ +/* Copyright (c) 2024-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ImGUIUtil.h" +#include "api_vulkan_sample.h" +#include "platform/input_events.h" +#include + +// Map framework keycodes to ImGuiKey (platform-agnostic) +static ImGuiKey KeyCodeToImGuiKey(vkb::KeyCode code) +{ + using vkb::KeyCode; + switch (code) + { + case KeyCode::Tab: + return ImGuiKey_Tab; + case KeyCode::Left: + return ImGuiKey_LeftArrow; + case KeyCode::Right: + return ImGuiKey_RightArrow; + case KeyCode::Up: + return ImGuiKey_UpArrow; + case KeyCode::Down: + return ImGuiKey_DownArrow; + case KeyCode::PageUp: + return ImGuiKey_PageUp; + case KeyCode::PageDown: + return ImGuiKey_PageDown; + case KeyCode::Home: + return ImGuiKey_Home; + case KeyCode::End: + return ImGuiKey_End; + case KeyCode::Insert: + return ImGuiKey_Insert; + case KeyCode::DelKey: + return ImGuiKey_Delete; + case KeyCode::Backspace: + return ImGuiKey_Backspace; + case KeyCode::Space: + return ImGuiKey_Space; + case KeyCode::Enter: + return ImGuiKey_Enter; + case KeyCode::Escape: + return ImGuiKey_Escape; + case KeyCode::Apostrophe: + return ImGuiKey_Apostrophe; + case KeyCode::Comma: + return ImGuiKey_Comma; + case KeyCode::Minus: + return ImGuiKey_Minus; + case KeyCode::Period: + return ImGuiKey_Period; + case KeyCode::Slash: + return ImGuiKey_Slash; + case KeyCode::Semicolon: + return ImGuiKey_Semicolon; + case KeyCode::Equal: + return ImGuiKey_Equal; + case KeyCode::LeftBracket: + return ImGuiKey_LeftBracket; + case KeyCode::Backslash: + return ImGuiKey_Backslash; + case KeyCode::RightBracket: + return ImGuiKey_RightBracket; + case KeyCode::GraveAccent: + return ImGuiKey_GraveAccent; + case KeyCode::CapsLock: + return ImGuiKey_CapsLock; + case KeyCode::ScrollLock: + return ImGuiKey_ScrollLock; + case KeyCode::NumLock: + return ImGuiKey_NumLock; + case KeyCode::PrintScreen: + return ImGuiKey_PrintScreen; + case KeyCode::Pause: + return ImGuiKey_Pause; + case KeyCode::KP_0: + return ImGuiKey_Keypad0; + case KeyCode::KP_1: + return ImGuiKey_Keypad1; + case KeyCode::KP_2: + return ImGuiKey_Keypad2; + case KeyCode::KP_3: + return ImGuiKey_Keypad3; + case KeyCode::KP_4: + return ImGuiKey_Keypad4; + case KeyCode::KP_5: + return ImGuiKey_Keypad5; + case KeyCode::KP_6: + return ImGuiKey_Keypad6; + case KeyCode::KP_7: + return ImGuiKey_Keypad7; + case KeyCode::KP_8: + return ImGuiKey_Keypad8; + case KeyCode::KP_9: + return ImGuiKey_Keypad9; + case KeyCode::KP_Decimal: + return ImGuiKey_KeypadDecimal; + case KeyCode::KP_Divide: + return ImGuiKey_KeypadDivide; + case KeyCode::KP_Multiply: + return ImGuiKey_KeypadMultiply; + case KeyCode::KP_Subtract: + return ImGuiKey_KeypadSubtract; + case KeyCode::KP_Add: + return ImGuiKey_KeypadAdd; + case KeyCode::KP_Enter: + return ImGuiKey_KeypadEnter; + case KeyCode::KP_Equal: + return ImGuiKey_KeypadEqual; + case KeyCode::LeftShift: + return ImGuiKey_LeftShift; + case KeyCode::LeftControl: + return ImGuiKey_LeftCtrl; + case KeyCode::LeftAlt: + return ImGuiKey_LeftAlt; + case KeyCode::RightShift: + return ImGuiKey_RightShift; + case KeyCode::RightControl: + return ImGuiKey_RightCtrl; + case KeyCode::RightAlt: + return ImGuiKey_RightAlt; + case KeyCode::F1: + return ImGuiKey_F1; + case KeyCode::F2: + return ImGuiKey_F2; + case KeyCode::F3: + return ImGuiKey_F3; + case KeyCode::F4: + return ImGuiKey_F4; + case KeyCode::F5: + return ImGuiKey_F5; + case KeyCode::F6: + return ImGuiKey_F6; + case KeyCode::F7: + return ImGuiKey_F7; + case KeyCode::F8: + return ImGuiKey_F8; + case KeyCode::F9: + return ImGuiKey_F9; + case KeyCode::F10: + return ImGuiKey_F10; + case KeyCode::F11: + return ImGuiKey_F11; + case KeyCode::F12: + return ImGuiKey_F12; + case KeyCode::_0: + return ImGuiKey_0; + case KeyCode::_1: + return ImGuiKey_1; + case KeyCode::_2: + return ImGuiKey_2; + case KeyCode::_3: + return ImGuiKey_3; + case KeyCode::_4: + return ImGuiKey_4; + case KeyCode::_5: + return ImGuiKey_5; + case KeyCode::_6: + return ImGuiKey_6; + case KeyCode::_7: + return ImGuiKey_7; + case KeyCode::_8: + return ImGuiKey_8; + case KeyCode::_9: + return ImGuiKey_9; + case KeyCode::A: + return ImGuiKey_A; + case KeyCode::B: + return ImGuiKey_B; + case KeyCode::C: + return ImGuiKey_C; + case KeyCode::D: + return ImGuiKey_D; + case KeyCode::E: + return ImGuiKey_E; + case KeyCode::F: + return ImGuiKey_F; + case KeyCode::G: + return ImGuiKey_G; + case KeyCode::H: + return ImGuiKey_H; + case KeyCode::I: + return ImGuiKey_I; + case KeyCode::J: + return ImGuiKey_J; + case KeyCode::K: + return ImGuiKey_K; + case KeyCode::L: + return ImGuiKey_L; + case KeyCode::M: + return ImGuiKey_M; + case KeyCode::N: + return ImGuiKey_N; + case KeyCode::O: + return ImGuiKey_O; + case KeyCode::P: + return ImGuiKey_P; + case KeyCode::Q: + return ImGuiKey_Q; + case KeyCode::R: + return ImGuiKey_R; + case KeyCode::S: + return ImGuiKey_S; + case KeyCode::T: + return ImGuiKey_T; + case KeyCode::U: + return ImGuiKey_U; + case KeyCode::V: + return ImGuiKey_V; + case KeyCode::W: + return ImGuiKey_W; + case KeyCode::X: + return ImGuiKey_X; + case KeyCode::Y: + return ImGuiKey_Y; + case KeyCode::Z: + return ImGuiKey_Z; + default: + break; + } + return ImGuiKey_None; +} + +ImGUIUtil::ImGUIUtil(ApiVulkanSample *_base) : + base(_base) +{ + // This conflicts with the hpp_gui context. Disable for now. + // ImGuiContext* context = ImGui::CreateContext(); + // ImGui::SetCurrentContext(context); + auto &device = base->get_render_context().get_device(); + vertexBuffer = + vkb::core::BufferBuilderC(1) + .with_usage(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT) + .with_vma_usage(VMA_MEMORY_USAGE_GPU_TO_CPU) + .with_debug_name("GUI vertex buffer") + .build_unique(device); + + indexBuffer = + vkb::core::BufferBuilderC(1) + .with_usage(VK_BUFFER_USAGE_INDEX_BUFFER_BIT) + .with_vma_usage(VMA_MEMORY_USAGE_GPU_TO_CPU) + .with_debug_name("GUI index buffer") + .build_unique(device); +} + +ImGUIUtil::~ImGUIUtil() +{ + // ImGui::DestroyContext(); // this would double free due to the default one in hpp_gui... + // Release all Vulkan resources required for rendering imGui + vkFreeMemory(base->get_render_context().get_device().get_handle(), fontMemory, nullptr); + vkDestroySampler(base->get_render_context().get_device().get_handle(), sampler, nullptr); + vkDestroyPipelineCache(base->get_render_context().get_device().get_handle(), pipelineCache, nullptr); + vkDestroyPipeline(base->get_render_context().get_device().get_handle(), pipeline, nullptr); + vkDestroyPipelineLayout(base->get_render_context().get_device().get_handle(), pipelineLayout, nullptr); + vkDestroyDescriptorPool(base->get_render_context().get_device().get_handle(), descriptorPool, nullptr); + vkDestroyDescriptorSetLayout(base->get_render_context().get_device().get_handle(), descriptorSetLayout, nullptr); +} + +// Initialize styles, keys, etc. +void ImGUIUtil::init(float width, float height) +{ + // Color scheme + vulkanStyle = ImGui::GetStyle(); + vulkanStyle.Colors[ImGuiCol_TitleBg] = ImVec4(1.0f, 0.0f, 0.0f, 0.6f); + vulkanStyle.Colors[ImGuiCol_TitleBgActive] = ImVec4(1.0f, 0.0f, 0.0f, 0.8f); + vulkanStyle.Colors[ImGuiCol_MenuBarBg] = ImVec4(1.0f, 0.0f, 0.0f, 0.4f); + vulkanStyle.Colors[ImGuiCol_Header] = ImVec4(1.0f, 0.0f, 0.0f, 0.4f); + vulkanStyle.Colors[ImGuiCol_CheckMark] = ImVec4(0.0f, 1.0f, 0.0f, 1.0f); + + setStyle(0); + // Dimensions + ImGuiIO &io = ImGui::GetIO(); + io.DisplaySize = ImVec2(width, height); + io.DisplayFramebufferScale = ImVec2(1.0f, 1.0f); +} + +void ImGUIUtil::setStyle(uint32_t index) +{ + switch (index) + { + case 0: + { + ImGuiStyle &style = ImGui::GetStyle(); + style = vulkanStyle; + break; + } + case 1: + ImGui::StyleColorsClassic(); + break; + case 2: + ImGui::StyleColorsDark(); + break; + case 3: + ImGui::StyleColorsLight(); + break; + } +} + +// Initialize all Vulkan resources used by the ui +void ImGUIUtil::initResources(VkRenderPass renderPass, VkQueue copyQueue) +{ + ImGuiIO &io = ImGui::GetIO(); + + // Create font texture + unsigned char *fontData; + int texWidth, texHeight; + + io.Fonts->GetTexDataAsRGBA32(&fontData, &texWidth, &texHeight); + VkDeviceSize uploadSize = texWidth * texHeight * 4 * sizeof(char); + + // SRS - Get Vulkan device driver information if available, use later for display + if (base->get_render_context().get_device().get_gpu().is_extension_supported(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME)) + { + VkPhysicalDeviceProperties2 deviceProperties2 = {}; + deviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + deviceProperties2.pNext = &driverProperties; + driverProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; + vkGetPhysicalDeviceProperties2(base->get_render_context().get_device().get_gpu().get_handle(), &deviceProperties2); + } + + // Create target image for copy + VkExtent3D font_extent{vkb::to_u32(texWidth), vkb::to_u32(texHeight), 1u}; + + font_image = std::make_unique(base->get_render_context().get_device(), font_extent, VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, + VMA_MEMORY_USAGE_GPU_ONLY); + font_image->set_debug_name("GUI font image"); + + font_image_view = std::make_unique(*font_image, VK_IMAGE_VIEW_TYPE_2D); + font_image_view->set_debug_name("View on GUI font image"); + + // Upload font data into the vulkan image memory + { + vkb::core::BufferC stage_buffer = vkb::core::BufferC::create_staging_buffer(base->get_render_context().get_device(), uploadSize, fontData); + + auto command_buffer = base->get_render_context().get_device().get_command_pool().request_command_buffer(); + + vkb::FencePool fence_pool{base->get_render_context().get_device()}; + + // Begin recording + command_buffer->begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, 0); + + { + // Prepare for transfer + vkb::ImageMemoryBarrier memory_barrier{}; + memory_barrier.old_layout = VK_IMAGE_LAYOUT_UNDEFINED; + memory_barrier.new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + memory_barrier.src_access_mask = 0; + memory_barrier.dst_access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; + memory_barrier.src_stage_mask = VK_PIPELINE_STAGE_HOST_BIT; + memory_barrier.dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + + command_buffer->image_memory_barrier(*font_image_view, memory_barrier); + } + + // Copy + VkBufferImageCopy buffer_copy_region{}; + buffer_copy_region.imageSubresource.layerCount = font_image_view->get_subresource_range().layerCount; + buffer_copy_region.imageSubresource.aspectMask = font_image_view->get_subresource_range().aspectMask; + buffer_copy_region.imageExtent = font_image->get_extent(); + + command_buffer->copy_buffer_to_image(stage_buffer, *font_image, {buffer_copy_region}); + + { + // Prepare for fragmen shader + vkb::ImageMemoryBarrier memory_barrier{}; + memory_barrier.old_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + memory_barrier.new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + memory_barrier.src_access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; + memory_barrier.dst_access_mask = VK_ACCESS_SHADER_READ_BIT; + memory_barrier.src_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + memory_barrier.dst_stage_mask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + + command_buffer->image_memory_barrier(*font_image_view, memory_barrier); + } + // End recording + command_buffer->end(); + auto &queue = base->get_render_context().get_device().get_queue_by_flags(VK_QUEUE_GRAPHICS_BIT, 0); + + queue.submit(*command_buffer, base->get_render_context().get_device().get_fence_pool().request_fence()); + + // Wait for the command buffer to finish its work before destroying the staging buffer + VK_CHECK(base->get_render_context().get_device().get_fence_pool().wait()); + base->get_render_context().get_device().get_fence_pool().reset(); + base->get_render_context().get_device().get_command_pool().reset_pool(); + } + // Font texture Sampler + VkSamplerCreateInfo samplerInfo = vkb::initializers::sampler_create_info(); + samplerInfo.magFilter = VK_FILTER_LINEAR; + samplerInfo.minFilter = VK_FILTER_LINEAR; + samplerInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerInfo.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; + VK_CHECK(vkCreateSampler(base->get_render_context().get_device().get_handle(), &samplerInfo, nullptr, &sampler)); + + // Descriptor pool + std::vector poolSizes = { + vkb::initializers::descriptor_pool_size(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 6)}; + VkDescriptorPoolCreateInfo descriptorPoolInfo = vkb::initializers::descriptor_pool_create_info(poolSizes, 7); + VK_CHECK(vkCreateDescriptorPool(base->get_render_context().get_device().get_handle(), &descriptorPoolInfo, nullptr, &descriptorPool)); + + // Descriptor set layout + std::vector setLayoutBindings = { + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + VK_SHADER_STAGE_FRAGMENT_BIT, 0), + }; + VkDescriptorSetLayoutCreateInfo descriptorLayout = vkb::initializers::descriptor_set_layout_create_info(setLayoutBindings); + VK_CHECK(vkCreateDescriptorSetLayout(base->get_render_context().get_device().get_handle(), &descriptorLayout, nullptr, &descriptorSetLayout)); + + // Descriptor set + VkDescriptorSetAllocateInfo allocInfo = vkb::initializers::descriptor_set_allocate_info(descriptorPool, + &descriptorSetLayout, 1); + VK_CHECK(vkAllocateDescriptorSets(base->get_render_context().get_device().get_handle(), &allocInfo, &descriptorSet)); + VkDescriptorImageInfo fontDescriptor = vkb::initializers::descriptor_image_info( + sampler, + font_image_view->get_handle(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + std::vector writeDescriptorSets = { + vkb::initializers::write_descriptor_set(descriptorSet, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 0, + &fontDescriptor)}; + + std::vector liveMapDescriptorSets = MapsView.LoadAssets(base, allocInfo, copyQueue); + if (liveMapDescriptorSets.size() > 0) + { + writeDescriptorSets.insert(liveMapDescriptorSets.end(), liveMapDescriptorSets.begin(), liveMapDescriptorSets.end()); + } + + vkUpdateDescriptorSets(base->get_render_context().get_device().get_handle(), static_cast(writeDescriptorSets.size()), + writeDescriptorSets.data(), + 0, nullptr); + + // Pipeline cache + VkPipelineCacheCreateInfo pipelineCacheCreateInfo = {}; + pipelineCacheCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; + VK_CHECK(vkCreatePipelineCache(base->get_render_context().get_device().get_handle(), &pipelineCacheCreateInfo, nullptr, &pipelineCache)); + + // Pipeline layout + // Push constants for UI rendering parameters + VkPushConstantRange pushConstantRange = vkb::initializers::push_constant_range(VK_SHADER_STAGE_VERTEX_BIT, sizeof(PushConstBlock), 0); + VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = vkb::initializers::pipeline_layout_create_info(&descriptorSetLayout, 1); + pipelineLayoutCreateInfo.pushConstantRangeCount = 1; + pipelineLayoutCreateInfo.pPushConstantRanges = &pushConstantRange; + VK_CHECK(vkCreatePipelineLayout(base->get_render_context().get_device().get_handle(), &pipelineLayoutCreateInfo, nullptr, &pipelineLayout)); + + // Setup graphics pipeline for UI rendering + VkPipelineInputAssemblyStateCreateInfo inputAssemblyState = + vkb::initializers::pipeline_input_assembly_state_create_info(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 0, VK_FALSE); + + VkPipelineRasterizationStateCreateInfo rasterizationState = + vkb::initializers::pipeline_rasterization_state_create_info(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE, + VK_FRONT_FACE_COUNTER_CLOCKWISE); + + // Enable blending + VkPipelineColorBlendAttachmentState blendAttachmentState{}; + blendAttachmentState.blendEnable = VK_TRUE; + blendAttachmentState.srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; + blendAttachmentState.dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + blendAttachmentState.colorBlendOp = VK_BLEND_OP_ADD; + blendAttachmentState.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; + blendAttachmentState.dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + blendAttachmentState.alphaBlendOp = VK_BLEND_OP_ADD; + blendAttachmentState.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + + VkPipelineColorBlendStateCreateInfo colorBlendState = + vkb::initializers::pipeline_color_blend_state_create_info(1, &blendAttachmentState); + + VkPipelineDepthStencilStateCreateInfo depthStencilState = + vkb::initializers::pipeline_depth_stencil_state_create_info(VK_FALSE, VK_FALSE, VK_COMPARE_OP_LESS_OR_EQUAL); + + VkPipelineViewportStateCreateInfo viewportState = + vkb::initializers::pipeline_viewport_state_create_info(1, 1, 0); + + VkPipelineMultisampleStateCreateInfo multisampleState = + vkb::initializers::pipeline_multisample_state_create_info(VK_SAMPLE_COUNT_1_BIT); + + std::vector dynamicStateEnables = { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo dynamicState = + vkb::initializers::pipeline_dynamic_state_create_info(dynamicStateEnables); + + std::vector shaderStages{ + base->load_shader("render_octomap", "imgui.vert.spv", VK_SHADER_STAGE_VERTEX_BIT), + base->load_shader("render_octomap", "imgui.frag.spv", VK_SHADER_STAGE_FRAGMENT_BIT)}; + + VkGraphicsPipelineCreateInfo pipelineCreateInfo = vkb::initializers::pipeline_create_info(pipelineLayout, renderPass); + + pipelineCreateInfo.pInputAssemblyState = &inputAssemblyState; + pipelineCreateInfo.pRasterizationState = &rasterizationState; + pipelineCreateInfo.pColorBlendState = &colorBlendState; + pipelineCreateInfo.pMultisampleState = &multisampleState; + pipelineCreateInfo.pViewportState = &viewportState; + pipelineCreateInfo.pDepthStencilState = &depthStencilState; + pipelineCreateInfo.pDynamicState = &dynamicState; + pipelineCreateInfo.stageCount = static_cast(shaderStages.size()); + pipelineCreateInfo.pStages = shaderStages.data(); + + // Vertex bindings an attributes based on ImGui vertex definition + std::vector vertexInputBindings = { + vkb::initializers::vertex_input_binding_description(0, sizeof(ImDrawVert), VK_VERTEX_INPUT_RATE_VERTEX), + }; + std::vector vertexInputAttributes = { + vkb::initializers::vertex_input_attribute_description(0, 0, VK_FORMAT_R32G32_SFLOAT, offsetof(ImDrawVert, pos)), + // Location 0: Position + vkb::initializers::vertex_input_attribute_description(0, 1, VK_FORMAT_R32G32_SFLOAT, offsetof(ImDrawVert, uv)), + // Location 1: UV + vkb::initializers::vertex_input_attribute_description(0, 2, VK_FORMAT_R8G8B8A8_UNORM, offsetof(ImDrawVert, col)), + // Location 0: Color + }; + VkPipelineVertexInputStateCreateInfo vertexInputState = vkb::initializers::pipeline_vertex_input_state_create_info(); + vertexInputState.vertexBindingDescriptionCount = static_cast(vertexInputBindings.size()); + vertexInputState.pVertexBindingDescriptions = vertexInputBindings.data(); + vertexInputState.vertexAttributeDescriptionCount = static_cast(vertexInputAttributes.size()); + vertexInputState.pVertexAttributeDescriptions = vertexInputAttributes.data(); + + pipelineCreateInfo.pVertexInputState = &vertexInputState; + + VK_CHECK(vkCreateGraphicsPipelines(base->get_render_context().get_device().get_handle(), pipelineCache, 1, &pipelineCreateInfo, nullptr, &pipeline)); +} + +// Starts a new imGui frame and sets up windows and ui elements +bool ImGUIUtil::newFrame(bool updateFrameGraph) +{ + ImGui::NewFrame(); + + // Draw only the left sidebar as an interactive window. + // The map viewport area remains free for the camera to receive mouse input. + { + ImGuiStyle &style = ImGui::GetStyle(); + style.ChildRounding = 0.0f; + style.WindowRounding = 12.0f; + style.FrameRounding = 12.0f; + + ImGuiIO &io = ImGui::GetIO(); + + const float padding = 20.0f; + const float sidebar_inner_width = 240.0f; + const float sidebar_width = sidebar_inner_width + padding * 2.0f; + const float btn_w = sidebar_inner_width; + const float btn_h = 52.0f; + const float gap = 10.0f; + + // Colors (same palette as MapView) + const ImVec4 sidebarColor = ImVec4(0x41 / 255.0f, 0x40 / 255.0f, 0x42 / 255.0f, 1.0f); + const ImVec4 buttonColor = ImVec4(0x00 / 255.0f, 0xF1 / 255.0f, 0xC6 / 255.0f, 1.0f); + const ImVec4 buttonActiveColor = ImVec4(0x00 / 255.0f, 0x94 / 255.0f, 0x81 / 255.0f, 1.0f); + const ImVec4 blackColor = ImVec4(0.0f, 0.0f, 0.0f, 1.0f); + + ImGui::SetNextWindowPos(ImVec2(0.0f, 0.0f), ImGuiCond_Always); + ImGui::SetNextWindowSize(ImVec2(sidebar_width, io.DisplaySize.y), ImGuiCond_Always); + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(padding, padding)); + ImGui::PushStyleColor(ImGuiCol_WindowBg, sidebarColor); + ImGui::PushStyleColor(ImGuiCol_Text, blackColor); + ImGui::Begin("Sidebar##render_octomap", nullptr, + ImGuiWindowFlags_NoTitleBar | + ImGuiWindowFlags_NoResize | + ImGuiWindowFlags_NoMove | + ImGuiWindowFlags_NoScrollbar | + ImGuiWindowFlags_NoScrollWithMouse | + ImGuiWindowFlags_NoSavedSettings); + + auto sidebar_button = [&](const char *label, MapView::ViewState state, const char *id) { + ImVec4 c = (MapsView.currentState == state) ? buttonActiveColor : buttonColor; + ImGui::PushStyleColor(ImGuiCol_Button, c); + ImGui::PushStyleColor(ImGuiCol_ButtonHovered, c); + ImGui::PushStyleColor(ImGuiCol_ButtonActive, c); + ImGui::PushID(id); + bool pressed = ImGui::Button(label, ImVec2(btn_w, btn_h)); + ImGui::PopID(); + ImGui::PopStyleColor(3); + if (pressed) + { + MapsView.currentState = state; + MapsView.stateChanged = true; + } + ImGui::Dummy(ImVec2(0.0f, gap)); + }; + + sidebar_button("OCTOMAP", MapView::ViewState::Octomap, "##btn_octomap"); + sidebar_button("GLTF MAP", MapView::ViewState::GLTFRegular, "##btn_gltf"); + sidebar_button("SPLATS", MapView::ViewState::GLTFSplats, "##btn_splats"); + + ImGui::End(); + ImGui::PopStyleColor(2); + ImGui::PopStyleVar(); + + // Compute 3D viewport rectangle (right side) + MapsView.mapPos = {sidebar_width, padding}; + MapsView.mapSize = {io.DisplaySize.x - sidebar_width - padding, io.DisplaySize.y - padding * 2.0f}; + + // Draw a non-interactive semi-transparent map panel background. + ImGui::SetNextWindowPos(ImVec2(MapsView.mapPos.x, MapsView.mapPos.y), ImGuiCond_Always); + ImGui::SetNextWindowSize(ImVec2(MapsView.mapSize.x, MapsView.mapSize.y), ImGuiCond_Always); + ImGui::SetNextWindowBgAlpha(0.35f); + ImGui::PushStyleColor(ImGuiCol_WindowBg, sidebarColor); + ImGui::Begin("MapPanel##render_octomap", nullptr, + ImGuiWindowFlags_NoTitleBar | + ImGuiWindowFlags_NoResize | + ImGuiWindowFlags_NoMove | + ImGuiWindowFlags_NoScrollbar | + ImGuiWindowFlags_NoScrollWithMouse | + ImGuiWindowFlags_NoSavedSettings | + ImGuiWindowFlags_NoInputs); + ImGui::End(); + ImGui::PopStyleColor(); + } + + ImGui::EndFrame(); + + // Render to generate draw buffers + ImGui::Render(); + if (needsUpdateBuffers) + { + needsUpdateBuffers = false; + return true; + } + return false; +} + +// Update vertex and index buffer containing the imGui elements when required +// Returns true if buffers were recreated (requiring command buffer rebuild) +bool ImGUIUtil::updateBuffers() +{ + ImDrawData *imDrawData = ImGui::GetDrawData(); + + if (!imDrawData) + { + return false; + } + + // Note: Alignment is done inside buffer creation + VkDeviceSize vertexBufferSize = imDrawData->TotalVtxCount * sizeof(ImDrawVert); + VkDeviceSize indexBufferSize = imDrawData->TotalIdxCount * sizeof(ImDrawIdx); + + if ((vertexBufferSize == 0) || (indexBufferSize == 0)) + { + return false; + } + + bool buffersRecreated = false; + + // Update buffers only if vertex or index count has been changed compared to current buffer size + if ((vertexBuffer->get_handle() == VK_NULL_HANDLE) || (vertexCount != imDrawData->TotalVtxCount)) + { + // Wait for GPU to finish using the old buffer before destroying it + vkDeviceWaitIdle(base->get_render_context().get_device().get_handle()); + vertexBuffer.reset(); + vertexBuffer = std::make_unique(base->get_render_context().get_device(), vertexBufferSize, + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + VMA_MEMORY_USAGE_GPU_TO_CPU); + vertexCount = imDrawData->TotalVtxCount; + vertexBuffer->set_debug_name("GUI Util vertex buffer"); + buffersRecreated = true; + } + + if ((indexBuffer->get_handle() == VK_NULL_HANDLE) || (indexCount != imDrawData->TotalIdxCount)) + { + // Wait for GPU to finish using the old buffer before destroying it + if (!buffersRecreated) + { + vkDeviceWaitIdle(base->get_render_context().get_device().get_handle()); + } + indexCount = imDrawData->TotalIdxCount; + + indexBuffer.reset(); + indexBuffer = std::make_unique(base->get_render_context().get_device(), indexBufferSize, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + VMA_MEMORY_USAGE_GPU_TO_CPU); + indexBuffer->set_debug_name("GUI index buffer"); + buffersRecreated = true; + } + + // Upload data + ImDrawVert *vtxDst = reinterpret_cast(vertexBuffer->map()); + ImDrawIdx *idxDst = reinterpret_cast(indexBuffer->map()); + + for (int n = 0; n < imDrawData->CmdListsCount; n++) + { + const ImDrawList *cmd_list = imDrawData->CmdLists[n]; + memcpy(vtxDst, cmd_list->VtxBuffer.Data, cmd_list->VtxBuffer.Size * sizeof(ImDrawVert)); + memcpy(idxDst, cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size * sizeof(ImDrawIdx)); + vtxDst += cmd_list->VtxBuffer.Size; + idxDst += cmd_list->IdxBuffer.Size; + } + + // Flush to make writes visible to GPU + vertexBuffer->flush(); + indexBuffer->flush(); + + return buffersRecreated; +} + +void ImGUIUtil::drawFrame(VkCommandBuffer commandBuffer) +{ + ImGuiIO &io = ImGui::GetIO(); + + vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + + VkViewport viewport = vkb::initializers::viewport(ImGui::GetIO().DisplaySize.x, ImGui::GetIO().DisplaySize.y, 0.0f, 1.0f); + vkCmdSetViewport(commandBuffer, 0, 1, &viewport); + + // UI scale and translate via push constants + pushConstBlock.scale = glm::vec2(2.0f / io.DisplaySize.x, 2.0f / io.DisplaySize.y); + pushConstBlock.translate = glm::vec2(-1.0f); + vkCmdPushConstants(commandBuffer, pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(PushConstBlock), &pushConstBlock); + + // Render commands + ImDrawData *imDrawData = ImGui::GetDrawData(); + uint32_t vertexOffset = 0; + uint32_t indexOffset = 0; + + if (imDrawData->CmdListsCount > 0) + { + VkDeviceSize offsets[1] = {0}; + vkCmdBindVertexBuffers(commandBuffer, 0, 1, &vertexBuffer->get_handle(), offsets); + vkCmdBindIndexBuffer(commandBuffer, indexBuffer->get_handle(), 0, VK_INDEX_TYPE_UINT16); + + for (auto i = 0; i < imDrawData->CmdListsCount; i++) + { + const ImDrawList *cmd_list = imDrawData->CmdLists[i]; + for (auto j = 0; j < cmd_list->CmdBuffer.Size; j++) + { + const ImDrawCmd *pcmd = &cmd_list->CmdBuffer[j]; + VkRect2D scissorRect; + scissorRect.offset.x = glm::max(pcmd->ClipRect.x, 0.0f); + scissorRect.offset.y = glm::max(pcmd->ClipRect.y, 0.0f); + scissorRect.extent.width = static_cast(pcmd->ClipRect.z - pcmd->ClipRect.x); + scissorRect.extent.height = static_cast(pcmd->ClipRect.w - pcmd->ClipRect.y); + vkCmdSetScissor(commandBuffer, 0, 1, &scissorRect); + + if (static_cast(pcmd->TextureId) != nullptr) + { + VkDescriptorSet desc_set[1] = {static_cast(pcmd->TextureId)}; + vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, desc_set, 0, nullptr); + } + else + { + // bind the font + vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &descriptorSet, 0, nullptr); + } + vkCmdDrawIndexed(commandBuffer, pcmd->ElemCount, 1, indexOffset, vertexOffset, 0); + indexOffset += pcmd->ElemCount; + } + vertexOffset += cmd_list->VtxBuffer.Size; + } + } +} + +void ImGUIUtil::TextColorAlign(int align, const ImVec4 &col, const char *text, ...) +{ + va_list vaList; + va_start(vaList, text); + + float font_width = ImGui::CalcTextSize(text).x; + + switch (align) + { + case 1: + ImGui::SameLine( + ImGui::GetContentRegionAvail().x * 0.5f - font_width * 0.5f); + break; + case 2: + ImGui::SameLine( + ImGui::GetContentRegionAvail().x - font_width); + break; + case 0: + default: + break; + } + + ImGui::TextColoredV(col, text, vaList); + + va_end(vaList); +} + +void ImGUIUtil::handle_key_event(vkb::KeyCode code, vkb::KeyAction action) +{ + ImGuiIO &io = ImGui::GetIO(); + const ImGuiKey imgui_key = KeyCodeToImGuiKey(code); + if (imgui_key != ImGuiKey_None) + { + io.AddKeyEvent(imgui_key, action == vkb::KeyAction::Down || action == vkb::KeyAction::Repeat); + } + + // Update modifier states using ImGui keys + io.KeyCtrl = ImGui::IsKeyDown(ImGuiKey_LeftCtrl) || ImGui::IsKeyDown(ImGuiKey_RightCtrl); + io.KeyShift = ImGui::IsKeyDown(ImGuiKey_LeftShift) || ImGui::IsKeyDown(ImGuiKey_RightShift); + io.KeyAlt = ImGui::IsKeyDown(ImGuiKey_LeftAlt) || ImGui::IsKeyDown(ImGuiKey_RightAlt); + io.KeySuper = ImGui::IsKeyDown(ImGuiKey_LeftSuper) || ImGui::IsKeyDown(ImGuiKey_RightSuper); +} + +bool ImGUIUtil::GetWantKeyCapture() +{ + ImGuiIO &io = ImGui::GetIO(); + return io.WantCaptureKeyboard; +} + +void ImGUIUtil::charPressed(uint32_t key) +{ + ImGuiIO &io = ImGui::GetIO(); + io.AddInputCharacter(key); +} diff --git a/samples/complex/render_octomap/ImGUIUtil.h b/samples/complex/render_octomap/ImGUIUtil.h new file mode 100644 index 0000000000..46c0503507 --- /dev/null +++ b/samples/complex/render_octomap/ImGUIUtil.h @@ -0,0 +1,105 @@ +/* Copyright (c) 2024-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef IMGUI_UTIL_H +#define IMGUI_UTIL_H + +#include "api_vulkan_sample.h" +#include "core/buffer.h" +#include "platform/input_events.h" + +#include "Screens/MapView.h" +#include + +// ---------------------------------------------------------------------------- +// ImGUI class +// ---------------------------------------------------------------------------- + +class ImGUIUtil +{ + private: + // Vulkan resources for rendering the UI + VkSampler sampler; + std::unique_ptr vertexBuffer; + std::unique_ptr indexBuffer; + uint32_t vertexCount = 0; + uint32_t indexCount = 0; + VkDeviceMemory fontMemory = VK_NULL_HANDLE; + std::unique_ptr font_image; + std::unique_ptr font_image_view; + VkPipelineCache pipelineCache; + VkPipelineLayout pipelineLayout; + VkPipeline pipeline; + VkDescriptorPool descriptorPool; + VkDescriptorSetLayout descriptorSetLayout; + VkDescriptorSet descriptorSet; + VkPhysicalDeviceDriverProperties driverProperties = {}; + ApiVulkanSample *base; + ImGuiStyle vulkanStyle; + ImFont *montserratExtraBoldNormal; + ImFont *montserratExtraBoldSmall; + ImFont *montserratBoldNormal; + ImFont *montserratRegularNormal; + int selectedStyle = 0; + float windowWidth, windowHeight; + bool needsUpdateBuffers = false; + + public: + enum ViewState + { + LIVEMAPS_ACTIVE, + } state; + + MapView MapsView; + + // UI params are set via push constants + struct PushConstBlock + { + glm::vec2 scale; + glm::vec2 translate; + } pushConstBlock; + + explicit ImGUIUtil(ApiVulkanSample *_base); + ~ImGUIUtil(); + + // Initialize styles, keys, etc. + void init(float width, float height); + void setStyle(uint32_t index); + static void TextColorAlign(int align, const ImVec4 &col, const char *text, ...); + + // Initialize all Vulkan resources used by the ui + void initResources(VkRenderPass renderPass, VkQueue copyQueue); + + // Starts a new imGui frame and sets up windows and ui elements + bool newFrame(bool updateFrameGraph); + + // Update vertex and index buffer containing the imGui elements when required + // Returns true if buffers were recreated (requiring command buffer rebuild) + bool updateBuffers(); + + // Draw current imGui frame into a command buffer + void drawFrame(VkCommandBuffer commandBuffer); + + // Framework input path (platform-agnostic, incl. Direct-to-Display) + static void handle_key_event(vkb::KeyCode code, vkb::KeyAction action); + + static bool GetWantKeyCapture(); + + static void charPressed(uint32_t key); +}; + +#endif // IMGUI_UTIL_H diff --git a/samples/complex/render_octomap/README.adoc b/samples/complex/render_octomap/README.adoc new file mode 100644 index 0000000000..16374c7b23 --- /dev/null +++ b/samples/complex/render_octomap/README.adoc @@ -0,0 +1,46 @@ +//// +- Copyright (c) 2025-2026, Holochip Inc. +- +- SPDX-License-Identifier: Apache-2.0 +- +- Licensed under the Apache License, Version 2.0 the "License"; +- you may not use this file except in compliance with the License. +- You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +- +//// +== Rendering an OctMap +SLAM or Simultaneous Localization and Mapping is the main driving force behind modern robotics, AR. SLAM is a process which allows us to interact with the physical world. The main step in Localization involves determining where the camera is at the present moment in relation to where it has been in the past. The main idea is if you know where you were (a fix) and you know your momentum and direction, along with the time, you will be able to predict where you will be in the future. This process is called Dead Reckoning. + +When we want to do Dead Reckoning with a camera and an IMU, we're moving into https://virtualrealitypop.com/a-tectonic-shift-in-augmented-reality-c095d0c69df[Visual Inertial Odometry]. However, this only gives us knowledge of where we are in relation to where we were, or Localization. There is no map that is generated nor saved within that process. + +So, at one frame we have a point cloud from our VIO efforts, and the next frame will have another point cloud. If we can register one point cloud with another, then we can join them into one point cloud. This type of point cloud registration is commonly done with things like https://en.wikipedia.org/wiki/Iterative_closest_point[Iterative Closest Point]. + +Now, with our joined point cloud, we need to be able to recognize that sometimes, we've been places before, or we'd wind up with disjoint maps. That's where Loop Closure comes in. + +The preceding high-level background describes how we get to a situation where there's plenty of desire to be able to work with and render point clouds that are dynamically generated. ARCore, and ARKit are both able to create a point cloud map, and everything from Drones to robots use this same basic system to register and deal with the world around them. To navigate a room, a drone/robot might need to be able to determine if a voxel is occupied or not by using an occupancy grid. This gives rise to solutions which are optimized for storing such maps that can be dynamically updated in real time. + +The library https://octomap.github.io/[octomap] provides just such a library. + +In this sample, we demonstrate how using instancing in Vulkan, we can dynamically display and update an Octomap that was generated by ARKit. Using MoltenVK, we can display the map in real time, and communicate it rapidly over a network for desktops to render and combine in real time. +Here, we're simply showing how the render process works. Additionally, we're also providing a Vulkan IMGui wrapper and demonstrating how it is used. It's very similar if not nearly identical to the one found in the Framework, but it does have the ability to render images on the GUI and we use it to demonstrate how to place and size a viewport render context from a GUI to the rendering backend. This sample is meant to join several techniques in a way that mimics how a real world engine might work. + +== Real-world Project background +https://www.holochip.com[Holochip] is an awardee on the U.S. Environmental Protection Agency (US EPA) Phase II SBIR program, Localization and Mapping AI Application (LAMA). LAMA is an iOS and Android compatible SLAM solution for disaster response teams. It will allow responders to map, localize, place markers, notes, voice recording, and use AI object detection in large spaces, and communicate that information with on-site coordinators in GPS- and network-denied environments. + +[cols="a,a", frame=none, grid=none] +|=== +| image::./images/mapping.png[mapping] +| image::./images/markers.png[markers] +|=== + +While Octomap rendering provides excellent representation of the real world map in an easy to navigate manner at a size that's practical for embedded devices to house very large maps. Humans benefit from a more textured scene. This is where capturing of gaussian splats is beneficial. This sample demonstrates the same scene rendered as a GLTF and a GLTF with embedded guassian splats. Please note how much smaller the octomap is to store and use. Also, how much easier it is to view a guassian splat scene as a human to understand visually what objects are. + +For further details on this project contact mailto:info@holochip.com[] diff --git a/samples/complex/render_octomap/Screens/MapView.cpp b/samples/complex/render_octomap/Screens/MapView.cpp new file mode 100644 index 0000000000..a54ffef39f --- /dev/null +++ b/samples/complex/render_octomap/Screens/MapView.cpp @@ -0,0 +1,189 @@ +/* Copyright (c) 2024-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MapView.h" + +MapView::MapView() : + mapSize({153.0f, 221.0f}) +{ +} + +MapView::~MapView() = default; + +std::vector MapView::LoadAssets(ApiVulkanSample *, const VkDescriptorSetAllocateInfo &, VkQueue) +{ + return {}; +} + +void MapView::DrawSidebar() +{ + const ImVec4 sidebarColor = ImVec4( + 0x41 / 255.0f, + 0x40 / 255.0f, + 0x42 / 255.0f, + 1.0f); + + const ImVec4 buttonColor = ImVec4( + 0x00 / 255.0f, + 0xF1 / 255.0f, + 0xC6 / 255.0f, + 1.0f); + + const ImVec4 buttonActiveColor = ImVec4( + 0x00 / 255.0f, + 0x94 / 255.0f, + 0x81 / 255.0f, + 1.0f); + + const ImVec4 blackColor = ImVec4(0.0f, 0.0f, 0.0f, 1.0f); + + float oscWindowMainPadding = 20.0f; + float sidebarExpandedWidth = 240.0f; + float sidebarButtonWidth = sidebarExpandedWidth - (oscWindowMainPadding * 2); + float sidebarButtonHeight = 52.0f; + float buttonSpacing = 10.0f; + + // Get available height from the IO display size directly + ImGuiIO &io = ImGui::GetIO(); + float sidebarHeight = io.DisplaySize.y - (oscWindowMainPadding * 2); + + ImGui::SetCursorPosY(oscWindowMainPadding); + ImGui::SetCursorPosX(oscWindowMainPadding); + + ImGui::PushStyleColor(ImGuiCol_ChildBg, sidebarColor); + + // Create child window - use border=true to help with debugging visibility + ImGui::BeginChild("sidebar", ImVec2(sidebarExpandedWidth, sidebarHeight), false); + + ImGui::PushStyleVar(ImGuiStyleVar_FrameRounding, 12.0f); + ImGui::PushStyleColor(ImGuiCol_Text, blackColor); + + // Add initial padding using Dummy + ImGui::Dummy(ImVec2(0.0f, oscWindowMainPadding - ImGui::GetStyle().ItemSpacing.y)); + + // OCTOMAP BUTTON + ImGui::SetCursorPosX(oscWindowMainPadding); + + ImVec4 btnColor1 = (currentState == ViewState::Octomap) ? buttonActiveColor : buttonColor; + ImGui::PushStyleColor(ImGuiCol_Button, btnColor1); + ImGui::PushStyleColor(ImGuiCol_ButtonHovered, btnColor1); + ImGui::PushStyleColor(ImGuiCol_ButtonActive, btnColor1); + + if (ImGui::Button("OCTOMAP##btn_octomap", ImVec2(sidebarButtonWidth, sidebarButtonHeight))) + { + if (currentState != ViewState::Octomap) + { + currentState = ViewState::Octomap; + stateChanged = true; + } + } + ImGui::PopStyleColor(3); + + // Add spacing between buttons + ImGui::Dummy(ImVec2(0.0f, buttonSpacing - ImGui::GetStyle().ItemSpacing.y)); + + // GLTF REGULAR BUTTON + ImGui::SetCursorPosX(oscWindowMainPadding); + + ImVec4 btnColor2 = (currentState == ViewState::GLTFRegular) ? buttonActiveColor : buttonColor; + ImGui::PushStyleColor(ImGuiCol_Button, btnColor2); + ImGui::PushStyleColor(ImGuiCol_ButtonHovered, btnColor2); + ImGui::PushStyleColor(ImGuiCol_ButtonActive, btnColor2); + + if (ImGui::Button("GLTF MAP##btn_gltf", ImVec2(sidebarButtonWidth, sidebarButtonHeight))) + { + if (currentState != ViewState::GLTFRegular) + { + currentState = ViewState::GLTFRegular; + stateChanged = true; + } + } + ImGui::PopStyleColor(3); + + // Add spacing between buttons + ImGui::Dummy(ImVec2(0.0f, buttonSpacing - ImGui::GetStyle().ItemSpacing.y)); + + // GAUSSIAN SPLATS BUTTON + ImGui::SetCursorPosX(oscWindowMainPadding); + + ImVec4 btnColor3 = (currentState == ViewState::GLTFSplats) ? buttonActiveColor : buttonColor; + ImGui::PushStyleColor(ImGuiCol_Button, btnColor3); + ImGui::PushStyleColor(ImGuiCol_ButtonHovered, btnColor3); + ImGui::PushStyleColor(ImGuiCol_ButtonActive, btnColor3); + + if (ImGui::Button("SPLATS##btn_splats", ImVec2(sidebarButtonWidth, sidebarButtonHeight))) + { + if (currentState != ViewState::GLTFSplats) + { + currentState = ViewState::GLTFSplats; + stateChanged = true; + } + } + ImGui::PopStyleColor(3); + + ImGui::PopStyleColor(); // Text color + ImGui::PopStyleVar(); // FrameRounding + + ImGui::EndChild(); + ImGui::PopStyleColor(); // ChildBg +} + +bool MapView::DrawUI() +{ + ImGuiStyle &style = ImGui::GetStyle(); + style.ChildRounding = 0.0f; + + const ImVec4 oscSidebarColor = ImVec4( + 0x41 / 255.0f, + 0x40 / 255.0f, + 0x42 / 255.0f, + 1.0f); + + float oscWindowMainPadding = 20.0f; + + style.WindowRounding = 12.0f; + style.ChildRounding = 12.0f; + style.FrameRounding = 12.0f; + ImGui::PushStyleVar(ImGuiStyleVar_FrameRounding, 12.0f); + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0, 0)); + + float sidebarExpandedWidth = 240.0f; + float displaySpaceY = oscWindowMainPadding; + float displaySpaceX = (oscWindowMainPadding * 2) + sidebarExpandedWidth; + float displaySpaceHeight = ImGui::GetContentRegionAvail().y - (oscWindowMainPadding * 2); + float displaySpaceWidth = ImGui::GetContentRegionAvail().x - (oscWindowMainPadding * 3) - sidebarExpandedWidth; + + // Draw the sidebar with buttons + DrawSidebar(); + + // Draw the main display area + ImGui::SetCursorPosY(displaySpaceY); + ImGui::SetCursorPosX(displaySpaceX); + // Semi-transparent panel background so the UI is visible without fully hiding the 3D map. + ImGui::PushStyleColor(ImGuiCol_ChildBg, ImVec4(0x41 / 255.0f, 0x40 / 255.0f, 0x42 / 255.0f, 0.35f)); + ImGui::BeginChild("mapDisplay", ImVec2(displaySpaceWidth, displaySpaceHeight), false); + ImGui::PopStyleColor(); + ImGui::EndChild(); + + mapSize = {displaySpaceWidth, displaySpaceHeight}; + mapPos = {displaySpaceX, displaySpaceY}; + + ImGui::PopStyleVar(); + ImGui::PopStyleVar(); + + return stateChanged; +} diff --git a/samples/complex/render_octomap/Screens/MapView.h b/samples/complex/render_octomap/Screens/MapView.h new file mode 100644 index 0000000000..4c9fcc8dc1 --- /dev/null +++ b/samples/complex/render_octomap/Screens/MapView.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2024-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MAPVIEW_H +#define MAPVIEW_H + +#include "api_vulkan_sample.h" + +class MapView +{ + public: + // View states for different rendering modes + enum class ViewState + { + Octomap, // Default octomap rendering + GLTFRegular, // Regular GLTF map + GLTFSplats // Gaussian splats GLTF + }; + + MapView(); + ~MapView(); + + glm::vec2 mapPos; + glm::vec2 mapSize; + + // Current view state + ViewState currentState = ViewState::Octomap; + + // Flag to indicate view state changed + bool stateChanged = false; + + std::vector LoadAssets(ApiVulkanSample *base, const VkDescriptorSetAllocateInfo &allocInfo, VkQueue copyQueue); + + bool DrawUI(); + + private: + void DrawSidebar(); +}; + +#endif // MAPVIEW_H diff --git a/samples/complex/render_octomap/Tutorials/capturing-gaussian-splats.adoc b/samples/complex/render_octomap/Tutorials/capturing-gaussian-splats.adoc new file mode 100644 index 0000000000..7d62d9a0b5 --- /dev/null +++ b/samples/complex/render_octomap/Tutorials/capturing-gaussian-splats.adoc @@ -0,0 +1,717 @@ +//// +- Copyright (c) 2025-2026, Holochip Inc. +- +- SPDX-License-Identifier: Apache-2.0 +- +- Licensed under the Apache License, Version 2.0 the "License"; +- you may not use this file except in compliance with the License. +- You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +- +//// + += Capturing Gaussian Splats: A Practical Guide +:toc: +:toclevels: 3 +:sectnums: + +== Introduction + +So you want to capture your own Gaussian splat scenes? Great! Whether you're scanning a room, capturing outdoor environments, or digitizing objects, this tutorial will walk you through the process. We'll cover the different capture methods, what hardware and software you need, and how to turn your raw data into beautiful Gaussian splats. + +The good news: you probably already have the most important tool - a decent camera or smartphone. The less good news: the processing pipeline has some complexity, and getting high-quality results takes practice. But don't worry, we'll walk through everything step by step. + +== What You're Actually Capturing + +Before we dive into methods and tools, let's clarify what we need to capture. Gaussian splat training requires understanding the 3D structure of your scene *and* its appearance from multiple viewpoints. Fundamentally, you need: + +**Multiple images** of the scene from different angles. The more viewpoints, the better. Think dozens to hundreds of photos, not just 5-10. + +**Camera poses** for each image - where the camera was positioned and oriented when that photo was taken. This is the 3D structure part. + +**Camera parameters** - focal length, lens distortion, image resolution. Modern tools can estimate these automatically, but knowing them helps. + +Optional but helpful: + +**Depth information** - if you have a depth camera, this accelerates training and improves quality. + +**Mask information** - defining what parts of the image contain your subject versus background. + +The capture process gets you the images and camera poses. The training process turns this into Gaussian splats. Let's look at the different ways to do the capture part. + +== Capture Method 1: Photogrammetry (Video or Photo Sequence) + +This is the most accessible method because it works with any camera - phone, DSLR, drone, whatever. You walk around your subject taking photos (or record a video and extract frames), and software figures out where the camera was for each shot. + +=== How It Works + +Photogrammetry software analyzes your images, finds common features (corners, edges, textures), and uses them to reconstruct both the camera positions and a 3D point cloud of the scene. This process is called Structure from Motion (SfM). Once you have camera poses, you can train Gaussian splats. + +The software is solving a chicken-and-egg problem: to know the 3D structure, it needs camera positions. To know camera positions, it needs the 3D structure. It solves this iteratively, starting with estimates and refining them. + +=== What You Need + +**Hardware:** + +- Any camera that can take reasonably sharp photos +- Smartphones work great - modern phone cameras are surprisingly good +- DSLRs/mirrorless give you more control and better quality +- 360° cameras work too, though processing is different + +**Software:** + +- *COLMAP* (free, open-source, industry standard for SfM) +- *Reality Capture* (commercial, fast, excellent quality) +- *Metashape* (formerly PhotoScan, commercial, photogrammetry focused) +- *Meshroom* (free, open-source, user-friendly interface) + +=== The Capture Process + +Let me walk you through capturing a room: + +**Step 1 - Plan your capture.** Walk around the space first. Identify problematic areas: shiny surfaces (mirrors, glass), plain textures (white walls), moving objects (people, pets, curtains). These confuse photogrammetry. Cover or remove them if possible. + +**Step 2 - Set up lighting.** Consistent lighting is crucial. Close curtains to avoid changing sunlight. Turn on all lights. Avoid harsh shadows. If you're outdoors, overcast days are actually better than bright sun - softer, more even lighting. + +**Step 3 - Camera settings.** If using a phone, lock exposure and focus. On a DSLR, shoot in manual mode: ISO 400-800, aperture f/5.6-f/8 (for good depth of field), shutter fast enough to avoid blur (1/60s minimum handheld, faster is better). Shoot in RAW if possible. + +**Step 4 - Capture images.** Here's where technique matters: + +Walk around your subject in overlapping circles. Imagine you're orbiting at different heights. Take a photo every few steps, keeping 70-80% overlap between consecutive images. If you took 100 photos and every point in your scene appears in at least 10 of them, you're doing great. + +Keep the subject centered in frame. Avoid extreme angles initially (you can get creative once you have good coverage). Move smoothly - no sudden position jumps between shots. + +If shooting video instead, walk *slowly*. Like, uncomfortably slowly. 1 step per second kind of slow. Extract frames every 5-10 frames later (not every single frame - too much overlap and motion blur). + +For objects, place them on a turntable and rotate them while keeping the camera steady. Or keep the object still and move the camera around it. Either works. + +**Step 5 - Capture enough data.** For a small object: 50-100 photos. For a room: 200-400 photos. For a large outdoor space: 500-1000+ photos. More is better, but beyond a point you're just adding processing time. + +=== Processing with COLMAP + +COLMAP is the standard tool for photogrammetry. Here's the workflow: + +[source,bash] +---- +# 1. Feature extraction - finds interesting points in each image +colmap feature_extractor \ + --database_path database.db \ + --image_path images/ \ + --ImageReader.single_camera 1 \ + --ImageReader.camera_model SIMPLE_RADIAL + +# 2. Feature matching - finds the same points across different images +colmap exhaustive_matcher \ + --database_path database.db + +# 3. Structure from Motion - estimates camera poses and 3D structure +colmap mapper \ + --database_path database.db \ + --image_path images/ \ + --output_path sparse/ + +# 4. (Optional) Dense reconstruction - creates a dense point cloud +colmap image_undistorter \ + --image_path images/ \ + --input_path sparse/0 \ + --output_path dense/ + +colmap patch_match_stereo \ + --workspace_path dense/ + +colmap stereo_fusion \ + --workspace_path dense/ \ + --output_path dense/fused.ply +---- + +This gives you camera poses and a point cloud. Now you're ready to train Gaussian splats. + +=== Tips for Better Results + +**Texture is your friend.** Photogrammetry needs visual features to track. Plain white walls? Terrible. Textured wallpaper? Great. If capturing an object with little texture, temporarily add some - tape newspaper to it, place it on a textured surface, whatever. + +**Avoid motion blur.** Blurry images confuse the feature matching. Use a fast shutter speed or good lighting to keep everything sharp. + +**Watch out for reflections.** Shiny surfaces show different things from different angles, which breaks the assumptions of photogrammetry. Cover mirrors, shoot glass at angles to minimize reflections, or mask them out in post. + +**Mind the scale.** Photogrammetry reconstructs relative scale, not absolute. A dollhouse and a cathedral look the same to the algorithm. Include a known-size reference object if absolute measurements matter. + +**Check your coverage.** After capturing, review your photos. Did you miss any angles? Any parts of the scene that appear in fewer than 5 photos? Go back and fill those gaps. + +== Capture Method 2: SLAM (Simultaneous Localization and Mapping) + +SLAM is what robots and AR devices use to understand their environment in real-time. Unlike photogrammetry (which processes everything after capture), SLAM tracks the camera pose *while* you're capturing. This is what the `render_octomap` sample generated by Holochip's real-time SLAM. Note, Holochip's solution generates the GLTF with compressed, embedded Gaussian Splats in real-time without any extra tools required. + +=== How It Works + +SLAM tracks visual features frame-to-frame, building a map of the environment while simultaneously tracking the camera's position in that map. Modern SLAM systems can run in real-time on a laptop, giving you immediate feedback about your capture quality. + +The advantage over photogrammetry? You see the reconstruction as you capture, so you know immediately if you've missed areas or if the tracking failed. The disadvantage? It's more sensitive to fast motion and requires more setup. + +=== What You Need + +**Hardware:** + +- A camera with good frame rate (30+ FPS) +- Ideally, an RGB-D camera (color + depth) like: +- Or just a regular webcam/phone camera (visual SLAM only) + +**Software:** + +- *ORB-SLAM3* (open-source, state-of-the-art visual SLAM) +- *RTAB-Map* (open-source, designed for RGB-D cameras, user-friendly) +- *Open3D* (library with SLAM capabilities) +- *ARKit* (iOS, for iPhone/iPad with LiDAR) +- *ARCore* (Android, though less accurate for reconstruction) + +=== The Capture Process with RTAB-Map + +Let's walk through capturing a scene with RTAB-Map and an RGB-D camera: + +**Step 1 - Set up your camera.** Connect your RGB-D camera (e.g., RealSense D435). Launch RTAB-Map and configure it to use your camera. + +**Step 2 - Start mapping.** Click "Start" and you'll see a live view with tracked features. As you move the camera, you'll see the 3D map building in real-time. + +**Step 3 - Capture technique.** This is different from photogrammetry: + +Move *smoothly* and *slowly*. SLAM tracks features frame-to-frame, so sudden movements break tracking. Think "sliding" not "stepping." + +Keep the camera roughly level. Extreme movement is a weak point in many SLAM implementations. + +Look at areas with good texture first. SLAM needs to initialize by tracking features. Starting in front of a white wall? Bad. Starting looking at a bookshelf? Good. The easier you are on your SLAM implementation, the better it can perform. Several professional grade solutions account for bad practices experienced in the real world. + +Revisit areas - this helps SLAM "close loops" and reduce drift. If you capture a room, end where you started. SLAM will recognize it's back at the beginning and correct any accumulated error. + +Watch the tracking status in the UI. If it says "lost tracking," stop, back up slightly, and let it reacquire. Don't continue capturing while tracking is lost. + +**Step 4 - Save the map.** When done, RTAB-Map saves: + +- Camera trajectory (poses for each frame) +- Point cloud reconstruction +- RGB-D image data + +Export the camera poses and images in a format Gaussian splat trainers understand (more on this later). + +== Capture Method 3: Structured Light and Laser Scanning + +For high-precision captures of smaller objects, structured light scanners and laser scanners give you submillimeter accuracy. These are specialized devices, but worth mentioning. + +=== How It Works + +Structured light scanners project patterns (lines, grids, dots) onto your subject and capture how they deform. This directly gives you depth. Rotate the object (or scanner), capture from multiple angles, align the scans, and you have a complete 3D model. + +Laser scanners use a laser to measure distances, rotating to scan the entire field of view. They're more common in architecture and industrial settings. + +=== What You Need + +**Hardware:** + +- Desktop scanners: Revopoint POP 3, Creality CR-Scan, Shining 3D (consumer, $300-$1000) +- Professional scanners: Artec Eva, Faro Focus (professional, $10,000+) +- DIY: You can build structured light scanners with projectors and cameras + +**Software:** + +- Usually included with the scanner +- *CloudCompare* for point cloud processing +- *MeshLab* for mesh processing +- Export point clouds and camera poses for Gaussian splat training + +=== When to Use This Method + +Object scanning where precision matters - jewelry, mechanical parts, small sculptures. You get extremely accurate geometry, which helps Gaussian splat training converge faster. + +Not practical for room-scale or outdoor captures. These scanners work at close range. + +== Capture Method 4: Multi-View Stereo Rigs + +For professional productions, multi-camera rigs capture dozens or hundreds of synchronized views simultaneously. Think "bullet time" from The Matrix, but for 3D reconstruction. + +=== How It Works + +Mount 50-200+ cameras in a geodesic dome or along rails. Synchronize their shutters. Capture all views at once. Process with photogrammetry, but since you have so many simultaneous views, reconstruction is faster and more accurate. + +=== What You Need + +This is specialized equipment: + +- Synchronized camera arrays (GoPros, machine vision cameras) +- Hardware trigger systems +- Serious computing power for processing +- Custom calibration workflows + +Companies like Microsoft (Volumetric Video), Intel (TrueView), and various studios use these setups. + +=== When to Use This Method + +Performance capture - actors, athletes, dynamic scenes. Anywhere you need to capture a moment frozen in time from all angles. + +Not for most users due to cost and complexity, but it's how professional volumetric video is captured. + +== From Capture to Gaussian Splats: The Training Pipeline + +Okay, you've captured your scene using one of the methods above. Now you have: + +- A collection of images +- Camera poses for each image +- (Optionally) a point cloud + +How do you turn this into Gaussian splats? + +=== The Training Process + +Training Gaussian splats is an optimization process. It starts with a point cloud (either from your capture or random initialization), treats each point as a Gaussian splat, and iteratively adjusts the positions, rotations, scales, opacities, and colors to match your captured images. + +The algorithm renders the current splats from each camera pose, compares the result to your actual captured image, and adjusts the splats to minimize the difference. Repeat for thousands of iterations, and you get photorealistic Gaussian splats. + +=== Available Training Tools + +**1. Original 3D Gaussian Splatting Implementation** + +The reference implementation from INRIA: + +[source,bash] +---- +# Clone the repo +git clone https://github.com/graphdeco-inria/gaussian-splatting --recursive + +# Install dependencies +cd gaussian-splatting +conda create -n gaussian_splatting python=3.8 +conda activate gaussian_splatting +pip install -r requirements.txt + +# Train on your data (must be in COLMAP format) +python train.py -s /path/to/your/scene -m output/model + +# Render views +python render.py -m output/model + +# Export to GLTF (using community tools) +python convert_to_gltf.py -m output/model -o output/scene.gltf +---- + +**2. Nerfstudio** + +Nerfstudio is a framework for training various neural rendering methods, including Gaussian splats: + +[source,bash] +---- +# Install nerfstudio +pip install nerfstudio + +# Process your images with COLMAP +ns-process-data images --data /path/to/images --output-dir /path/to/processed + +# Train Gaussian splats +ns-train splatfacto --data /path/to/processed + +# View in browser +ns-viewer --load-config outputs/.../config.yml +---- + +Nerfstudio is more user-friendly with great visualization tools and a web viewer. + +**3. Gaussian Splatting Studio** + +A GUI application that wraps the training process: + +- Point-and-click interface +- Built-in COLMAP integration +- Real-time training preview +- Export options for various formats + +Perfect if you don't want to touch the command line. + +**4. Luma AI** (Cloud-based) + +Upload your images to Luma's cloud service. They handle processing and training, then you download the result. Easiest option but requires internet and their service. + +=== Data Format: Getting Your Capture into the Right Format + +Training tools expect data in a specific format. The most common is COLMAP format: + +[source] +---- +scene/ + images/ + IMG_0001.jpg + IMG_0002.jpg + ... + sparse/ + 0/ + cameras.bin + images.bin + points3D.bin +---- + +If you captured with COLMAP, you already have this. + +If you captured with RTAB-Map or other SLAM: + +- Export images to `images/` +- Export camera poses in COLMAP format (or TUM format and convert) +- Some tools provide conversion scripts + +If you have video: + +- Extract frames: `ffmpeg -i video.mp4 -qscale:v 2 images/img_%04d.jpg` +- Run COLMAP on the frames to get camera poses + +=== Training Tips + +**Start with lower resolution.** Training at 1920x1080 is slow. Downsample to 960x540 for initial tests, verify quality, then do a final training run at full resolution. + +**Watch the metrics.** Training tools report PSNR (Peak Signal-to-Noise Ratio) and SSIM (Structural Similarity). Higher is better. If they plateau early, something's wrong with your capture. + +**Point cloud initialization matters.** If you have a good point cloud from COLMAP or SLAM, use it. Random initialization works but takes longer. + +**Iterations and learning rate.** Default settings are usually good (30,000 iterations), but you can adjust. More complex scenes might benefit from longer training. + +**Densification.** During training, the algorithm adds new splats where needed and removes useless ones. This "densification" happens automatically but can be tuned. + +== Practical Capture Scenarios + +Let's walk through some common scenarios with specific advice. + +=== Scenario 1: Indoor Room Scan + +**Best method:** Photogrammetry or RGB-D SLAM + +**Hardware:** Smartphone or RGB-D camera (RealSense, iPhone with LiDAR, Android (some models)) + +**Process:** + +1. Close curtains for consistent lighting +2. Remove or mask moving objects (people, pets, fans) +3. If using photogrammetry: Walk around taking 300-400 photos with 70% overlap +4. If using SLAM: Walk around slowly with RGB-D camera, ending where you started +5. Process with COLMAP or RTAB-Map +6. Train with nerfstudio or Gaussian splatting implementation + +**Common issues:** + +- White walls lack texture → Temporarily hang posters or place textured objects +- Mirrors and glass → Cover them or mask in post +- Windows showing outside → Mask them out or close curtains +- Large plain surfaces (floors) → Walk at different heights to get multiple angles + +=== Scenario 2: Object Scanning + +**Best method:** Photogrammetry with turntable, or structured light scanner + +**Hardware:** Camera + turntable, or handheld RGB-D, or structured light scanner + +**Process:** + +1. Place object on turntable with textured (non-reflective) background +2. Set up even, diffuse lighting from multiple angles +3. If using turntable: Rotate 10° at a time, capture 36 photos for full 360° +4. Repeat at different heights (horizontal ring, 30° above, 30° below) +5. If object has top details, capture top-down views +6. Process with COLMAP or scanner software +7. Train Gaussian splats + +**Common issues:** + +- Shiny objects → Use polarized light or powder spray (removable) +- Transparent objects → Place on contrasting background, use back-lighting +- Thin structures (wires, hair) → Multiple passes from different angles +- Symmetrical objects → Add temporary asymmetric markers for tracking + +=== Scenario 3: Outdoor Environment + +**Best method:** Photogrammetry with drone or ground-level camera + +**Hardware:** Drone with camera + +**Process:** + +1. Choose overcast day for even lighting (or consistent time if training on sunny day data) +2. If using drone: Fly in automated grid pattern at multiple altitudes +3. If ground-level: Walk around perimeter, capturing overlapping images +4. Capture 500-1000+ photos for large areas +5. Process with COLMAP or Reality Capture (better for large scenes) +6. Train Gaussian splats + +**Common issues:** + +- Moving vegetation (trees, grass) → Capture on calm day, or accept some blur +- Changing lighting (sun, clouds) → Capture quickly or on overcast day +- Sky/uniform areas → COLMAP struggles; mask sky or focus on ground features +- Scale and drift → Include ground control points with known positions + +=== Scenario 4: Performance/Face Capture + +**Best method:** Multi-view stereo rig, or single-camera video with careful technique + +**Hardware:** Multiple synchronized cameras, or high-frame-rate single camera + +**Process (single camera):** + +1. Have subject stay still +2. Move camera around them in smooth, slow orbit +3. Capture 200-300 frames (video at 30-60fps for 5-10 seconds) +4. Extract frames, process with COLMAP +5. Train Gaussian splats + +**Process (multi-camera):** + +1. Calibrate all cameras +2. Synchronize shutters +3. Capture single moment from all angles +4. Process with photogrammetry +5. Train Gaussian splats + +**Common issues:** + +- Subject movement → Very hard to handle; need multi-camera setup or very patient subject +- Hair and fine details → Need many views and high resolution +- Expression capture → Requires video or very fast multi-camera capture + +== Quality Checklist + +Before training, verify you have: + +- **Sufficient overlap** - Every point visible in 10+ images +- **Sharp images** - No motion blur +- **Consistent lighting** - No dramatic changes between shots +- **Good texture** - Photogrammetry needs features to track +- **Complete coverage** - No missing angles (especially backs of objects) +- **Proper exposure** - No blown-out highlights or crushed blacks +- **Stable capture** - Smooth motion for SLAM, no sudden camera jumps + +After COLMAP/SLAM processing, check: + +- **Camera poses look correct** - Visualize in COLMAP GUI or CloudCompare +- **Point cloud looks recognizable** - Should roughly resemble your scene +- **No obvious registration errors** - Point cloud shouldn't be split or misaligned +- **Reasonable number of reconstructed cameras** - If you took 300 photos but only 50 registered, something's wrong + +== Common Problems and Solutions + +**Problem:** COLMAP reconstructs very few cameras (only 20 out of 200 images) + +**Solution:** + +- Images might be too blurry → Recapture with faster shutter +- Insufficient overlap → Images are too different; capture with more overlap +- Scene too uniform → Add temporary texture or capture different subject +- Wrong camera model → Try different models (SIMPLE_RADIAL, PINHOLE, RADIAL) + +**Problem:** Point cloud looks disconnected or has multiple copies of the same thing + +**Solution:** + +- Insufficient overlap between different parts of your capture path +- Include "connecting" shots that bridge the gaps +- Might need to capture more images showing the relationship between parts + +**Problem:** SLAM lost tracking during capture + +**Solution:** + +- Movement was too fast → Recapture with slower, smoother motion +- Area was too featureless → Start in textured area, move to plain areas gradually +- Lighting was too dark or had extreme contrast → Improve lighting + +**Problem:** Gaussian splat training produces blurry results + +**Solution:** + +- Camera poses might be wrong → Check COLMAP/SLAM output quality +- Not enough training iterations → Train longer +- Images are actually blurry → Recapture with better camera settings +- Insufficient views → Capture more images from more angles + +**Problem:** Gaussian splats look good from training views but bad from novel views + +**Solution:** + +- Overfitting → Not enough training views; capture more diverse angles +- Poor coverage → Missing key viewpoints; recapture with better coverage +- Wrong camera parameters → COLMAP estimated wrong intrinsics + +== Recommended Workflows for Beginners + +**Easiest, Average Quality, and Real-time** + +1. Use Holochip's LAMA app +2. Connect to the Holochip LAMA server +3. Observe map populated in near realtime with Gaussian Splats +4. Retrieve the GLTF with embedded Gaussian Splats and the GEOJSON output from the saved maps folder. + +**Easiest: Smartphone + Cloud Processing** + +1. Use phone with Luma AI app +2. Record video walking slowly around subject +3. Upload to Luma AI for processing +4. Download trained Gaussian splats +5. Convert to GLTF if needed + +**Best Quality: DSLR + COLMAP + Local Training** + +1. Use DSLR/mirrorless camera with manual settings +2. Capture 200-400 photos with 70-80% overlap +3. Process with COLMAP for camera poses +4. Train with nerfstudio locally +5. Export to GLTF + +**Fastest: RGB-D Camera + RTAB-Map** + +1. Buy Intel RealSense D435i (~$300) +2. Capture with RTAB-Map in real-time +3. Export camera poses and images +4. Train with Gaussian splatting implementation +5. Export to GLTF + +== Advanced Topics + +=== Camera Calibration + +If COLMAP struggles or you want more control, pre-calibrate your camera: + +[source,bash] +---- +# Print checkerboard pattern +# Capture 20+ images of checkerboard from different angles +# Run OpenCV calibration: +python calibrate_camera.py --images calibration_images/ --output camera_params.txt + +# Use these parameters in COLMAP: +colmap feature_extractor \ + --database_path database.db \ + --image_path images/ \ + --ImageReader.camera_params "f,cx,cy,k1" +---- + +Calibration is especially important for wide-angle lenses (lots of distortion) or specialized cameras. + +=== Mask-Based Capture + +If you want to capture an object without the background, use masks: + +1. Capture normally +2. Use Photoshop, GIMP, or Segment Anything Model (SAM) to create masks for each image +3. Place masks in `masks/` folder (same filename as images, but .png) +4. Train with masks to ignore background +5. Result: Clean object without background clutter + +=== Multi-Stage Captures + +For very large or complex scenes: + +1. Capture in sections +2. Process each section independently with COLMAP +3. Merge point clouds in CloudCompare +4. Align camera poses +5. Train Gaussian splats on merged data + +This keeps processing manageable for huge scenes. + +=== Dynamic Scenes + +Capturing moving subjects or from a moving platform (ship at sea) is hard but possible: + +**Time-slice approach:** Multi-camera rig captures one moment from all angles. Process as static scene. + +**Per-frame approach:** Capture video + SLAM. Train separate Gaussian splats for each frame, then morph between them. Very expensive, but produces 4D (3D + time) results. + +**Sparse dynamic capture:** Capture mostly-static scene with dynamic elements. Train static Gaussian splats for background, separate dynamic models for moving parts. + +This is an active research area with new techniques emerging regularly. + +== Exporting for Use in This Sample + +After training, you need to export your Gaussian splats to GLTF format to use them in the `render_octomap` sample or other Vulkan applications. + +=== Using Community Tools + +[source,bash] +---- +# Clone the gltf export tools +git clone https://github.com/antimatter15/splat +cd splat + +# Convert from .ply (Gaussian splatting output) to GLTF +python convert.py \ + --input /path/to/point_cloud.ply \ + --output scene.gltf \ + --format gltf + +# Or use the COLMAP-to-GLTF converter +python colmap_to_gltf.py \ + --colmap_dir /path/to/colmap/sparse/0 \ + --output scene.gltf +---- + +=== Verifying the Export + +Load your GLTF in a viewer to verify: + +- glTF Viewer (web-based) +- Babylon.js Sandbox (web-based) +- This Vulkan sample! + +Check that: + +- Splats appear in correct positions +- Colors look right +- No missing regions +- File size is reasonable (compression working) + +== Summary: From Capture to Rendering + +Let's trace the complete pipeline one more time: + +1. **Plan your capture** - Choose method based on subject, hardware, and quality needs +2. **Capture data** - Photos, video, or RGB-D with camera poses +3. **Process captures** - COLMAP, RTAB-Map, or other tool to get camera poses and point cloud +4. **Verify quality** - Check reconstructed cameras and point cloud look correct +5. **Train Gaussian splats** - Run training for 20-30k iterations +6. **Export to GLTF** - Use conversion tools to create standardized file +7. **Load and render** - Use this sample or other viewer to visualize + +The whole process from capture to rendering can take as little as an hour for simple objects (with cloud processing) or several days for complex scenes with high-quality local training. + +The most important factors for success: + +- **Good capture technique** - Overlap, smooth motion, even lighting +- **Sufficient coverage** - Every part of your scene from many angles +- **Quality verification** - Check each stage before moving to the next +- **Patience** - Training takes time; don't rush it + +== Further Resources + +**Software:** + +- https://github.com/colmap/colmap[COLMAP] - Structure from Motion +- https://github.com/graphdeco-inria/gaussian-splatting[3D Gaussian Splatting] - Original implementation +- https://github.com/nerfstudio-project/nerfstudio[Nerfstudio] - User-friendly training framework +- https://github.com/introlab/rtabmap[RTAB-Map] - RGB-D SLAM +- https://github.com/raulmur/ORB_SLAM3[ORB-SLAM3] - Visual SLAM + +**Tutorials:** + +- Original 3DGS paper: "3D Gaussian Splatting for Real-Time Radiance Field Rendering" +- COLMAP documentation: Excellent tutorials on photogrammetry +- Nerfstudio documentation: Great getting-started guides + +**Communities:** + +- r/photogrammetry on Reddit +- 3DGS Discord servers +- COLMAP discussions on GitHub + +**Related Tutorials:** + +- link:gaussian-splats-rendering.adoc[Gaussian Splats Rendering Tutorial] - How to render what you've captured +- link:imgui-vulkan-integration.adoc[ImGui Vulkan Integration Tutorial] +- link:../README.adoc[render_octomap Sample README] diff --git a/samples/complex/render_octomap/Tutorials/gaussian-splats-rendering.adoc b/samples/complex/render_octomap/Tutorials/gaussian-splats-rendering.adoc new file mode 100644 index 0000000000..79a72f85af --- /dev/null +++ b/samples/complex/render_octomap/Tutorials/gaussian-splats-rendering.adoc @@ -0,0 +1,1293 @@ +//// +- Copyright (c) 2025-2026, Holochip Inc. +- +- SPDX-License-Identifier: Apache-2.0 +- +- Licensed under the Apache License, Version 2.0 the "License"; +- you may not use this file except in compliance with the License. +- You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +- +//// + += Gaussian Splats Rendering in Vulkan: A Comprehensive Guide +:toc: +:toclevels: 3 +:sectnums: + +== Introduction + +This tutorial demonstrates how to render 3D Gaussian Splats using Vulkan. Gaussian splatting is a novel approach to real-time radiance field rendering that represents scenes as collections of 3D Gaussians, enabling high-quality novel view synthesis at interactive frame rates. + +For background on the render_octomap sample and its SLAM context, see the link:../README.adoc[README]. + +== What are Gaussian Splats? + +Imagine you're trying to capture a real-world scene - maybe a room in your house or an outdoor environment - and you want to render it back in 3D. Traditionally, you might create a mesh with triangles and textures. But what if the scene has complex geometry like plants, fur, or intricate details? Meshes become unwieldy, and you need thousands of triangles plus UV mapping. + +Enter Gaussian splats. Instead of triangles, think of the scene as being made up of thousands (or millions) of soft, fuzzy blobs floating in 3D space. Each blob is a 3D Gaussian - mathematically speaking, it's that familiar bell curve from statistics, but extended into three dimensions. These Gaussians can be squashed, stretched, and rotated to match the shape of whatever they're representing. + +=== The Math Behind the Blobs + +Don't worry - the math isn't as scary as it looks. Each Gaussian is essentially defined by "where it is" and "what shape it has." The classic 3D Gaussian formula is: + +[stem] +++++ +G(x) = e^{-\frac{1}{2}(x-\mu)^T \Sigma^{-1} (x-\mu)} +++++ + +This basically says: the further you get from the center (`μ`), the more the value falls off, and the rate of falloff depends on the covariance matrix (`Σ`). The covariance matrix encodes both the size and orientation of the Gaussian. + +Now here's the clever part: instead of storing that covariance matrix directly (which would be 9 numbers), we split it into rotation and scale. Think of it like describing an ellipsoid by saying "take a sphere, stretch it by these amounts along each axis, then rotate it." Mathematically: + +[stem] +++++ +\Sigma = R S S^T R^T +++++ + +The rotation `R` orients the Gaussian in space (stored as a quaternion - 4 numbers), and the scale `S` determines how stretched it is along each axis (3 numbers). This decomposition makes Gaussians easy to manipulate: you can rotate them, scale them, and move them around independently. + +=== What Goes Into Each Splat? + +So what data do we actually need to store for each of these fuzzy blobs? Let's break it down: + +First, obviously, we need to know **where** it is in 3D space - that's the position (x, y, z), which takes 12 bytes if we use 32-bit floats. Then we need to know **how it's oriented** - that's the rotation stored as a quaternion (4 values = 16 bytes). The **size** comes from the scale factors (3 values = 12 bytes). We also need to know how **see-through** it is, which is the opacity (1 value = 4 bytes). + +Finally, there's color. The simplest approach is just RGB (3 values = 12 bytes), but here's where things get interesting: you can instead store spherical harmonics coefficients. These are like a compact way of encoding how the color changes depending on which angle you're viewing from - think of how real objects look different colors from different angles due to lighting. Full degree-3 spherical harmonics takes 48 float values (192 bytes), which is substantial but gives you view-dependent appearance for free. + +Add it all up: a simple RGB splat is 56 bytes, while one with full spherical harmonics is 236 bytes. Now multiply that by a million splats... yeah, we'll need to talk about compression later. + +=== Why Gaussians Work So Well + +You might be wondering, "Why Gaussians specifically? Why not some other shape?" Great question. Gaussians have some special mathematical properties that make them perfect for this: + +They're **smooth and continuous** - no hard edges means everything blends together naturally to create high-quality images. They're **differentiable everywhere**, which means you can use gradient descent to optimize them (critical for training). They have a **closed-form projection** from 3D to 2D - we can mathematically project a 3D Gaussian onto the screen as a 2D Gaussian without approximation. They're **fast to evaluate** - just an exponential function. And they naturally support **alpha blending** through the opacity value, so transparent objects just work. + +=== How Do They Compare to Other Techniques? + +If you've worked with 3D rendering, you're probably comparing Gaussian splats to techniques you already know. Let's talk about how they stack up. + +**Versus NeRF (Neural Radiance Fields):** NeRF was groundbreaking - it showed you could represent scenes as neural networks and get photorealistic results. But rendering a NeRF means ray marching through a neural network, which is slow. We're talking seconds per frame. Gaussian splats flip this around: instead of a neural network, you have an explicit collection of Gaussians that you can rasterize directly on the GPU. This gets you from "seconds per frame" to "60+ frames per second." The downside? You're storing more data (millions of splats vs. network weights), but with compression (coming up later), this becomes manageable. Plus, explicit splats are way easier to edit, manipulate, and stream than a neural network. + +**Versus Traditional Meshes:** Meshes are great for engineered objects - cars, buildings, game characters. But try capturing a real-world scene with tons of fine detail like tree leaves, grass, or fuzzy surfaces. You'll need millions of triangles, and you still won't capture the subtle transparency and view-dependent effects naturally. Gaussian splats excel here: each splat can be transparent, and with spherical harmonics, the color automatically changes with viewing angle. No UV mapping, no texture atlases - the appearance is baked into the splats themselves. + +**Versus Point Clouds:** Point clouds are simple - just a bunch of points in space with colors. But they have problems: zoom in and you see gaps between points; zoom out and you get aliasing. Gaussian splats solve both issues. Each "point" is actually a fuzzy blob that blends with its neighbors, so no gaps. The Gaussian falloff provides natural anti-aliasing. And because splats are oriented ellipsoids (not just spheres), they can represent surfaces more efficiently - one elongated splat can cover the area that would need many spherical points. + +== Storing Splats in GLTF Files + +Here's the thing: having a cool rendering technique is great, but if nobody can share their work, it stays isolated. This is where the `KHR_gaussian_splatting` GLTF extension comes in, and it's more important than you might think. + +=== The Practical Impact + +Think about your workflow: you capture a scene, process it, and want to share it. With GLTF: + +You can upload it to a Vulkan application that streams the splats to the GPU progressively - load a low-detail version first, then stream in more detail as needed. You can import it into game engines for XR applications - imagine walking through a captured real-world environment in VR. Museums can archive cultural heritage scans in a format that won't become obsolete. Film productions can use captured environments as background plates or reference geometry. All using standard tools and pipelines. + +And here's a bonus: GLTF already has extensions for compression (`KHR_mesh_quantization`, `KHR_draco_mesh_compression`). These work with Gaussian splats too, which brings us to our next topic. + +=== Extension Structure + +[source,json] +---- +{ + "extensionsUsed": ["KHR_gaussian_splatting"], + "meshes": [{ + "name": "GaussianSplats", + "primitives": [{ + "attributes": { + "POSITION": 0, + "COLOR_0": 4 + }, + "extensions": { + "KHR_gaussian_splatting": { + "OPACITY": 3, + "ROTATION": 1, + "SCALE": 2, + "SH_DEGREE_0_COEF_0": 5, + "antialiased": false, + "colorSpace": "BT.709-sRGB", + "kernel": "ellipse", + "shDegree": 3 + } + }, + "mode": 0 + }] + }] +} +---- + +=== Attribute Accessors + +|=== +| Attribute | Type | Description + +| POSITION | VEC3 | Splat center positions +| ROTATION | VEC4 | Quaternion orientations +| SCALE | VEC3 | 3D scale factors +| OPACITY | SCALAR | Alpha values (0-1) +| COLOR_0 | VEC3 | RGB colors +| SH_DEGREE_0_COEF_0 | MAT3 | Spherical harmonics coefficients +|=== + +=== Extension Properties + +* `antialiased`: Enable anti-aliasing filter +* `colorSpace`: Color space ("BT.709-sRGB" or "linear") +* `kernel`: Splat kernel type ("ellipse" or "sphere") +* `shDegree`: Spherical harmonics degree (0-3) + +== The Compression Problem (And Solution) + +Okay, let's talk about the scary limiting factor: file size. Remember when we said a splat with spherical harmonics takes 236 bytes? And a typical scene has a million splats? Let's do the math: + +1 million splats × 236 bytes = 236 megabytes. For a single room. + +5 million splats × 236 bytes = 1.18 gigabytes. For a larger scene. + +Now imagine trying to load that on a mobile device. Or stream it over a network. Or store thousands of these captures. The raw data sizes are simply not practical for real-world use. Compression isn't a nice-to-have feature - it's absolutely essential. + +=== Storage Requirements Without Compression + +[options="header"] +|=== +| Scene Size | Splat Count | Uncompressed Size (RGB) | Uncompressed Size (SH Degree 3) +| Small room | 500,000 | 28 MB | 118 MB +| Large room | 2,000,000 | 112 MB | 472 MB +| Building floor | 5,000,000 | 280 MB | 1.18 GB +| Outdoor scene | 10,000,000 | 560 MB | 2.36 GB +|=== + +=== How Quantization Saves the Day + +The good news? Gaussian splat data compresses really well, and the secret is quantization. Here's the insight: do we *really* need 32-bit float precision for everything? + +Think about it: if a splat's position is off by 0.0001 units, will anyone notice? If a rotation quaternion has a tiny error, will the visual result be different? Spoiler: usually not. Gaussian splat data has some properties that make it perfect for compression: + +**Spatial coherence** - nearby splats tend to look similar. If you have 100 splats representing a wall, they probably have similar colors, scales, and orientations. + +**Perceptual tolerance** - our eyes are forgiving. Small errors in rotation, position, or color get lost in the final blended image. + +**Redundancy** - many splats share similar properties. Lots of splats might have the same opacity, or similar scales. + +So instead of using 32 bits per number, let's see what happens when we use fewer bits... + +==== Compressing Positions + +Let's start with positions. Here's a clever trick: instead of storing absolute world coordinates, we normalize them relative to the scene bounds. Say your scene extends from (0, 0, 0) to (10, 10, 10). We can express any position as a fraction from 0 to 1 along each axis: + +[source,cpp] +---- +// Normalize position to [0, 1] range +vec3 normalized = (position - scene_min) / (scene_max - scene_min); + +// Now instead of using floats, use 16-bit integers +uint16_t x = (uint16_t)(normalized.x * 65535.0f); +uint16_t y = (uint16_t)(normalized.y * 65535.0f); +uint16_t z = (uint16_t)(normalized.z * 65535.0f); + +// We just went from 12 bytes to 6 bytes - that's 50% savings! +---- + +How much precision did we lose? A 16-bit integer gives you 65,536 steps. For a 10-meter room, that's 0.15mm precision. Can your eye see a 0.15mm position error after everything is blended? Nope. + +==== Compressing Rotations + +Rotations are trickier. A quaternion has 4 components, but here's a mathematical fact: if you know 3 of them, you can calculate the 4th (since quaternions are normalized: x² + y² + z² + w² = 1). This gives us the "smallest-three" encoding: + +[source,cpp] +---- +// Find the largest component (so we can reconstruct it accurately) +int largest_idx = find_largest_component(quat); + +// Store only the other three +vec3 smallest_three = extract_other_components(quat, largest_idx); + +// Quantize these to 16-bit integers (or 10-bit for more aggressive compression) +int16_t q1 = (int16_t)(smallest_three.x * 32767.0f); +int16_t q2 = (int16_t)(smallest_three.y * 32767.0f); +int16_t q3 = (int16_t)(smallest_three.z * 32767.0f); + +// 16 bytes → 6 bytes (plus 2 bits to store which component we dropped) +---- + +That's a 62.5% reduction! The SPZ format in the official GLTF extension takes this further, using **10-bit signed integers** for each of the three components plus 2 bits for the index, fitting everything into just 32 bits total. This gives excellent precision while cutting storage to just 4 bytes per rotation. + +==== Compressing Scales + +Scale factors tend to cluster around certain ranges - most splats aren't huge or tiny. But here's the thing: scale values can vary by orders of magnitude (a splat might be 0.01 units or 10 units). If we quantize them linearly, we waste precision on large values and don't have enough for small values. + +Solution? Logarithmic encoding. We store log₂(scale) instead of scale directly: + +[source,cpp] +---- +// Take the log first (now our range is more uniform) +vec3 log_scale = log2(scale); + +// Quantize to 16-bit (now we have good precision across the range) +uint16_t sx = quantize_to_range(log_scale.x, -8.0f, 8.0f, 65535); +uint16_t sy = quantize_to_range(log_scale.y, -8.0f, 8.0f, 65535); +uint16_t sz = quantize_to_range(log_scale.z, -8.0f, 8.0f, 65535); + +// 12 bytes → 6 bytes, with better precision distribution +---- + +==== Compressing Opacity and Color + +These are the easiest wins. Opacity is just a number from 0 to 1. Do we need float precision? Nope - 8 bits gives you 256 levels of transparency, which is way more than anyone can distinguish: + +[source,cpp] +---- +uint8_t opacity = (uint8_t)(opacity_float * 255.0f); +// 4 bytes → 1 byte (75% reduction!) +---- + +Colors are similar. RGB with 32-bit floats? Overkill. Standard 8-bit per channel (like every image format uses) works great: + +[source,cpp] +---- +uint8_t r = (uint8_t)(color.r * 255.0f); +uint8_t g = (uint8_t)(color.g * 255.0f); +uint8_t b = (uint8_t)(color.b * 255.0f); +// 12 bytes → 3 bytes (75% reduction) +---- + +Even spherical harmonics coefficients can be quantized down to 8 or 16 bits per value. Sure, you lose some of the subtle view-dependent effects, but 192 bytes → 48 bytes (75% savings) is worth the imperceptible quality loss in most cases. + +=== Compression Ratios in Practice + +[options="header"] +|=== +| Attribute | Original Size | Quantized Size | Reduction +| Position | 12 bytes | 6 bytes (uint16×3) | 50% +| Rotation | 16 bytes | 6 bytes (smallest-3) | 62.5% +| Scale | 12 bytes | 6 bytes (uint16×3) | 50% +| Opacity | 4 bytes | 1 byte (uint8) | 75% +| Color (RGB) | 12 bytes | 3 bytes (uint8×3) | 75% +| SH Degree 3 | 192 bytes | 48 bytes (uint8×48) | 75% +| **Total (RGB)** | **56 bytes** | **22 bytes** | **61% reduction** +| **Total (SH)** | **236 bytes** | **70 bytes** | **70% reduction** +|=== + +=== SPZ Compression: The Official Standard + +Here's the real story about compression in the GLTF standard: the `KHR_gaussian_splatting` extension works alongside a companion extension called `KHR_gaussian_splatting_compression_spz` that handles compression using the SPZ format. + +SPZ (Splat compression format) was specifically designed for Gaussian splats and achieves up to **90% compression compared to uncompressed PLY** while preserving visual fidelity and performance. This isn't a repurposed mesh compression format - it's purpose-built for splat data. + +==== How SPZ Works + +SPZ is clever about how it stores Gaussian splat attributes. The format can be used in two ways: + +**Option 1 - Decompressed to attributes:** The SPZ blob gets decompressed into standard GLTF accessors (position, rotation, scale, etc.) that your renderer reads normally. + +**Option 2 - Direct to rendering pipeline:** Advanced implementations can pass the compressed SPZ data directly to the GPU and decompress in shaders, saving memory bandwidth. + +The compression itself uses aggressive quantization optimized for splat data: + +**Positions** are stored relative to scene bounds and quantized. + +**Rotations** use an improved encoding: the smallest three components of the normalized quaternion are stored as **10-bit signed integers** (not 16-bit like earlier approaches), and the largest component is derived. The index of which component was dropped is stored in just 2 bits. This gives excellent precision in only 32 bits total. + +**Scales and opacity** are quantized to appropriate bit depths. + +**Spherical harmonics** (if present) support flexible encoding from degree 0 (no SH, just diffuse) up to degree 3 (full view-dependent appearance), letting you trade file size for visual quality. + +==== The Practical Impact + +Let's revisit our 1 million splat scene with the actual SPZ numbers: + +* **Uncompressed PLY**: 236 MB (with degree-3 spherical harmonics) +* **SPZ compressed**: ~24 MB (90% reduction) + +That's right - the standard claims 90% compression while maintaining visual fidelity. A 24 MB file downloads in under 3 seconds on most mobile connections. That's the difference between "impractical" and "production-ready." + +==== Graceful Degradation + +Here's something smart: the `KHR_gaussian_splatting` extension includes graceful fallback to sparse point cloud rendering. If a viewer doesn't support Gaussian splat rendering, it can fall back to rendering the splats as simple points. You won't get the smooth, blended appearance, but you'll see *something* rather than nothing. + +==== Base Extension vs Compression Extension + +It's important to understand there are two related extensions: + +**KHR_gaussian_splatting** - The base extension that defines how Gaussian splats are represented in GLTF. Splats are treated as point primitives with attributes for position, rotation, scale, transparency, and spherical harmonics. + +**KHR_gaussian_splatting_compression_spz** - The companion extension that defines SPZ compression. This is optional but highly recommended for any practical use case. + +You can technically use the base extension without compression (storing uncompressed splat data in GLTF buffers) however, this is only useful in the small scenes due to large file sizes. + +=== Additional Compression Techniques + +Beyond SPZ compression, you can stack additional optimizations: + +==== 1. Mesh Compression Standards + +GLTF supports the `KHR_mesh_quantization` extension, which can work alongside Gaussian splats: + +* Normalized integers for positions and other attributes +* Automatic dequantization in vertex shaders +* Interoperability with existing GLTF tooling + +The quantization parameters are stored in the GLTF JSON, working in harmony with SPZ compression. + +==== 2. Container-Level Compression + +On top of SPZ, you can: + +* **GZIP the entire GLTF package**: Compress the .gltf and binary buffers together (modest additional savings) +* **GLB format with compression**: Use the binary GLTF format with built-in compression + +=== GLTF Structure with SPZ Compression + +When you use the `KHR_gaussian_splatting_compression_spz` extension, the GLTF structure looks like this: + +[source,json] +---- +{ + "extensionsUsed": [ + "KHR_gaussian_splatting", + "KHR_gaussian_splatting_compression_spz" + ], + "meshes": [{ + "name": "GaussianSplats", + "primitives": [{ + "mode": 0, // POINTS + "attributes": { + "POSITION": 0, + "COLOR_0": 1 + }, + "extensions": { + "KHR_gaussian_splatting": { + "ROTATION": 2, + "SCALE": 3, + "OPACITY": 4, + "shDegree": 3 + }, + "KHR_gaussian_splatting_compression_spz": { + "buffer": 5, // Points to SPZ blob + "byteOffset": 0, + "byteLength": 25000000 + } + } + }] + }], + "buffers": [{ + "uri": "splats.spz", + "byteLength": 25000000 + }] +} +---- + +The SPZ blob is stored as a buffer that the loader can either: + +1. Decompress into the standard accessors (POSITION, ROTATION, etc.) +2. Pass directly to GPU shaders for decompression + +This flexibility means renderers can choose the approach that works best for their architecture, while the data format remains standard and portable. + +== Framework Integration + +The Vulkan-Samples framework has been extended to support gaussian splats: + +=== GaussianSplat Component + +A new scene graph component (`scene_graph/components/gaussian_splat.h`) stores splat data: + +[source,cpp] +---- +class GaussianSplat : public Component +{ +public: + enum class KernelType { Ellipse, Sphere }; + enum class ColorSpace { SRGB, Linear }; + + uint32_t splat_count = 0; + uint32_t sh_degree = 0; + bool antialiased = false; + KernelType kernel = KernelType::Ellipse; + ColorSpace color_space = ColorSpace::SRGB; + + // GPU buffers + std::unique_ptr position_buffer; + std::unique_ptr rotation_buffer; + std::unique_ptr scale_buffer; + std::unique_ptr opacity_buffer; + std::unique_ptr color_buffer; + std::unique_ptr sh_buffer; +}; +---- + +=== GLTF Loader Extension + +The `GLTFLoader` class recognizes the `KHR_gaussian_splatting` extension: + +[source,cpp] +---- +// In gltf_loader.h +#define KHR_GAUSSIAN_SPLATTING_EXTENSION "KHR_gaussian_splatting" + +// In gltf_loader.cpp +std::unordered_map GLTFLoader::supported_extensions = { + {KHR_LIGHTS_PUNCTUAL_EXTENSION, false}, + {KHR_GAUSSIAN_SPLATTING_EXTENSION, false} +}; +---- + +== Reading Gaussian Splats from GLTF + +Loading Gaussian splats from GLTF involves several steps: parsing the JSON structure, reading binary buffers, and creating GPU resources. + +=== Step 1: Detecting Gaussian Splat Data + +First, check if a GLTF file contains Gaussian splats: + +[source,cpp] +---- +bool has_gaussian_splats(const tinygltf::Model& model) +{ + // Check if the extension is used + auto it = std::find(model.extensionsUsed.begin(), + model.extensionsUsed.end(), + "KHR_gaussian_splatting"); + if (it == model.extensionsUsed.end()) + return false; + + // Check if any mesh primitive uses the extension + for (const auto& mesh : model.meshes) + { + for (const auto& primitive : mesh.primitives) + { + if (primitive.extensions.find("KHR_gaussian_splatting") != + primitive.extensions.end()) + { + return true; + } + } + } + return false; +} +---- + +=== Step 2: Extracting Extension Data + +Parse the extension JSON to get accessor indices: + +[source,cpp] +---- +struct GaussianSplatExtension +{ + int position_accessor = -1; + int rotation_accessor = -1; + int scale_accessor = -1; + int opacity_accessor = -1; + int color_accessor = -1; + std::vector sh_accessors; + + int sh_degree = 0; + bool antialiased = false; + std::string color_space = "BT.709-sRGB"; + std::string kernel = "ellipse"; +}; + +GaussianSplatExtension parse_extension(const tinygltf::Value& ext_value) +{ + GaussianSplatExtension ext; + + // Get accessor indices + if (ext_value.Has("ROTATION")) + ext.rotation_accessor = ext_value.Get("ROTATION").Get(); + if (ext_value.Has("SCALE")) + ext.scale_accessor = ext_value.Get("SCALE").Get(); + if (ext_value.Has("OPACITY")) + ext.opacity_accessor = ext_value.Get("OPACITY").Get(); + + // Get SH accessors + if (ext_value.Has("shDegree")) + ext.sh_degree = ext_value.Get("shDegree").Get(); + + for (int degree = 0; degree <= ext.sh_degree; degree++) + { + for (int m = -degree; m <= degree; m++) + { + std::string key = "SH_DEGREE_" + std::to_string(degree) + + "_COEF_" + std::to_string(m); + if (ext_value.Has(key)) + ext.sh_accessors.push_back(ext_value.Get(key).Get()); + } + } + + // Get properties + if (ext_value.Has("antialiased")) + ext.antialiased = ext_value.Get("antialiased").Get(); + if (ext_value.Has("colorSpace")) + ext.color_space = ext_value.Get("colorSpace").Get(); + if (ext_value.Has("kernel")) + ext.kernel = ext_value.Get("kernel").Get(); + + return ext; +} +---- + +=== Step 3: Reading Buffer Data + +GLTF stores data in buffers accessed through accessors: + +[source,cpp] +---- +template +std::vector read_accessor_data(const tinygltf::Model& model, int accessor_idx) +{ + if (accessor_idx < 0 || accessor_idx >= model.accessors.size()) + return {}; + + const auto& accessor = model.accessors[accessor_idx]; + const auto& buffer_view = model.bufferViews[accessor.bufferView]; + const auto& buffer = model.buffers[buffer_view.buffer]; + + std::vector data(accessor.count); + + // Calculate byte stride + size_t byte_stride = buffer_view.byteStride; + if (byte_stride == 0) + byte_stride = accessor.ByteStride(buffer_view); + + // Read data + const unsigned char* src = buffer.data.data() + + buffer_view.byteOffset + + accessor.byteOffset; + + for (size_t i = 0; i < accessor.count; i++) + { + // Handle different component types and normalization + if (accessor.componentType == TINYGLTF_COMPONENT_TYPE_FLOAT) + { + memcpy(&data[i], src + i * byte_stride, sizeof(T)); + } + else if (accessor.componentType == TINYGLTF_COMPONENT_TYPE_UNSIGNED_SHORT) + { + // Dequantize if normalized + uint16_t* quantized = (uint16_t*)(src + i * byte_stride); + if (accessor.normalized) + { + // Convert [0, 65535] to [0.0, 1.0] + for (int j = 0; j < sizeof(T)/sizeof(float); j++) + ((float*)&data[i])[j] = quantized[j] / 65535.0f; + } + } + // Handle other component types... + } + + return data; +} +---- + +=== Step 4: Creating GPU Buffers + +Transfer the loaded data to GPU buffers: + +[source,cpp] +---- +std::unique_ptr create_gaussian_splat_from_gltf( + const tinygltf::Model& model, + const tinygltf::Primitive& primitive, + vkb::Device& device) +{ + auto splat = std::make_unique(); + + // Get extension data + auto ext_it = primitive.extensions.find("KHR_gaussian_splatting"); + if (ext_it == primitive.extensions.end()) + return nullptr; + + GaussianSplatExtension ext = parse_extension(ext_it->second); + + // Read positions (from standard POSITION attribute) + auto positions = read_accessor_data( + model, primitive.attributes.at("POSITION")); + splat->splat_count = positions.size(); + + // Read rotations + auto rotations = read_accessor_data( + model, ext.rotation_accessor); + + // Read scales + auto scales = read_accessor_data( + model, ext.scale_accessor); + + // Read opacities + auto opacities = read_accessor_data( + model, ext.opacity_accessor); + + // Read colors + auto colors = read_accessor_data( + model, primitive.attributes.at("COLOR_0")); + + // Create GPU buffers + splat->position_buffer = create_device_buffer( + device, positions.data(), + positions.size() * sizeof(glm::vec3), + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); + + splat->rotation_buffer = create_device_buffer( + device, rotations.data(), + rotations.size() * sizeof(glm::vec4), + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); + + splat->scale_buffer = create_device_buffer( + device, scales.data(), + scales.size() * sizeof(glm::vec3), + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); + + splat->opacity_buffer = create_device_buffer( + device, opacities.data(), + opacities.size() * sizeof(float), + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); + + splat->color_buffer = create_device_buffer( + device, colors.data(), + colors.size() * sizeof(glm::vec3), + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); + + // Store extension properties + splat->sh_degree = ext.sh_degree; + splat->antialiased = ext.antialiased; + splat->kernel = (ext.kernel == "sphere") ? + GaussianSplat::KernelType::Sphere : + GaussianSplat::KernelType::Ellipse; + splat->color_space = (ext.color_space == "linear") ? + GaussianSplat::ColorSpace::Linear : + GaussianSplat::ColorSpace::SRGB; + + return splat; +} +---- + +=== Step 5: Handling Quantized Data + +If the GLTF uses `KHR_mesh_quantization`, dequantization must be handled: + +[source,glsl] +---- +// In vertex shader, dequantize positions +layout(location = 0) in vec3 inPositionQuantized; // uint16 normalized + +uniform vec3 quantization_min; +uniform vec3 quantization_range; + +void main() +{ + // Dequantize: [0, 1] → [min, min + range] + vec3 position = quantization_min + inPositionQuantized * quantization_range; + + // Continue with normal processing... +} +---- + +Or dequantize on CPU during loading: + +[source,cpp] +---- +glm::vec3 dequantize_position(uint16_t x, uint16_t y, uint16_t z, + const glm::vec3& min, const glm::vec3& range) +{ + return min + glm::vec3( + (float)x / 65535.0f * range.x, + (float)y / 65535.0f * range.y, + (float)z / 65535.0f * range.z + ); +} +---- + +=== Complete Loading Example + +Here's a complete example of loading Gaussian splats from a GLTF file: + +[source,cpp] +---- +void render_octomap::load_gaussian_splats_gltf(const std::string& filename) +{ + tinygltf::Model model; + tinygltf::TinyGLTF loader; + std::string err, warn; + + // Load GLTF file + bool ret = loader.LoadASCIIFromFile(&model, &err, &warn, filename); + if (!ret) + { + LOGE("Failed to load GLTF: {}", err); + return; + } + + // Check for Gaussian splat extension + if (!has_gaussian_splats(model)) + { + LOGE("GLTF file does not contain Gaussian splats"); + return; + } + + // Iterate through meshes and primitives + for (const auto& mesh : model.meshes) + { + for (const auto& primitive : mesh.primitives) + { + // Check if this primitive has Gaussian splats + auto ext_it = primitive.extensions.find("KHR_gaussian_splatting"); + if (ext_it == primitive.extensions.end()) + continue; + + // Create Gaussian splat component + auto splat = create_gaussian_splat_from_gltf( + model, primitive, get_device()); + + if (splat) + { + LOGI("Loaded {} Gaussian splats", splat->splat_count); + + // Add to scene graph + auto node = scene->create_child_node(); + node->add_component(std::move(splat)); + } + } + } + + // Rebuild command buffers to render new splats + rebuild_command_buffers(); +} +---- + +=== Error Handling and Validation + +Always validate loaded data: + +[source,cpp] +---- +bool validate_gaussian_splat_data(const GaussianSplat& splat) +{ + if (splat.splat_count == 0) + { + LOGE("No splats loaded"); + return false; + } + + // Check quaternion normalization + // Rotations should be unit quaternions + + // Check scale values (should be positive) + + // Check opacity range [0, 1] + + // Validate buffer sizes match splat count + + return true; +} +---- + +== Rendering Gaussian Splats + +Now for the fun part: actually getting these splats on screen. The rendering process is surprisingly elegant once you understand what's happening. We're going to take 3D Gaussians floating in space and project them onto the 2D screen, then rasterize them as quads (like billboards), and finally evaluate the Gaussian function per-pixel to get smooth, blended results. + +=== The Big Picture + +Before we dive into shader code, let's understand the pipeline: + +**Step 1 - Vertex Shader:** For each splat, we need to figure out what it looks like on screen. This means projecting the 3D Gaussian to a 2D Gaussian on the screen, calculating how big it should appear, and generating a billboard quad to cover it. + +**Step 2 - Rasterization:** The GPU's fixed-function hardware rasterizes our quads into pixels. Nothing special here - same as any other geometry. + +**Step 3 - Fragment Shader:** For each pixel, we evaluate the 2D Gaussian function. This gives us a smooth falloff from the center to the edges. We multiply by the opacity and color, output premultiplied alpha, and we're done. + +**Step 4 - Blending:** The GPU blends all the quads together using alpha blending. Since we're outputting premultiplied alpha, the standard blend equation `src + dst * (1 - src.alpha)` does exactly what we want. + +=== The Projection Math (Don't Worry, I'll Explain) + +Here's the tricky part: we have a 3D Gaussian blob floating in space with its 3D covariance matrix `Σ₃D`. We need to project it onto the 2D screen and figure out what its 2D covariance matrix `Σ₂D` should be. If we get this wrong, the splat will look stretched or squashed on screen. + +The good news? There's a closed-form solution using the Jacobian of the perspective projection. The formula is: + +[stem] +++++ +\Sigma_{2D} = J W \Sigma_{3D} W^T J^T +++++ + +Let's break this down in English: `W` is the rotation part of the view matrix (how the camera is oriented). `J` is the Jacobian, which captures how perspective projection distorts things - objects closer to the camera project larger than distant objects. + +The Jacobian looks like this: + +[stem] +++++ +J = \begin{bmatrix} +\frac{f_x}{z} & 0 & -\frac{f_x x}{z^2} \\ +0 & \frac{f_y}{z} & -\frac{f_y y}{z^2} +\end{bmatrix} +++++ + +Where `(x, y, z)` is where the splat is in view space (camera coordinates), and `(fₓ, fᵧ)` are the focal lengths from your projection matrix. Notice how everything is divided by z? That's the perspective part - further away means smaller projection. + +You don't need to memorize this formula. What matters is: we can compute exactly how a 3D Gaussian projects to 2D, which means our shader can do this in real-time. + +=== The Vertex Shader: Where the Magic Happens + +Alright, let's build the vertex shader that does all this math. This shader runs once per splat vertex (we'll draw 4 vertices per splat to make a quad). The shader's job is to: + +1. Take the 3D splat data (position, rotation, scale) +2. Build the 3D covariance matrix +3. Project it to 2D using that Jacobian formula +4. Figure out how big the splat appears on screen +5. Generate a quad corner position +6. Pass along everything the fragment shader needs + +Here's the complete implementation with comments explaining each step: + +[source,glsl] +---- +#version 450 + +// Per-splat attributes +layout(location = 0) in vec3 inPosition; // Splat center +layout(location = 1) in vec4 inRotation; // Quaternion +layout(location = 2) in vec3 inScale; // Scale factors +layout(location = 3) in float inOpacity; // Opacity [0, 1] +layout(location = 4) in vec3 inColor; // RGB color + +// Uniform buffer +layout(binding = 0) uniform UBO { + mat4 view; + mat4 proj; + vec2 viewport; // Width, height + vec2 focal; // fx, fy +} ubo; + +// Output to fragment shader +layout(location = 0) out vec3 outColor; +layout(location = 1) out float outOpacity; +layout(location = 2) out vec2 outCoord; // Gaussian coordinates + +// Convert quaternion to rotation matrix +mat3 quaternionToMatrix(vec4 q) +{ + float x = q.x, y = q.y, z = q.z, w = q.w; + + return mat3( + 1.0 - 2.0*(y*y + z*z), 2.0*(x*y - w*z), 2.0*(x*z + w*y), + 2.0*(x*y + w*z), 1.0 - 2.0*(x*x + z*z), 2.0*(y*z - w*x), + 2.0*(x*z - w*y), 2.0*(y*z + w*x), 1.0 - 2.0*(x*x + y*y) + ); +} + +// Compute Jacobian of perspective projection +mat3 computeJacobian(vec3 viewPos, vec2 focal) +{ + float x = viewPos.x; + float y = viewPos.y; + float z = viewPos.z; + float z2 = z * z; + + // Jacobian maps 3D displacements to 2D screen displacements + return mat3( + focal.x / z, 0.0, -focal.x * x / z2, + 0.0, focal.y / z, -focal.y * y / z2, + 0.0, 0.0, 0.0 // We only need 2x3, pad with zeros + ); +} + +// Compute eigenvalues of 2x2 symmetric matrix (for radius calculation) +vec2 eigenvalues2x2(mat2 m) +{ + float a = m[0][0]; + float b = m[0][1]; // = m[1][0] since symmetric + float c = m[1][1]; + + float trace = a + c; + float det = a * c - b * b; + float discriminant = sqrt(max(0.0, trace * trace - 4.0 * det)); + + float lambda1 = 0.5 * (trace + discriminant); + float lambda2 = 0.5 * (trace - discriminant); + + return vec2(lambda1, lambda2); +} + +void main() +{ + // 1. Transform splat center to view space + vec4 viewPos = ubo.view * vec4(inPosition, 1.0); + + // Cull splats behind camera + if (viewPos.z > 0.0) { + gl_Position = vec4(0.0, 0.0, -1.0, 1.0); + return; + } + + // 2. Build 3D covariance matrix from rotation and scale + mat3 R = quaternionToMatrix(inRotation); + mat3 S = mat3( + inScale.x, 0.0, 0.0, + 0.0, inScale.y, 0.0, + 0.0, 0.0, inScale.z + ); + + // Covariance: Σ = R S S^T R^T + mat3 M = R * S; + mat3 cov3D = M * transpose(M); + + // 3. Project 3D covariance to 2D + mat3 J = computeJacobian(viewPos.xyz, ubo.focal); + mat3 W = mat3(ubo.view); // Extract rotation part only + mat3 T = J * W; + + // Σ₂D = T Σ₃D T^T (only need upper 2x2 block) + mat3 cov2Dfull = T * cov3D * transpose(T); + mat2 cov2D = mat2( + cov2Dfull[0][0], cov2Dfull[0][1], + cov2Dfull[1][0], cov2Dfull[1][1] + ); + + // Add small value to diagonal for numerical stability + cov2D[0][0] += 0.3; + cov2D[1][1] += 0.3; + + // 4. Compute splat radius from eigenvalues + vec2 eigenvals = eigenvalues2x2(cov2D); + float maxEigenval = max(eigenvals.x, eigenvals.y); + float radius = ceil(3.0 * sqrt(maxEigenval)); // 3 sigma = 99.7% coverage + + // 5. Generate billboard quad + // gl_VertexIndex tells us which corner of the quad we're generating + // Use instanced rendering with 4 vertices per splat + vec2 quadOffsets[4] = vec2[]( + vec2(-1.0, -1.0), + vec2( 1.0, -1.0), + vec2(-1.0, 1.0), + vec2( 1.0, 1.0) + ); + + int cornerIndex = gl_VertexIndex % 4; + vec2 quadOffset = quadOffsets[cornerIndex]; + + // Compute inverse of 2D covariance for Gaussian evaluation + float det = cov2D[0][0] * cov2D[1][1] - cov2D[0][1] * cov2D[1][0]; + mat2 cov2DInv = mat2( + cov2D[1][1] / det, -cov2D[0][1] / det, + -cov2D[1][0] / det, cov2D[0][0] / det + ); + + // Transform quad corner by covariance for proper ellipse shape + // This makes the quad align with the Gaussian's principal axes + vec2 offsetPixels = radius * quadOffset; + + // Convert to NDC offset + vec2 offsetNDC = offsetPixels / (0.5 * ubo.viewport); + + // Project center to clip space + vec4 clipPos = ubo.proj * viewPos; + clipPos /= clipPos.w; + + // Add offset in NDC space + clipPos.xy += offsetNDC * clipPos.w; + + gl_Position = clipPos; + + // 6. Output to fragment shader + outColor = inColor; + outOpacity = inOpacity; + + // Pass Gaussian coordinates for fragment evaluation + // Transform to Gaussian space using inverse covariance + outCoord = cov2DInv * (offsetPixels); +} +---- + +Notice the structure: we transform to view space, build the 3D covariance, project to 2D, compute the radius, and generate quad corners. The key insight is that `cov2DInv` (the inverse of the 2D covariance matrix) is what the fragment shader needs to evaluate the Gaussian. By computing it here and transforming our quad coordinates with it, we've set up everything perfectly for the fragment shader. + +One more thing: notice we add 0.3 to the diagonal of `cov2D`? That's for numerical stability - it prevents degenerate cases where the covariance matrix becomes singular (non-invertible). It's a small epsilon that doesn't affect visual quality but prevents crashes. + +=== The Fragment Shader: Evaluating the Gaussian + +The fragment shader is much simpler. By the time we get here, all the hard work is done. We just need to evaluate the 2D Gaussian function at this pixel's location: + +[source,glsl] +---- +#version 450 + +layout(location = 0) in vec3 inColor; +layout(location = 1) in float inOpacity; +layout(location = 2) in vec2 inCoord; // Gaussian coordinates + +layout(location = 0) out vec4 outColor; + +void main() +{ + // Evaluate 2D Gaussian function: exp(-0.5 * x^T Σ^(-1) x) + // inCoord already has Σ^(-1) applied, so just compute dot product + float power = -0.5 * dot(inCoord, inCoord); + + // Clamp for numerical stability + power = max(power, -100.0); + + // Evaluate Gaussian + float gaussian = exp(power); + + // Apply opacity + float alpha = gaussian * inOpacity; + + // Early discard for performance (skip nearly transparent fragments) + if (alpha < 0.004) // ~1/255 + discard; + + // Output with premultiplied alpha + // This is important for correct blending + outColor = vec4(inColor * alpha, alpha); +} +---- + +Beautiful! That's only 20 lines of actual code. We compute how far we are from the Gaussian's center (that `dot(inCoord, inCoord)` operation), evaluate the exponential falloff, multiply by opacity, and output. The `discard` for low alpha values is an optimization allowing us to drop pixels that are too transparent to see. + +=== Premultiplied Alpha + +Notice the output `vec4(inColor * alpha, alpha)` instead of the more common `vec4(inColor, alpha)`. This is called premultiplied alpha (or associated alpha), and it's crucial for correct Gaussian splat rendering. + +Here's why: when you blend multiple semi-transparent Gaussians on top of each other, you want them to additively accumulate. The standard GPU blend equation with premultiplied alpha is: + +[source] +---- +result = source + destination * (1 - source.alpha) +---- + +This is perfect for Gaussians. Each splat adds its contribution, and the `(1 - source.alpha)` term lets the background show through in proportion to how transparent the splat is. + +If we used straight alpha (non-premultiplied), we'd need a different blend equation, and we'd have problems with color bleeding - the color values from fully transparent areas would affect the result, which is wrong. Premultiplied alpha solves this elegantly: a fully transparent pixel contributes (0, 0, 0, 0), which adds nothing. Perfect. + +Plus, there's a performance bonus: the blend equation is simpler without that extra multiply for the source color. + +=== Vulkan Pipeline Configuration + +Setting up the complete Vulkan pipeline for Gaussian splat rendering: + +[source,cpp] +---- +void create_splat_pipeline() +{ + // 1. Vertex input state - one binding per attribute buffer + std::vector bindings = { + {0, sizeof(glm::vec3), VK_VERTEX_INPUT_RATE_INSTANCE}, // Position + {1, sizeof(glm::vec4), VK_VERTEX_INPUT_RATE_INSTANCE}, // Rotation + {2, sizeof(glm::vec3), VK_VERTEX_INPUT_RATE_INSTANCE}, // Scale + {3, sizeof(float), VK_VERTEX_INPUT_RATE_INSTANCE}, // Opacity + {4, sizeof(glm::vec3), VK_VERTEX_INPUT_RATE_INSTANCE}, // Color + }; + + std::vector attributes = { + {0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0}, // Position + {1, 1, VK_FORMAT_R32G32B32A32_SFLOAT, 0}, // Rotation + {2, 2, VK_FORMAT_R32G32B32_SFLOAT, 0}, // Scale + {3, 3, VK_FORMAT_R32_SFLOAT, 0}, // Opacity + {4, 4, VK_FORMAT_R32G32B32_SFLOAT, 0}, // Color + }; + + VkPipelineVertexInputStateCreateInfo vertexInputState = {}; + vertexInputState.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vertexInputState.vertexBindingDescriptionCount = bindings.size(); + vertexInputState.pVertexBindingDescriptions = bindings.data(); + vertexInputState.vertexAttributeDescriptionCount = attributes.size(); + vertexInputState.pVertexAttributeDescriptions = attributes.data(); + + // 2. Input assembly - triangle strip for quad generation + VkPipelineInputAssemblyStateCreateInfo inputAssemblyState = {}; + inputAssemblyState.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + inputAssemblyState.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + inputAssemblyState.primitiveRestartEnable = VK_FALSE; + + // 3. Rasterization state + VkPipelineRasterizationStateCreateInfo rasterizationState = {}; + rasterizationState.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterizationState.polygonMode = VK_POLYGON_MODE_FILL; + rasterizationState.cullMode = VK_CULL_MODE_NONE; // Don't cull, quads face camera + rasterizationState.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + rasterizationState.lineWidth = 1.0f; + + // 4. Depth/stencil state - READ depth but DON'T WRITE (transparency!) + VkPipelineDepthStencilStateCreateInfo depthStencilState = {}; + depthStencilState.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + depthStencilState.depthTestEnable = VK_TRUE; + depthStencilState.depthWriteEnable = VK_FALSE; // Critical for transparency + depthStencilState.depthCompareOp = VK_COMPARE_OP_LESS; + + // 5. Blend state - Premultiplied alpha blending + VkPipelineColorBlendAttachmentState blendAttachment = {}; + blendAttachment.blendEnable = VK_TRUE; + blendAttachment.srcColorBlendFactor = VK_BLEND_FACTOR_ONE; + blendAttachment.dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + blendAttachment.colorBlendOp = VK_BLEND_OP_ADD; + blendAttachment.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; + blendAttachment.dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + blendAttachment.alphaBlendOp = VK_BLEND_OP_ADD; + blendAttachment.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | + VK_COLOR_COMPONENT_A_BIT; + + VkPipelineColorBlendStateCreateInfo colorBlendState = {}; + colorBlendState.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + colorBlendState.attachmentCount = 1; + colorBlendState.pAttachments = &blendAttachment; + + // Create pipeline... +} +---- + +Key configuration points: + +1. **Instanced rendering**: Use `VK_VERTEX_INPUT_RATE_INSTANCE` to draw 4 vertices per splat +2. **No backface culling**: Quads always face camera +3. **Depth test but no write**: Essential for correct transparency ordering +4. **Premultiplied alpha blending**: `ONE` + `ONE_MINUS_SRC_ALPHA` + +=== Drawing Gaussian Splats + +[source,cpp] +---- +void draw_gaussian_splats(VkCommandBuffer cmd, const GaussianSplat& splat) +{ + // Bind pipeline + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, splat_pipeline); + + // Bind uniform buffer (view, proj matrices, etc.) + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_layout, 0, 1, &descriptor_set, 0, nullptr); + + // Bind vertex buffers (one per attribute) + VkBuffer buffers[] = { + splat.position_buffer->get_handle(), + splat.rotation_buffer->get_handle(), + splat.scale_buffer->get_handle(), + splat.opacity_buffer->get_handle(), + splat.color_buffer->get_handle(), + }; + VkDeviceSize offsets[] = {0, 0, 0, 0, 0}; + vkCmdBindVertexBuffers(cmd, 0, 5, buffers, offsets); + + // Draw: 4 vertices per splat, instanced + vkCmdDraw(cmd, 4, splat.splat_count, 0, 0); +} +---- + +== Depth Sorting + +For correct rendering, splats must be sorted by depth (back-to-front). Common approaches: + +=== CPU Sorting + +Simple but can be slow for large splat counts: + +[source,cpp] +---- +std::sort(splats.begin(), splats.end(), [&](const Splat& a, const Splat& b) { + float depthA = glm::dot(viewDir, a.position - cameraPos); + float depthB = glm::dot(viewDir, b.position - cameraPos); + return depthA > depthB; // Back to front +}); +---- + +=== GPU Radix Sort + +More efficient for large datasets using compute shaders. + +=== Tile-Based Sorting + +Divide screen into tiles and sort within each tile for better parallelism. + +== Performance Considerations + +=== Culling + +* **Frustum culling**: Skip splats outside the view frustum +* **Size culling**: Skip splats that project to less than 1 pixel +* **Opacity culling**: Skip splats with very low opacity + +=== Level of Detail + +Reduce splat count for distant regions or lower quality settings. + +=== Memory Layout + +Use structure-of-arrays (SoA) for better cache utilization: + +[source,cpp] +---- +struct SplatBuffers { + std::vector positions; + std::vector rotations; + std::vector scales; + std::vector opacities; + std::vector colors; +}; +---- + +== Sample Usage + +The render_octomap sample demonstrates switching between different view modes: + +[source,cpp] +---- +void render_octomap::onViewStateChanged(MapView::ViewState newState) +{ + switch (newState) + { + case MapView::ViewState::Octomap: + // Render octomap cubes + break; + + case MapView::ViewState::GLTFRegular: + // Render standard GLTF mesh + loadGLTFScene("scenes/octmap_and_splats/savedMap_v1.1.0.gltf"); + break; + + case MapView::ViewState::GLTFSplats: + // Render gaussian splats + loadGaussianSplatsScene("scenes/octmap_and_splats/savedMap_v1.1.0_splats_c0_-1_-1.gltf"); + break; + } + rebuild_command_buffers(); +} +---- + +== Future Enhancements + +=== Spherical Harmonics + +For view-dependent color effects, implement SH evaluation: + +[source,glsl] +---- +vec3 evaluateSH(vec3 dir, mat3 sh_coeffs) { + // Evaluate spherical harmonics basis functions + // and combine with coefficients + return vec3(x,y,z); +} +---- + +=== Anti-Aliasing + +Implement the Mip-Splatting technique for anti-aliased rendering. + +=== Streaming + +For large scenes, implement tile-based streaming of splat data. + +== References + +* https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/[3D Gaussian Splatting for Real-Time Radiance Field Rendering] +* https://github.com/KhronosGroup/glTF/pull/2446[KHR_gaussian_splatting GLTF Extension Proposal] +* link:../README.adoc[render_octomap Sample README] +* link:imgui-vulkan-integration.adoc[ImGui Vulkan Integration Tutorial] + + +=== What This Sample Demonstrates + +The `render_octomap` sample shows all of this in action. It loads GLTF files containing both traditional geometry (octomaps) and Gaussian splats, switches between different rendering modes, and demonstrates how splats integrate with existing rendering techniques. It's a real-world SLAM reconstruction workflow where you capture an environment, process it into both mesh and splat representations, and render them side-by-side. diff --git a/samples/complex/render_octomap/Tutorials/imgui-vulkan-integration.adoc b/samples/complex/render_octomap/Tutorials/imgui-vulkan-integration.adoc new file mode 100644 index 0000000000..c8a2180d80 --- /dev/null +++ b/samples/complex/render_octomap/Tutorials/imgui-vulkan-integration.adoc @@ -0,0 +1,281 @@ +//// +- Copyright (c) 2025-2026, Holochip Inc. +- +- SPDX-License-Identifier: Apache-2.0 +- +- Licensed under the Apache License, Version 2.0 the "License"; +- you may not use this file except in compliance with the License. +- You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +- +//// + += ImGui Integration with Vulkan: A Practical Guide +:toc: +:toclevels: 3 +:sectnums: + +== Introduction + +This tutorial demonstrates how to integrate https://github.com/ocornut/imgui[Dear ImGui] with a Vulkan application using the `ImGUIUtil` class provided in the render_octomap sample. ImGui is an immediate mode graphical user interface library that is particularly well-suited for debugging tools, in-application configuration, and rapid prototyping. + +For background on the render_octomap sample and its purpose, see the link:../README.adoc[README]. + +== Why ImGui for Vulkan Applications? + +ImGui offers several advantages for Vulkan developers: + +* **Immediate Mode**: No need to manage widget state - just call functions each frame +* **Lightweight**: Minimal overhead and easy to integrate +* **Customizable**: Full control over rendering and styling +* **Debug-Friendly**: Perfect for runtime parameter tweaking and visualization + +== The ImGUIUtil Class + +The `ImGUIUtil` class wraps ImGui functionality and handles all Vulkan-specific setup. It manages: + +* Font texture upload and sampling +* Vertex and index buffer management +* Pipeline creation and configuration +* Descriptor sets for texture binding +* Push constants for viewport transformation + +=== Class Overview + +[source,cpp] +---- +class ImGUIUtil { +public: + // Initialize styles, keys, etc. + void init(float width, float height); + + // Initialize all Vulkan resources used by the UI + void initResources(VkRenderPass renderPass, VkQueue copyQueue); + + // Starts a new ImGui frame and sets up windows and UI elements + bool newFrame(bool updateFrameGraph); + + // Update vertex and index buffer containing the ImGui elements + void updateBuffers(); + + // Draw current ImGui frame into a command buffer + void drawFrame(VkCommandBuffer commandBuffer); + + // Input handling + static void handleKey(int key, int scancode, int action, int mode); + static bool GetWantKeyCapture(); + static void charPressed(uint32_t key); +}; +---- + +== Integration Steps + +=== Step 1: Create the ImGUIUtil Instance + +Create an instance of `ImGUIUtil` by passing your `ApiVulkanSample` base pointer: + +[source,cpp] +---- +ImGUIUtil gui(this); // 'this' is your ApiVulkanSample-derived class +---- + +=== Step 2: Initialize ImGui + +Call `init()` with your window dimensions: + +[source,cpp] +---- +gui.init(width, height); +---- + +This sets up: + +* ImGui context and IO configuration +* Font loading (Montserrat font family in various weights) +* Key mappings for input handling +* Default styling + +=== Step 3: Initialize Vulkan Resources + +After creating your render pass, initialize the Vulkan resources: + +[source,cpp] +---- +gui.initResources(renderPass, graphicsQueue); +---- + +This creates: + +* Font texture and image view +* Sampler for font texture +* Descriptor pool, layout, and sets +* Graphics pipeline with appropriate blend states +* Pipeline layout with push constants + +=== Step 4: Render Loop Integration + +In your render loop, follow this pattern: + +[source,cpp] +---- +// Start new ImGui frame +if (gui.newFrame(updateFrameGraph)) { + // ImGui will capture input - handle accordingly +} + +// Update buffers if UI changed +gui.updateBuffers(); + +// In your command buffer recording: +gui.drawFrame(commandBuffer); +---- + +== Vulkan Resource Management + +=== Font Texture Upload + +The `ImGUIUtil` handles font texture upload using a staging buffer and command buffer submission: + +[source,cpp] +---- +// Create staging buffer with font data +vkb::core::BufferC stage_buffer = vkb::core::BufferC::create_staging_buffer( + device, uploadSize, fontData); + +// Request command buffer from pool +auto command_buffer = device.get_command_pool().request_command_buffer(); + +// Record copy commands with proper image layout transitions +command_buffer->begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, 0); + +// Transition to TRANSFER_DST_OPTIMAL +// Copy buffer to image +// Transition to SHADER_READ_ONLY_OPTIMAL + +command_buffer->end(); + +// Submit and wait +queue.submit(*command_buffer, device.get_fence_pool().request_fence()); +device.get_fence_pool().wait(); +---- + +=== Pipeline Configuration + +The ImGui pipeline uses: + +* **Vertex Input**: Position (vec2), UV (vec2), Color (uint32) +* **Blend State**: Alpha blending enabled for transparency +* **Depth Test**: Disabled (UI renders on top) +* **Cull Mode**: None (UI elements may be flipped) + +=== Push Constants + +Viewport transformation is handled via push constants: + +[source,cpp] +---- +struct PushConstBlock { + glm::vec2 scale; // 2.0 / width, 2.0 / height + glm::vec2 translate; // -1.0, -1.0 +} pushConstBlock; +---- + +== Input Handling + +=== Keyboard Input + +Forward keyboard events to ImGui: + +[source,cpp] +---- +void onKeyEvent(int key, int scancode, int action, int mods) { + ImGUIUtil::handleKey(key, scancode, action, mods); + + // Check if ImGui wants keyboard input + if (ImGUIUtil::GetWantKeyCapture()) { + // Don't process key for application + return; + } + + // Process key for application... +} +---- + +=== Character Input + +For text input: + +[source,cpp] +---- +void onCharEvent(uint32_t codepoint) { + ImGUIUtil::charPressed(codepoint); +} +---- + +== Extending ImGUIUtil + +=== Adding Custom Widgets + +The `MapView` class demonstrates how to extend the UI with custom views: + +[source,cpp] +---- +class MapView { +public: + std::vector LoadAssets( + ApiVulkanSample* base, + VkDescriptorSetAllocateInfo allocInfo, + VkQueue copyQueue); + + void Draw(); // Called during newFrame() +}; +---- + +=== Rendering Images in the UI + +One key feature of this `ImGUIUtil` implementation is the ability to render images within the ImGui interface. This is useful for: + +* Displaying render target previews +* Showing texture thumbnails +* Creating viewport windows within the UI + +== Best Practices + +1. **Buffer Updates**: Only call `updateBuffers()` when the UI has changed +2. **Font Loading**: Load fonts during initialization, not per-frame +3. **Descriptor Management**: Pre-allocate enough descriptor sets for your images +4. **Command Buffer Reuse**: Use the framework's command pool for efficient buffer management + +== Comparison with Framework GUI + +The Vulkan-Samples framework includes its own GUI implementation in `framework/gui.h`. The `ImGUIUtil` in this sample differs in that it: + +* Supports rendering images within the UI +* Demonstrates viewport placement from GUI to render backend +* Shows a more self-contained implementation pattern + +== Further Reading + +* https://github.com/ocornut/imgui[Dear ImGui GitHub Repository] +* https://github.com/ocornut/imgui/wiki[ImGui Wiki] +* link:../README.adoc[render_octomap Sample README] +* link:../ImGUIUtil.h[ImGUIUtil Header File] +* link:../ImGUIUtil.cpp[ImGUIUtil Implementation] + +== Summary + +The `ImGUIUtil` class provides a complete solution for integrating ImGui with Vulkan applications. By handling all the Vulkan-specific details internally, it allows developers to focus on building their UI rather than managing graphics resources. + +Key takeaways: + +* Use `init()` and `initResources()` for setup +* Call `newFrame()`, `updateBuffers()`, and `drawFrame()` each frame +* Forward input events for proper interaction +* Extend with custom views like `MapView` for application-specific UI diff --git a/samples/complex/render_octomap/images/mapping.png b/samples/complex/render_octomap/images/mapping.png new file mode 100644 index 0000000000..e3f518b9d5 Binary files /dev/null and b/samples/complex/render_octomap/images/mapping.png differ diff --git a/samples/complex/render_octomap/images/markers.png b/samples/complex/render_octomap/images/markers.png new file mode 100644 index 0000000000..5b3da32751 Binary files /dev/null and b/samples/complex/render_octomap/images/markers.png differ diff --git a/samples/complex/render_octomap/octomap b/samples/complex/render_octomap/octomap new file mode 160000 index 0000000000..766e96c77e --- /dev/null +++ b/samples/complex/render_octomap/octomap @@ -0,0 +1 @@ +Subproject commit 766e96c77e4fc4e7d66345a27546a1f4f33ddd4b diff --git a/samples/complex/render_octomap/render_octomap.cpp b/samples/complex/render_octomap/render_octomap.cpp new file mode 100644 index 0000000000..70bb28e562 --- /dev/null +++ b/samples/complex/render_octomap/render_octomap.cpp @@ -0,0 +1,1101 @@ +/* Copyright (c) 2024-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "render_octomap.h" +#include "core/util/logging.hpp" +#include "filesystem/legacy.h" +#include "gltf_loader.h" +#include "octomap/octomap.h" +#include "platform/input_events.h" +#include "scene_graph/components/mesh.h" +#include "scene_graph/components/pbr_material.h" +#include "scene_graph/components/sub_mesh.h" +#include "scene_graph/scene.h" + +#include + +// KHR_gaussian_splatting extension name +#define KHR_GAUSSIAN_SPLATTING_EXTENSION "KHR_gaussian_splatting" + +render_octomap::render_octomap() : + vertices(), indexCount(), pipelineCache(VK_NULL_HANDLE), pipelineLayout(VK_NULL_HANDLE), pipeline(VK_NULL_HANDLE), descriptorPool(VK_NULL_HANDLE), descriptorSetLayout(VK_NULL_HANDLE), descriptorSet(VK_NULL_HANDLE), gui(nullptr), mMaxTreeDepth(16), m_zMin(), m_zMax(), lastMapBuildSize() +{ + title = "Octomap Viewer"; + map = new octomap::OcTree(0.1f); +} +render_octomap::~render_octomap() +{ + if (has_device()) + { + vkDestroyPipeline(get_device().get_handle(), pipeline, nullptr); + vkDestroyPipelineLayout(get_device().get_handle(), pipelineLayout, nullptr); + vkDestroyDescriptorSetLayout(get_device().get_handle(), descriptorSetLayout, nullptr); + vkDestroyPipeline(get_device().get_handle(), gltf_pipeline, nullptr); + vkDestroyPipelineLayout(get_device().get_handle(), gltf_pipeline_layout, nullptr); + vkDestroyPipeline(get_device().get_handle(), splat_pipeline, nullptr); + vkDestroyPipelineLayout(get_device().get_handle(), splat_pipeline_layout, nullptr); + vkDestroyDescriptorPool(get_device().get_handle(), splat_descriptor_pool, nullptr); + vkDestroyDescriptorSetLayout(get_device().get_handle(), splat_descriptor_set_layout, nullptr); + vkDestroyPipelineCache(get_device().get_handle(), pipelineCache, nullptr); + vkDestroyDescriptorPool(get_device().get_handle(), descriptorPool, nullptr); + delete gui; + gui = nullptr; + } + delete map; + map = nullptr; +} +void render_octomap::BuildCubes() +{ + const octomap::OcTree *tree = map; + if (tree->size() == 0) + { + return; + } + float nextBuildSize = static_cast(lastMapBuildSize) + (static_cast(lastMapBuildSize) * 0.05f); + if (static_cast(tree->size()) < nextBuildSize) + { + return; + } + + double minX, minY, minZ, maxX, maxY, maxZ; + tree->getMetricMin(minX, minY, minZ); + tree->getMetricMax(maxX, maxY, maxZ); + + // set min/max Z for color height map + m_zMin = static_cast(minZ); + m_zMax = static_cast(maxZ); + + // this is to get just grey; doing full color heightmap for now. + // h = std::min(std::max((h-m_zMin)/ (m_zMax - m_zMin), 0.0f), 1.0f) * 0.4f + 0.3f; // h \in [0.3, 0.7] + + instances.clear(); + for (auto it = tree->begin_tree(mMaxTreeDepth), end = tree->end_tree(); it != end; ++it) + { + if (it.isLeaf() && tree->isNodeOccupied(*it)) + { + glm::vec3 coords = {it.getCoordinate().x(), it.getCoordinate().y(), it.getCoordinate().z()}; + coords.y *= -1; + InstanceData instance; + instance.pos[0] = coords[0]; + instance.pos[1] = coords[1]; + instance.pos[2] = coords[2]; + float h = coords[2]; + if (m_zMin >= m_zMax) + { + h = 0.5f; + } + else + { + h = (1.0f - std::min(std::max((h - m_zMin) / (m_zMax - m_zMin), 0.0f), 1.0f)) * 0.8f; + } + + // blend over HSV-values (more colors) + float r, g, b; + float s = 1.0f; + float v = 1.0f; + + h -= floor(h); + h *= 6; + int i; + float m, n, f; + + i = floor(h); + f = h - static_cast(i); + if (!(i & 1)) + { + f = 1 - f; // if "i" is even + } + m = v * (1 - s); + n = v * (1 - s * f); + + switch (i) + { + case 6: + case 0: + r = v; + g = n; + b = m; + break; + case 1: + r = n; + g = v; + b = m; + break; + case 2: + r = m; + g = v; + b = n; + break; + case 3: + r = m; + g = n; + b = v; + break; + case 4: + r = n; + g = m; + b = v; + break; + case 5: + r = v; + g = m; + b = n; + break; + default: + r = 1; + g = 0.5f; + b = 0.5f; + break; + } + + instance.col[0] = r; + instance.col[1] = g; + instance.col[2] = b; + instance.col[3] = 1.0f; + instance.scale = static_cast(it.getSize()); + instances.push_back(instance); + } + } // end for all voxels + // Create buffers + if (!instances.empty()) + { + instanceBuffer = std::make_unique(get_device(), + instances.size() * sizeof(InstanceData), + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); + + auto buf = instanceBuffer->map(); + memcpy(buf, instances.data(), instances.size() * sizeof(InstanceData)); + instanceBuffer->flush(); + instanceBuffer->unmap(); + } + // instance buffer + + lastBuildTime = std::chrono::system_clock::now(); + lastMapBuildSize = tree->size(); +} + +void render_octomap::build_command_buffers() +{ + VkCommandBufferBeginInfo command_buffer_begin_info = vkb::initializers::command_buffer_begin_info(); + + VkClearValue clear_values[2]; + clear_values[0].color = {{0.0f, 0.0f, 0.033f, 0.0f}}; + clear_values[1].depthStencil = {1.0f, 0}; + + VkRenderPassBeginInfo render_pass_begin_info = vkb::initializers::render_pass_begin_info(); + render_pass_begin_info.renderPass = render_pass; + render_pass_begin_info.renderArea.extent.width = width; + render_pass_begin_info.renderArea.extent.height = height; + render_pass_begin_info.clearValueCount = 2; + render_pass_begin_info.pClearValues = clear_values; + + for (int32_t i = 0; i < draw_cmd_buffers.size(); ++i) + { + // Set target frame buffer + render_pass_begin_info.framebuffer = framebuffers[i]; + VK_CHECK(vkBeginCommandBuffer(draw_cmd_buffers[i], &command_buffer_begin_info)); + + vkCmdBeginRenderPass(draw_cmd_buffers[i], &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE); + // Render ImGui first (sidebar + map panel background), then draw the 3D map into the map viewport. + // This ensures the opaque `mapDisplay` background doesn't overdraw the 3D content. + gui->drawFrame(draw_cmd_buffers[i]); + + VkViewport viewport = vkb::initializers::viewport(gui->MapsView.mapSize.x, gui->MapsView.mapSize.y, 0.0f, 1.0f); + viewport.x = gui->MapsView.mapPos.x; + viewport.y = gui->MapsView.mapPos.y; + vkCmdSetViewport(draw_cmd_buffers[i], 0, 1, &viewport); + VkRect2D scissorRect; + scissorRect.offset.x = static_cast(gui->MapsView.mapPos.x); + scissorRect.offset.y = static_cast(gui->MapsView.mapPos.y); + scissorRect.extent.width = static_cast(gui->MapsView.mapSize.x); + scissorRect.extent.height = static_cast(gui->MapsView.mapSize.y); + vkCmdSetScissor(draw_cmd_buffers[i], 0, 1, &scissorRect); + + VkDeviceSize offsets[1] = {0}; + vkCmdBindDescriptorSets(draw_cmd_buffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, + &descriptorSet, 0, nullptr); + vkCmdBindPipeline(draw_cmd_buffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + vkCmdBindVertexBuffers(draw_cmd_buffers[i], 0, 1, &vertexBuffer->get_handle(), offsets); + vkCmdBindVertexBuffers(draw_cmd_buffers[i], 1, 1, &instanceBuffer->get_handle(), offsets); + vkCmdBindIndexBuffer(draw_cmd_buffers[i], indexBuffer->get_handle(), 0, VK_INDEX_TYPE_UINT32); + + vkCmdDrawIndexed(draw_cmd_buffers[i], indexCount, instances.size(), 0, 0, 0); + // draw_ui(draw_cmd_buffers[i]); + vkCmdEndRenderPass(draw_cmd_buffers[i]); + + VK_CHECK(vkEndCommandBuffer(draw_cmd_buffers[i])); + } +} +bool render_octomap::prepare(const vkb::ApplicationOptions &options) +{ + if (!ApiVulkanSample::prepare(options)) + { + return false; + } + // Enable framework WASD movement (implemented only for `CameraType::FirstPerson`). + camera.type = vkb::CameraType::FirstPerson; + camera.set_perspective(60.0f, static_cast(width) / static_cast(height), 0.1f, 256.0f); + camera.set_rotation({0.0f, 0.0f, 0.0f}); + camera.set_translation({0.0f, 0.0f, -1.0f}); + std::string octomapPath = vkb::fs::path::get(vkb::fs::path::Type::Assets, "scenes/octmap_and_splats/octMap.bin"); + map->readBinary(octomapPath); + BuildCubes(); + gui = new ImGUIUtil(this); + gui->init(static_cast(width), static_cast(height)); + gui->initResources(render_pass, queue); + createPipelines(render_pass); + // Initialize ImGui frame state before first command buffer build + gui->newFrame(true); + gui->updateBuffers(); + build_command_buffers(); + prepared = true; + return true; +} + +void render_octomap::createPipelines(VkRenderPass renderPass) +{ + SetupVertexDescriptions(); + prepareUBO(); + auto inputAssemblyState = vkb::initializers::pipeline_input_assembly_state_create_info(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 0, VK_FALSE); + auto raster_State = vkb::initializers::pipeline_rasterization_state_create_info(VK_POLYGON_MODE_FILL, VK_CULL_MODE_BACK_BIT, VK_FRONT_FACE_COUNTER_CLOCKWISE, 0); + auto blendAttachmentState = vkb::initializers::pipeline_color_blend_attachment_state(0xf, VK_FALSE); + auto colorBlendState = vkb::initializers::pipeline_color_blend_state_create_info(1, &blendAttachmentState); + auto depthStencilState = vkb::initializers::pipeline_depth_stencil_state_create_info(VK_TRUE, VK_TRUE, VK_COMPARE_OP_LESS_OR_EQUAL); + auto viewportState = vkb::initializers::pipeline_viewport_state_create_info(1, 1, 0); + auto multisampleState = vkb::initializers::pipeline_multisample_state_create_info(VK_SAMPLE_COUNT_1_BIT, 0); + + std::vector dynamicStateEnables = { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH}; + + auto dynamicState = vkb::initializers::pipeline_dynamic_state_create_info(dynamicStateEnables.data(), dynamicStateEnables.size(), 0); + + VkPipelineCacheCreateInfo pipelineCacheCreateInfo = {}; + pipelineCacheCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; + VK_CHECK(vkCreatePipelineCache(get_device().get_handle(), &pipelineCacheCreateInfo, nullptr, &pipelineCache)); + + // Rendering pipeline + std::vector poolSizes = { + // Graphics pipelines uniform buffers + vkb::initializers::descriptor_pool_size(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2)}; + VkDescriptorPoolCreateInfo descriptorPoolInfo = vkb::initializers::descriptor_pool_create_info(poolSizes, 3); + VK_CHECK(vkCreateDescriptorPool(get_device().get_handle(), &descriptorPoolInfo, nullptr, &descriptorPool)); + + std::vector setLayoutBindings = { + // Binding 0: Vertex shader uniform buffer + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0)}; + + auto descriptorLayout = vkb::initializers::descriptor_set_layout_create_info(setLayoutBindings); + VK_CHECK(vkCreateDescriptorSetLayout(get_device().get_handle(), &descriptorLayout, nullptr, &descriptorSetLayout)); + + auto pPipelineLayoutCreateInfo = vkb::initializers::pipeline_layout_create_info(&descriptorSetLayout, 1); + VK_CHECK(vkCreatePipelineLayout(get_device().get_handle(), &pPipelineLayoutCreateInfo, nullptr, &pipelineLayout)); + + // Load shaders + std::vector shaderStages = { + load_shader("render_octomap", "render.vert.spv", VK_SHADER_STAGE_VERTEX_BIT), + load_shader("render_octomap", "render.frag.spv", VK_SHADER_STAGE_FRAGMENT_BIT)}; + + auto pipelineCreateInfo = vkb::initializers::pipeline_create_info(pipelineLayout, renderPass, 0); + + pipelineCreateInfo.pVertexInputState = &vertices.inputState; + pipelineCreateInfo.pInputAssemblyState = &inputAssemblyState; + pipelineCreateInfo.pRasterizationState = &raster_State; + pipelineCreateInfo.pColorBlendState = &colorBlendState; + pipelineCreateInfo.pMultisampleState = &multisampleState; + pipelineCreateInfo.pViewportState = &viewportState; + pipelineCreateInfo.pDepthStencilState = &depthStencilState; + pipelineCreateInfo.pDynamicState = &dynamicState; + pipelineCreateInfo.stageCount = shaderStages.size(); + pipelineCreateInfo.pStages = shaderStages.data(); + pipelineCreateInfo.renderPass = renderPass; + + VK_CHECK( + vkCreateGraphicsPipelines(get_device().get_handle(), pipelineCache, 1, &pipelineCreateInfo, nullptr, &pipeline)); + + auto allocInfo = vkb::initializers::descriptor_set_allocate_info(descriptorPool, &descriptorSetLayout, 1); + VK_CHECK(vkAllocateDescriptorSets(get_device().get_handle(), &allocInfo, &descriptorSet)); + VkDescriptorBufferInfo buffer_descriptor = create_descriptor(*uniformBufferVS); + std::vector baseImageWriteDescriptorSets = { + vkb::initializers::write_descriptor_set(descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 0, + &buffer_descriptor)}; + + vkUpdateDescriptorSets(get_device().get_handle(), baseImageWriteDescriptorSets.size(), baseImageWriteDescriptorSets.data(), 0, nullptr); +} + +void render_octomap::create_gltf_pipeline(VkRenderPass renderPass) +{ + if (gltf_pipeline != VK_NULL_HANDLE) + { + return; + } + + // Pipeline layout: reuse existing descriptor set layout (binding 0 UBO) and add push constants. + struct GltfPushConstants + { + glm::mat4 model; + glm::vec4 color; + }; + + VkPushConstantRange push_constant_range{}; + push_constant_range.stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + push_constant_range.offset = 0; + push_constant_range.size = sizeof(GltfPushConstants); + + VkPipelineLayoutCreateInfo pipeline_layout_create_info = vkb::initializers::pipeline_layout_create_info(&descriptorSetLayout, 1); + pipeline_layout_create_info.pushConstantRangeCount = 1; + pipeline_layout_create_info.pPushConstantRanges = &push_constant_range; + VK_CHECK(vkCreatePipelineLayout(get_device().get_handle(), &pipeline_layout_create_info, nullptr, &gltf_pipeline_layout)); + + // Shaders + std::vector shaderStages = { + load_shader("render_octomap", "gltf.vert.spv", VK_SHADER_STAGE_VERTEX_BIT), + load_shader("render_octomap", "gltf.frag.spv", VK_SHADER_STAGE_FRAGMENT_BIT)}; + + // Vertex input: single interleaved binding with POSITION (VEC3, 12 bytes) + COLOR_0 (VEC4, 16 bytes) = stride 28 + std::vector bindings = { + vkb::initializers::vertex_input_binding_description(0, 28, VK_VERTEX_INPUT_RATE_VERTEX)}; + std::vector attributes = { + vkb::initializers::vertex_input_attribute_description(0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0), // POSITION at offset 0 + vkb::initializers::vertex_input_attribute_description(0, 1, VK_FORMAT_R32G32B32A32_SFLOAT, 12)}; // COLOR_0 at offset 12 + + VkPipelineVertexInputStateCreateInfo vertexInputState = vkb::initializers::pipeline_vertex_input_state_create_info(); + vertexInputState.vertexBindingDescriptionCount = static_cast(bindings.size()); + vertexInputState.pVertexBindingDescriptions = bindings.data(); + vertexInputState.vertexAttributeDescriptionCount = static_cast(attributes.size()); + vertexInputState.pVertexAttributeDescriptions = attributes.data(); + + auto inputAssemblyState = vkb::initializers::pipeline_input_assembly_state_create_info(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 0, VK_FALSE); + auto raster_state = vkb::initializers::pipeline_rasterization_state_create_info(VK_POLYGON_MODE_FILL, VK_CULL_MODE_BACK_BIT, + VK_FRONT_FACE_COUNTER_CLOCKWISE, 0); + auto blendAttachment = vkb::initializers::pipeline_color_blend_attachment_state(0xf, VK_FALSE); + auto colorBlendState = vkb::initializers::pipeline_color_blend_state_create_info(1, &blendAttachment); + auto depthStencilState = vkb::initializers::pipeline_depth_stencil_state_create_info(VK_TRUE, VK_TRUE, VK_COMPARE_OP_LESS_OR_EQUAL); + auto viewportState = vkb::initializers::pipeline_viewport_state_create_info(1, 1, 0); + auto multisampleState = vkb::initializers::pipeline_multisample_state_create_info(VK_SAMPLE_COUNT_1_BIT, 0); + + std::vector dynamicStateEnables = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}; + auto dynamicState = vkb::initializers::pipeline_dynamic_state_create_info(dynamicStateEnables.data(), dynamicStateEnables.size(), 0); + + auto pipelineCreateInfo = vkb::initializers::pipeline_create_info(gltf_pipeline_layout, renderPass, 0); + pipelineCreateInfo.pVertexInputState = &vertexInputState; + pipelineCreateInfo.pInputAssemblyState = &inputAssemblyState; + pipelineCreateInfo.pRasterizationState = &raster_state; + pipelineCreateInfo.pColorBlendState = &colorBlendState; + pipelineCreateInfo.pMultisampleState = &multisampleState; + pipelineCreateInfo.pViewportState = &viewportState; + pipelineCreateInfo.pDepthStencilState = &depthStencilState; + pipelineCreateInfo.pDynamicState = &dynamicState; + pipelineCreateInfo.stageCount = static_cast(shaderStages.size()); + pipelineCreateInfo.pStages = shaderStages.data(); + + VK_CHECK(vkCreateGraphicsPipelines(get_device().get_handle(), pipelineCache, 1, &pipelineCreateInfo, nullptr, &gltf_pipeline)); +} + +void render_octomap::create_splat_pipeline(VkRenderPass renderPass) +{ + if (splat_pipeline != VK_NULL_HANDLE) + { + return; + } + + // Descriptor set for splat UBO + { + std::vector poolSizes = { + vkb::initializers::descriptor_pool_size(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1)}; + VkDescriptorPoolCreateInfo poolInfo = vkb::initializers::descriptor_pool_create_info(poolSizes, 1); + VK_CHECK(vkCreateDescriptorPool(get_device().get_handle(), &poolInfo, nullptr, &splat_descriptor_pool)); + + std::vector bindings = { + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0)}; + VkDescriptorSetLayoutCreateInfo layoutInfo = vkb::initializers::descriptor_set_layout_create_info(bindings); + VK_CHECK(vkCreateDescriptorSetLayout(get_device().get_handle(), &layoutInfo, nullptr, &splat_descriptor_set_layout)); + + VkDescriptorSetAllocateInfo allocInfo = vkb::initializers::descriptor_set_allocate_info(splat_descriptor_pool, &splat_descriptor_set_layout, 1); + VK_CHECK(vkAllocateDescriptorSets(get_device().get_handle(), &allocInfo, &splat_descriptor_set)); + + VkDescriptorBufferInfo buffer_descriptor = create_descriptor(*splat_uniform_buffer); + VkWriteDescriptorSet write = vkb::initializers::write_descriptor_set(splat_descriptor_set, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 0, &buffer_descriptor); + vkUpdateDescriptorSets(get_device().get_handle(), 1, &write, 0, nullptr); + } + + VkPipelineLayoutCreateInfo pipeline_layout_create_info = vkb::initializers::pipeline_layout_create_info(&splat_descriptor_set_layout, 1); + VK_CHECK(vkCreatePipelineLayout(get_device().get_handle(), &pipeline_layout_create_info, nullptr, &splat_pipeline_layout)); + + std::vector shaderStages = { + load_shader("render_octomap", "splat.vert.spv", VK_SHADER_STAGE_VERTEX_BIT), + load_shader("render_octomap", "splat.frag.spv", VK_SHADER_STAGE_FRAGMENT_BIT)}; + + // Vertex input: per-instance splat attributes + std::vector bindings = { + vkb::initializers::vertex_input_binding_description(0, sizeof(SplatInstance), VK_VERTEX_INPUT_RATE_INSTANCE)}; + std::vector attributes = { + vkb::initializers::vertex_input_attribute_description(0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0), // pos + vkb::initializers::vertex_input_attribute_description(0, 1, VK_FORMAT_R32G32B32A32_SFLOAT, sizeof(float) * 3), // rot + vkb::initializers::vertex_input_attribute_description(0, 2, VK_FORMAT_R32G32B32_SFLOAT, sizeof(float) * 7), // scale + vkb::initializers::vertex_input_attribute_description(0, 3, VK_FORMAT_R32_SFLOAT, sizeof(float) * 10), // opacity + vkb::initializers::vertex_input_attribute_description(0, 4, VK_FORMAT_R32G32B32_SFLOAT, sizeof(float) * 11), // color + }; + + VkPipelineVertexInputStateCreateInfo vertexInputState = vkb::initializers::pipeline_vertex_input_state_create_info(); + vertexInputState.vertexBindingDescriptionCount = static_cast(bindings.size()); + vertexInputState.pVertexBindingDescriptions = bindings.data(); + vertexInputState.vertexAttributeDescriptionCount = static_cast(attributes.size()); + vertexInputState.pVertexAttributeDescriptions = attributes.data(); + + auto inputAssemblyState = vkb::initializers::pipeline_input_assembly_state_create_info(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 0, VK_FALSE); + auto raster_state = vkb::initializers::pipeline_rasterization_state_create_info(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE, + VK_FRONT_FACE_COUNTER_CLOCKWISE, 0); + + // Premultiplied alpha blending + VkPipelineColorBlendAttachmentState blendAttachment{}; + blendAttachment.colorWriteMask = 0xf; + blendAttachment.blendEnable = VK_TRUE; + blendAttachment.srcColorBlendFactor = VK_BLEND_FACTOR_ONE; + blendAttachment.dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + blendAttachment.colorBlendOp = VK_BLEND_OP_ADD; + blendAttachment.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; + blendAttachment.dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; + blendAttachment.alphaBlendOp = VK_BLEND_OP_ADD; + + auto colorBlendState = vkb::initializers::pipeline_color_blend_state_create_info(1, &blendAttachment); + auto depthStencilState = vkb::initializers::pipeline_depth_stencil_state_create_info(VK_TRUE, VK_FALSE, VK_COMPARE_OP_LESS_OR_EQUAL); + auto viewportState = vkb::initializers::pipeline_viewport_state_create_info(1, 1, 0); + auto multisampleState = vkb::initializers::pipeline_multisample_state_create_info(VK_SAMPLE_COUNT_1_BIT, 0); + + std::vector dynamicStateEnables = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR}; + auto dynamicState = vkb::initializers::pipeline_dynamic_state_create_info(dynamicStateEnables.data(), dynamicStateEnables.size(), 0); + + auto pipelineCreateInfo = vkb::initializers::pipeline_create_info(splat_pipeline_layout, renderPass, 0); + pipelineCreateInfo.pVertexInputState = &vertexInputState; + pipelineCreateInfo.pInputAssemblyState = &inputAssemblyState; + pipelineCreateInfo.pRasterizationState = &raster_state; + pipelineCreateInfo.pColorBlendState = &colorBlendState; + pipelineCreateInfo.pMultisampleState = &multisampleState; + pipelineCreateInfo.pViewportState = &viewportState; + pipelineCreateInfo.pDepthStencilState = &depthStencilState; + pipelineCreateInfo.pDynamicState = &dynamicState; + pipelineCreateInfo.stageCount = static_cast(shaderStages.size()); + pipelineCreateInfo.pStages = shaderStages.data(); + + VK_CHECK(vkCreateGraphicsPipelines(get_device().get_handle(), pipelineCache, 1, &pipelineCreateInfo, nullptr, &splat_pipeline)); +} + +void render_octomap::prepareUBO() +{ + // Vertex shader uniform buffer block + uniformBufferVS = std::make_unique(get_device(), + sizeof(uboVS), + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); + updateUBO(); +} + +void render_octomap::updateUBO() +{ + uboVS.projection = camera.matrices.perspective; + uboVS.camera = camera.matrices.view; + + uniformBufferVS->convert_and_update(uboVS); +} + +// This just gives the first vertexBuffer +void render_octomap::generateMasterCube() +{ + // Setup vertices for a single quad made from two triangles + std::vector verticesLoc = + { + {{0.5f, 0.5f, 0.5f}}, + {{0.5f, 0.5f, -0.5f}}, + {{0.5f, -0.5f, 0.5f}}, + {{0.5f, -0.5f, -0.5f}}, + {{-0.5f, 0.5f, 0.5f}}, + {{-0.5f, 0.5f, -0.5f}}, + {{-0.5f, -0.5f, 0.5f}}, + {{-0.5f, -0.5f, -0.5f}}}; + + // Setup indices - counter-clockwise winding for all outward-facing triangles + // Vertices: 0=(+,+,+), 1=(+,+,-), 2=(+,-,+), 3=(+,-,-), 4=(-,+,+), 5=(-,+,-), 6=(-,-,+), 7=(-,-,-) + std::vector indices = { + // Right face (+X) - looking from +X toward origin + 0, 2, 3, 3, 1, 0, + // Left face (-X) - looking from -X toward origin + 4, 5, 7, 7, 6, 4, + + // Top face (+Y) - looking from +Y toward origin + 0, 1, 5, 5, 4, 0, + // Bottom face (-Y) - looking from -Y toward origin + 2, 6, 7, 7, 3, 2, + + // Back face (+Z) - looking from +Z toward origin + 0, 4, 6, 6, 2, 0, + // Front face (-Z) - looking from -Z toward origin + 1, 3, 7, 7, 5, 1}; + indexCount = static_cast(indices.size()); + + // Create buffers + // Vertex buffer + vertexBuffer = std::make_unique(get_device(), + verticesLoc.size() * sizeof(Vertex), + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); + + auto buf = vertexBuffer->map(); + memcpy(buf, verticesLoc.data(), verticesLoc.size() * sizeof(Vertex)); + vertexBuffer->flush(); + vertexBuffer->unmap(); + // Index buffer + indexBuffer = std::make_unique(get_device(), + indices.size() * sizeof(uint32_t), + VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); + buf = indexBuffer->map(); + memcpy(buf, indices.data(), indices.size() * sizeof(uint32_t)); + indexBuffer->flush(); + indexBuffer->unmap(); +} + +void render_octomap::SetupVertexDescriptions() +{ + generateMasterCube(); + // Binding description + vertices.bindingDescriptions = { + vkb::initializers::vertex_input_binding_description(0, sizeof(Vertex), VK_VERTEX_INPUT_RATE_VERTEX), + vkb::initializers::vertex_input_binding_description(1, sizeof(InstanceData), VK_VERTEX_INPUT_RATE_INSTANCE)}; + + // Attribute descriptions + // Describes memory layout and shader positions + vertices.attributeDescriptions = { + // Location 0: Position + vkb::initializers::vertex_input_attribute_description(0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0), + + // Per-Instance attributes + vkb::initializers::vertex_input_attribute_description(1, 1, VK_FORMAT_R32G32B32_SFLOAT, 0), // Location 1: Position + vkb::initializers::vertex_input_attribute_description(1, 2, VK_FORMAT_R32G32B32A32_SFLOAT, sizeof(float) * 3), // Location 2: Color + vkb::initializers::vertex_input_attribute_description(1, 3, VK_FORMAT_R32_SFLOAT, sizeof(float) * 7), // Location 3: Scale + }; + + // Assign to vertex buffer + vertices.inputState = vkb::initializers::pipeline_vertex_input_state_create_info(); + vertices.inputState.vertexBindingDescriptionCount = vertices.bindingDescriptions.size(); + vertices.inputState.pVertexBindingDescriptions = vertices.bindingDescriptions.data(); + vertices.inputState.vertexAttributeDescriptionCount = vertices.attributeDescriptions.size(); + vertices.inputState.pVertexAttributeDescriptions = vertices.attributeDescriptions.data(); +} +bool render_octomap::resize(const uint32_t width, const uint32_t height) +{ + ApiVulkanSample::resize(width, height); + rebuild_command_buffers(); + return true; +} + +void render_octomap::update_overlay(float delta_time, const std::function &additional_ui) +{ + // This sample uses a custom ImGUI implementation (ImGUIUtil) + // The GUI updates are handled in the render() function + // Call the additional_ui callback if provided + if (additional_ui) + { + additional_ui(); + } +} + +void render_octomap::render(float delta_time) +{ + if (!prepared) + { + return; + } + ApiVulkanSample::prepare_frame(); + + // Update camera movement based on keyboard input (WASD) + camera.update(delta_time); + + // Update imGui every frame to process input + ImGuiIO &io = ImGui::GetIO(); + + io.DisplaySize = ImVec2(static_cast(width), static_cast(height)); + io.DeltaTime = delta_time; + + // Process ImGui frame to handle button clicks and other input + gui->newFrame(frame_count == 0); + bool ui_buffers_recreated = gui->updateBuffers(); + + const bool view_state_changed = gui->MapsView.stateChanged; + if (view_state_changed) + { + gui->MapsView.stateChanged = false; + onViewStateChanged(gui->MapsView.currentState); + } + + // (Re)build 3D instance data and UBOs before recording. + if (!paused || camera.updated) + { + updateUBO(); + } + BuildCubes(); + + // Record only the current command buffer (safe per-frame path). + recreate_current_command_buffer(); + auto cmd = draw_cmd_buffers[current_buffer]; + auto begin_info = vkb::initializers::command_buffer_begin_info(); + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + VK_CHECK(vkBeginCommandBuffer(cmd, &begin_info)); + + VkClearValue clear_values[2]; + clear_values[0].color = {{0.0f, 0.0f, 0.033f, 0.0f}}; + clear_values[1].depthStencil = {1.0f, 0}; + + VkRenderPassBeginInfo render_pass_begin_info = vkb::initializers::render_pass_begin_info(); + render_pass_begin_info.renderPass = render_pass; + render_pass_begin_info.renderArea.extent.width = width; + render_pass_begin_info.renderArea.extent.height = height; + render_pass_begin_info.clearValueCount = 2; + render_pass_begin_info.pClearValues = clear_values; + render_pass_begin_info.framebuffer = framebuffers[current_buffer]; + + vkCmdBeginRenderPass(cmd, &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE); + + // Draw the 3D map into the viewport. + VkViewport viewport = vkb::initializers::viewport(gui->MapsView.mapSize.x, gui->MapsView.mapSize.y, 0.0f, 1.0f); + viewport.x = gui->MapsView.mapPos.x; + viewport.y = gui->MapsView.mapPos.y; + vkCmdSetViewport(cmd, 0, 1, &viewport); + VkRect2D scissorRect; + scissorRect.offset.x = static_cast(gui->MapsView.mapPos.x); + scissorRect.offset.y = static_cast(gui->MapsView.mapPos.y); + scissorRect.extent.width = static_cast(gui->MapsView.mapSize.x); + scissorRect.extent.height = static_cast(gui->MapsView.mapSize.y); + vkCmdSetScissor(cmd, 0, 1, &scissorRect); + + VkDeviceSize offsets[2] = {0, 0}; + + switch (currentViewState) + { + case MapView::ViewState::Octomap: + { + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &descriptorSet, 0, nullptr); + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + vkCmdBindVertexBuffers(cmd, 0, 1, &vertexBuffer->get_handle(), offsets); + if (instanceBuffer) + { + vkCmdBindVertexBuffers(cmd, 1, 1, &instanceBuffer->get_handle(), offsets); + vkCmdBindIndexBuffer(cmd, indexBuffer->get_handle(), 0, VK_INDEX_TYPE_UINT32); + vkCmdDrawIndexed(cmd, indexCount, static_cast(instances.size()), 0, 0, 0); + } + break; + } + + case MapView::ViewState::GLTFRegular: + { + if (gltf_pipeline == VK_NULL_HANDLE) + { + create_gltf_pipeline(render_pass); + } + if (gltf_pipeline == VK_NULL_HANDLE || gltf_nodes.empty()) + { + break; + } + + struct GltfPushConstants + { + glm::mat4 model; + glm::vec4 color; + } pc; + + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, gltf_pipeline_layout, 0, 1, &descriptorSet, 0, nullptr); + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, gltf_pipeline); + + for (auto &d : gltf_nodes) + { + if (!d.node || !d.sub_mesh) + { + continue; + } + + auto pos_it = d.sub_mesh->vertex_buffers.find("position"); + if (pos_it == d.sub_mesh->vertex_buffers.end()) + { + continue; + } + VkBuffer pos_buf = pos_it->second.get_handle(); + + // Bind single interleaved buffer (contains POSITION + COLOR_0) + vkCmdBindVertexBuffers(cmd, 0, 1, &pos_buf, offsets); + + // Try to get color from material, otherwise use white + const auto *mat = dynamic_cast(d.sub_mesh->get_material()); + glm::vec4 col = mat ? mat->base_color_factor : glm::vec4(1.0f, 1.0f, 1.0f, 1.0f); + + // If material color is default/white and we have vertex colors, the shader will use vertex colors + pc.model = d.node->get_transform().get_world_matrix(); + pc.color = col; + vkCmdPushConstants(cmd, gltf_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(GltfPushConstants), &pc); + + if (d.sub_mesh->index_buffer) + { + vkCmdBindIndexBuffer(cmd, d.sub_mesh->index_buffer->get_handle(), 0, d.sub_mesh->index_type); + vkCmdDrawIndexed(cmd, d.sub_mesh->vertex_indices, 1, 0, 0, 0); + } + } + break; + } + + case MapView::ViewState::GLTFSplats: + { + if (!splat_instance_buffer || splat_count == 0) + { + break; + } + if (!splat_uniform_buffer) + { + break; + } + if (splat_pipeline == VK_NULL_HANDLE) + { + create_splat_pipeline(render_pass); + } + if (splat_pipeline == VK_NULL_HANDLE) + { + break; + } + + // Update splat UBO + splat_ubo.projection = camera.matrices.perspective; + splat_ubo.view = camera.matrices.view; + splat_ubo.viewport = glm::vec2(gui->MapsView.mapSize.x, gui->MapsView.mapSize.y); + splat_ubo.focalX = camera.matrices.perspective[0][0] * splat_ubo.viewport.x * 0.5f; + splat_ubo.focalY = camera.matrices.perspective[1][1] * splat_ubo.viewport.y * 0.5f; + splat_uniform_buffer->convert_and_update(splat_ubo); + + vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, splat_pipeline_layout, 0, 1, &splat_descriptor_set, 0, nullptr); + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, splat_pipeline); + vkCmdBindVertexBuffers(cmd, 0, 1, &splat_instance_buffer->get_handle(), offsets); + vkCmdDraw(cmd, 4, splat_count, 0, 0); + break; + } + } + + // Draw ImGui last so sidebar/buttons are on top. `mapDisplay` is transparent. + gui->drawFrame(cmd); + + vkCmdEndRenderPass(cmd); + VK_CHECK(vkEndCommandBuffer(cmd)); + + // Submit to queue + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &draw_cmd_buffers[current_buffer]; + VK_CHECK(vkQueueSubmit(queue, 1, &submit_info, VK_NULL_HANDLE)); + + ApiVulkanSample::submit_frame(); +} + +void render_octomap::input_event(const vkb::InputEvent &input_event) +{ + ImGuiIO &io = ImGui::GetIO(); + + if (input_event.get_source() == vkb::EventSource::Mouse) + { + const auto &mouse_button = static_cast(input_event); + const float content_scale = window ? window->get_content_scale_factor() : 1.0f; + const float mouse_x_scaled = mouse_button.get_pos_x() * content_scale; + const float mouse_y_scaled = mouse_button.get_pos_y() * content_scale; + + // Use the classic ImGui IO feeding approach for reliable hover/click detection. + io.MousePos = ImVec2(mouse_x_scaled, mouse_y_scaled); + const int button_id = static_cast(mouse_button.get_button()); + const bool down = (mouse_button.get_action() == vkb::MouseAction::Down); + const bool up = (mouse_button.get_action() == vkb::MouseAction::Up); + if ((down || up) && button_id >= 0 && button_id < 5) + { + io.MouseDown[button_id] = down; + } + + // Sidebar bounds must match `ImGUIUtil::newFrame()`. + const float sidebar_width = 240.0f + 20.0f * 2.0f; + const bool over_sidebar = mouse_x_scaled < sidebar_width; + if (!over_sidebar) + { + ApiVulkanSample::input_event(input_event); + } + return; + } + + // For keyboard and other events, use the framework input pipeline. + ApiVulkanSample::input_event(input_event); +} + +void render_octomap::onViewStateChanged(MapView::ViewState newState) +{ + if (currentViewState == newState) + { + return; + } + + LOGI("View state changed to: {}", static_cast(newState)); + currentViewState = newState; + + switch (newState) + { + case MapView::ViewState::Octomap: + // Octomap is already loaded, just need to rebuild command buffers + LOGI("Switching to Octomap view"); + break; + + case MapView::ViewState::GLTFRegular: + LOGI("Switching to GLTF Regular view"); + if (!gltfScene) + { + loadGLTFScene("scenes/octmap_and_splats/savedMap_v1.1.0.gltf"); + } + break; + + case MapView::ViewState::GLTFSplats: + LOGI("Switching to Gaussian Splats view"); + if (!splatsScene) + { + loadGaussianSplatsScene("scenes/octmap_and_splats/savedMap_v1.1.0_splats_c0_-1_-1.gltf"); + } + break; + } + + // Rebuild command buffers for the new view + rebuild_command_buffers(); +} + +void render_octomap::loadGLTFScene(const std::string &filename) +{ + LOGI("Loading GLTF scene: {}", filename); + + vkb::GLTFLoader loader(get_device()); + gltfScene = loader.read_scene_from_file(filename); + + if (gltfScene) + { + LOGI("GLTF scene loaded successfully"); + + // Build a flat list of nodes/submeshes for drawing. + gltf_nodes.clear(); + auto meshes = gltfScene->get_components(); + for (auto *mesh : meshes) + { + for (auto node : mesh->get_nodes()) + { + for (auto *sub_mesh : mesh->get_submeshes()) + { + gltf_nodes.push_back({node, sub_mesh}); + } + } + } + + create_gltf_pipeline(render_pass); + } + else + { + LOGE("Failed to load GLTF scene: {}", filename); + } +} + +void render_octomap::loadGaussianSplatsScene(const std::string &filename) +{ + LOGI("Loading Gaussian Splats scene: {}", filename); + loadGaussianSplatsData(filename); + create_splat_pipeline(render_pass); +} + +void render_octomap::loadGaussianSplatsData(const std::string &filename) +{ + // Parse the splats GLTF directly to extract `KHR_gaussian_splatting` attributes. + // The file contains a single POINTS primitive with accessor indices for POSITION/COLOR_0 and + // extension fields ROTATION/SCALE/OPACITY. + tinygltf::TinyGLTF gltf; + tinygltf::Model model; + std::string err; + std::string warn; + + std::string gltf_file = vkb::fs::path::get(vkb::fs::path::Type::Assets) + filename; + bool ok = gltf.LoadASCIIFromFile(&model, &err, &warn, gltf_file.c_str()); + if (!ok || !err.empty()) + { + LOGE("Failed to load splats gltf {}: {}", gltf_file, err); + return; + } + if (!warn.empty()) + { + LOGI("{}", warn); + } + if (model.meshes.empty() || model.meshes[0].primitives.empty()) + { + LOGE("Splats gltf has no meshes/primitives: {}", filename); + return; + } + + const tinygltf::Primitive &prim = model.meshes[0].primitives[0]; + + auto get_accessor_ptr = [&](int accessor_index, size_t &stride_bytes) -> const uint8_t * { + if (accessor_index < 0 || accessor_index >= static_cast(model.accessors.size())) + { + stride_bytes = 0; + return nullptr; + } + const tinygltf::Accessor &acc = model.accessors[accessor_index]; + const tinygltf::BufferView &bv = model.bufferViews[acc.bufferView]; + const tinygltf::Buffer &buf = model.buffers[bv.buffer]; + + if (acc.componentType != TINYGLTF_COMPONENT_TYPE_FLOAT) + { + stride_bytes = 0; + return nullptr; + } + + size_t comps = 1; + switch (acc.type) + { + case TINYGLTF_TYPE_VEC2: + comps = 2; + break; + case TINYGLTF_TYPE_VEC3: + comps = 3; + break; + case TINYGLTF_TYPE_VEC4: + comps = 4; + break; + case TINYGLTF_TYPE_MAT3: + comps = 9; + break; + default: + comps = 1; + break; + } + size_t elem_size = comps * sizeof(float); + stride_bytes = (bv.byteStride > 0) ? static_cast(bv.byteStride) : elem_size; + + return buf.data.data() + static_cast(bv.byteOffset) + static_cast(acc.byteOffset); + }; + + const int pos_accessor = prim.attributes.contains("POSITION") ? prim.attributes.at("POSITION") : -1; + const int col_accessor = prim.attributes.contains("COLOR_0") ? prim.attributes.at("COLOR_0") : -1; + if (pos_accessor < 0) + { + LOGE("Splats gltf missing POSITION accessor: {}", filename); + return; + } + + int rot_accessor = -1; + int scale_accessor = -1; + int opacity_accessor = -1; + if (prim.extensions.contains(KHR_GAUSSIAN_SPLATTING_EXTENSION)) + { + const tinygltf::Value &ext = prim.extensions.at(KHR_GAUSSIAN_SPLATTING_EXTENSION); + if (ext.IsObject()) + { + if (ext.Has("ROTATION")) + { + rot_accessor = ext.Get("ROTATION").Get(); + } + if (ext.Has("SCALE")) + { + scale_accessor = ext.Get("SCALE").Get(); + } + if (ext.Has("OPACITY")) + { + opacity_accessor = ext.Get("OPACITY").Get(); + } + } + } + if (rot_accessor < 0 || scale_accessor < 0 || opacity_accessor < 0 || col_accessor < 0) + { + LOGE("Splats gltf missing required KHR_gaussian_splatting accessors (ROTATION/SCALE/OPACITY/COLOR_0): {}", filename); + return; + } + + const size_t count = model.accessors[pos_accessor].count; + if (count == 0) + { + LOGE("Splats gltf has 0 splats: {}", filename); + return; + } + + size_t pos_stride = 0, rot_stride = 0, scale_stride = 0, opa_stride = 0, col_stride = 0; + const uint8_t *pos_ptr = get_accessor_ptr(pos_accessor, pos_stride); + const uint8_t *rot_ptr = get_accessor_ptr(rot_accessor, rot_stride); + const uint8_t *scale_ptr = get_accessor_ptr(scale_accessor, scale_stride); + const uint8_t *opa_ptr = get_accessor_ptr(opacity_accessor, opa_stride); + const uint8_t *col_ptr = get_accessor_ptr(col_accessor, col_stride); + if (!pos_ptr || !rot_ptr || !scale_ptr || !opa_ptr || !col_ptr) + { + LOGE("Splats gltf accessor buffer decode failed: {}", filename); + return; + } + + std::vector instances; + instances.resize(count); + for (size_t i = 0; i < count; ++i) + { + const float *p = reinterpret_cast(pos_ptr + i * pos_stride); + const float *r = reinterpret_cast(rot_ptr + i * rot_stride); + const float *s = reinterpret_cast(scale_ptr + i * scale_stride); + const float *o = reinterpret_cast(opa_ptr + i * opa_stride); + const float *c = reinterpret_cast(col_ptr + i * col_stride); + + instances[i].pos[0] = p[0]; + instances[i].pos[1] = p[1]; + instances[i].pos[2] = p[2]; + instances[i].rot[0] = r[0]; + instances[i].rot[1] = r[1]; + instances[i].rot[2] = r[2]; + instances[i].rot[3] = r[3]; + instances[i].scale[0] = s[0]; + instances[i].scale[1] = s[1]; + instances[i].scale[2] = s[2]; + instances[i].opacity = o[0]; + instances[i].color[0] = c[0]; + instances[i].color[1] = c[1]; + instances[i].color[2] = c[2]; + instances[i]._pad = 0.0f; + } + + splat_count = static_cast(count); + splat_instance_buffer = std::make_unique(get_device(), + instances.size() * sizeof(SplatInstance), + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); + auto buf = splat_instance_buffer->map(); + memcpy(buf, instances.data(), instances.size() * sizeof(SplatInstance)); + splat_instance_buffer->flush(); + splat_instance_buffer->unmap(); + splat_instance_buffer->set_debug_name("render_octomap splat instance buffer"); + + if (!splat_uniform_buffer) + { + splat_uniform_buffer = std::make_unique(get_device(), + sizeof(splat_ubo), + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); + splat_uniform_buffer->set_debug_name("render_octomap splat ubo"); + } + + LOGI("Loaded {} gaussian splats", splat_count); +} + +std::unique_ptr create_render_octomap() +{ + return std::make_unique(); +} \ No newline at end of file diff --git a/samples/complex/render_octomap/render_octomap.h b/samples/complex/render_octomap/render_octomap.h new file mode 100644 index 0000000000..3cd519c07a --- /dev/null +++ b/samples/complex/render_octomap/render_octomap.h @@ -0,0 +1,155 @@ +/* Copyright (c) 2024-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef RENDER_OCTOMAP_H +#define RENDER_OCTOMAP_H + +#include "api_vulkan_sample.h" +#include "scene_graph/node.h" +#include +#include +#include +#include +#include +#include + +namespace octomap +{ +class OcTree; +} + +namespace vkb +{ +namespace sg +{ +class Scene; +class SubMesh; +} // namespace sg + +} // namespace vkb + +class render_octomap : public ApiVulkanSample +{ + public: + render_octomap(); + ~render_octomap() override; + void BuildCubes(); + void build_command_buffers() override; + bool prepare(const vkb::ApplicationOptions &options) override; + void update_overlay(float delta_time, const std::function &additional_ui) override; + void render(float delta_time) override; + void input_event(const vkb::InputEvent &input_event) override; + void createPipelines(VkRenderPass renderPass); + void create_gltf_pipeline(VkRenderPass renderPass); + void create_splat_pipeline(VkRenderPass renderPass); + void prepareUBO(); + void updateUBO(); + void generateMasterCube(); + void SetupVertexDescriptions(); + bool resize(const uint32_t width, const uint32_t height) override; + + // View state handling + void onViewStateChanged(MapView::ViewState newState); + void loadGLTFScene(const std::string &filename); + void loadGaussianSplatsScene(const std::string &filename); + void loadGaussianSplatsData(const std::string &filename); + + private: + struct + { + VkPipelineVertexInputStateCreateInfo inputState; + std::vector bindingDescriptions; + std::vector attributeDescriptions; + } vertices; + struct InstanceData + { + float pos[3]; + float col[4]; + float scale{0.0f}; + }; + struct + { + glm::mat4 projection; + glm::mat4 camera; + } uboVS; + std::unique_ptr vertexBuffer; + std::unique_ptr indexBuffer; + std::unique_ptr instanceBuffer; + std::unique_ptr uniformBufferVS; + uint32_t indexCount; + VkPipelineCache pipelineCache; + VkPipelineLayout pipelineLayout; + VkPipeline pipeline; + VkDescriptorPool descriptorPool; + VkDescriptorSetLayout descriptorSetLayout; + VkDescriptorSet descriptorSet; + octomap::OcTree *map; + ImGUIUtil *gui; + unsigned int mMaxTreeDepth; + + float m_zMin; + float m_zMax; + uint64_t lastMapBuildSize; + std::chrono::time_point lastBuildTime; + std::vector instances; + + // View state management + MapView::ViewState currentViewState = MapView::ViewState::Octomap; + std::unique_ptr gltfScene; + std::unique_ptr splatsScene; + + struct GltfNodeDraw + { + vkb::scene_graph::NodeC *node{nullptr}; + vkb::sg::SubMesh *sub_mesh{nullptr}; + }; + std::vector gltf_nodes; + + VkPipelineLayout gltf_pipeline_layout{VK_NULL_HANDLE}; + VkPipeline gltf_pipeline{VK_NULL_HANDLE}; + + // Gaussian splats rendering + struct SplatInstance + { + float pos[3]; + float rot[4]; + float scale[3]; + float opacity; + float color[3]; + float _pad; + }; + std::unique_ptr splat_instance_buffer; + uint32_t splat_count{0}; + + struct + { + glm::mat4 projection; + glm::mat4 view; + glm::vec2 viewport; + float focalX; + float focalY; + } splat_ubo; + std::unique_ptr splat_uniform_buffer; + VkDescriptorPool splat_descriptor_pool{VK_NULL_HANDLE}; + VkDescriptorSetLayout splat_descriptor_set_layout{VK_NULL_HANDLE}; + VkDescriptorSet splat_descriptor_set{VK_NULL_HANDLE}; + VkPipelineLayout splat_pipeline_layout{VK_NULL_HANDLE}; + VkPipeline splat_pipeline{VK_NULL_HANDLE}; +}; + +std::unique_ptr create_render_octomap(); + +#endif // RENDER_OCTOMAP_H diff --git a/shaders/render_octomap/glsl/gltf.frag b/shaders/render_octomap/glsl/gltf.frag new file mode 100644 index 0000000000..be38bad3e1 --- /dev/null +++ b/shaders/render_octomap/glsl/gltf.frag @@ -0,0 +1,29 @@ +/* Copyright (c) 2024-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec4 inColor; + +layout(location = 0) out vec4 outColor; + +void main() +{ + // Use vertex color directly from GLTF data + outColor = inColor; +} diff --git a/shaders/render_octomap/glsl/gltf.vert b/shaders/render_octomap/glsl/gltf.vert new file mode 100644 index 0000000000..198020e13e --- /dev/null +++ b/shaders/render_octomap/glsl/gltf.vert @@ -0,0 +1,44 @@ +/* Copyright (c) 2024-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec3 inPos; +layout(location = 1) in vec4 inColor; + +layout(binding = 0) uniform UBO +{ + mat4 projection; + mat4 view; +} ubo; + +layout(push_constant) uniform PushConstants +{ + mat4 model; + vec4 color; +} pc; + +layout(location = 0) out vec4 outColor; + +void main() +{ + vec4 world_pos = pc.model * vec4(inPos, 1.0); + gl_Position = ubo.projection * ubo.view * world_pos; + // Use vertex color from GLTF data + outColor = inColor; +} diff --git a/shaders/render_octomap/glsl/imgui.frag b/shaders/render_octomap/glsl/imgui.frag new file mode 100644 index 0000000000..a2d51ffcae --- /dev/null +++ b/shaders/render_octomap/glsl/imgui.frag @@ -0,0 +1,30 @@ +/* Copyright (c) 2024-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#version 450 + +layout (binding = 0) uniform sampler2D fontSampler; + +layout (location = 0) in vec2 inUV; +layout (location = 1) in vec4 inColor; + +layout (location = 0) out vec4 outColor; + +void main() +{ + outColor = inColor * texture(fontSampler, inUV); +} diff --git a/shaders/render_octomap/glsl/imgui.vert b/shaders/render_octomap/glsl/imgui.vert new file mode 100644 index 0000000000..59a59a4398 --- /dev/null +++ b/shaders/render_octomap/glsl/imgui.vert @@ -0,0 +1,42 @@ +/* Copyright (c) 2024-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#version 450 + +layout (location = 0) in vec2 inPos; +layout (location = 1) in vec2 inUV; +layout (location = 2) in vec4 inColor; + +layout (push_constant) uniform PushConstants { + vec2 scale; + vec2 translate; +} pushConstants; + +layout (location = 0) out vec2 outUV; +layout (location = 1) out vec4 outColor; + +out gl_PerVertex +{ + vec4 gl_Position; +}; + +void main() +{ + outUV = inUV; + outColor = inColor; + gl_Position = vec4(inPos * pushConstants.scale + pushConstants.translate, 0.0, 1.0); +} diff --git a/shaders/render_octomap/glsl/render.frag b/shaders/render_octomap/glsl/render.frag new file mode 100644 index 0000000000..83bb478613 --- /dev/null +++ b/shaders/render_octomap/glsl/render.frag @@ -0,0 +1,27 @@ +/* Copyright (c) 2024-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec4 inColor; +layout(location = 0) out vec4 rt0; + +void main() +{ + rt0 = inColor; +} \ No newline at end of file diff --git a/shaders/render_octomap/glsl/render.vert b/shaders/render_octomap/glsl/render.vert new file mode 100644 index 0000000000..997364d096 --- /dev/null +++ b/shaders/render_octomap/glsl/render.vert @@ -0,0 +1,46 @@ +/* Copyright (c) 2024-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec3 aPos; + +// Instanced attributes +layout (location = 1) in vec3 instancePos; +layout (location = 2) in vec4 inColor; +layout (location = 3) in float instanceScale; + +layout (binding = 0) uniform UBO +{ + mat4 projection; + mat4 camera; +} ubo; + +layout (location = 0) out vec4 outColor; + +void main() +{ + outColor = inColor; + vec4 locPos = vec4(aPos, 1.0); + float eps = 0.00001; + vec4 Pos = vec4((locPos.xyz * instanceScale) + instancePos, 1.0); + Pos.x -= eps; + Pos.y -= eps; + Pos.z -= eps; + gl_Position = ubo.projection * ubo.camera * Pos; +} \ No newline at end of file diff --git a/shaders/render_octomap/glsl/splat.frag b/shaders/render_octomap/glsl/splat.frag new file mode 100644 index 0000000000..be06800d88 --- /dev/null +++ b/shaders/render_octomap/glsl/splat.frag @@ -0,0 +1,60 @@ +/* Copyright (c) 2024-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Inputs from vertex shader +layout(location = 0) in vec4 inColor; +layout(location = 1) in vec2 inConic; // 2D conic parameters (inverse covariance) +layout(location = 2) in float inOpacity; +layout(location = 3) in vec2 inCoord; // Quad coordinate for Gaussian evaluation + +// Output +layout(location = 0) out vec4 outColor; + +void main() { + // Evaluate 2D Gaussian function + // G(x,y) = exp(-0.5 * (x,y) * Sigma^-1 * (x,y)^T) + // where Sigma^-1 is the inverse covariance matrix + + // For a symmetric 2x2 matrix [[a, b], [b, c]], the quadratic form is: + // a*x^2 + 2*b*x*y + c*y^2 + // We pass conic = (c/det, -b/det) from vertex shader + // and need to reconstruct the full inverse + + float x = inCoord.x; + float y = inCoord.y; + + // Simplified Gaussian evaluation using distance from center + // This is an approximation - full implementation would use the conic parameters + float power = -0.5 * (x * x + y * y); + + // Gaussian falloff + float alpha = exp(power); + + // Apply opacity + float finalAlpha = alpha * inOpacity; + + // Discard fragments with very low alpha for performance + if (finalAlpha < 0.004) { + discard; + } + + // Output with premultiplied alpha for proper blending + outColor = vec4(inColor.rgb * finalAlpha, finalAlpha); +} diff --git a/shaders/render_octomap/glsl/splat.vert b/shaders/render_octomap/glsl/splat.vert new file mode 100644 index 0000000000..fd1eba0db3 --- /dev/null +++ b/shaders/render_octomap/glsl/splat.vert @@ -0,0 +1,128 @@ +/* Copyright (c) 2024-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Per-splat attributes (instanced) +layout(location = 0) in vec3 inPosition; // Splat center position +layout(location = 1) in vec4 inRotation; // Quaternion rotation +layout(location = 2) in vec3 inScale; // 3D scale factors +layout(location = 3) in float inOpacity; // Opacity value +layout(location = 4) in vec3 inColor; // RGB color + +// Uniform buffer +layout(binding = 0) uniform UBO { + mat4 projection; + mat4 view; + vec2 viewport; // Viewport dimensions + float focalX; // Focal length X + float focalY; // Focal length Y +} ubo; + +// Outputs to fragment shader +layout(location = 0) out vec4 outColor; +layout(location = 1) out vec2 outConic; // 2D conic parameters for Gaussian +layout(location = 2) out float outOpacity; +layout(location = 3) out vec2 outCoord; // Quad coordinate for Gaussian evaluation + +// Quad vertices for billboard rendering +const vec2 quadVertices[4] = vec2[4]( + vec2(-1.0, -1.0), + vec2( 1.0, -1.0), + vec2(-1.0, 1.0), + vec2( 1.0, 1.0) +); + +// Convert quaternion to rotation matrix +mat3 quaternionToMatrix(vec4 q) { + float x = q.x, y = q.y, z = q.z, w = q.w; + return mat3( + 1.0 - 2.0*(y*y + z*z), 2.0*(x*y - w*z), 2.0*(x*z + w*y), + 2.0*(x*y + w*z), 1.0 - 2.0*(x*x + z*z), 2.0*(y*z - w*x), + 2.0*(x*z - w*y), 2.0*(y*z + w*x), 1.0 - 2.0*(x*x + y*y) + ); +} + +void main() { + // Get quad vertex for this instance + int quadIdx = gl_VertexIndex % 4; + vec2 quadPos = quadVertices[quadIdx]; + + // Transform splat center to view space + vec4 viewPos = ubo.view * vec4(inPosition, 1.0); + + // Build 3D covariance matrix from rotation and scale + mat3 R = quaternionToMatrix(inRotation); + mat3 S = mat3( + inScale.x, 0.0, 0.0, + 0.0, inScale.y, 0.0, + 0.0, 0.0, inScale.z + ); + mat3 M = R * S; + mat3 cov3D = M * transpose(M); + + // Project 3D covariance to 2D screen space + // Jacobian of perspective projection + float z = viewPos.z; + float z2 = z * z; + mat3 J = mat3( + ubo.focalX / z, 0.0, -ubo.focalX * viewPos.x / z2, + 0.0, ubo.focalY / z, -ubo.focalY * viewPos.y / z2, + 0.0, 0.0, 0.0 + ); + + // 2D covariance in screen space + mat3 viewRot = mat3(ubo.view); + mat3 cov2D = J * viewRot * cov3D * transpose(viewRot) * transpose(J); + + // Extract 2D covariance parameters (symmetric matrix) + float a = cov2D[0][0] + 0.3; // Add small value for numerical stability + float b = cov2D[0][1]; + float c = cov2D[1][1] + 0.3; + + // Compute eigenvalues for splat size + float det = a * c - b * b; + float trace = a + c; + float gap = sqrt(max(0.0, trace * trace - 4.0 * det)); + float lambda1 = (trace + gap) * 0.5; + float lambda2 = (trace - gap) * 0.5; + + // Splat radius (3 sigma covers 99.7% of Gaussian) + float radius = 3.0 * sqrt(max(lambda1, lambda2)); + + // Project center to screen + vec4 clipPos = ubo.projection * viewPos; + vec2 screenPos = clipPos.xy / clipPos.w; + + // Offset by quad position scaled by radius + vec2 pixelOffset = quadPos * radius / ubo.viewport; + + // Final position + gl_Position = vec4(screenPos + pixelOffset, clipPos.z / clipPos.w, 1.0); + + // Pass to fragment shader + // Convert sRGB to linear (the GLTF specifies colorSpace: "BT.709-sRGB") + vec3 linearColor = pow(inColor, vec3(2.2)); + outColor = vec4(linearColor, 1.0); + outOpacity = inOpacity; + outCoord = quadPos * radius; + + // Conic parameters for Gaussian evaluation (inverse of 2D covariance) + float invDet = 1.0 / det; + outConic = vec2(c * invDet, -b * invDet); // Simplified for symmetric case +}