diff --git a/.gitignore b/.gitignore index 52c3f813..dab4c354 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ input/* !input/sphere1.obj !input/bunnyhead.obj build/ +build_debug/ include/rxmesh/util/git_sha1.cpp .vscode/ -scripts/*.log \ No newline at end of file +scripts/*.log diff --git a/CMakeLists.txt b/CMakeLists.txt index d036c085..4ed2dc31 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -79,6 +79,22 @@ FetchContent_Declare(cereal ) FetchContent_Populate(cereal) +# Package Management +# TODO: Consider using CPM for the various libraries above +include(cmake/CPM.cmake) + +# Add cuCollection with priority queue. This should eventually come from +# NVIDIA. +CPMAddPackage( + NAME cuco + GITHUB_REPOSITORY andrewbriand/cuCollections + GIT_TAG d58dd9fedde721a264c8ae960f7393a3a3b08c58 + OPTIONS + "BUILD_TESTS OFF" + "BUILD_BENCHMARKS OFF" + "BUILD_EXAMPLES OFF" +) + # Auto-detect GPU architecture include("cmake/AutoDetectCudaArch.cmake") @@ -133,7 +149,7 @@ set(cxx_flags set(MSVC_XCOMPILER_FLAGS "/openmp:experimental /MP /std:c++17 /Zi") set(cuda_flags - -Xcompiler=$<$:-Wall -fopenmp -O3 -Wno-unused-function> + -Xcompiler=$<$:-rdynamic -Wall -fopenmp -O3 -Wno-unused-function> -Xcompiler=$<$:${MSVC_XCOMPILER_FLAGS}> #Disables warning #177-D "function XXX was declared but never referenced" diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index 55d14245..4fc91b42 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -4,12 +4,13 @@ add_subdirectory(MCF) add_subdirectory(Geodesic) add_subdirectory(Delaunay) add_subdirectory(GaussianCurvature) -add_subdirectory(XPBD ) +add_subdirectory(XPBD) #add_subdirectory(Simplification) add_subdirectory(ShortestEdgeCollapse) add_subdirectory(Remesh) +add_subdirectory(SECPriority) +add_subdirectory(SurfaceTracking) add_subdirectory(SurfaceTracking) add_subdirectory(SCP) add_subdirectory(ARAP) -add_subdirectory(Heat) - +add_subdirectory(Heat) \ No newline at end of file diff --git a/apps/SECPriority/CMakeLists.txt b/apps/SECPriority/CMakeLists.txt new file mode 100644 index 00000000..4f391430 --- /dev/null +++ b/apps/SECPriority/CMakeLists.txt @@ -0,0 +1,41 @@ +add_executable(SECPriority) + +set(SOURCE_LIST + #main.cu + secp.cu + secp_rxmesh.cuh + secp_kernels.cuh +) + +set(COMMON_LIST + ../common/openmesh_trimesh.h + ../common/openmesh_report.h +) + +target_sources(SECPriority + PRIVATE + ${SOURCE_LIST} ${COMMON_LIST} +) + +if (WIN32) + target_compile_definitions(SECPriority + PRIVATE _USE_MATH_DEFINES + PRIVATE NOMINMAX + PRIVATE _CRT_SECURE_NO_WARNINGS) +endif() + +set_target_properties(SECPriority PROPERTIES FOLDER "apps") + +set_property(TARGET SECPriority PROPERTY CUDA_SEPARABLE_COMPILATION ON) + +source_group(TREE ${CMAKE_CURRENT_LIST_DIR} PREFIX "SECPriority" FILES ${SOURCE_LIST}) + +target_link_libraries(SECPriority + PRIVATE RXMesh + PRIVATE gtest_main + PRIVATE OpenMeshCore + PRIVATE OpenMeshTools + PRIVATE cuco +) + +#gtest_discover_tests( SECPriority ) \ No newline at end of file diff --git a/apps/SECPriority/main.cu b/apps/SECPriority/main.cu new file mode 100644 index 00000000..e6062fd6 --- /dev/null +++ b/apps/SECPriority/main.cu @@ -0,0 +1,96 @@ +#include +#include + +#include +#include + +#include +#include + +#include +#include + +#include +#include + +using namespace cuco; +namespace cg = cooperative_groups; + +// grab some bits from priority queue tests and benchmarks + +// -- simulate reading the mesh, computing edge length +// -- cuco:pair +// +// setup pair_less template +// +// setup device function to pop items from queue +// + +template +struct pair_less +{ + __host__ __device__ bool operator()(const T& a, const T& b) const + { + return a.first < b.first; + } +}; + +template +void generate_kv_pairs_uniform(OutputIt output_begin, OutputIt output_end) +{ + std::random_device rd; + std::mt19937 gen{rd()}; + + const auto num_keys = std::distance(output_begin, output_end); + for(auto i = 0; i < num_keys; i++) + { + output_begin[i] = {static_cast(gen()), + static_cast(i)}; + } +} + +void sp_pair() +{ + // Setup the cuco::priority_queue + const size_t insertion_size = 200; + const size_t deletion_size = 100; + using PairType = cuco::pair; + using Compare = pair_less; + + cuco::priority_queue pq(insertion_size); + + // Generate data for the queue + std::vector h_pairs(insertion_size); + generate_kv_pairs_uniform(h_pairs.begin(), h_pairs.end()); + + for(auto i = 0; i < h_pairs.size(); i++) + { + std::cout << "Priority: " << h_pairs[i].first + << "\tID: " << h_pairs[i].second << "\n"; + } + + // Fill the priority queue + thrust::device_vector d_pairs(h_pairs); + pq.push(d_pairs.begin(), d_pairs.end()); + cudaDeviceSynchronize(); + + // Pop the priority queue + thrust::device_vector d_popped(deletion_size); + pq.pop(d_popped.begin(), d_popped.end()); + cudaDeviceSynchronize(); + + std::cout << "-----After Pop-----\n"; + thrust::host_vector h_popped(d_popped); + for(auto i = 0; i < h_popped.size(); i++) + { + std::cout << "Priority: " << h_popped[i].first + << "\tID: " << h_popped[i].second << "\n"; + } +} + +int main(int argc, char* argv[]) +{ + sp_pair(); + + return 0; +} \ No newline at end of file diff --git a/apps/SECPriority/secp.cu b/apps/SECPriority/secp.cu new file mode 100644 index 00000000..8da2cfe8 --- /dev/null +++ b/apps/SECPriority/secp.cu @@ -0,0 +1,102 @@ +#include "gtest/gtest.h" +#include "rxmesh/util/log.h" +#include "rxmesh/util/macros.h" +#include "rxmesh/util/util.h" + +#include + +struct arg +{ + std::string obj_file_name = STRINGIFY(INPUT_DIR) "dragon.obj"; + std::string output_folder = STRINGIFY(OUTPUT_DIR); + float target = 0.1; + float edgefrac = 0.1; + uint32_t device_id = 0; + char** argv; + int argc; +} Arg; + +#include "secp_rxmesh.cuh" + +TEST(Apps, SECPriority) +{ + using namespace rxmesh; + + // Select device + cuda_query(Arg.device_id); + + // RXMeshDynamic rx(Arg.obj_file_name); + + const std::string p_file = STRINGIFY(OUTPUT_DIR) + + extract_file_name(Arg.obj_file_name) + + "_patches"; + RXMeshDynamic rx(Arg.obj_file_name, p_file); + if (!std::filesystem::exists(p_file)) { + rx.save(p_file); + } + + ASSERT_TRUE(rx.is_edge_manifold()); + + ASSERT_TRUE(rx.is_closed()); + + uint32_t final_num_vertices = Arg.target * rx.get_num_vertices(); + + secp_rxmesh(rx, final_num_vertices, Arg.edgefrac); +} + + +int main(int argc, char** argv) +{ + using namespace rxmesh; + Log::init(); + + ::testing::InitGoogleTest(&argc, argv); + Arg.argv = argv; + Arg.argc = argc; + + + if (argc > 1) { + if (cmd_option_exists(argv, argc + argv, "-h")) { + // clang-format off + RXMESH_INFO("\nUsage: SECPriority.exe < -option X>\n" + " -h: Display this massage and exit\n" + " -input: Input file. Input file should be under the input/ subdirectory\n" + " Default is {} \n" + " Hint: Only accept OBJ files\n" + " -target: The fraction of output #vertices from the input\n" + " -edgefrac: The fraction of edges to collapse in a round\n" + " -o: JSON file output folder. Default is {} \n" + " -device_id: GPU device ID. Default is {}", + Arg.obj_file_name, Arg.output_folder, Arg.device_id); + // clang-format on + exit(EXIT_SUCCESS); + } + + if (cmd_option_exists(argv, argc + argv, "-input")) { + Arg.obj_file_name = + std::string(get_cmd_option(argv, argv + argc, "-input")); + } + if (cmd_option_exists(argv, argc + argv, "-o")) { + Arg.output_folder = + std::string(get_cmd_option(argv, argv + argc, "-o")); + } + if (cmd_option_exists(argv, argc + argv, "-device_id")) { + Arg.device_id = + atoi(get_cmd_option(argv, argv + argc, "-device_id")); + } + if (cmd_option_exists(argv, argc + argv, "-target")) { + Arg.target = atof(get_cmd_option(argv, argv + argc, "-target")); + } + if (cmd_option_exists(argv, argc + argv, "-edgefrac")) { + Arg.edgefrac = atof(get_cmd_option(argv, argv + argc, "-edgefrac")); + } + } + + RXMESH_TRACE("input= {}", Arg.obj_file_name); + RXMESH_TRACE("output_folder= {}", Arg.output_folder); + RXMESH_TRACE("device_id= {}", Arg.device_id); + RXMESH_TRACE("target= {}", Arg.target); + RXMESH_TRACE("edgefrac= {}", Arg.edgefrac); + + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/apps/SECPriority/secp_kernels.cuh b/apps/SECPriority/secp_kernels.cuh new file mode 100644 index 00000000..bd05b62d --- /dev/null +++ b/apps/SECPriority/secp_kernels.cuh @@ -0,0 +1,225 @@ +#pragma once +#include "rxmesh/cavity_manager.cuh" +#include "../ShortestEdgeCollapse/link_condition.cuh" + +#include +#include + +template +__global__ static void secp(rxmesh::Context context, + rxmesh::VertexAttribute coords, + const int reduce_threshold, + rxmesh::EdgeAttribute e_pop_attr) +{ + using namespace rxmesh; + auto block = cooperative_groups::this_thread_block(); + ShmemAllocator shrd_alloc; + CavityManager cavity( + block, context, shrd_alloc, true); + + const uint32_t pid = cavity.patch_id(); + + if (pid == INVALID32) { + return; + } + + // we first use this mask to set the edge we want to collapse (and then + // filter them). Then after cavity.prologue, we reuse this bitmask to mark + // the newly added edges + Bitmask edge_mask(cavity.patch_info().edges_capacity[0], shrd_alloc); + edge_mask.reset(block); + + // we use this bitmask to mark the other end of to-be-collapse edge during + // checking for the link condition + Bitmask v0_mask(cavity.patch_info().num_vertices[0], shrd_alloc); + Bitmask v1_mask(cavity.patch_info().num_vertices[0], shrd_alloc); + + + // Precompute EV + Query ev_query(context, pid); + ev_query.prologue(block, shrd_alloc); + block.sync(); + + // 1a) mark edge we want to collapse given e_pop_attr + for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { + assert(eh.local_id() < cavity.patch_info().num_edges[0]); + + //edge_mask.set(eh.local_id(), e_pop_attr(eh)); + if(true == e_pop_attr(eh)) + { + edge_mask.set(eh.local_id(), true); + } + + }); + block.sync(); + + // 2a) check edge link condition. + link_condition(block, cavity.patch_info(), ev_query, + edge_mask, v0_mask, v1_mask); + block.sync(); + + for_each_edge(cavity.patch_info(), [&](EdgeHandle eh) { + assert(eh.local_id() < cavity.patch_info().num_edges[0]); + if (edge_mask(eh.local_id())) { + cavity.create(eh); + } + }); + block.sync(); + + ev_query.epilogue(block, shrd_alloc); + + // create the cavity + if (cavity.prologue(block, shrd_alloc, coords)) { + edge_mask.reset(block); + block.sync(); + + // fill in the cavities + cavity.for_each_cavity(block, [&](uint16_t c, uint16_t size) { + const EdgeHandle src = cavity.template get_creator(c); + + // TODO handle boundary edges + + VertexHandle v0, v1; + + cavity.get_vertices(src, v0, v1); + + const VertexHandle new_v = cavity.add_vertex(); + + if (new_v.is_valid()) { + + coords(new_v, 0) = (coords(v0, 0) + coords(v1, 0)) * T(0.5); + coords(new_v, 1) = (coords(v0, 1) + coords(v1, 1)) * T(0.5); + coords(new_v, 2) = (coords(v0, 2) + coords(v1, 2)) * T(0.5); + + + DEdgeHandle e0 = + cavity.add_edge(new_v, cavity.get_cavity_vertex(c, 0)); + + if (e0.is_valid()) { + edge_mask.set(e0.local_id(), true); + + const DEdgeHandle e_init = e0; + + for (uint16_t i = 0; i < size; ++i) { + const DEdgeHandle e = cavity.get_cavity_edge(c, i); + + const VertexHandle v_end = + cavity.get_cavity_vertex(c, (i + 1) % size); + + const DEdgeHandle e1 = + (i == size - 1) ? + e_init.get_flip_dedge() : + cavity.add_edge( + cavity.get_cavity_vertex(c, i + 1), new_v); + + if (!e1.is_valid()) { + break; + } + + if (i != size - 1) { + edge_mask.set(e1.local_id(), true); + } + + const FaceHandle new_f = cavity.add_face(e0, e, e1); + + if (!new_f.is_valid()) { + break; + } + e0 = e1.get_flip_dedge(); + } + } + } + }); + } + + cavity.epilogue(block); + block.sync(); +} + +//template +template +__global__ static void compute_edge_priorities( + rxmesh::Context context, + const rxmesh::VertexAttribute coords, + PQView_t pq_view, + size_t pq_num_bytes) +{ + using namespace rxmesh; + namespace cg = cooperative_groups; + cg::thread_block g = cg::this_thread_block(); + ShmemAllocator shrd_alloc; + + Query query(context); + auto intermediatePairs = shrd_alloc.alloc(query.get_patch_info().num_edges[0]); + __shared__ int pair_counter; + pair_counter = 0; + + auto edge_len = [&](const EdgeHandle& eh, const VertexIterator& iter) { + const VertexHandle v0 = iter[0]; + const VertexHandle v1 = iter[1]; + + const Vec3 p0(coords(v0, 0), coords(v0, 1), coords(v0, 2)); + const Vec3 p1(coords(v1, 0), coords(v1, 1), coords(v1, 2)); + + T len2 = glm::distance2(p0, p1); + + auto p_e = rxmesh::detail::unpack(eh.unique_id()); + //printf("p_id:%u\te_id:%hu\n", p_e.first, p_e.second); + //printf("e_id:%llu\t, len:%f\n", eh.unique_id(), len2); + + // repack the EdgeHandle into smaller 32 bits for + // use with priority queue. Need to check elsewhere + // that there are less than 2^16 patches. + auto id32 = unique_id32(p_e.second, (uint16_t)p_e.first); + //auto p_e_32 = unpack32(id32); + //printf("32bit p_id:%hu\te_id:%hu\n", p_e_32.first, p_e_32.second); + + PriorityPair_t p{len2, id32}; + //PriorityPair_t p{len2, eh}; + + auto val_counter = atomicAdd(&pair_counter, 1); + intermediatePairs[val_counter] = p; + }; + + auto block = cooperative_groups::this_thread_block(); + query.dispatch(block, shrd_alloc, edge_len); + block.sync(); + + char * pq_shrd_mem = shrd_alloc.alloc(pq_num_bytes); + pq_view.push(block, intermediatePairs, intermediatePairs + pair_counter, pq_shrd_mem); +} + +template +__global__ static void pop_and_mark_edges_to_collapse( + PQView_t pq_view, + rxmesh::EdgeAttribute marked_edges, + uint32_t pop_num_edges) +{ + // setup shared memory array to store the popped pairs + // + // device api pop pairs + namespace cg = cooperative_groups; + using namespace rxmesh; + ShmemAllocator shrd_alloc; + + auto intermediatePairs = shrd_alloc.alloc(blockThreads); + char * pq_shrd_mem = shrd_alloc.alloc(pq_view.get_shmem_size(blockThreads)); + cg::thread_block g = cg::this_thread_block(); + pq_view.pop(g, intermediatePairs, intermediatePairs + blockThreads, pq_shrd_mem); + + int tid = blockIdx.x * blockDim.x + threadIdx.x; + int local_tid = threadIdx.x; + + // Make sure the index is within bounds + if(tid < pop_num_edges) + { + //printf("tid: %d\n", tid); + //unpack the uid to get the patch and edge ids + auto p_e = unpack32(intermediatePairs[local_tid].second); + //printf("32bit p_id:%hu\te_id:%hu\n", p_e.first, p_e.second); + rxmesh::EdgeHandle eh(p_e.first, rxmesh::LocalEdgeT(p_e.second)); + + //use the eh to index into a passed in edge attribute + marked_edges(eh) = true; + } +} diff --git a/apps/SECPriority/secp_rxmesh.cuh b/apps/SECPriority/secp_rxmesh.cuh new file mode 100644 index 00000000..76270d97 --- /dev/null +++ b/apps/SECPriority/secp_rxmesh.cuh @@ -0,0 +1,358 @@ +#pragma once + +#define GLM_ENABLE_EXPERIMENTAL +#include +#include + + +#include "rxmesh/query.cuh" +#include "rxmesh/rxmesh_dynamic.h" + +// Priority Queue related includes +#include +#include + +#include +#include + +/** + * @brief Return unique index of the local mesh element composed by the + * patch id and the local index + * + * @param local_id the local within-patch mesh element id + * @param patch_id the patch owning the mesh element + * @return + */ +constexpr __device__ __host__ __forceinline__ uint32_t +unique_id32(const uint16_t local_id, const uint16_t patch_id) +{ + uint32_t ret = patch_id; + ret = (ret << 16); + ret |= local_id; + return ret; +} + +/** + * @brief unpack a 32 uint to its high and low 16 bits. + * This is used to convert the unique id to its local id (16 + * low bit) and patch id (high 16 bit) + * @param uid unique id + * @return a std::pair storing the patch id and local id + */ +constexpr __device__ __host__ __forceinline__ std::pair + unpack32(uint32_t uid) +{ + uint16_t local_id = uid & ((1 << 16) - 1); + uint16_t patch_id = uid >> 16; + return std::make_pair(patch_id, local_id); +} + +// Priority queue setup. Use 'pair_less' to prioritize smaller values. +template +struct pair_less +{ + __host__ __device__ bool operator()(const T& a, const T& b) const + { + return a.first < b.first; + } +}; + +using PriorityPair_t = cuco::pair; +using PriorityCompare = pair_less; +using PriorityQueue_t = cuco::priority_queue; +using PQView_t = PriorityQueue_t::device_mutable_view; + + +template +using Vec3 = glm::vec<3, T, glm::defaultp>; + +#include "secp_kernels.cuh" + +#include "rxmesh/util/report.h" + +template +void render_edge_attr(rxmesh::RXMeshDynamic& rx, + const std::shared_ptr>& edge_attr) +{ + using namespace rxmesh; + //make sure the attribute is on the HOST + edge_attr->move(DEVICE, HOST); + + std::vector edgeColors(rx.get_num_edges()); + rx.for_each_edge(HOST, + [&](EdgeHandle eh) { + if(true == (*edge_attr)(eh)) + { + edgeColors[rx.linear_id(eh)] = 200.0f; + } + else + { + edgeColors[rx.linear_id(eh)] = eh.patch_id(); + } + }); + + auto ps_mesh = rx.get_polyscope_mesh(); + auto edge_colors = ps_mesh->addEdgeScalarQuantity("Edges to Collapse", edgeColors); + edge_colors->setEnabled(true); +} + +inline void secp_rxmesh(rxmesh::RXMeshDynamic& rx, + const uint32_t final_num_vertices, + const float edge_reduce_ratio) +{ + EXPECT_TRUE(rx.validate()); + + using namespace rxmesh; + constexpr uint32_t blockThreads = 256; + + rxmesh::Report report("SECP_RXMesh"); + report.command_line(Arg.argc, Arg.argv); + report.device(); + report.system(); + report.model_data(Arg.obj_file_name + "_before", rx, "model_before"); + report.add_member("method", std::string("RXMesh")); + report.add_member("blockThreads", blockThreads); + + auto coords = rx.get_input_vertex_coordinates(); + + LaunchBox launch_box; + + float total_time = 0; + float app_time = 0; + float slice_time = 0; + float cleanup_time = 0; + float pq_time = 0; + float pop_mark_time = 0; + float e_priority_time = 0; + + auto e_pop_attr = rx.add_edge_attribute("ePop", 1); + + RXMESH_INFO("#Vertices {}", rx.get_num_vertices()); + RXMESH_INFO("#Edges {}", rx.get_num_edges()); + RXMESH_INFO("#Faces {}", rx.get_num_faces()); + RXMESH_INFO("#Patches {}", rx.get_num_patches()); + + size_t max_smem_bytes_dyn = 0; + size_t max_smem_bytes_static = 0; + uint32_t max_num_registers_per_thread = 0; + uint32_t max_num_blocks = 0; + +#if USE_POLYSCOPE + rx.render_vertex_patch(); + rx.render_edge_patch(); + rx.render_face_patch(); + // polyscope::show(); +#endif + + bool validate = false; + + int num_passes = 0; + + CUDA_ERROR(cudaProfilerStart()); + GPUTimer timer; + timer.start(); + while(rx.get_num_vertices(true) > final_num_vertices) + { + ++num_passes; + + GPUTimer pq_timer; + pq_timer.start(); + + // rebuild every round? Not necessarily a great way to use a pq. + PriorityQueue_t pq(rx.get_num_edges()); + e_pop_attr->reset(false, DEVICE); + + //rx.prepare_launch_box( + rx.update_launch_box( + {Op::EV}, + launch_box, + (void*)compute_edge_priorities, + false, false, false, false, + [&](uint32_t v, uint32_t e, uint32_t f){ + // Allocate enough additional memory + // for the priority queue and the intermediate + // array of PriorityPair_t. + return pq.get_shmem_size(blockThreads) + (e*sizeof(PriorityPair_t)); + } + ); + + GPUTimer edge_priorities_timer; + edge_priorities_timer.start(); + compute_edge_priorities + <<>>( rx.get_context(), *coords, pq.get_mutable_device_view(), pq.get_shmem_size(blockThreads)); + edge_priorities_timer.stop(); + e_priority_time += edge_priorities_timer.elapsed_millis(); + //cudaDeviceSynchronize(); + //RXMESH_TRACE("launch_box.smem_bytes_dyn = {}", launch_box.smem_bytes_dyn); + //RXMESH_TRACE("pq.get_shmem_size = {}", pq.get_shmem_size(blockThreads)); + + // Next kernel needs to pop some percentage of the top + // elements in the priority queue and store popped elements + // to be used by the next kernel that actually does the collapses + + float reduce_ratio = edge_reduce_ratio; + const int num_edges_before = int(rx.get_num_edges()); + const int reduce_threshold = + std::max(1, int(reduce_ratio * float(num_edges_before))); + // Mark the edge attributes to be collapsed + uint32_t pop_num_edges = reduce_threshold; //reduce_ratio * rx.get_num_edges(); + //RXMESH_TRACE("pop_num_edges: {}", pop_num_edges); + + constexpr uint32_t threads_per_block = 256; + uint32_t number_of_blocks = (pop_num_edges + threads_per_block - 1) / threads_per_block; + int shared_mem_bytes = pq.get_shmem_size(threads_per_block) + + (threads_per_block * sizeof(PriorityPair_t)); + //RXMESH_TRACE("threads_per_block: {}", threads_per_block); + //RXMESH_TRACE("number_of_blocks: {}", number_of_blocks); + //RXMESH_TRACE("shared_mem_bytes: {}", shared_mem_bytes); + + GPUTimer pop_mark_timer; + pop_mark_timer.start(); + pop_and_mark_edges_to_collapse + <<>> + (pq.get_mutable_device_view(), + *e_pop_attr, + pop_num_edges); + + // if(num_passes == 1) + // { + // render_edge_attr(rx, e_pop_attr); + // } + CUDA_ERROR(cudaDeviceSynchronize()); + CUDA_ERROR(cudaGetLastError()); + pop_mark_timer.stop(); + pop_mark_time += pop_mark_timer.elapsed_millis(); + + pq_timer.stop(); + + pq_time += pq_timer.elapsed_millis(); + + // loop over the mesh, and try to collapse + + rx.reset_scheduler(); + while(!rx.is_queue_empty() && + rx.get_num_vertices(true) > final_num_vertices) + { + + //RXMESH_INFO(" Queue size = {}", + // rx.get_context().m_patch_scheduler.size()); + + //rx.prepare_launch_box( + rx.update_launch_box( + {Op::EV}, + launch_box, + (void*)secp, + true, false, false, false, + [&](uint32_t v, uint32_t e, uint32_t f) { + return detail::mask_num_bytes(e) + + 2 * detail::mask_num_bytes(v) + + 3 * ShmemAllocator::default_alignment; + } + ); + + max_smem_bytes_dyn = + std::max(max_smem_bytes_dyn, launch_box.smem_bytes_dyn); + max_smem_bytes_static = + std::max(max_smem_bytes_static, launch_box.smem_bytes_static); + max_num_registers_per_thread = + std::max(max_num_registers_per_thread, + launch_box.num_registers_per_thread); + max_num_blocks = + std::max(max_num_blocks, DIVIDE_UP(launch_box.blocks, 8)); + GPUTimer app_timer; + + app_timer.start(); + secp + <<>>(rx.get_context(), + *coords, + reduce_threshold, + *e_pop_attr); + // should we cudaDeviceSyn here? stopping timers too soon? + //CUDA_ERROR(cudaDeviceSynchronize()); + //CUDA_ERROR(cudaGetLastError()); + + app_timer.stop(); + + GPUTimer cleanup_timer; + cleanup_timer.start(); + rx.cleanup(); + cleanup_timer.stop(); + + GPUTimer slice_timer; + slice_timer.start(); + rx.slice_patches(*coords); + slice_timer.stop(); + + GPUTimer cleanup_timer2; + cleanup_timer2.start(); + rx.cleanup(); + cleanup_timer2.stop(); + + + CUDA_ERROR(cudaDeviceSynchronize()); + CUDA_ERROR(cudaGetLastError()); + + app_time += app_timer.elapsed_millis(); + slice_time += slice_timer.elapsed_millis(); + cleanup_time += cleanup_timer.elapsed_millis(); + cleanup_time += cleanup_timer2.elapsed_millis(); + } + } + timer.stop(); + total_time += timer.elapsed_millis(); + CUDA_ERROR(cudaProfilerStop()); + + RXMESH_INFO("secp_rxmesh() RXMesh SEC took {} (ms), num_passes= {}", + total_time, + num_passes); + RXMESH_INFO("secp_rxmesh() PriorityQ time {} (ms)", pq_time); + RXMESH_INFO("secp_rxmesh() |-Edge priorities time {} (ms)", e_priority_time); + RXMESH_INFO("secp_rxmesh() |-Pop and Mark time {} (ms)", pop_mark_time); + RXMESH_INFO("secp_rxmesh() App time {} (ms)", app_time); + RXMESH_INFO("secp_rxmesh() Slice timer {} (ms)", slice_time); + RXMESH_INFO("secp_rxmesh() Cleanup timer {} (ms)", cleanup_time); + + RXMESH_INFO("#Vertices {}", rx.get_num_vertices(true)); + RXMESH_INFO("#Edges {}", rx.get_num_edges(true)); + RXMESH_INFO("#Faces {}", rx.get_num_faces(true)); + RXMESH_INFO("#Patches {}", rx.get_num_patches(true)); + + + rx.update_host(); + + coords->move(DEVICE, HOST); + + report.add_member("num_passes", num_passes); + report.add_member("max_smem_bytes_dyn", max_smem_bytes_dyn); + report.add_member("max_smem_bytes_static", max_smem_bytes_static); + report.add_member("max_num_registers_per_thread", + max_num_registers_per_thread); + report.add_member("max_num_blocks", max_num_blocks); + report.add_member("secp_remesh_time", total_time); + report.add_member("priority_queue_time", pq_time); + report.add_member("app_time", app_time); + report.add_member("slice_time", slice_time); + report.add_member("cleanup_time", cleanup_time); + report.add_member("attributes_memory_mg", coords->get_memory_mg()); + report.model_data(Arg.obj_file_name + "_after", rx, "model_after"); + +#if USE_POLYSCOPE + rx.update_polyscope(); + + auto ps_mesh = rx.get_polyscope_mesh(); + ps_mesh->updateVertexPositions(*coords); + ps_mesh->setEnabled(false); + + rx.render_vertex_patch(); + rx.render_edge_patch(); + rx.render_face_patch(); + polyscope::show(); +#endif + + report.write(Arg.output_folder + "/rxmesh_secp", + "SECP_RXMesh_" + extract_file_name(Arg.obj_file_name)); +} \ No newline at end of file diff --git a/cmake/CPM.cmake b/cmake/CPM.cmake new file mode 100644 index 00000000..d0fd0e8e --- /dev/null +++ b/cmake/CPM.cmake @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: MIT +# +# SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors + +set(CPM_DOWNLOAD_VERSION 0.39.0) +set(CPM_HASH_SUM "66639bcac9dd2907b2918de466783554c1334446b9874e90d38e3778d404c2ef") + +if(CPM_SOURCE_CACHE) + set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +elseif(DEFINED ENV{CPM_SOURCE_CACHE}) + set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +else() + set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake") +endif() + +# Expand relative path. This is important if the provided path contains a tilde (~) +get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE) + +file(DOWNLOAD + https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake + ${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM} +) + +include(${CPM_DOWNLOAD_LOCATION}) diff --git a/sweep_edgefrac_test.sh b/sweep_edgefrac_test.sh new file mode 100755 index 00000000..9ffb5195 --- /dev/null +++ b/sweep_edgefrac_test.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Start value +start=0.001 +# End value +end=0.9 +# Step value +step=0.031 + +# Command path +command="./build/bin/SECPriority" +# Input file +input_file="./input/rocker-arm.obj" + +# Loop through the range +for target in $(seq $start $step $end) +do + echo "Running with edgefrac = $target" + $command -input $input_file -edgefrac $target +done + diff --git a/sweep_test.sh b/sweep_test.sh new file mode 100755 index 00000000..6d7ee81e --- /dev/null +++ b/sweep_test.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Start value +start=0.001 +# End value +end=0.9 +# Step value +step=0.031 + +# Command path +command="./build/bin/SECPriority" +# Input file +input_file="./input/rocker-arm.obj" + +# Loop through the range +for target in $(seq $start $step $end) +do + echo "Running with target = $target" + $command -input $input_file -target $target +done +