From 03ab50ccf119f95f94c187bc3b2c20dfabf5f0ae Mon Sep 17 00:00:00 2001 From: Mikhail Karasikov Date: Fri, 16 May 2025 18:36:47 +0200 Subject: [PATCH 01/62] don't show progress bar while extracting contigs from the query graph --- metagraph/src/cli/query.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/metagraph/src/cli/query.cpp b/metagraph/src/cli/query.cpp index cb7d2e35c9..44f136e645 100644 --- a/metagraph/src/cli/query.cpp +++ b/metagraph/src/cli/query.cpp @@ -939,6 +939,9 @@ construct_query_graph(const AnnotatedDBG &anno_graph, // pull contigs from query graph std::vector>> contigs; std::mutex seq_mutex; + bool verbose = common::get_verbose(); + // turn off verbose to hide the contig extraction progress bar + common::set_verbose(false); graph_init->call_sequences([&](const std::string &contig, const auto &) { std::lock_guard lock(seq_mutex); contigs.emplace_back(contig, std::vector{}); @@ -946,6 +949,7 @@ construct_query_graph(const AnnotatedDBG &anno_graph, num_threads, // pull only primary contigs when building canonical query graph full_dbg.get_mode() == DeBruijnGraph::CANONICAL); + common::set_verbose(verbose); logger->trace("[Query graph construction] Contig extraction took {} sec", timer.elapsed()); timer.reset(); From 2ad67fde1f73624eef82c871f19ee632d2cec32c Mon Sep 17 00:00:00 2001 From: Mikhail Karasikov Date: Fri, 16 May 2025 20:50:17 +0200 Subject: [PATCH 02/62] compile on MacOS --- metagraph/CMakeLists.txt | 22 ++++++++++++---------- metagraph/CMakeListsKMC.txt.in | 2 +- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/metagraph/CMakeLists.txt b/metagraph/CMakeLists.txt index b92c3685eb..e3a957e35b 100644 --- a/metagraph/CMakeLists.txt +++ b/metagraph/CMakeLists.txt @@ -352,14 +352,6 @@ add_library(sdust STATIC target_compile_options(sdust PRIVATE -Wall -Wc++-compat -Wno-error) -include(ExternalProject) -ExternalProject_Add(HTSLIB - SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib - CONFIGURE_COMMAND autoreconf -i ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib && ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib/configure --prefix=${CMAKE_BINARY_DIR}/external-libraries/htslib --disable-libcurl --disable-bz2 --disable-lzma -) -link_directories(${CMAKE_BINARY_DIR}/external-libraries/htslib/lib) -include_directories(${CMAKE_BINARY_DIR}/external-libraries/htslib/include) - set(OTHER_COMPILER_FLAGS "") get_directory_property(DIRECTORY_DEFINITIONS COMPILE_DEFINITIONS) foreach(DEF ${DIRECTORY_DEFINITIONS}) @@ -403,7 +395,17 @@ file(GLOB metagraph_cli_files "src/cli/*/*/*.cpp" ) add_library(metagraph-cli STATIC ${metagraph_cli_files}) -add_dependencies(metagraph-core HTSLIB) + +if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + include(ExternalProject) + ExternalProject_Add(HTSLIB + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib + CONFIGURE_COMMAND autoreconf -i ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib && ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib/configure --prefix=${CMAKE_BINARY_DIR}/external-libraries/htslib --disable-libcurl --disable-bz2 --disable-lzma + ) + link_directories(${CMAKE_BINARY_DIR}/external-libraries/htslib/lib) + include_directories(${CMAKE_BINARY_DIR}/external-libraries/htslib/include) + add_dependencies(metagraph-core HTSLIB) +endif() add_executable(metagraph "src/main.cpp") set_target_properties(metagraph PROPERTIES OUTPUT_NAME "metagraph_${CMAKE_DBG_ALPHABET}") @@ -561,7 +563,7 @@ list(FILTER benchmark_files EXCLUDE REGEX ".*\\._.*") add_executable(benchmarks ${benchmark_files}) target_compile_definitions(benchmarks PRIVATE TEST_DATA_DIR="${PROJECT_SOURCE_DIR}/tests/data") -target_link_libraries(benchmarks benchmark_main benchmark metagraph-core metagraph-cli -lsdsl) +target_link_libraries(benchmarks benchmark_main benchmark metagraph-core metagraph-cli) target_compile_options(benchmarks PRIVATE) diff --git a/metagraph/CMakeListsKMC.txt.in b/metagraph/CMakeListsKMC.txt.in index 880710cea4..1263c6249c 100644 --- a/metagraph/CMakeListsKMC.txt.in +++ b/metagraph/CMakeListsKMC.txt.in @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8.12) +cmake_minimum_required(VERSION 3.5) project(KMC) From 977199e2295d0b459699488df2a5ec54a54e73aa Mon Sep 17 00:00:00 2001 From: Mikhail Karasikov Date: Fri, 16 May 2025 21:03:23 +0200 Subject: [PATCH 03/62] keep column positions in small vectors --- .../src/annotation/binary_matrix/base/binary_matrix.hpp | 2 +- .../src/annotation/binary_matrix/row_diff/row_diff.hpp | 6 +++--- metagraph/src/graph/alignment/annotation_buffer.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp b/metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp index b23c2c7323..888eba5465 100644 --- a/metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp +++ b/metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp @@ -20,7 +20,7 @@ class BinaryMatrix { typedef uint64_t Row; typedef uint64_t Column; - typedef Vector SetBitPositions; + typedef SmallVector SetBitPositions; typedef std::function RowCallback; typedef std::function ValueCallback; diff --git a/metagraph/src/annotation/binary_matrix/row_diff/row_diff.hpp b/metagraph/src/annotation/binary_matrix/row_diff/row_diff.hpp index 67382f5a12..f74b5d7ac1 100644 --- a/metagraph/src/annotation/binary_matrix/row_diff/row_diff.hpp +++ b/metagraph/src/annotation/binary_matrix/row_diff/row_diff.hpp @@ -108,7 +108,7 @@ class RowDiff : public IRowDiff, public BinaryMatrix { BaseMatrix& diffs() { return diffs_; } private: - static void add_diff(const Vector &diff, Vector *row); + static void add_diff(const SetBitPositions &diff, SetBitPositions *row); BaseMatrix diffs_; }; @@ -210,14 +210,14 @@ void RowDiff::serialize(std::ostream &f) const { } template -void RowDiff::add_diff(const Vector &diff, Vector *row) { +void RowDiff::add_diff(const SetBitPositions &diff, SetBitPositions *row) { assert(std::is_sorted(row->begin(), row->end())); assert(std::is_sorted(diff.begin(), diff.end())); if (diff.empty()) return; - Vector result; + SetBitPositions result; result.reserve(row->size() + diff.size()); std::set_symmetric_difference(row->begin(), row->end(), diff.begin(), diff.end(), diff --git a/metagraph/src/graph/alignment/annotation_buffer.cpp b/metagraph/src/graph/alignment/annotation_buffer.cpp index a644bf2933..5c51847e19 100644 --- a/metagraph/src/graph/alignment/annotation_buffer.cpp +++ b/metagraph/src/graph/alignment/annotation_buffer.cpp @@ -181,7 +181,7 @@ void AnnotationBuffer::fetch_queued_annotations() { } else { for (auto&& labels : annotator_.get_matrix().get_rows(queued_rows)) { std::sort(labels.begin(), labels.end()); - push_node_labels(node_it++, row_it++, std::move(labels)); + push_node_labels(node_it++, row_it++, Columns(labels.begin(), labels.end())); } } From 3ec88ce09c5476640704c38cb4acc2be1b58ff02 Mon Sep 17 00:00:00 2001 From: Mikhail Karasikov Date: Fri, 16 May 2025 21:49:38 +0200 Subject: [PATCH 04/62] always compile htslib --- metagraph/CMakeLists.txt | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/metagraph/CMakeLists.txt b/metagraph/CMakeLists.txt index e3a957e35b..086cc94a74 100644 --- a/metagraph/CMakeLists.txt +++ b/metagraph/CMakeLists.txt @@ -396,16 +396,14 @@ file(GLOB metagraph_cli_files ) add_library(metagraph-cli STATIC ${metagraph_cli_files}) -if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") - include(ExternalProject) - ExternalProject_Add(HTSLIB - SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib - CONFIGURE_COMMAND autoreconf -i ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib && ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib/configure --prefix=${CMAKE_BINARY_DIR}/external-libraries/htslib --disable-libcurl --disable-bz2 --disable-lzma - ) - link_directories(${CMAKE_BINARY_DIR}/external-libraries/htslib/lib) - include_directories(${CMAKE_BINARY_DIR}/external-libraries/htslib/include) - add_dependencies(metagraph-core HTSLIB) -endif() +include(ExternalProject) +ExternalProject_Add(HTSLIB + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib + CONFIGURE_COMMAND autoreconf -i ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib && ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib/configure --prefix=${CMAKE_BINARY_DIR}/external-libraries/htslib --disable-libcurl --disable-bz2 --disable-lzma +) +link_directories(${CMAKE_BINARY_DIR}/external-libraries/htslib/lib) +include_directories(${CMAKE_BINARY_DIR}/external-libraries/htslib/include) +add_dependencies(metagraph-core HTSLIB) add_executable(metagraph "src/main.cpp") set_target_properties(metagraph PROPERTIES OUTPUT_NAME "metagraph_${CMAKE_DBG_ALPHABET}") From 5688029160125c12d0f10b76d5a794bdbadab15e Mon Sep 17 00:00:00 2001 From: Mikhail Karasikov Date: Fri, 16 May 2025 22:10:18 +0200 Subject: [PATCH 05/62] revert CMakeLists.txt --- metagraph/CMakeLists.txt | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/metagraph/CMakeLists.txt b/metagraph/CMakeLists.txt index 086cc94a74..b92c3685eb 100644 --- a/metagraph/CMakeLists.txt +++ b/metagraph/CMakeLists.txt @@ -352,6 +352,14 @@ add_library(sdust STATIC target_compile_options(sdust PRIVATE -Wall -Wc++-compat -Wno-error) +include(ExternalProject) +ExternalProject_Add(HTSLIB + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib + CONFIGURE_COMMAND autoreconf -i ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib && ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib/configure --prefix=${CMAKE_BINARY_DIR}/external-libraries/htslib --disable-libcurl --disable-bz2 --disable-lzma +) +link_directories(${CMAKE_BINARY_DIR}/external-libraries/htslib/lib) +include_directories(${CMAKE_BINARY_DIR}/external-libraries/htslib/include) + set(OTHER_COMPILER_FLAGS "") get_directory_property(DIRECTORY_DEFINITIONS COMPILE_DEFINITIONS) foreach(DEF ${DIRECTORY_DEFINITIONS}) @@ -395,14 +403,6 @@ file(GLOB metagraph_cli_files "src/cli/*/*/*.cpp" ) add_library(metagraph-cli STATIC ${metagraph_cli_files}) - -include(ExternalProject) -ExternalProject_Add(HTSLIB - SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib - CONFIGURE_COMMAND autoreconf -i ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib && ${CMAKE_CURRENT_SOURCE_DIR}/external-libraries/htslib/configure --prefix=${CMAKE_BINARY_DIR}/external-libraries/htslib --disable-libcurl --disable-bz2 --disable-lzma -) -link_directories(${CMAKE_BINARY_DIR}/external-libraries/htslib/lib) -include_directories(${CMAKE_BINARY_DIR}/external-libraries/htslib/include) add_dependencies(metagraph-core HTSLIB) add_executable(metagraph "src/main.cpp") @@ -561,7 +561,7 @@ list(FILTER benchmark_files EXCLUDE REGEX ".*\\._.*") add_executable(benchmarks ${benchmark_files}) target_compile_definitions(benchmarks PRIVATE TEST_DATA_DIR="${PROJECT_SOURCE_DIR}/tests/data") -target_link_libraries(benchmarks benchmark_main benchmark metagraph-core metagraph-cli) +target_link_libraries(benchmarks benchmark_main benchmark metagraph-core metagraph-cli -lsdsl) target_compile_options(benchmarks PRIVATE) From 3b8e67d2dd64b07db6726cade58233d050a15128 Mon Sep 17 00:00:00 2001 From: Mikhail Karasikov Date: Fri, 16 May 2025 22:31:22 +0200 Subject: [PATCH 06/62] revert --- metagraph/CMakeListsKMC.txt.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagraph/CMakeListsKMC.txt.in b/metagraph/CMakeListsKMC.txt.in index 1263c6249c..880710cea4 100644 --- a/metagraph/CMakeListsKMC.txt.in +++ b/metagraph/CMakeListsKMC.txt.in @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.5) +cmake_minimum_required(VERSION 2.8.12) project(KMC) From 7cac36145d203489bfe605c6c9ce0104ef2c5a9a Mon Sep 17 00:00:00 2001 From: Mikhail Karasikov Date: Fri, 16 May 2025 22:32:34 +0200 Subject: [PATCH 07/62] back to Vector --- metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp b/metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp index 888eba5465..b23c2c7323 100644 --- a/metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp +++ b/metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp @@ -20,7 +20,7 @@ class BinaryMatrix { typedef uint64_t Row; typedef uint64_t Column; - typedef SmallVector SetBitPositions; + typedef Vector SetBitPositions; typedef std::function RowCallback; typedef std::function ValueCallback; From 4b683701d2de44bb89986958adc2e377b69a9fef Mon Sep 17 00:00:00 2001 From: Mikhail Karasikov Date: Fri, 16 May 2025 22:49:57 +0200 Subject: [PATCH 08/62] pass verbose without changing the global veriable --- metagraph/src/cli/query.cpp | 7 ++----- .../src/graph/representation/base/sequence_graph.cpp | 10 ++++++---- .../src/graph/representation/base/sequence_graph.hpp | 3 ++- metagraph/src/graph/representation/canonical_dbg.cpp | 7 ++++--- metagraph/src/graph/representation/canonical_dbg.hpp | 3 ++- metagraph/src/graph/representation/masked_graph.cpp | 7 ++++--- metagraph/src/graph/representation/masked_graph.hpp | 3 ++- metagraph/src/graph/representation/succinct/boss.cpp | 8 +++++--- metagraph/src/graph/representation/succinct/boss.hpp | 2 ++ .../src/graph/representation/succinct/dbg_succinct.cpp | 6 ++++-- .../src/graph/representation/succinct/dbg_succinct.hpp | 3 ++- 11 files changed, 35 insertions(+), 24 deletions(-) diff --git a/metagraph/src/cli/query.cpp b/metagraph/src/cli/query.cpp index 44f136e645..e3d97a38e4 100644 --- a/metagraph/src/cli/query.cpp +++ b/metagraph/src/cli/query.cpp @@ -939,17 +939,14 @@ construct_query_graph(const AnnotatedDBG &anno_graph, // pull contigs from query graph std::vector>> contigs; std::mutex seq_mutex; - bool verbose = common::get_verbose(); - // turn off verbose to hide the contig extraction progress bar - common::set_verbose(false); graph_init->call_sequences([&](const std::string &contig, const auto &) { std::lock_guard lock(seq_mutex); contigs.emplace_back(contig, std::vector{}); }, num_threads, // pull only primary contigs when building canonical query graph - full_dbg.get_mode() == DeBruijnGraph::CANONICAL); - common::set_verbose(verbose); + full_dbg.get_mode() == DeBruijnGraph::CANONICAL, + false); logger->trace("[Query graph construction] Contig extraction took {} sec", timer.elapsed()); timer.reset(); diff --git a/metagraph/src/graph/representation/base/sequence_graph.cpp b/metagraph/src/graph/representation/base/sequence_graph.cpp index 043e135790..3214d5a78b 100644 --- a/metagraph/src/graph/representation/base/sequence_graph.cpp +++ b/metagraph/src/graph/representation/base/sequence_graph.cpp @@ -341,7 +341,8 @@ void call_sequences(const DeBruijnGraph &graph, size_t num_threads, bool call_unitigs, uint64_t min_tip_size, - bool kmers_in_single_form) { + bool kmers_in_single_form, + bool verbose = common::get_verbose()) { // TODO: port over the implementation from BOSS std::ignore = num_threads; @@ -351,7 +352,7 @@ void call_sequences(const DeBruijnGraph &graph, ProgressBar progress_bar(visited.size() - sdsl::util::cnt_one_bits(visited), "Traverse graph", - std::cerr, !common::get_verbose()); + std::cerr, !verbose); auto call_paths_from = [&](node_index node) { call_sequences_from(graph, @@ -417,8 +418,9 @@ void call_sequences(const DeBruijnGraph &graph, void DeBruijnGraph::call_sequences(const CallPath &callback, size_t num_threads, - bool kmers_in_single_form) const { - ::mtg::graph::call_sequences(*this, callback, num_threads, false, 0, kmers_in_single_form); + bool kmers_in_single_form, + bool verbose) const { + ::mtg::graph::call_sequences(*this, callback, num_threads, false, 0, kmers_in_single_form, verbose); } void DeBruijnGraph::call_unitigs(const CallPath &callback, diff --git a/metagraph/src/graph/representation/base/sequence_graph.hpp b/metagraph/src/graph/representation/base/sequence_graph.hpp index 5561bf33db..c0bf4bfb57 100644 --- a/metagraph/src/graph/representation/base/sequence_graph.hpp +++ b/metagraph/src/graph/representation/base/sequence_graph.hpp @@ -191,7 +191,8 @@ class DeBruijnGraph : public SequenceGraph { */ virtual void call_sequences(const CallPath &callback, size_t num_threads = 1, - bool kmers_in_single_form = false) const; + bool kmers_in_single_form = false, + bool verbose = common::get_verbose()) const; /** * Call all unitigs except short tips, where tips are * the unitigs with InDegree(first) + OutDegree(last) < 2. diff --git a/metagraph/src/graph/representation/canonical_dbg.cpp b/metagraph/src/graph/representation/canonical_dbg.cpp index c4aeeec359..4bade1c820 100644 --- a/metagraph/src/graph/representation/canonical_dbg.cpp +++ b/metagraph/src/graph/representation/canonical_dbg.cpp @@ -424,12 +424,13 @@ size_t CanonicalDBG::indegree(node_index node) const { void CanonicalDBG::call_sequences(const CallPath &callback, size_t num_threads, - bool kmers_in_single_form) const { + bool kmers_in_single_form, + bool verbose) const { if (kmers_in_single_form) { - graph_->call_sequences(callback, num_threads, true); + graph_->call_sequences(callback, num_threads, true, verbose); } else { // TODO: port over implementation from DBGSuccinct to DeBruijnGraph - DeBruijnGraph::call_sequences(callback, num_threads, false); + DeBruijnGraph::call_sequences(callback, num_threads, false, verbose); } } diff --git a/metagraph/src/graph/representation/canonical_dbg.hpp b/metagraph/src/graph/representation/canonical_dbg.hpp index 4762b71d7e..c0841d6eec 100644 --- a/metagraph/src/graph/representation/canonical_dbg.hpp +++ b/metagraph/src/graph/representation/canonical_dbg.hpp @@ -84,7 +84,8 @@ class CanonicalDBG : public DBGWrapper { virtual void call_sequences(const CallPath &callback, size_t num_threads = 1, - bool kmers_in_single_form = false) const override final; + bool kmers_in_single_form = false, + bool verbose = common::get_verbose()) const override final; virtual void call_unitigs(const CallPath &callback, size_t num_threads = 1, diff --git a/metagraph/src/graph/representation/masked_graph.cpp b/metagraph/src/graph/representation/masked_graph.cpp index 2af9d9f108..cc3dd0787f 100644 --- a/metagraph/src/graph/representation/masked_graph.cpp +++ b/metagraph/src/graph/representation/masked_graph.cpp @@ -106,7 +106,8 @@ bit_vector_stat get_boss_mask(const DBGSuccinct &dbg_succ, void MaskedDeBruijnGraph::call_sequences(const CallPath &callback, size_t num_threads, - bool kmers_in_single_form) const { + bool kmers_in_single_form, + bool verbose) const { if (auto *dbg_succ = dynamic_cast(graph_.get())) { bit_vector_stat mask = get_boss_mask(*dbg_succ, *kmers_in_graph_, only_valid_nodes_in_mask_); @@ -117,10 +118,10 @@ void MaskedDeBruijnGraph::call_sequences(const CallPath &callback, } callback(sequence, path); - }, num_threads, kmers_in_single_form, &mask); + }, num_threads, kmers_in_single_form, verbose, &mask); } else { - DeBruijnGraph::call_sequences(callback, num_threads, kmers_in_single_form); + DeBruijnGraph::call_sequences(callback, num_threads, kmers_in_single_form, verbose); } } diff --git a/metagraph/src/graph/representation/masked_graph.hpp b/metagraph/src/graph/representation/masked_graph.hpp index 2d2fdd2e62..192c88017c 100644 --- a/metagraph/src/graph/representation/masked_graph.hpp +++ b/metagraph/src/graph/representation/masked_graph.hpp @@ -75,7 +75,8 @@ class MaskedDeBruijnGraph : public DBGWrapper { virtual void call_sequences(const CallPath &callback, size_t num_threads = 1, - bool kmers_in_single_form = false) const override; + bool kmers_in_single_form = false, + bool verbose = common::get_verbose()) const override; virtual void call_unitigs(const CallPath &callback, size_t num_threads = 1, diff --git a/metagraph/src/graph/representation/succinct/boss.cpp b/metagraph/src/graph/representation/succinct/boss.cpp index f55e7b6a52..41f64b19ae 100644 --- a/metagraph/src/graph/representation/succinct/boss.cpp +++ b/metagraph/src/graph/representation/succinct/boss.cpp @@ -2081,6 +2081,7 @@ void BOSS::call_paths(Call &&, std::vector && size_t num_threads, bool split_to_unitigs, bool kmers_in_single_form, + bool verbose, const bitmap *subgraph_mask, bool trim_sentinels) const { assert(!subgraph_mask || subgraph_mask->size() == W_->size()); @@ -2106,7 +2107,7 @@ void BOSS::call_paths(Call &&, std::vector && ProgressBar progress_bar(visited.size() - sdsl::util::cnt_one_bits(visited), "Traverse BOSS", - std::cerr, !common::get_verbose()); + std::cerr, !verbose); ThreadPool thread_pool(num_threads ? num_threads : 1, TASK_POOL_SIZE); bool async = true; @@ -2693,6 +2694,7 @@ call_path(const BOSS &boss, void BOSS::call_sequences(Call&&> callback, size_t num_threads, bool kmers_in_single_form, + bool verbose, const bitmap *subgraph_mask) const { call_paths([&](std::vector&& edges, std::vector&& path) { assert(path.size() >= k_ + 1); @@ -2705,7 +2707,7 @@ void BOSS::call_sequences(Call&&> callbac callback(std::move(sequence), std::move(edges)); - }, num_threads, false, kmers_in_single_form, subgraph_mask, true); + }, num_threads, false, kmers_in_single_form, verbose, subgraph_mask, true); } // Reach all k-mers that merge into anchor |edge| by following their diff paths. @@ -3046,7 +3048,7 @@ void BOSS::call_unitigs(Call&&> callback, // this is not a tip callback(std::move(sequence), std::move(edges)); - }, num_threads, true, kmers_in_single_form, subgraph_mask, true); + }, num_threads, true, kmers_in_single_form, common::get_verbose(), subgraph_mask, true); } /** diff --git a/metagraph/src/graph/representation/succinct/boss.hpp b/metagraph/src/graph/representation/succinct/boss.hpp index dce2d6c568..a6e51fbe71 100644 --- a/metagraph/src/graph/representation/succinct/boss.hpp +++ b/metagraph/src/graph/representation/succinct/boss.hpp @@ -136,6 +136,7 @@ class BOSS { size_t num_threads = 1, bool unitigs = false, bool kmers_in_single_form = false, + bool verbose = common::get_verbose(), const bitmap *subgraph_mask = NULL, bool trim_sentinels = false) const; @@ -145,6 +146,7 @@ class BOSS { void call_sequences(Call&&> callback, size_t num_threads = 1, bool kmers_in_single_form = false, + bool verbose = common::get_verbose(), const bitmap *subgraph_mask = NULL) const; /** diff --git a/metagraph/src/graph/representation/succinct/dbg_succinct.cpp b/metagraph/src/graph/representation/succinct/dbg_succinct.cpp index 20c6d17fc8..4b9f480eae 100644 --- a/metagraph/src/graph/representation/succinct/dbg_succinct.cpp +++ b/metagraph/src/graph/representation/succinct/dbg_succinct.cpp @@ -499,7 +499,8 @@ void DBGSuccinct::map_to_nodes(std::string_view sequence, void DBGSuccinct::call_sequences(const CallPath &callback, size_t num_threads, - bool kmers_in_single_form) const { + bool kmers_in_single_form, + bool verbose) const { assert(boss_graph_.get()); boss_graph_->call_sequences( [&](std::string&& seq, auto&& path) { @@ -509,7 +510,8 @@ void DBGSuccinct::call_sequences(const CallPath &callback, callback(std::move(seq), std::move(path)); }, num_threads, - kmers_in_single_form + kmers_in_single_form, + verbose ); } diff --git a/metagraph/src/graph/representation/succinct/dbg_succinct.hpp b/metagraph/src/graph/representation/succinct/dbg_succinct.hpp index 940c26313c..2f243189f4 100644 --- a/metagraph/src/graph/representation/succinct/dbg_succinct.hpp +++ b/metagraph/src/graph/representation/succinct/dbg_succinct.hpp @@ -68,7 +68,8 @@ class DBGSuccinct : public DeBruijnGraph { virtual void call_sequences(const CallPath &callback, size_t num_threads = 1, - bool kmers_in_single_form = false) const override final; + bool kmers_in_single_form = false, + bool verbose = common::get_verbose()) const override final; virtual void call_unitigs(const CallPath &callback, size_t num_threads = 1, From 11d6002a50c164d18908f8967b96de8090ea9ac3 Mon Sep 17 00:00:00 2001 From: Mikhail Karasikov Date: Fri, 16 May 2025 23:56:01 +0200 Subject: [PATCH 09/62] keep rows in small vectors --- metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp b/metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp index b23c2c7323..888eba5465 100644 --- a/metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp +++ b/metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp @@ -20,7 +20,7 @@ class BinaryMatrix { typedef uint64_t Row; typedef uint64_t Column; - typedef Vector SetBitPositions; + typedef SmallVector SetBitPositions; typedef std::function RowCallback; typedef std::function ValueCallback; From f5d7feec61707eacedacc3bd5d6db66a6d91f75a Mon Sep 17 00:00:00 2001 From: Mikhail Karasikov Date: Sat, 17 May 2025 00:06:02 +0200 Subject: [PATCH 10/62] use uint32_t for column indexes --- .../src/annotation/annotation_converters.cpp | 6 +++--- .../src/annotation/annotation_converters.hpp | 2 +- .../binary_matrix/base/binary_matrix.hpp | 2 +- .../binary_matrix/column_sparse/column_major.cpp | 2 +- .../binary_matrix/multi_brwt/brwt_builders.cpp | 14 +++++++------- .../binary_matrix/multi_brwt/brwt_builders.hpp | 2 +- .../binary_matrix/multi_brwt/clustering.cpp | 4 ++-- .../binary_matrix/multi_brwt/clustering.hpp | 2 +- metagraph/src/cli/transform_annotation.cpp | 7 +++---- metagraph/src/common/range_partition.cpp | 16 ++++++---------- metagraph/src/common/range_partition.hpp | 4 ++-- .../src/graph/alignment/aligner_labeled.cpp | 2 +- .../src/graph/annotated_graph_algorithm.cpp | 2 +- metagraph/tests/annotation/test_annotation.cpp | 8 +++++--- .../tests/annotation/test_matrix_helpers.cpp | 2 +- 15 files changed, 36 insertions(+), 39 deletions(-) diff --git a/metagraph/src/annotation/annotation_converters.cpp b/metagraph/src/annotation/annotation_converters.cpp index f7a567b9e7..1022d5bd64 100644 --- a/metagraph/src/annotation/annotation_converters.cpp +++ b/metagraph/src/annotation/annotation_converters.cpp @@ -420,7 +420,7 @@ void convert_to_row_diff( std::unique_ptr convert_to_BRWT( - const std::vector> &linkage, + const std::vector> &linkage, size_t num_parallel_nodes, size_t num_threads, const fs::path &tmp_path, @@ -456,7 +456,7 @@ convert_to_BRWT( template <> std::unique_ptr convert_to_BRWT( const std::vector &annotation_files, - const std::vector> &linkage, + const std::vector> &linkage, size_t num_parallel_nodes, size_t num_threads, const fs::path &tmp_path) { @@ -487,7 +487,7 @@ std::unique_ptr convert_to_BRWT( template<> std::unique_ptr convert_to_BRWT(const std::vector &annotation_files, - const std::vector> &linkage, + const std::vector> &linkage, size_t num_parallel_nodes, size_t num_threads, const fs::path &tmp_path) { diff --git a/metagraph/src/annotation/annotation_converters.hpp b/metagraph/src/annotation/annotation_converters.hpp index 977d461dbc..29f16ac6fb 100644 --- a/metagraph/src/annotation/annotation_converters.hpp +++ b/metagraph/src/annotation/annotation_converters.hpp @@ -60,7 +60,7 @@ convert_to_greedy_BRWT(RowDiffColumnAnnotator &&annotation, template std::unique_ptr convert_to_BRWT(const std::vector &annotation_files, - const std::vector> &linkage_matrix, + const std::vector> &linkage_matrix, size_t num_parallel_nodes = 1, size_t num_threads = 1, const std::filesystem::path &tmp_dir = ""); diff --git a/metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp b/metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp index 888eba5465..34b54fc935 100644 --- a/metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp +++ b/metagraph/src/annotation/binary_matrix/base/binary_matrix.hpp @@ -18,7 +18,7 @@ namespace matrix { class BinaryMatrix { public: typedef uint64_t Row; - typedef uint64_t Column; + typedef uint32_t Column; typedef SmallVector SetBitPositions; typedef std::function RowCallback; diff --git a/metagraph/src/annotation/binary_matrix/column_sparse/column_major.cpp b/metagraph/src/annotation/binary_matrix/column_sparse/column_major.cpp index 5dfcb9e582..4ddc6c4dc6 100644 --- a/metagraph/src/annotation/binary_matrix/column_sparse/column_major.cpp +++ b/metagraph/src/annotation/binary_matrix/column_sparse/column_major.cpp @@ -156,7 +156,7 @@ ColumnMajor::sum_rows(const std::vector> &index_counts, if (total_sum_count < min_count) return {}; - std::vector> result; + std::vector> result; result.reserve(num_columns()); for (size_t j = 0; j < num_columns(); ++j) { diff --git a/metagraph/src/annotation/binary_matrix/multi_brwt/brwt_builders.cpp b/metagraph/src/annotation/binary_matrix/multi_brwt/brwt_builders.cpp index ea409bb008..fb4065480f 100644 --- a/metagraph/src/annotation/binary_matrix/multi_brwt/brwt_builders.cpp +++ b/metagraph/src/annotation/binary_matrix/multi_brwt/brwt_builders.cpp @@ -63,7 +63,7 @@ BRWT BRWTBottomUpBuilder::concatenate(std::vector&& submatrices, uint64_t num_columns = 0; Partition partition; for (const BRWT &submatrix : submatrices) { - partition.push_back(utils::arange(num_columns, submatrix.num_columns())); + partition.push_back(utils::arange>(num_columns, submatrix.num_columns())); num_columns += submatrix.num_columns(); } parent.assignments_ = RangePartition(std::move(partition)); @@ -130,7 +130,7 @@ BRWT BRWTBottomUpBuilder::concatenate_sparse(std::vector&& submatrices, uint64_t num_columns = 0; Partition partition; for (const BRWT &submatrix : submatrices) { - partition.push_back(utils::arange(num_columns, submatrix.num_columns())); + partition.push_back(utils::arange>(num_columns, submatrix.num_columns())); num_columns += submatrix.num_columns(); } parent.assignments_ = RangePartition(std::move(partition)); @@ -162,7 +162,7 @@ BRWT BRWTBottomUpBuilder::concatenate_sparse(std::vector&& submatrices, template std::vector subset(std::vector *vector, - const std::vector indexes) { + const std::vector indexes) { assert(vector); std::vector result; @@ -195,7 +195,7 @@ BRWT BRWTBottomUpBuilder::build(std::vector>&& colum // linkage[c] = {} for each c < num_columns BRWT BRWTBottomUpBuilder::build( const std::function &get_columns, - const std::vector> &linkage, + const std::vector> &linkage, const std::filesystem::path &tmp_path, size_t num_nodes_parallel, size_t num_threads) { @@ -322,7 +322,7 @@ BRWT BRWTBottomUpBuilder::build( ThreadPool thread_pool(num_threads, 100'000 * num_threads); - std::vector> stored_columns(linkage.size()); + std::vector> stored_columns(linkage.size()); #pragma omp parallel for num_threads(num_nodes_parallel) schedule(dynamic) for (size_t i = num_leaves; i < linkage.size(); ++i) { @@ -454,7 +454,7 @@ BRWT BRWTBottomUpBuilder::merge(std::vector&& nodes, uint64_t num_columns_total = 0; Partition current_partition; for (const BRWT &node : nodes) { - current_partition.push_back(utils::arange(num_columns_total, node.num_columns())); + current_partition.push_back(utils::arange>(num_columns_total, node.num_columns())); num_columns_total += node.num_columns(); } @@ -572,7 +572,7 @@ void BRWTOptimizer::reassign(size_t node_rank, BRWT *parent, size_t num_threads) BRWT &node = dynamic_cast(*parent->child_nodes_.at(node_rank)); - std::vector column_arrangement; + std::vector column_arrangement; std::vector group_sizes; for (size_t g = 0; g < parent->assignments_.num_groups(); ++g) { if (g == node_rank) diff --git a/metagraph/src/annotation/binary_matrix/multi_brwt/brwt_builders.hpp b/metagraph/src/annotation/binary_matrix/multi_brwt/brwt_builders.hpp index 5d0d68fd70..9aca6a4b78 100644 --- a/metagraph/src/annotation/binary_matrix/multi_brwt/brwt_builders.hpp +++ b/metagraph/src/annotation/binary_matrix/multi_brwt/brwt_builders.hpp @@ -33,7 +33,7 @@ class BRWTBottomUpBuilder { = std::function&&)>; static BRWT build(const std::function &get_columns, - const std::vector> &linkage, + const std::vector> &linkage, const std::filesystem::path &tmp_dir, size_t num_nodes_parallel = 1, size_t num_threads = 1); diff --git a/metagraph/src/annotation/binary_matrix/multi_brwt/clustering.cpp b/metagraph/src/annotation/binary_matrix/multi_brwt/clustering.cpp index af5afcf7da..daf74ff517 100644 --- a/metagraph/src/annotation/binary_matrix/multi_brwt/clustering.cpp +++ b/metagraph/src/annotation/binary_matrix/multi_brwt/clustering.cpp @@ -16,7 +16,7 @@ namespace matrix { using mtg::common::logger; -typedef std::vector> Partition; +typedef std::vector> Partition; typedef std::vector VectorPtrs; @@ -258,7 +258,7 @@ Partition greedy_matching(const std::vector &columns, size_t num_threads) { ++progress_bar; } - for (size_t i = 0; i < columns.size(); ++i) { + for (uint32_t i = 0; i < columns.size(); ++i) { if (!matched[i]) partition.push_back({ i }); } diff --git a/metagraph/src/annotation/binary_matrix/multi_brwt/clustering.hpp b/metagraph/src/annotation/binary_matrix/multi_brwt/clustering.hpp index e56b449e3e..85c8b8bc01 100644 --- a/metagraph/src/annotation/binary_matrix/multi_brwt/clustering.hpp +++ b/metagraph/src/annotation/binary_matrix/multi_brwt/clustering.hpp @@ -25,7 +25,7 @@ struct SparseColumn { // `SparseColumn` storing the column size and the positions of its set bits. // Output: a set of greedily matched column pairs. template -std::vector> +std::vector> greedy_matching(const std::vector &columns, size_t num_threads = 1); // Format resembling the Z matrix from scipy.cluster.hierarchy.linkage diff --git a/metagraph/src/cli/transform_annotation.cpp b/metagraph/src/cli/transform_annotation.cpp index 6b5ac12c18..f63222659a 100644 --- a/metagraph/src/cli/transform_annotation.cpp +++ b/metagraph/src/cli/transform_annotation.cpp @@ -168,11 +168,11 @@ matrix::LinkageMatrix compute_linkage(const std::vector &files, } } -std::vector> +std::vector> parse_linkage_matrix(const std::string &filename) { std::ifstream in(filename); - std::vector> linkage; + std::vector> linkage; std::string line; while (std::getline(in, line)) { std::vector parts = utils::split_string(line, " "); @@ -858,8 +858,7 @@ int transform_annotation(Config *config) { logger->trace("Generated new linkage and saved to {}", config->linkage_file); } - std::vector> linkage - = parse_linkage_matrix(config->linkage_file); + auto linkage = parse_linkage_matrix(config->linkage_file); logger->trace("Linkage loaded from {}", config->linkage_file); auto brwt_annotator = convert_to_BRWT( diff --git a/metagraph/src/common/range_partition.cpp b/metagraph/src/common/range_partition.cpp index e0bc1e23a5..365ac161d4 100644 --- a/metagraph/src/common/range_partition.cpp +++ b/metagraph/src/common/range_partition.cpp @@ -5,7 +5,7 @@ #include "common/serialization.hpp" -RangePartition::RangePartition(const std::vector &arrangement, +RangePartition::RangePartition(const std::vector &arrangement, const std::vector &group_sizes) { size_t offset = 0; for (size_t group_size : group_sizes) { @@ -19,15 +19,11 @@ RangePartition::RangePartition(const std::vector &arrangement, initialize_groups_and_ranks(); } -RangePartition::RangePartition(std::vector>&& partition) { - partition_.reserve(partition.size()); - for (auto &group : partition) { - assert(group.size() && "partition blocks must not be empty"); - partition_.emplace_back(group.begin(), group.end()); - group.clear(); - } - partition.clear(); - +RangePartition::RangePartition(std::vector>&& partition) + : partition_(std::move(partition)) { + assert(std::all_of(partition_.begin(), partition_.end(), + [](const auto &group) { return !group.empty(); }) + && "partition blocks must not be empty"); assert(initialize_groups_and_ranks()); initialize_groups_and_ranks(); } diff --git a/metagraph/src/common/range_partition.hpp b/metagraph/src/common/range_partition.hpp index f1bb020649..d6676e4e4b 100644 --- a/metagraph/src/common/range_partition.hpp +++ b/metagraph/src/common/range_partition.hpp @@ -16,9 +16,9 @@ class RangePartition { typedef uint32_t R; RangePartition() {} - RangePartition(const std::vector &arrangement, + RangePartition(const std::vector &arrangement, const std::vector &group_sizes); - explicit RangePartition(std::vector>&& partition); + explicit RangePartition(std::vector>&& partition); // get group that contains value inline G group(T value) const; diff --git a/metagraph/src/graph/alignment/aligner_labeled.cpp b/metagraph/src/graph/alignment/aligner_labeled.cpp index cc3c5f4779..bef5b3738d 100644 --- a/metagraph/src/graph/alignment/aligner_labeled.cpp +++ b/metagraph/src/graph/alignment/aligner_labeled.cpp @@ -17,7 +17,7 @@ typedef AnnotationBuffer::Columns Columns; typedef DeBruijnGraph::node_index node_index; // dummy index for an unfetched annotations -static constexpr size_t nannot = std::numeric_limits::max(); +static constexpr Column nannot = std::numeric_limits::max(); template bool overlap_with_diff(const T1 &tuple1, const T2 &tuple2, int64_t diff) { diff --git a/metagraph/src/graph/annotated_graph_algorithm.cpp b/metagraph/src/graph/annotated_graph_algorithm.cpp index 056f367028..b782e3422b 100644 --- a/metagraph/src/graph/annotated_graph_algorithm.cpp +++ b/metagraph/src/graph/annotated_graph_algorithm.cpp @@ -365,7 +365,7 @@ construct_diff_label_count_vector(const AnnotatedDBG &anno_graph, code_to_indicator[label_encoder.encode(label_out)] |= 2; } - std::vector label_codes; + std::vector label_codes; label_codes.reserve(code_to_indicator.size()); for (const auto &[code, indicator] : code_to_indicator) { label_codes.push_back(code); diff --git a/metagraph/tests/annotation/test_annotation.cpp b/metagraph/tests/annotation/test_annotation.cpp index 868c96bf62..6b197c845f 100644 --- a/metagraph/tests/annotation/test_annotation.cpp +++ b/metagraph/tests/annotation/test_annotation.cpp @@ -45,8 +45,10 @@ TYPED_TEST(AnnotatorPresetTest, GetLabels) { } TYPED_TEST(AnnotatorPresetTest, CountLabels) { + using Column = mtg::annot::matrix::BinaryMatrix::Column; + EXPECT_EQ( - convert_to_set(std::vector>({ + convert_to_set(std::vector>({ {0, 1}, {3, 2}, {1, 4}, {2, 2} })), convert_to_set(this->annotation->get_matrix().sum_rows( @@ -57,7 +59,7 @@ TYPED_TEST(AnnotatorPresetTest, CountLabels) { ); EXPECT_EQ( - convert_to_set(std::vector>({ + convert_to_set(std::vector>({ {0, 1}, {3, 2}, {1, 4}, {2, 2} })), convert_to_set(this->annotation->get_matrix().sum_rows( @@ -68,7 +70,7 @@ TYPED_TEST(AnnotatorPresetTest, CountLabels) { ); EXPECT_EQ( - convert_to_set(std::vector>({ + convert_to_set(std::vector>({ {3, 2}, {1, 4}, {2, 2} })), convert_to_set(this->annotation->get_matrix().sum_rows( diff --git a/metagraph/tests/annotation/test_matrix_helpers.cpp b/metagraph/tests/annotation/test_matrix_helpers.cpp index 6dad0863df..5df9ff366a 100644 --- a/metagraph/tests/annotation/test_matrix_helpers.cpp +++ b/metagraph/tests/annotation/test_matrix_helpers.cpp @@ -203,7 +203,7 @@ void test_matrix(const TypeParam &matrix, const BitVectorPtrArray &columns) { for (size_t m : { size_t(0), size_t(matrix.num_columns() / 2), size_t(matrix.num_columns()) }) { - std::vector indices(m); + std::vector indices(m); std::iota(indices.begin(), indices.end(), 0); std::vector> column_map(m); From c6bd1243ad6c3485f97c76354066f8e7bd02a7a7 Mon Sep 17 00:00:00 2001 From: Mikhail Karasikov Date: Sun, 18 May 2025 00:38:33 +0200 Subject: [PATCH 11/62] minor --- metagraph/src/common/range_partition.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/metagraph/src/common/range_partition.cpp b/metagraph/src/common/range_partition.cpp index 365ac161d4..da54e3346e 100644 --- a/metagraph/src/common/range_partition.cpp +++ b/metagraph/src/common/range_partition.cpp @@ -1,5 +1,6 @@ #include "range_partition.hpp" +#include #include #include "common/serialization.hpp" From 30102c08e1284970a163188b99366ed850ea57aa Mon Sep 17 00:00:00 2001 From: Mikhail Karasikov Date: Sun, 18 May 2025 14:03:57 +0200 Subject: [PATCH 12/62] minor --- .../row_compressed/annotate_row_compressed.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/metagraph/src/annotation/representation/row_compressed/annotate_row_compressed.cpp b/metagraph/src/annotation/representation/row_compressed/annotate_row_compressed.cpp index 80661e1f1a..2c2632db59 100644 --- a/metagraph/src/annotation/representation/row_compressed/annotate_row_compressed.cpp +++ b/metagraph/src/annotation/representation/row_compressed/annotate_row_compressed.cpp @@ -37,8 +37,7 @@ RowCompressed