Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion metagraph/src/graph/representation/canonical_dbg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ ::map_to_nodes_sequentially(std::string_view sequence,
path.reserve(sequence.size() - get_k() + 1);

if (const auto sshash = std::dynamic_pointer_cast<const DBGSSHash>(graph_)) {
sshash->map_to_nodes_with_rc<>(sequence, [&](node_index node, bool orientation) {
sshash->map_to_nodes_with_rc<true>(sequence, [&](node_index node, bool orientation) {
Comment thread
adamant-pwn marked this conversation as resolved.
callback(node && orientation ? reverse_complement(node) : node);
}, terminate);
return;
Expand Down
46 changes: 35 additions & 11 deletions metagraph/src/graph/representation/hash/dbg_sshash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "common/seq_tools/reverse_complement.hpp"
#include "common/threads/threading.hpp"
#include "common/logger.hpp"
#include "common/algorithms.hpp"
#include "kmer/kmer_extractor.hpp"


Expand Down Expand Up @@ -99,6 +100,36 @@ void DBGSSHash::add_sequence(std::string_view sequence,
throw std::logic_error("adding sequences not supported");
}

void DBGSSHash
::map_to_nodes_with_rc_advanced(std::string_view sequence,
const std::function<void(sshash::lookup_result)>& callback,
bool with_rc,
const std::function<bool()>& terminate) const {
if (terminate() || sequence.size() < k_)
return;
Comment thread
hmusta marked this conversation as resolved.

std::visit([&](const auto &dict) {
using kmer_t = get_kmer_t<decltype(dict)>;

std::vector<char> seq_encoded;
seq_encoded.reserve(sequence.size());
for (size_t i = 0; i < sequence.size(); ++i) {
seq_encoded.emplace_back(!kmer_t::is_valid(sequence[i]));
}

auto invalid = utils::drag_and_mark_segments(seq_encoded, 1, k_);

kmer_t uint_kmer = sshash::util::string_to_uint_kmer<kmer_t>(sequence.data(), k_ - 1);
uint_kmer.pad_char();
for (size_t i = k_ - 1; i < sequence.size() && !terminate(); ++i) {
uint_kmer.drop_char();
uint_kmer.kth_char_or(k_ - 1, kmer_t::char_to_uint(sequence[i]));
callback(!invalid[i] ? dict.lookup_advanced_uint(uint_kmer, with_rc)
: sshash::lookup_result());
Comment thread
hmusta marked this conversation as resolved.
Outdated
}
}, dict_);
}

template <bool with_rc>
void DBGSSHash::map_to_nodes_with_rc(std::string_view sequence,
const std::function<void(node_index, bool)>& callback,
Expand All @@ -113,18 +144,11 @@ void DBGSSHash::map_to_nodes_with_rc(std::string_view sequence,
return;
}

std::visit([&](const auto &dict) {
using kmer_t = get_kmer_t<decltype(dict)>;
kmer_t uint_kmer = sshash::util::string_to_uint_kmer<kmer_t>(sequence.data(), k_ - 1);
uint_kmer.pad_char();
for (size_t i = k_ - 1; i < sequence.size() && !terminate(); ++i) {
uint_kmer.drop_char();
uint_kmer.kth_char_or(k_ - 1, kmer_t::char_to_uint(sequence[i]));
auto res = dict.lookup_advanced_uint(uint_kmer, with_rc);
callback(sshash_to_graph_index(res.kmer_id), res.kmer_orientation);
}
}, dict_);
map_to_nodes_with_rc_advanced(sequence, [&](sshash::lookup_result res) {
callback(sshash_to_graph_index(res.kmer_id), res.kmer_orientation);
}, with_rc, terminate);
}

template
void DBGSSHash::map_to_nodes_with_rc<true>(std::string_view,
const std::function<void(node_index, bool)>&,
Expand Down
6 changes: 6 additions & 0 deletions metagraph/src/graph/representation/hash/dbg_sshash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ class DBGSSHash : public DeBruijnGraph {
size_t num_nodes_;
Mode mode_;

void map_to_nodes_with_rc_advanced(
Comment thread
adamant-pwn marked this conversation as resolved.
Outdated
std::string_view sequence,
const std::function<void(sshash::lookup_result)>& callback,
bool with_rc,
const std::function<bool()>& terminate = []() { return false; }) const;

size_t dict_size() const;
};

Expand Down
7 changes: 3 additions & 4 deletions metagraph/tests/annotation/test_annotated_dbg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,12 @@
#include "gtest/gtest.h"

#include "../test_helpers.hpp"
#include "../graph/all/test_dbg_helpers.hpp"

#include "common/threads/threading.hpp"
#include "common/vectors/bit_vector_dyn.hpp"
#include "common/vectors/vector_algorithm.hpp"
#include "annotation/representation/column_compressed/annotate_column_compressed.hpp"
#include "graph/representation/bitmap/dbg_bitmap.hpp"
#include "graph/representation/hash/dbg_hash_string.hpp"
#include "graph/representation/hash/dbg_hash_ordered.hpp"
#include "graph/representation/hash/dbg_hash_fast.hpp"

#define protected public
#define private public
Expand Down Expand Up @@ -987,6 +984,7 @@ typedef ::testing::Types<std::pair<DBGBitmap, annot::ColumnCompressed<>>,
std::pair<DBGHashOrdered, annot::ColumnCompressed<>>,
std::pair<DBGHashFast, annot::ColumnCompressed<>>,
std::pair<DBGSuccinct, annot::ColumnCompressed<>>,
std::pair<DBGSSHash, annot::ColumnCompressed<>>,
std::pair<DBGBitmap, annot::RowFlatAnnotator>,
std::pair<DBGHashString, annot::RowFlatAnnotator>,
std::pair<DBGHashOrdered, annot::RowFlatAnnotator>,
Expand Down Expand Up @@ -1016,6 +1014,7 @@ class AnnotatedDBGNoNTest : public ::testing::Test {};
typedef ::testing::Types<std::pair<DBGBitmap, annot::ColumnCompressed<>>,
std::pair<DBGHashOrdered, annot::ColumnCompressed<>>,
std::pair<DBGHashFast, annot::ColumnCompressed<>>,
std::pair<DBGSSHash, annot::ColumnCompressed<>>,
std::pair<DBGBitmap, annot::RowFlatAnnotator>,
std::pair<DBGHashOrdered, annot::RowFlatAnnotator>,
std::pair<DBGHashFast, annot::RowFlatAnnotator>,
Expand Down
1 change: 1 addition & 0 deletions metagraph/tests/annotation/test_annotated_dbg_helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ template std::unique_ptr<AnnotatedDBG> build_anno_graph<DBGBitmap, ColumnCompres
template std::unique_ptr<AnnotatedDBG> build_anno_graph<DBGHashOrdered, ColumnCompressed<>>(uint64_t, const std::vector<std::string> &, const std::vector<std::string>&, DeBruijnGraph::Mode, bool);
template std::unique_ptr<AnnotatedDBG> build_anno_graph<DBGHashFast, ColumnCompressed<>>(uint64_t, const std::vector<std::string> &, const std::vector<std::string>&, DeBruijnGraph::Mode, bool);
template std::unique_ptr<AnnotatedDBG> build_anno_graph<DBGHashString, ColumnCompressed<>>(uint64_t, const std::vector<std::string> &, const std::vector<std::string>&, DeBruijnGraph::Mode, bool);
template std::unique_ptr<AnnotatedDBG> build_anno_graph<DBGSSHash, ColumnCompressed<>>(uint64_t, const std::vector<std::string> &, const std::vector<std::string>&, DeBruijnGraph::Mode, bool);

template std::unique_ptr<AnnotatedDBG> build_anno_graph<DBGSuccinct, RowFlatAnnotator>(uint64_t, const std::vector<std::string> &, const std::vector<std::string>&, DeBruijnGraph::Mode, bool);
template std::unique_ptr<AnnotatedDBG> build_anno_graph<DBGBitmap, RowFlatAnnotator>(uint64_t, const std::vector<std::string> &, const std::vector<std::string>&, DeBruijnGraph::Mode, bool);
Expand Down
7 changes: 6 additions & 1 deletion metagraph/tests/graph/all/test_dbg_helpers.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#include "test_dbg_helpers.hpp"

#include "../../annotation/test_annotated_dbg_helpers.hpp"
#include "annotation/representation/column_compressed/annotate_column_compressed.hpp"

#include "gtest/gtest.h"
#include "graph/annotated_dbg.hpp"
Comment thread
hmusta marked this conversation as resolved.
Outdated
#include "graph/representation/canonical_dbg.hpp"
#include "graph/representation/succinct/boss.hpp"
#include "graph/representation/succinct/boss_construct.hpp"
Expand Down Expand Up @@ -146,6 +150,7 @@ void writeFastaFile(const std::vector<std::string>& sequences, const std::string

fastaFile.close();
}

template <>
std::shared_ptr<DeBruijnGraph>
build_graph<DBGSSHash>(uint64_t k,
Expand All @@ -155,7 +160,7 @@ build_graph<DBGSSHash>(uint64_t k,
return std::make_shared<DBGSSHash>(k, mode);

// use DBGHashString to get contigs for SSHash
Comment thread
hmusta marked this conversation as resolved.
Outdated
auto string_graph = build_graph<DBGHashString>(k, sequences, mode);
auto string_graph = build_graph<DBGHashFast>(k, sequences, mode);

std::vector<std::string> contigs;
size_t num_kmers = 0;
Expand Down