From c544fceea0a3cbbb3ee894e71131f44ad7602463 Mon Sep 17 00:00:00 2001 From: Harun Mustafa Date: Mon, 8 Aug 2022 11:34:53 +0200 Subject: [PATCH 001/410] Support lazy loading of annotation columns in differential assembly --- metagraph/integration_tests/test_assemble.py | 9 +- .../column_compressed_lazy.hpp | 51 +++++++++++ metagraph/src/cli/assemble.cpp | 24 +++++- metagraph/src/cli/config/config.cpp | 1 + .../src/cli/load/load_annotated_graph.cpp | 45 ++++++---- .../src/graph/annotated_graph_algorithm.cpp | 84 +++++++++++-------- 6 files changed, 161 insertions(+), 53 deletions(-) create mode 100644 metagraph/src/annotation/representation/column_compressed/column_compressed_lazy.hpp diff --git a/metagraph/integration_tests/test_assemble.py b/metagraph/integration_tests/test_assemble.py index a4a6ae4c30..0c9eb7f67f 100644 --- a/metagraph/integration_tests/test_assemble.py +++ b/metagraph/integration_tests/test_assemble.py @@ -42,6 +42,7 @@ } GFAs = [name for name, _ in gfa_tests.items()] +LOAD_TYPES = ['load', 'stream'] NUM_THREADS = 4 @@ -266,11 +267,13 @@ def check_suffix(anno_repr, suffix): no_anchor_opt ) - def test_diff_assembly(self): + @parameterized.expand(LOAD_TYPES) + def test_diff_assembly(self, load_type): assemble_command = f'{METAGRAPH} assemble -p {NUM_THREADS} \ -a {self.tempdir.name}/annotation{anno_file_extension[self.anno_repr]} \ -o {self.tempdir.name}/diff_contigs \ --diff-assembly-rules {TEST_DATA_DIR}/example.diff.json \ + {"--separately" if load_type == "stream" else ""} \ {self.tempdir.name}/graph{graph_file_extension[self.graph_repr]}' res = subprocess.run([assemble_command], shell=True) self.assertEqual(res.returncode, 0) @@ -296,11 +299,13 @@ def test_diff_assembly(self): self.assertEqual(results['>metasub_by_kmer'][0], 'CTTGGATCACACTCTTCTCAGAGCCCAGGCCAGGGGCCCCCAAGAAAGGCTCTGGTGGAGAACCTGTGCATGAAGGCTGTCAACCAGTCCATAGGCAGGGCCATCAGGCACCAAAGGGATTCTGCCAGCATAGTGCTCCTGGACCAGTGATACACCCGGCACCCTGTCCTGGACATGCTGTTGGCCTGGATCTGAGCCCTCGTGGAGGTCAAAGCCACCTTTGGTTCTGCCATTGCTGCTGTGTGGAAGTTCACTCAAGTAGGCCTCTTCCTG') self.assertEqual(results['>metasub_sym_diff'][0], 'TGGAAGTTCACTCAAGTAGGCCTCTTCCTGACAGGCAGCTGCACCACTGCCTGGCGCTGTGCCCTTCCTTTGCTCTGCCCGCTGGAGACGGTGTTTGTCATGGGCCTGGTCTGCAGG') - def test_diff_assembly_simple(self): + @parameterized.expand(LOAD_TYPES) + def test_diff_assembly_simple(self, load_type): assemble_command = f'{METAGRAPH} assemble -p {NUM_THREADS} \ -a {self.tempdir.name}/annotation{anno_file_extension[self.anno_repr]} \ -o {self.tempdir.name}/diff_contigs \ --diff-assembly-rules {TEST_DATA_DIR}/example_simple.diff.json \ + {"--separately" if load_type == "stream" else ""} \ {self.tempdir.name}/graph{graph_file_extension[self.graph_repr]}' res = subprocess.run([assemble_command], shell=True) self.assertEqual(res.returncode, 0) diff --git a/metagraph/src/annotation/representation/column_compressed/column_compressed_lazy.hpp b/metagraph/src/annotation/representation/column_compressed/column_compressed_lazy.hpp new file mode 100644 index 0000000000..9b8aa61e50 --- /dev/null +++ b/metagraph/src/annotation/representation/column_compressed/column_compressed_lazy.hpp @@ -0,0 +1,51 @@ +#ifndef __COLUMN_COMPRESSED_LAZY_HPP__ +#define __COLUMN_COMPRESSED_LAZY_HPP__ + + +#include "annotation/representation/column_compressed/annotate_column_compressed.hpp" + +namespace mtg { +namespace annot { + +template +class ColumnCompressedLazy : public MultiLabelEncoded