Skip to content

Commit 4e0ec24

Browse files
committed
added more info to test
1 parent 7eb1b67 commit 4e0ec24

2 files changed

Lines changed: 25 additions & 14 deletions

File tree

src/koza/io/writer/tsv_writer.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -84,24 +84,22 @@ def get_new_fh_path(base_dir, filename, category):
8484
if split:
8585
base_dir, filename = Path(fh.name).parent, getattr(self, f"{record_type}s_file_name").name
8686
if record_type == "node":
87-
category = record.get("category", ["UnknownNodeCategory"])[0].split(":")[-1]
87+
category = record.get("category", [""])[0].split(":")[-1]
8888
else:
8989
subject_category = (
90-
record.get("subject_category", "UnknownSubjectCategory").split(":")[-1]
90+
record.get("subject_category", "").split(":")[-1]
9191
if record.get("subject_category")
92-
else "UnknownSubjectCategory"
92+
else "UnknownCategory"
9393
)
9494

9595
object_category = (
96-
record.get("object_category", "UnknownObjectCategory").split(":")[-1]
96+
record.get("object_category", "").split(":")[-1]
9797
if record.get("object_category")
98-
else "UnknownObjectCategory"
98+
else "UnknownCategory"
9999
)
100100

101101
edge_category = (
102-
record.get("category", ["UnknownEdgeCategory"])[0].split(":")[-1]
103-
if record.get("category")
104-
else "UnknownEdgeCategory"
102+
record.get("category", [""])[0].split(":")[-1] if record.get("category") else "UnknownCategory"
105103
)
106104

107105
category = subject_category + edge_category + object_category

tests/unit/test_tsvwriter_node_and_edge.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,19 @@ def test_tsv_writer_split():
4040
"""
4141
Writes a test tsv file
4242
"""
43-
g1 = Gene(id="HGNC:11603", name="TBX4")
44-
d1 = Disease(id="MONDO:0005002", name="chronic obstructive pulmonary disease")
43+
g1 = Gene(id="HGNC:11603", name="TBX4", category=["biolink:Gene"])
44+
d1 = Disease(id="MONDO:0005002", name="chronic obstructive pulmonary disease", category=["biolink:Disease"])
4545
a1 = GeneToDiseaseAssociation(
4646
id="uuid:5b06e86f-d768-4cd9-ac27-abe31e95ab1e",
4747
subject=g1.id,
4848
object=d1.id,
4949
predicate="biolink:contributes_to",
5050
knowledge_level="not_provided",
5151
agent_type="not_provided",
52+
subject_category="biolink:Gene",
53+
object_category="biolink:Disease",
5254
)
53-
g2 = Gene(id="HGNC:11604", name="TBX5")
55+
g2 = Gene(id="HGNC:11604", name="TBX5", category=["biolink:Gene"])
5456
d2 = Disease(id="MONDO:0005003", name="asthma")
5557
a2 = GeneToDiseaseAssociation(
5658
id="uuid:5b06e86f-d768-4cd9-ac27-abe31e95ab1f",
@@ -61,7 +63,7 @@ def test_tsv_writer_split():
6163
agent_type="not_provided",
6264
)
6365
g3 = Gene(id="HGNC:11605", name="TBX6")
64-
d3 = Disease(id="MONDO:0005004", name="lung cancer")
66+
d3 = Disease(id="MONDO:0005004", name="lung cancer", category=["biolink:Disease"])
6567
a3 = GeneToDiseaseAssociation(
6668
id="uuid:5b06e86f-d768-4cd9-ac27-abe31e95ab1g",
6769
subject=g3.id,
@@ -70,14 +72,25 @@ def test_tsv_writer_split():
7072
knowledge_level="not_provided",
7173
agent_type="not_provided",
7274
)
73-
ents = [[g1, d1, a1], [g2, d2, a2], [g3, d3, a3]]
75+
g4 = Gene(id="HGNC:11606", name="TBX7")
76+
d4 = Disease(id="MONDO:0005005", name="pulmonary fibrosis")
77+
a4 = GeneToDiseaseAssociation(
78+
id="uuid:5b06e86f-d768-4cd9-ac27-abe31e95ab1h",
79+
subject=g4.id,
80+
object=d4.id,
81+
predicate="biolink:contributes_to",
82+
knowledge_level="not_provided",
83+
agent_type="not_provided",
84+
)
85+
86+
ents = [[g1, d1, a1], [g2, d2, a2], [g3, d3, a3], [g4, d4, a4]]
7487

7588
node_properties = ["id", "category", "symbol", "in_taxon", "provided_by", "source"]
7689
edge_properties = ["id", "subject", "predicate", "object", "category" "qualifiers", "publications", "provided_by"]
7790

7891
outdir = "output/tests/split-examples"
7992
outfile = "tsvwriter"
80-
split_edge_file_substring = "UnknownSubjectCategoryGeneToDiseaseAssociationUnknownObjectCategory"
93+
split_edge_file_substring = "UnknownCategoryGeneToDiseaseAssociationUnknownCategory"
8194

8295
t = TSVWriter(outdir, outfile, node_properties, edge_properties)
8396
for ent in ents:

0 commit comments

Comments
 (0)