-
Notifications
You must be signed in to change notification settings - Fork 22
feat: Return chromsizes tileset info #158
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
cfd7968
1e31b7a
55e2327
2276c65
624fd7f
3133dde
249de68
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| from typing import List, Optional | ||
|
|
||
| from pydantic import BaseModel | ||
|
|
||
|
|
||
| class TilesetInfo(BaseModel): | ||
| max_width: int | ||
| min_pos: List[int] | ||
| max_pos: List[int] | ||
| chromsizes: Optional[List] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,35 +1,61 @@ | ||
| import csv | ||
| import logging | ||
| from smart_open import open | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| def get_tsv_chromsizes(filename): | ||
| def tileset_info(filename: str) -> dict: | ||
| """Return a standard higlass tileset info object that contains | ||
| chromsizes as an element. | ||
|
|
||
| The chromsizes in the returned object will be a list of [name, size] | ||
| tuples. | ||
|
|
||
| [ | ||
| ['chr1', 1000], | ||
| ['chr2', 2000] | ||
| ] | ||
| """ | ||
| chromsizes = get_tsv_chromsizes(filename) | ||
|
|
||
| max_width = sum([int(c[1]) for c in chromsizes]) | ||
| return { | ||
| "max_width": max_width, | ||
| "chromsizes": [[c[0], int(c[1])] for c in chromsizes], | ||
| "min_pos": [0], | ||
| "max_pos": [max_width], | ||
| } | ||
|
|
||
|
|
||
| def get_tsv_chromsizes(file): | ||
| """ | ||
| Get a list of chromosome sizes from this [presumably] tsv | ||
| chromsizes file file. | ||
| chromsizes file. | ||
|
|
||
| Parameters: | ||
| ----------- | ||
| filename: string | ||
| The filename of the tsv file | ||
| file: string or file-like object | ||
| A file-like object | ||
|
|
||
| Returns | ||
| ------- | ||
| chromsizes: [(name:string, size:int), ...] | ||
| An ordered list of chromosome names and sizes | ||
| """ | ||
| if isinstance(file, str): | ||
| file = open(file, "rb") | ||
|
|
||
| try: | ||
| with open(filename, "r") as f: | ||
| reader = csv.reader(f, delimiter="\t") | ||
| file.seek(0) | ||
| binary_data = file.read() | ||
| text_data = binary_data.decode("utf-8") | ||
|
|
||
| data = [] | ||
| for row in reader: | ||
| data.append(row) | ||
| lines = text_data.split("\n") | ||
| data = [line.strip().split("\t") for line in lines if line.strip()] | ||
| return data | ||
| except Exception as ex: | ||
| logger.error(ex) | ||
|
|
||
| err_msg = "WHAT?! Could not load file %s. 😤 (%s)" % (filename, ex) | ||
| err_msg = "WHAT?! Could not load file %s." % (ex) | ||
|
|
||
| raise Exception(err_msg) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| import os.path as op | ||
|
|
||
| import clodius.tiles.chromsizes as ctcs | ||
| from clodius.models.tileset_info import TilesetInfo | ||
|
|
||
|
|
||
| def test_get_tileset_info(): | ||
| filename = op.join("data", "chromSizes.tsv") | ||
|
|
||
| # Test loading tileset info using a filename | ||
| tsinfo = TilesetInfo(**ctcs.tileset_info(filename)) | ||
|
|
||
| assert tsinfo.max_width > 100 | ||
| assert len(tsinfo.chromsizes) > 2 | ||
| # TODO: Do something with the return value | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The parsing with the pydantic model is a kind of assertion here, is that what you were thinking wrt to "return value'?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, I meant to remove that line. Removing in the next commit. |
||
|
|
||
| with open(filename, "rb") as f: | ||
| # Test loading using a file-like object | ||
| tsinfo = TilesetInfo(**ctcs.tileset_info(f)) | ||
|
|
||
| assert tsinfo.max_width > 100 | ||
| assert len(tsinfo.chromsizes) > 2 | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see this is only used in the test. Should that be in the test module only or is this intended as a part of the public API?