From 5fe4a75d18ad2e4658dcf017df36546356ddc29f Mon Sep 17 00:00:00 2001 From: Peter Kerpedjiev Date: Wed, 1 Apr 2026 22:02:41 -0700 Subject: [PATCH 1/3] Move pysam import in cram.py into function where it's actually used --- clodius/tiles/cram.py | 6 +- notebooks/Aggregating an array.ipynb | 61 ++++++++++++++----- .../ENSEMBL annotations and RNAseq.ipynb | 31 +++++++--- notebooks/h37rv gene annotations.ipynb | 21 +++++-- 4 files changed, 89 insertions(+), 30 deletions(-) diff --git a/clodius/tiles/cram.py b/clodius/tiles/cram.py index a9bc6133..d0728b33 100644 --- a/clodius/tiles/cram.py +++ b/clodius/tiles/cram.py @@ -1,10 +1,10 @@ -import pysam - from clodius.tiles.bam import alignment_tileset_info from clodius.tiles.bam import alignment_tiles def tileset_info(filename, chromsizes): + import pysam + samfile = pysam.AlignmentFile(filename, "rc") return alignment_tileset_info(samfile, chromsizes) @@ -13,6 +13,8 @@ def tileset_info(filename, chromsizes): def tiles( filename, tile_ids, index_filename=None, chromsizes=None, max_tile_width=None ): + import pysam + samfile = pysam.AlignmentFile(filename, "rc", index_filename=index_filename) return alignment_tiles( diff --git a/notebooks/Aggregating an array.ipynb b/notebooks/Aggregating an array.ipynb index 15eb3520..0925bec2 100644 --- a/notebooks/Aggregating an array.ipynb +++ b/notebooks/Aggregating an array.ipynb @@ -4,7 +4,10 @@ "cell_type": "code", "execution_count": 1, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [ @@ -15,7 +18,10 @@ "cell_type": "code", "execution_count": 2, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [ @@ -59,7 +65,10 @@ "cell_type": "code", "execution_count": 24, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [ @@ -295,7 +304,10 @@ "cell_type": "code", "execution_count": 186, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [ @@ -329,7 +341,10 @@ "cell_type": "code", "execution_count": 190, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [ @@ -487,7 +502,10 @@ "cell_type": "code", "execution_count": 5, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [ @@ -627,7 +645,10 @@ "cell_type": "code", "execution_count": 43, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [ @@ -1051,7 +1072,10 @@ "cell_type": "code", "execution_count": 279, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [ @@ -1145,7 +1169,10 @@ { "cell_type": "markdown", "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "source": [ "## Example with n x c data" @@ -1331,7 +1358,10 @@ "cell_type": "code", "execution_count": 127, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [ @@ -1384,7 +1414,10 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [] @@ -1392,7 +1425,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1406,9 +1439,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.1" + "version": "3.10.15" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/notebooks/ENSEMBL annotations and RNAseq.ipynb b/notebooks/ENSEMBL annotations and RNAseq.ipynb index 068e6c5f..76d83166 100644 --- a/notebooks/ENSEMBL annotations and RNAseq.ipynb +++ b/notebooks/ENSEMBL annotations and RNAseq.ipynb @@ -4,7 +4,10 @@ "cell_type": "code", "execution_count": 1, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [ @@ -39,7 +42,10 @@ "cell_type": "code", "execution_count": 3, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [ @@ -50,7 +56,10 @@ "cell_type": "code", "execution_count": 5, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [ @@ -229,7 +238,10 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [] @@ -238,7 +250,10 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [] @@ -246,7 +261,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -260,9 +275,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.2" + "version": "3.10.15" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/notebooks/h37rv gene annotations.ipynb b/notebooks/h37rv gene annotations.ipynb index aeade26f..a252ef6b 100644 --- a/notebooks/h37rv gene annotations.ipynb +++ b/notebooks/h37rv gene annotations.ipynb @@ -4,7 +4,10 @@ "cell_type": "code", "execution_count": 78, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [ @@ -209,7 +212,10 @@ "cell_type": "code", "execution_count": 76, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [ @@ -220,7 +226,10 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } }, "outputs": [], "source": [] @@ -228,7 +237,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -242,9 +251,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.2" + "version": "3.10.15" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } From 41ca8860fc76f0f59bc8957b20aaef1320f86d66 Mon Sep 17 00:00:00 2001 From: Peter Kerpedjiev Date: Sat, 4 Apr 2026 07:54:34 -0700 Subject: [PATCH 2/3] Avoid overflow in cooler files when genomes are larger than 2**31 --- clodius/tiles/cooler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clodius/tiles/cooler.py b/clodius/tiles/cooler.py index 016c806e..bc8c4451 100644 --- a/clodius/tiles/cooler.py +++ b/clodius/tiles/cooler.py @@ -514,7 +514,7 @@ def make_mats(filepath): # get the genome size resolution = list(f["resolutions"].keys())[0] - genome_length = int(sum(f["resolutions"][resolution]["chroms"]["length"])) + genome_length = int(np.sum(f["resolutions"][resolution]["chroms"]["length"][:].astype(np.int64))) info["max_pos"] = [genome_length, genome_length] info["min_pos"] = [1, 1] From 3c0c19c694bbd1ccaf8ac32f382f0a9f1e6b49bb Mon Sep 17 00:00:00 2001 From: Peter Kerpedjiev Date: Sat, 4 Apr 2026 07:58:04 -0700 Subject: [PATCH 3/3] Fix linting errors --- clodius/tiles/vcf.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clodius/tiles/vcf.py b/clodius/tiles/vcf.py index 57f68ec0..623360e3 100644 --- a/clodius/tiles/vcf.py +++ b/clodius/tiles/vcf.py @@ -7,7 +7,6 @@ from clodius.utils import TILE_OPTIONS_CHAR - def grouper(n, iterable): it = iter(iterable) while True: @@ -51,7 +50,7 @@ def regions(filename, chromsizes, offset, limit): limit: The total number of entries to fetch """ from pysam import VariantFile - + vcf = VariantFile(filename) # auto-detect input format fetcher = vcf.fetch()