diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 1c40547b..d8e79e04 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.20.3
+current_version = 0.20.4
 tag = True
 commit = True
 
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..d3f7daa4
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,20 @@
+# Binary genomics data files tracked with Git LFS
+*.cool filter=lfs diff=lfs merge=lfs -text
+*.mv5 filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.hdf5 filter=lfs diff=lfs merge=lfs -text
+*.bam filter=lfs diff=lfs merge=lfs -text
+*.bai filter=lfs diff=lfs merge=lfs -text
+*.beddb filter=lfs diff=lfs merge=lfs -text
+*.bb filter=lfs diff=lfs merge=lfs -text
+*.bigWig filter=lfs diff=lfs merge=lfs -text
+*.hitile filter=lfs diff=lfs merge=lfs -text
+data/*.fna filter=lfs diff=lfs merge=lfs -text
+data/*.gff.gz filter=lfs diff=lfs merge=lfs -text
+data/*.vcf.gz filter=lfs diff=lfs merge=lfs -text
+data/*.bed.gz filter=lfs diff=lfs merge=lfs -text
+data/*.bed.1.gz filter=lfs diff=lfs merge=lfs -text
+data/*.gz.tbi filter=lfs diff=lfs merge=lfs -text
+data/*.multires filter=lfs diff=lfs merge=lfs -text
+data/*.gff filter=lfs diff=lfs merge=lfs -text
+data/SRR1770413.sorted.short.bam.bai filter=lfs diff=lfs merge=lfs -text
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 45cce637..bba0d059 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -14,7 +14,7 @@ jobs:
     - uses: actions/checkout@v3
     - uses: actions/setup-python@v4
       with:
-        python-version: '3.10'
+        python-version: '3.12'
 
     - name: Install Dependencies
       run: |
@@ -29,21 +29,12 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ['3.9', '3.10']
+        python-version: ['3.12']
 
     steps:
     - uses: actions/checkout@v3
-
-    - name: Cache Fixtures
-      id: cache-fixtures
-      uses: actions/cache@v3
       with:
-        path: data/
-        key: ${{ runner.os }}-{{ hashFiles('get_test_data.sh') }}-{{ hashFiles('.gitignore') }}
-
-    - name: Download Fixtures
-      if: steps.cache-fixtures.outputs.cache-hit != 'true'
-      run: ./get_test_data.sh
+        lfs: true
 
     - name: Set Up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v4
diff --git a/.gitignore b/.gitignore
index 8875b9cd..de97046d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
 notebooks/Scratch.ipynb
 notebooks/VCF.ipynb
 
+settings.local.json
+
 *.py[cod]
 __pycache__
 *~
@@ -40,7 +42,36 @@ Thumbs.db
 old
 tmp
 checkpoint
-data/
+data/*
+!data/Dixon2012-J1-NcoI-R1-filtered.100kb.multires.cool
+!data/hic-resolutions.cool
+!data/sample_htime.json
+!data/gene_annotations.short.db
+!data/wgEncodeCaltechRnaSeqHuvecR1x75dTh1014IlnaPlusSignalRep2.bigWig
+!data/points_density.h5
+!data/corrected.geneListwithStrand.bed.multires
+!data/labels.h5
+!data/SRR1770413.sorted.short.bam
+!data/SRR1770413.sorted.short.bam.bai
+!data/SRR1770413.different_index_filename.bai
+!data/SRR1770413.mismatched_bai.bam
+!data/geneAnnotationsExonUnions.1000.bed.v3.beddb
+!data/masterlist_DHSs_733samples_WM20180608_all_mean_signal_colorsMax.bed.bb
+!data/GCA_000350705.1_Esch_coli_KTE11_V1_genomic.short.fna.fai
+!data/GCA_002918705.1_ASM291870v1_genomic.gff.gz
+!data/genomic.10k.gff
+!data/genomic.10k.gff.gz
+!data/chm13v1.chrom.sizes
+!data/hg38.chrom.sizes
+!data/test.1.vcf.gz
+!data/no_item_rgb.bed
+!data/regions.valid.bed.1.gz
+!data/regions.valid.bed
+!data/regions.valid.bed.gz
+!data/regions.valid.bed.gz.tbi
+!data/regions.spaces.bed
+!data/genomic.10k.gff.gz.tbi
+!data/GCA_000350705.1_Esch_coli_KTE11_V1_genomic.short.fna
 output/
 COMMANDS
 npm-debug.log
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 916c2ef8..0eb736e1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,11 @@
+v0.21.0
+
+- Huge set of changes to support file-pointer based tileset functions
+
+v0.20.4
+
+- Fix overflow issue in cooler files
+
 v0.20.3
 
 - Add chromsizes tileset_info function
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 00000000..ff9c6d05
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,43 @@
+# Clodius
+
+A Python library and CLI tool for aggregating large genomic datasets into tile-based formats for display at multiple resolutions (used by [HiGlass](https://higlass.io)).
+
+## Project Structure
+
+- `clodius/` — main package
+  - `cli/` — Click-based CLI commands (`aggregate.py`, `convert.py`)
+  - `tiles/` — tile generation modules per file type (bigwig, cooler, bed, etc.)
+  - `models/` — Pydantic data models
+- `test/` — pytest tests mirroring the source layout
+- `test/sample_data/` — small sample files used by tests
+
+## Development Setup
+
+```shell
+pip install -e ".[dev]"
+```
+
+## Common Commands
+
+Run all tests:
+```shell
+pytest
+```
+
+Run a specific test:
+```shell
+pytest test/cli_test.py::test_clodius_aggregate_bedgraph
+```
+
+Lint:
+```shell
+flake8 clodius
+```
+
+## Key Conventions
+
+- **Linting**: flake8 (configured via `pyproject.toml`)
+- **Tests**: pytest with coverage (`pytest --cov=clodius`)
+- **Build**: hatchling
+- **Main branch**: `develop` (use this as the base for PRs)
+- **Python packaging**: `pyproject.toml` (no `setup.py`)
diff --git a/README.md b/README.md
index e5d5c756..aadca63b 100644
--- a/README.md
+++ b/README.md
@@ -55,6 +55,40 @@ install `clodius` with develop mode:
 pip install -e ".[dev]"
 ```
 
+## Test Fixtures (Git LFS)
+
+Test data files in `data/` are stored in [Git LFS](https://git-lfs.com/). They are downloaded automatically when you clone the repository with LFS enabled:
+
+```shell
+git lfs install  # once per machine
+git clone <repo>  # LFS files downloaded automatically
+# or, in an existing clone:
+git lfs pull
+```
+
+### Adding a new test fixture
+
+1. **Check if the file type is already tracked** — open [.gitattributes](.gitattributes) and look for a matching pattern (e.g. `data/*.gz`, `*.bam`). If not, add a new tracking rule:
+
+   ```shell
+   git lfs track "data/*.ext"   # adds a line to .gitattributes
+   git add .gitattributes
+   ```
+
+2. **Allow the file through `.gitignore`** — `data/*` is ignored by default. Add a negation line for your file:
+
+   ```
+   !data/your_new_file.ext
+   ```
+
+3. **Stage and commit as normal:**
+
+   ```shell
+   git add data/your_new_file.ext
+   git commit -m "Add test fixture: your_new_file.ext"
+   git push  # LFS objects are uploaded automatically
+   ```
+
 ## Testing
 
 
diff --git a/clodius/__init__.py b/clodius/__init__.py
index 8815fb52..13844a7b 100644
--- a/clodius/__init__.py
+++ b/clodius/__init__.py
@@ -1 +1 @@
-__version__ = "0.20.3"
+__version__ = "0.32.0"
diff --git a/clodius/alignment.py b/clodius/alignment.py
new file mode 100644
index 00000000..aff65c34
--- /dev/null
+++ b/clodius/alignment.py
@@ -0,0 +1,344 @@
+import subprocess
+from collections import Counter
+from Bio import SeqIO, AlignIO, pairwise2
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+from Bio.Align import MultipleSeqAlignment
+from typing import Literal, Optional
+import tempfile
+import os
+from Bio import Align
+import numpy as np
+
+DNA_ALPHABET = ["-", "A", "C", "G", "T"]
+PROTEIN_ALPHABET = [
+    "-",
+    "A",
+    "C",
+    "D",
+    "E",
+    "F",
+    "G",
+    "H",
+    "I",
+    "K",
+    "L",
+    "M",
+    "N",
+    "P",
+    "Q",
+    "R",
+    "S",
+    "T",
+    "V",
+    "W",
+    "Y",
+]
+
+
+def align_sequences(seq1, seq2):
+    """Align two sequences using arbitrary alignment scores."""
+    aligner = Align.PairwiseAligner()
+
+    aligner.match_score = 1
+    aligner.mismatch_score = -4
+    aligner.open_gap_score = -6
+    aligner.extend_gap_score = -1
+
+    alignments = aligner.align(seq1, seq2)
+    best_alignment = alignments[0]
+
+    return best_alignment
+
+
+def alignment_to_subs(alignment):
+    """Convert a BioPython alignment object into "subs" that are
+    compatible with the higlass pileup track.
+
+    :param alignment: A BioPython alignment object
+    :returns: A dictionary containing the start and end positions of
+        the alignment, relative to the reference as well as all of the
+        modifications. These take the form of:
+
+        {
+            "pos": # the position of the modification
+            "type": # the type of modification, X for match or mismatch,
+                    # D for deletion and I for insertion
+            "length": # the length of the modification
+            "base": # The moiety at the modification position. Only present for
+                substitutions, not insertions or deletions.
+            "variant": # The variant being mutated to. Only present for
+                substitutions, not insertions or deletions.
+        }
+    """
+    parts = []
+    ttrue = 0
+    tpos = 0
+    qpos = 0
+
+    start = 0
+    end = 0
+    aligneds = list(zip(alignment.aligned[0], alignment.aligned[1]))
+
+    for i, ((ts, te), (qs, qe)) in enumerate(aligneds):
+        ts, te, qs, qe = int(ts), int(te), int(qs), int(qe)
+
+        if i == 0:
+            # start position
+            start = ts
+            tpos = ts
+            ttrue = 0
+        if i == len(aligneds) - 1:
+            # end position
+            end = te
+
+        if ts > tpos:
+            parts += [{"pos": ttrue, "type": "D", "length": ts - tpos}]
+            ttrue += ts - tpos
+        if qs > qpos:
+            parts += [{"pos": ttrue, "type": "I", "length": qs - qpos}]
+        for i in range(te - ts):
+            if alignment.target[ts + i] != alignment.query[qs + i]:
+                parts += [
+                    {
+                        "pos": ttrue + i,
+                        "type": "X",
+                        "length": 1,
+                        "base": alignment.target[ts + i],
+                        "variant": alignment.query[qs + i],
+                    }
+                ]
+
+        ttrue += te - ts
+        tpos = te
+        qpos = qe
+
+    # Handle trailing insertion in query sequence
+    query_len = len(alignment.query)
+    if qpos < query_len:
+        parts += [{"pos": ttrue, "type": "I", "length": query_len - qpos}]
+
+    return start + 1, end + 1, parts
+
+
+def run_clustal_omega(sequences, seq_ids=None, seqtype="dna"):
+    """
+    Align sequences with Clustal Omega.
+
+    Args:
+        sequences (list of str): Input nucleotide sequences (unaligned).
+        seq_ids (list of str, optional): IDs for sequences (default: numbered).
+
+    Returns:
+        alignment (MultipleSeqAlignment): Biopython alignment object.
+    """
+    if seq_ids is None:
+        seq_ids = [f"seq{i}" for i in range(len(sequences))]
+
+    # Create temp fasta input file
+    with tempfile.NamedTemporaryFile("w", delete=False) as fasta_file:
+        input_fasta = fasta_file.name
+        records = [
+            SeqRecord(Seq(seq), id=sid, description="")
+            for seq, sid in zip(sequences, seq_ids)
+        ]
+        SeqIO.write(records, fasta_file, "fasta")
+
+    # Create temp output file
+    output_fasta = input_fasta + "_aligned.fasta"
+
+    params = [
+        "clustalo",
+        "-i",
+        input_fasta,
+        "-o",
+        output_fasta,
+        "--force",
+        "--outfmt=fasta",
+    ]
+
+    if seqtype is not None:
+        params += [f"--seqtype={seqtype.upper()}"]
+
+    # Run Clustal Omega
+    subprocess.run(
+        params,
+        check=True,
+    )
+
+    # Parse alignment
+    alignment = AlignIO.read(output_fasta, "fasta")
+
+    # Clean up
+    os.remove(input_fasta)
+    os.remove(output_fasta)
+
+    return alignment
+
+
+def refseq_alignment(sequences, refseq, seq_ids=None, seqtype=None):
+    """
+    Align sequences to a reference sequence using pairwise alignment.
+
+    Args:
+        sequences (list of str): Input sequences to align to reference.
+        refseq (str): Reference sequence.
+        seq_ids (list of str, optional): IDs for sequences (default: numbered).
+
+    Returns:
+        alignment (MultipleSeqAlignment): Biopython alignment object.
+    """
+    if seq_ids is None:
+        seq_ids = [f"seq{i}" for i in range(len(sequences))]
+
+    if seqtype is None:
+        all_seqs = [refseq] + sequences
+        seqtype = (
+            "dna"
+            if all(set(seq.upper()) <= set("ACGT") for seq in all_seqs)
+            else "protein"
+        )
+
+    aligned_records = []
+
+    # Add reference sequence first
+    ref_record = SeqRecord(Seq(refseq), id="refseq", description="")
+    aligned_records.append(ref_record)
+
+    # Align each sequence to reference
+    for seq, seq_id in zip(sequences, seq_ids):
+        if seqtype == "dna":
+            alignment = pairwise2.align.globalms(
+                refseq, seq, 2, -1, -2, -0.5, one_alignment_only=True
+            )[0]
+        else:
+            alignment = pairwise2.align.globalms(
+                refseq, seq, 1, -1, -2, -0.5, one_alignment_only=True
+            )[0]
+
+        ref_aligned, seq_aligned = alignment.seqA, alignment.seqB
+
+        # Extract positions corresponding to reference sequence
+        aligned_seq = ""
+        for ref_char, seq_char in zip(ref_aligned, seq_aligned):
+            if ref_char != "-":
+                aligned_seq += seq_char
+
+        aligned_record = SeqRecord(Seq(aligned_seq), id=seq_id, description="")
+        aligned_records.append(aligned_record)
+
+    return MultipleSeqAlignment(aligned_records)
+
+
+def make_pwm_from_alignment(
+    alignment, pseudocount=0, seqtype: Literal["dna", "protein"] = "dna"
+):
+    """
+    Build a PWM from an aligned set of sequences.
+
+    Args:
+        alignment (MultipleSeqAlignment): Aligned sequences.
+        pseudocount (int): Pseudocount for smoothing.
+
+    Returns:
+        pwm (dict): Dictionary of {base: [probabilities]}.
+    """
+    if seqtype is None:
+        all_chars = set()
+        for record in alignment:
+            all_chars.update(str(record.seq).upper())
+        seqtype = "dna" if all_chars <= set("ACGT-") else "protein"
+
+    alphabet = DNA_ALPHABET if seqtype == "dna" else PROTEIN_ALPHABET
+
+    seq_length = alignment.get_alignment_length()
+    pwm = {base: [] for base in alphabet}
+
+    seqs = []
+    for record in alignment:
+        seqs.append(str(record.seq).upper())
+
+    for pos in range(seq_length):
+        column = [record.seq[pos] for record in alignment]
+        counts = Counter(column)
+
+        total = sum(counts.get(base, 0) + pseudocount for base in alphabet)
+        for base in alphabet:
+            prob = (counts.get(base, 0) + pseudocount) / total
+            pwm[base].append(prob)
+
+    # print(pwm.keys())
+    # # print(pwm["M"])
+    # for key in pwm:
+    #     print(f"{key} {pwm[key][0]:.2} {pwm[key][1]:.2}")
+    return pwm, seqs
+
+
+def generate_pwm_from_sequences(
+    sequences,
+    seq_ids=None,
+    pseudocount=0,
+    seqtype: Optional[Literal["dna", "protein"]] = None,
+    refseq=None,
+):
+    """
+    Align sequences using Clustal Omega and generate a position weight matrix (PWM).
+
+    Args:
+        sequences (list of str): Input nucleotide sequences (unaligned).
+        seq_ids (list of str, optional): Sequence IDs (default: numbered seq0, seq1, ...).
+        pseudocount (int, optional): Pseudocount for PWM smoothing.
+        refseq: Use a reference sequence for the alignment. If not set then create
+            a MSA using clustalO
+
+    Returns:
+        pwm (dict): PWM as dictionary {base: [probabilities]}.
+        seqs: The aligned sequences
+    """
+    if refseq:
+        # A reference sequence is provided, use it for alignment
+        alignment = refseq_alignment(sequences, refseq, seq_ids, seqtype)
+    else:
+        alignment = run_clustal_omega(sequences, seq_ids, seqtype=seqtype)
+
+    pwm, seqs = make_pwm_from_alignment(
+        alignment, pseudocount=pseudocount, seqtype=seqtype
+    )
+    return pwm, seqs
+
+
+def create_distance_matrix(sequences):
+    n = len(sequences)
+    distance_matrix = np.zeros((n, n))
+    from tqdm import tqdm
+
+    for i in tqdm(range(n)):
+        for j in range(i + 1, n):
+            score = pairwise2.align.globalxx(
+                sequences[i], sequences[j], score_only=True
+            )
+
+            # Convert similarity to distance
+            distance = 150 - score  # Assuming 150 nt sequences
+            distance_matrix[i, j] = distance
+            distance_matrix[j, i] = distance
+
+    return distance_matrix
+
+
+def cluster_sequences(sequences):
+    from scipy.cluster.hierarchy import linkage, dendrogram
+
+    # Generate distance matrix
+    dist_matrix = create_distance_matrix(sequences)
+
+    # Perform hierarchical clustering
+    linkage_matrix = linkage(dist_matrix, method="average")
+
+    dplot = dendrogram(linkage_matrix, no_plot=True)
+    return linkage_matrix, dplot["leaves"]
+
+
+def order_by_clustering(sequences):
+    _, order = cluster_sequences(sequences)
+    return [sequences[i] for i in order]
diff --git a/clodius/chromosomes.py b/clodius/chromosomes.py
index 2e14c3e7..bbc64a13 100644
--- a/clodius/chromosomes.py
+++ b/clodius/chromosomes.py
@@ -1,4 +1,52 @@
 import negspy.coordinates as nc
+import numpy as np
+import pandas as pd
+from smart_open import open
+
+
+def chromsizes_array_to_series(chromsizes):
+    """
+    Convert an array of [[chrname, size]...] values to a series
+    indexed by chrname with size values
+    """
+    chrnames = [c[0] for c in chromsizes]
+    chrvalues = [c[1] for c in chromsizes]
+
+    return pd.Series(np.array([int(c) for c in chrvalues]), index=chrnames)
+
+
+def chromsizes_as_array(chromsizes_filename):
+    """Load chromosome sizes as an array."""
+    chromsizes = []
+
+    f = chromsizes_filename
+    if isinstance(chromsizes_filename, str):
+        f = open(chromsizes_filename, "rb")
+
+    for line in f:
+        chromsizes += [line.decode("utf8").strip().split("\t")]
+        if not len(chromsizes[-1]) >= 2:
+            raise ValueError(f"Invalid chromsizes line, only 1 tsv column: {line}")
+
+        try:
+            chromsizes[-1][1] = int(chromsizes[-1][1])
+        except ValueError:
+            raise ValueError(
+                f"Invalid chromsizes line, no integer in second column: {line}"
+            )
+
+    return chromsizes
+
+
+def chromsizes_as_series(chromsizes_filename):
+    """Load chromosome sizes as a pandas series."""
+    chromsizes = []
+
+    with open(chromsizes_filename) as f:
+        for line in f:
+            chromsizes += [line.strip().split("\t")]
+
+    return chromsizes_array_to_series(chromsizes)
 
 
 def load_chromsizes(chromsizes_filename, assembly=None):
@@ -20,9 +68,7 @@ def load_chromsizes(chromsizes_filename, assembly=None):
         chrom_sizes = [chrom_info.chrom_lengths[c] for c in chrom_info.chrom_order]
     else:
         if assembly is None:
-            raise ValueError(
-                "No assembly or chromsizes specified. Please specify an assembly using the --assembly parameter or a set of chromsizes using the --chromsizes-filename parameter"
-            )
+            raise ValueError("No assembly or chromsizes specified")
 
         chrom_info = nc.get_chrominfo(assembly)
         chrom_names = nc.get_chromorder(assembly)
diff --git a/clodius/cli/aggregate.py b/clodius/cli/aggregate.py
index 9def0d47..15c632cd 100644
--- a/clodius/cli/aggregate.py
+++ b/clodius/cli/aggregate.py
@@ -1,29 +1,29 @@
 # -*- coding: utf-8 -*-
 from __future__ import division, print_function
 
+from . import cli
+
+import click
+import clodius.chromosomes as cch
+import clodius.multivec as cmv
+import clodius.array as ct
 import collections as col
-import gzip
-import json
+import h5py
 import math
+import negspy.coordinates as nc
+import numpy as np
 import os
 import os.path as op
 import random
+import scipy.misc as sm
+import slugid
 import sqlite3
 import sys
 import time
+import gzip
+import json
+from smart_open import open
 
-import h5py
-import numpy as np
-
-import click
-import clodius.array as ct
-import clodius.chromosomes as cch
-import clodius.multivec as cmv
-import negspy.coordinates as nc
-import scipy.misc as sm
-import slugid
-
-from . import cli
 from .utils import get_tile_pos_from_lng_lat, transaction
 
 
@@ -324,7 +324,7 @@ def line_to_dict(line):
         chrom_sizes=chrom_sizes,
         tile_size=tile_size,
         max_zoom=max_zoom,
-        max_width=tile_size * 2 ** max_zoom,
+        max_width=tile_size * 2**max_zoom,
         version=BED2DDB_VERSION,
     )
 
@@ -369,7 +369,7 @@ def line_to_dict(line):
 
     tile_counts = col.defaultdict(lambda: col.defaultdict(lambda: col.defaultdict(int)))
     # Sort from high to low importance
-    entries.sort(key=lambda x: -x["importance"])
+    entries = sorted(entries, key=lambda x: -x["importance"])
 
     interval_inserts = []
     position_index_inserts = []
@@ -466,7 +466,11 @@ def _bedfile(
     delimiter,
     chromsizes_filename,
     offset,
+    print_freq=1000,
 ):
+    """
+    :param print_freq: Print a status every print_freq lines. If 0, turn off status printing.
+    """
     BEDDB_VERSION = 3
 
     if output_file is None:
@@ -624,7 +628,7 @@ def line_to_np_array(line):
         chrom_sizes=chrom_sizes,
         tile_size=tile_size,
         max_zoom=max_zoom,
-        max_width=tile_size * 2 ** max_zoom,
+        max_width=tile_size * 2**max_zoom,
         header=header,
         version=BEDDB_VERSION,
     )
@@ -757,8 +761,9 @@ def line_to_np_array(line):
                     ),
                 )
 
-                if counter % 1000 == 0:
-                    print("counter:", counter, value["endPos"] - value["startPos"])
+                if print_freq:
+                    if counter % print_freq == 0:
+                        print("counter:", counter, value["endPos"] - value["startPos"])
 
                 exec_statement = "INSERT INTO position_index VALUES (?,?,?,?,?)"
                 c.execute(
@@ -827,7 +832,7 @@ def _bedgraph(
 
     tile_size = tile_size
     # how many values to read in at once while tiling
-    chunk_size = tile_size * 2 ** chunk_size
+    chunk_size = tile_size * 2**chunk_size
 
     dsets = []  # data sets at each zoom level
     nan_dsets = []  # store nan values
@@ -839,8 +844,8 @@ def _bedgraph(
     data_buffers = [[]]
     nan_data_buffers = [[]]
 
-    while assembly_size / 2 ** z > tile_size:
-        dset_length = math.ceil(assembly_size / 2 ** z)
+    while assembly_size / 2**z > tile_size:
+        dset_length = math.ceil(assembly_size / 2**z)
         dsets += [
             f.create_dataset(
                 "values_" + str(z), (dset_length,), dtype="f", compression="gzip"
@@ -873,7 +878,7 @@ def _bedgraph(
     d.attrs["max-zoom"] = max_zoom = math.ceil(
         math.log(d.attrs["max-length"] / tile_size) / math.log(2)
     )
-    d.attrs["max-width"] = tile_size * 2 ** max_zoom
+    d.attrs["max-width"] = tile_size * 2**max_zoom
     d.attrs["max-position"] = 0
 
     print("assembly size (max-length)", d.attrs["max-length"])
@@ -935,11 +940,9 @@ def add_values_to_data_buffers(buffers_to_add, nan_buffers_to_add):
 
             # aggregate and store aggregated values in the next zoom_level's
             # data
-            data_buffers[curr_zoom + 1] += list(
-                ct.aggregate(curr_chunk, 2 ** zoom_step)
-            )
+            data_buffers[curr_zoom + 1] += list(ct.aggregate(curr_chunk, 2**zoom_step))
             nan_data_buffers[curr_zoom + 1] += list(
-                ct.aggregate(nan_curr_chunk, 2 ** zoom_step)
+                ct.aggregate(nan_curr_chunk, 2**zoom_step)
             )
 
             data_buffers[curr_zoom] = data_buffers[curr_zoom][chunk_size:]
@@ -1050,9 +1053,9 @@ def add_values_to_data_buffers(buffers_to_add, nan_buffers_to_add):
         nan_dsets[curr_zoom][curr_pos : curr_pos + chunk_size] = nan_curr_chunk
 
         # aggregate and store aggregated values in the next zoom_level's data
-        data_buffers[curr_zoom + 1] += list(ct.aggregate(curr_chunk, 2 ** zoom_step))
+        data_buffers[curr_zoom + 1] += list(ct.aggregate(curr_chunk, 2**zoom_step))
         nan_data_buffers[curr_zoom + 1] += list(
-            ct.aggregate(nan_curr_chunk, 2 ** zoom_step)
+            ct.aggregate(nan_curr_chunk, 2**zoom_step)
         )
 
         data_buffers[curr_zoom] = data_buffers[curr_zoom][chunk_size:]
@@ -1438,7 +1441,10 @@ def bedgraph(
     "with .multires.bed",
 )
 @click.option(
-    "--assembly", "-a", help="The genome assembly that this file was created against",
+    "--assembly",
+    "-a",
+    help="The genome assembly that this file was created against",
+    default="hg19",
 )
 @click.option(
     "--importance-column",
@@ -1526,6 +1532,7 @@ def bedfile(
     "-a",
     help="The genome assembly that this file was created against",
     type=str,
+    default="hg19",
     show_default=True,
 )
 @click.option(
@@ -1542,6 +1549,7 @@ def bedfile(
 @click.option(
     "--has-header/--no-header",
     help="Does this file have a header that we should ignore",
+    type=bool,
     default=False,
     show_default=True,
 )
diff --git a/clodius/cli/convert.py b/clodius/cli/convert.py
index 734c12b8..6e051892 100644
--- a/clodius/cli/convert.py
+++ b/clodius/cli/convert.py
@@ -1,22 +1,35 @@
 import ast
+import logging
 import math
 import os
 import os.path as op
 import tempfile
+from tempfile import TemporaryDirectory
+from dataclasses import dataclass
 
 import h5py
 import numpy as np
-from tqdm import tqdm
+import json
+import hashlib
 
-import bbi
 import click
 import clodius.chromosomes as cch
 import clodius.multivec as cmv
 import negspy.coordinates as nc
 import scipy.misc as sm
+from clodius.tiles.bam import get_cigar_substitutions
+from clodius.tiles.utils import calc_max_width
+from collections import defaultdict
+import random
+from typing import List
+
+from typing import Optional
+import time
 
 from . import cli
 
+logger = logging.getLogger(__name__)
+
 
 def epilogos_bedline_to_vector(bedlines, row_infos=None):
     """
@@ -303,37 +316,38 @@ def agg(x):
     "--output-file",
     "-o",
     default=None,
-    help="The default output file name to use. If this isn't "
-    "specified, clodius will replace the current extension "
-    "with .hitile",
+    help="The default output file name to use. If this isn't"
+    "specified, clodius will replace the current extension"
+    "with .multivec",
 )
 @click.option(
     "--assembly",
     "-a",
     help="The genome assembly that this file was created against",
     type=click.Choice(nc.available_chromsizes()),
+    default="hg19",
 )
 @click.option(
     "--chromosome-col",
-    help="The column number (1-based) which contains the chromosome name",
+    help="The column number (1-based) which contains the chromosome " "name",
     default=1,
     type=int,
 )
 @click.option(
     "--from-pos-col",
-    help="The column number (1-based) which contains the starting position",
+    help="The column number (1-based) which contains the starting " "position",
     default=2,
     type=int,
 )
 @click.option(
     "--to-pos-col",
-    help="The column number (1-based) which contains the ending position",
+    help="The column number (1-based) which contains the ending" "position",
     default=3,
     type=int,
 )
 @click.option(
     "--value-col",
-    help="The column number (1-based) which contains the actual value",
+    help="The column number (1-based) which contains the actual value" "position",
     default=4,
     type=int,
 )
@@ -367,8 +381,8 @@ def agg(x):
 @click.option(
     "--format",
     type=click.Choice(["default", "epilogos", "states"]),
-    help="'default':chr start end state1_value state2_value, etc; "
-    "'epilogos': chr start end [[state1_value, state1_num],[state2_value, state2_num],[etc]]; "
+    help="'default':chr start end state1_value state2_value, etc;"
+    "'epilogos': chr start end [[state1_value, state1_num],[state2_value, state2_num],[etc]];"
     "'states': chr start end state_name",
     default="default",
 )
@@ -431,21 +445,262 @@ def bedfile_to_multivec(
     )
 
 
+@dataclass
+class ImportanceLine:
+    line: List[str]
+    importance: float
+
+
+def line_tiles(importance_line: ImportanceLine, chrom_info):
+    """
+    Given a line from a bed file, return the zoom level
+    """
+    line = importance_line.line
+
+    chrom = line[0]
+    chrom_len = calc_max_width(chrom_info.chrom_lengths[chrom])
+    interval_len = int(line[2]) - int(line[1])
+    zoom_level = math.floor(math.log(chrom_len / interval_len) / math.log(2))
+
+    tile_size = int(chrom_len / 2**zoom_level)
+    tile_start = int(line[1]) // tile_size
+    tile_end = int(line[2]) // tile_size
+
+    return [
+        (chrom, zoom_level, tile_pos, importance_line.importance, line)
+        for tile_pos in range(tile_start, tile_end + 1)
+    ]
+
+
+def line_hash(line):
+    return hashlib.md5("\t".join(line).encode("utf8")).hexdigest()
+
+
+def promote_tiles(tiled_lines, max_per_tile=5):
+    """
+    For each tiled line, if there is space in a tile with a higher zoom level,
+    then change this tile's zoom level and tile position to the higher zoom level.
+    """
+    new_tiled_lines = []
+    tile_counts = defaultdict(int)
+    tile_hashes = defaultdict(set)
+
+    for chrom, zoom_level, tile_pos, importance, line in tiled_lines:
+        _line_hash = line_hash(line)
+
+        if zoom_level == 10 and chrom == "chr1":
+            print("##### line", line)
+
+        if zoom_level == 0:
+            continue
+
+        line_found = False
+
+        while (
+            zoom_level > 0
+            and tile_counts[(chrom, zoom_level - 1, tile_pos // 2)] < max_per_tile
+        ):
+            # print("promoting", zoom_level, tile_pos, interval_len, line)
+            zoom_level -= 1
+            tile_pos //= 2
+
+            if _line_hash in tile_hashes[(chrom, zoom_level, tile_pos)]:
+                # this line is already in a tile
+                line_found = True
+                break
+
+        if line_found:
+            continue
+
+        # print(zoom_level, tile_pos, line)
+
+        new_tiled_lines.append((chrom, zoom_level, tile_pos, importance, line))
+
+        tile_counts[(chrom, zoom_level, tile_pos)] += 1
+        tile_hashes[(chrom, zoom_level, tile_pos)].add(_line_hash)
+
+        if tile_counts[(chrom, zoom_level, tile_pos)] > max_per_tile:
+            raise ValueError(
+                f"Too many items in this tile: {zoom_level}, {tile_pos}, {tile_counts[(zoom_level, tile_pos)]}"
+            )
+
+    return new_tiled_lines
+
+
+def dump_chunk_to_file(root, chrom, zoom_level, chunk, max_per_tile):
+    logger.info("========= Dumping chunk: %d", zoom_level)
+
+    if chrom not in root["values"]:
+        root["values"].create_group(chrom)
+
+    if str(zoom_level) not in root["values"][chrom]:
+        logger.info(
+            "Creating new dataset for chrom %s zoom_level %d", chrom, zoom_level
+        )
+        dt = h5py.string_dtype(encoding="utf-8")
+        num_tiles = 2**zoom_level
+        root["values"][chrom].create_dataset(
+            str(zoom_level),
+            shape=(num_tiles * max_per_tile,),
+            dtype=dt,
+            compression="gzip",
+        )
+
+    chunk = sorted(chunk)
+
+    ixs = [c[0] for c in chunk]
+    lines = [c[1] for c in chunk]
+
+    # print("tile counts", tile_counts[(zoom_level, 0)])
+
+    # print("chrom", chrom, "zoom_level", zoom_level, "ixs", ixs)
+    # print("lines", lines)
+    root["values"][chrom][str(zoom_level)][ixs] = lines
+
+
+def _bedfile_to_hibed(
+    filepath: str,
+    output_file: Optional[str] = None,
+    assembly: Optional[str] = "hg19",
+    chromsizes_filename: Optional[str] = None,
+    max_per_tile: int = 1024,
+    importance_column: int = None,
+    method="random",
+):
+    logging.basicConfig(level=logging.INFO)
+
+    if method not in ["random", "size", "column"]:
+        raise ValueError(
+            f"Unknown method {method}. Options are 'random' or 'size' or 'column'"
+        )
+
+    if method == "column" and importance_column is None:
+        raise ValueError(
+            'If method is "column", then importance_column must be specified'
+        )
+    (chrom_info, chrom_names, chrom_sizes) = cch.load_chromsizes(
+        chromsizes_filename, assembly
+    )
+
+    if output_file is None:
+        output_file = op.splitext(filepath)[0] + ".hibed"
+
+    with open(filepath) as f:
+        parts = [line.strip().split("\t") for line in f]
+        if method == "size":
+            interval_lens = [int(parts[2]) - int(parts[1]) for parts in parts]
+            # sorted_lines = [sl[1] for sl in sorted(zip(interval_lens, parts))[::-1]]
+            importance_lines = [
+                ImportanceLine(importance=importance, line=line)
+                for (importance, line) in zip(interval_lens, parts)
+            ]
+        elif method == "random":
+            importance_lines = [
+                ImportanceLine(importance=random.random(), line=line) for line in parts
+            ]
+        elif method == "column":
+            importance_lines = [
+                ImportanceLine(importance=float(line[importance_column - 1]), line=line)
+                for line in parts
+            ]
+
+    importance_lines = sorted(
+        importance_lines, key=lambda x: x.importance, reverse=True
+    )
+    print("importance_lines", importance_lines[0])
+
+    logger.info(
+        "Tiling on %d lines with max_per_tile of %d.",
+        len(importance_lines),
+        max_per_tile,
+    )
+
+    # add zoom level and tile position to each line
+    tiled_lines = []
+    for importance_line in importance_lines:
+        tiled_lines += line_tiles(importance_line, chrom_info)
+
+    new_tiled_lines = promote_tiles(tiled_lines, max_per_tile=max_per_tile)
+    max_zoom_level = max([zoom_level for chrom, zoom_level, *_ in new_tiled_lines])
+    logger.info("Max zoom: %d", max_zoom_level)
+
+    if op.exists(output_file):
+        os.remove(output_file)
+
+    root = h5py.File(output_file, mode="w")
+    info = root.create_group("info")
+    root.create_group("values")
+
+    info.attrs["max_per_tile"] = max_per_tile
+    info.attrs["max_zoom"] = max_zoom_level
+
+    tile_counts = defaultdict(int)
+
+    chunks = defaultdict(list)
+    max_chunk_size = 100
+    prev_zoom_levels = dict()
+
+    new_tiled_lines = sorted(new_tiled_lines)
+
+    t1 = time.time()
+    for chrom, zoom_level, tile_pos, importance, line in new_tiled_lines:
+        ix = tile_pos * max_per_tile + tile_counts[(chrom, zoom_level, tile_pos)]
+        # if chrom == "chr1" and zoom_level == 0:
+        #     print(
+        #         "tile_pos", tile_pos, "tc", tile_counts[(chrom, zoom_level, tile_pos)]
+        #     )
+
+        if (
+            "chrom" in prev_zoom_levels and zoom_level != prev_zoom_levels[chrom]
+        ) or len(chunks[chrom]) > max_chunk_size:
+            dump_chunk_to_file(
+                root,
+                chrom,
+                prev_zoom_levels[chrom],
+                chunks[chrom],
+                max_per_tile=max_per_tile,
+            )
+            chunks[chrom] = []
+
+        # print("zoom_level", zoom_level, "tile_pos", tile_pos, "line", line)
+        if tile_counts[(chrom, zoom_level, tile_pos)] == max_per_tile:
+            raise ValueError(
+                f"Too many items in this tile: {chrom}, {zoom_level}, {tile_pos}, {tile_counts[(chrom, zoom_level, tile_pos)]}"
+            )
+
+        # print("adding", zoom_level, tile_pos, ix)
+        chunks[chrom] += [
+            (ix, json.dumps({"importance": importance, "line": "\t".join(line)}))
+        ]
+        tile_counts[(chrom, zoom_level, tile_pos)] += 1
+        prev_zoom_levels[chrom] = zoom_level
+
+    # print("adding last chunk", prev_zoom_level, chunk)
+    for chrom in chrom_names:
+        if chrom in prev_zoom_levels:
+            dump_chunk_to_file(
+                root, chrom, prev_zoom_levels[chrom], chunks[chrom], max_per_tile
+            )
+
+    logger.info("Finished writing to file: %f", time.time() - t1)
+
+
 @convert.command()
-@click.argument("filepaths", metavar="FILEPATHS", nargs=-1)
+@click.argument("filepath")
 @click.option(
     "--output-file",
     "-o",
     default=None,
-    help="The default output file name to use. If this isn't "
-    "specified, clodius will replace the current extension "
-    "with .hitile",
+    help="The default output file name to use. If this isn't"
+    "specified, clodius will replace the current extension"
+    "with .hibed",
 )
 @click.option(
     "--assembly",
     "-a",
     help="The genome assembly that this file was created against",
     type=click.Choice(nc.available_chromsizes()),
+    default="hg19",
 )
 @click.option(
     "--chromsizes-filename",
@@ -453,91 +708,154 @@ def bedfile_to_multivec(
     default=None,
 )
 @click.option(
-    "--row-infos-filename",
-    help="A file containing the names of the rows in the multivec file",
+    "--max-per-tile",
+    "-t",
+    default=256,
+    help="The maximum number of items in each tile.",
+)
+@click.option(
+    "--importance-column",
     default=None,
+    type=int,
+    help="The column (1-based) containing the importance values.",
 )
 @click.option(
-    "--tile-size",
-    "-t",
-    default=256,
-    help="The number of data points in each tile."
-    "Used to determine the number of zoom levels"
-    "to create.",
+    "--method",
+    "-m",
+    default="random",
+    type=click.Choice(["random", "size", "column"]),
+    help="The method to use for tile promotion: random (the default) or size",
 )
-def bigwigs_to_multivec(
-    filepaths,
+def bedfile_to_hibed(
+    filepath,
     output_file,
     assembly,
     chromsizes_filename,
-    row_infos_filename,
-    tile_size,
+    max_per_tile,
+    importance_column,
+    method,
 ):
-    with tempfile.TemporaryDirectory() as td:
-        print("temporary dir:", td)
+    _bedfile_to_hibed(
+        filepath,
+        output_file,
+        assembly,
+        chromsizes_filename,
+        max_per_tile,
+        importance_column,
+        method,
+    )
 
-        temp_file = op.join(td, "temp.mv5")
-        f_out = h5py.File(temp_file, "w")
 
-        (chrom_info, chrom_names, chrom_lengths) = cch.load_chromsizes(
-            chromsizes_filename, assembly
-        )
+def reads_to_array(f_in, h_out, ref, chrom_len):
+    """Convert BAM file reads to an HDF5 array.
 
-        if row_infos_filename is not None:
-            with open(row_infos_filename, "r") as f:
-                row_infos = [line.strip().encode("utf8") for line in f]
+    Arguments:
 
-        else:
-            row_infos = None
+    f_in: The pysam AlignmentFile handle
+    h_out: An hdf5 file handle to store the output arrays
+    ref: The chromosome name
+    chrom_len: The length of the chromosome
 
-        starting_resolution = 1
-        resolution = starting_resolution
-        for chrom in chrom_info.chrom_order:
-            f_out.create_dataset(
-                chrom,
-                (
-                    math.ceil(chrom_info.chrom_lengths[chrom] / starting_resolution),
-                    len(filepaths),
-                ),
-                fillvalue=np.nan,
-                compression="gzip",
-            )
-
-        # Fill in data for each bigwig file.
-        for bw_index, bw_file in tqdm(list(enumerate(filepaths)), desc="bigwigs"):
-            if bbi.is_bigwig(bw_file):
-                chromsizes = bbi.chromsizes(bw_file)
-                matching_chromosomes = set(chromsizes.keys()).intersection(
-                    set(chrom_names)
-                )
-
-                # Fill in data for each resolution of a bigwig file.
-                for chr_name in matching_chromosomes:
-                    print("chr_name:", chr_name, resolution)
-                    chr_len = chrom_info.chrom_lengths[chr_name]
-                    chr_shape = (math.ceil(chr_len / resolution), len(filepaths))
-                    arr = bbi.fetch(
-                        bw_file, chr_name, 0, chr_len, chr_shape[0], summary="sum"
-                    )
-                    f_out[chr_name][:, bw_index] = arr
+    """
+    logger.info("Creating array for chrom: %s with length: %d", ref, chrom_len)
+    reads = f_in.fetch(ref, 0, chrom_len)
+
+    subs = {
+        "A": np.zeros((chrom_len,)),
+        "C": np.zeros((chrom_len,)),
+        "G": np.zeros((chrom_len,)),
+        "T": np.zeros((chrom_len,)),
+        "S": np.zeros((chrom_len,)),
+        "M": np.zeros((chrom_len,)),
+        "I": np.zeros((chrom_len,)),
+        "D": np.zeros((chrom_len,)),
+        "H": np.zeros((chrom_len,)),
+        "N": np.zeros((chrom_len,)),
+    }
+
+    for read in reads:
+        ap = [
+            p
+            for p in read.get_aligned_pairs(with_seq=True, matches_only=True)
+            if p[2].islower()
+        ]
+        #     print("read", read.reference_start)
+        subs["M"][read.reference_start + 1 : read.reference_end + 1] += 1
+
+        for start, cigar_op, oplen in get_cigar_substitutions(read):
+            if cigar_op == "I":
+                subs[cigar_op][start + 1] += 1
             else:
-                print(f"{bw_file} not is_bigwig")
+                subs[cigar_op][start + 1 : start + 1 + oplen] += 1
+
+        for p in ap:
+            subs["M"][p[1] + 1] -= 1
+            subs[read.query_sequence[p[0]]][p[1] + 1] += 1
+
+    arr = np.array(
+        [
+            subs["A"],
+            subs["T"],
+            subs["G"],
+            subs["C"],
+            subs["S"],
+            subs["M"],
+            subs["I"],
+            subs["D"],
+        ]
+    ).T
+    logger.info("Dumping array with shape: %s", str(arr.shape))
+
+    h_out.create_dataset(ref, data=arr, compression="gzip")
+    pass
 
-        f_out.flush()
 
-        f_out.close()
-        tf = temp_file
-        f_in = h5py.File(tf, "r")
+def sum_agg(x):
+    return np.nansum(x.T.reshape((x.shape[1], -1, 2)), axis=2).T
+
+
+@convert.command()
+@click.argument("filepath")
+@click.option("--index-filepath", "-i", default=None)
+@click.option(
+    "--output-file",
+    "-o",
+    default=None,
+    help="The default output file name to use. If this isn't"
+    "specified, clodius will replace the current extension"
+    "with .bam.mv5",
+)
+def bamfile_to_multivec(filepath, index_filepath, output_file):
+    """Convert a BAM file to a multivec representation."""
+    import pysam
+
+    logging.basicConfig(level=logging.INFO)
+
+    if index_filepath is None:
+        index_filepath = filepath + ".bai"
+
+    if output_file is None:
+        output_file = op.splitext(filepath)[0] + ".bam.mv5"
+    logger.info("Output file: %s", output_file)
+
+    f = pysam.AlignmentFile(filepath, index_filename=index_filepath)
+
+    logger.info("Loaded alignment file")
+
+    with TemporaryDirectory() as tmp_dir:
+        h_mid = h5py.File(op.join(tmp_dir, "mid.h5"), "w")
 
-        def agg(x):
-            return x.T.reshape((x.shape[1], -1, 2)).sum(axis=2).T
+        for ref, chrom_len in zip(f.references, f.lengths):
+            reads_to_array(f, h_mid, ref, chrom_len)
 
+        logger.info("Creating multivec array")
         cmv.create_multivec_multires(
-            f_in,
-            chromsizes=zip(chrom_names, chrom_lengths),
-            agg=agg,
-            starting_resolution=starting_resolution,
-            tile_size=tile_size,
+            h_mid,
+            zip(f.references, f.lengths),
+            agg=sum_agg,
+            #     agg=log_sum_exp_agg,
+            starting_resolution=1,
+            row_infos=["a", "t", "g", "c", "s", "m", "i", "d", "h", "n"],
             output_file=output_file,
-            row_infos=row_infos,
+            tile_size=256,
         )
diff --git a/clodius/db_tiles.py b/clodius/db_tiles.py
index 2d5a2a9a..53ef2cc9 100644
--- a/clodius/db_tiles.py
+++ b/clodius/db_tiles.py
@@ -1,54 +1,67 @@
 import collections as col
 import math
-import sqlite3
+import sosqlite
+import apsw
 
+sovfs = sosqlite.SmartOpenVFS(name="so-vfs")
 
-def get_tileset_info(db_file):
-    conn = sqlite3.connect(db_file)
-    c = conn.cursor()
-
-    row = c.execute("SELECT * from tileset_info").fetchone()
-    if row is not None and len(row) == 9:
-        header = row[8]
-    else:
-        header = ""
-
-    tileset_info = {
-        "zoom_step": row[0],
-        "max_length": row[1],
-        "assembly": row[2],
-        "chrom_names": row[3],
-        "chrom_sizes": row[4],
-        "tile_size": row[5],
-        "max_zoom": row[6],
-        "max_width": row[7],
-        "min_pos": [1],
-        "max_pos": [row[1]],
-        "header": header,
-    }
-    conn.close()
 
-    return tileset_info
+def get_tileset_info(db_file):
+    with apsw.Connection(
+        db_file, vfs=sovfs.name, flags=apsw.SQLITE_OPEN_READONLY
+    ) as conn:
+        c = conn.cursor()
+
+        row = c.execute("SELECT * from tileset_info").fetchone()
+        if row is not None and len(row) == 9:
+            header = row[8]
+        else:
+            header = ""
+
+        tileset_info = {
+            "zoom_step": row[0],
+            "max_length": row[1],
+            "assembly": row[2],
+            "chrom_names": row[3],
+            "chrom_sizes": row[4],
+            "tile_size": row[5],
+            "max_zoom": row[6],
+            "max_width": row[7],
+            "min_pos": [1],
+            "max_pos": [row[1]],
+            "header": header,
+        }
+
+        return tileset_info
 
 
 def get_2d_tileset_info(db_file):
-    conn = sqlite3.connect(db_file)
-    c = conn.cursor()
-
-    row = c.execute("SELECT * from tileset_info").fetchone()
-    tileset_info = {
-        "zoom_step": row[0],
-        "max_length": row[1],
-        "assembly": row[2],
-        "chrom_names": row[3],
-        "chrom_sizes": row[4],
-        "tile_size": row[5],
-        "max_zoom": row[6],
-        "max_width": row[7],
-        "min_pos": [1, 1],
-        "max_pos": [row[1], row[1]],
-    }
-    conn.close()
+    with apsw.Connection(
+        db_file, vfs=sovfs.name, flags=apsw.SQLITE_OPEN_READONLY
+    ) as conn:
+        c = conn.cursor()
+
+        row = c.execute("SELECT * from tileset_info").fetchone()
+        if row is not None and len(row) == 9:
+            header = row[8]
+        else:
+            header = ""
+
+        tileset_info = {
+            "zoom_step": row[0],
+            "max_length": row[1],
+            "assembly": row[2],
+            "chrom_names": row[3],
+            "chrom_sizes": row[4],
+            "tile_size": row[5],
+            "max_zoom": row[6],
+            "max_width": row[7],
+            "min_pos": [1, 1],
+            "max_pos": [row[1], row[1]],
+            "header": header,
+        }
+
+        return tileset_info
 
     return tileset_info
 
@@ -74,62 +87,62 @@ def get_tiles(db_file, zoom, tile_x_pos, num_tiles=1):
         A set of tiles, indexed by position
     """
     tileset_info = get_tileset_info(db_file)
-    conn = sqlite3.connect(db_file)
+    with apsw.Connection(
+        db_file, vfs=sovfs.name, flags=apsw.SQLITE_OPEN_READONLY
+    ) as conn:
+        c = conn.cursor()
 
-    c = conn.cursor()
+        tile_width = tileset_info["max_width"] / 2 ** zoom
 
-    tile_width = tileset_info["max_width"] / 2 ** zoom
+        tile_start_pos = tile_width * tile_x_pos
+        tile_end_pos = tile_start_pos + num_tiles * tile_width
 
-    tile_start_pos = tile_width * tile_x_pos
-    tile_end_pos = tile_start_pos + num_tiles * tile_width
+        query = """
+        SELECT startPos, endPos, chrOffset, importance, fields, uid
+        FROM intervals,position_index
+        WHERE
+            intervals.id=position_index.id AND
+            zoomLevel <= {} AND
+            rEndPos >= {} AND
+            rStartPos <= {}
+        """.format(
+            zoom, tile_start_pos, tile_end_pos
+        )
 
-    query = """
-    SELECT startPos, endPos, chrOffset, importance, fields, uid
-    FROM intervals,position_index
-    WHERE
-        intervals.id=position_index.id AND
-        zoomLevel <= {} AND
-        rEndPos >= {} AND
-        rStartPos <= {}
-    """.format(
-        zoom, tile_start_pos, tile_end_pos
-    )
+        rows = c.execute(query).fetchall()
 
-    rows = c.execute(query).fetchall()
+        new_rows = col.defaultdict(list)
 
-    new_rows = col.defaultdict(list)
+        for r in rows:
+            try:
+                uid = r[5].decode("utf-8")
+            except AttributeError:
+                uid = r[5]
 
-    for r in rows:
-        try:
-            uid = r[5].decode("utf-8")
-        except AttributeError:
-            uid = r[5]
+            tile_pos = tile_x_pos + math.floor((r[0] - tile_start_pos) / tile_width)
 
-        tile_pos = tile_x_pos + math.floor((r[0] - tile_start_pos) / tile_width)
+            x_start = r[0]
+            x_end = r[1]
 
-        x_start = r[0]
-        x_end = r[1]
-
-        for i in range(tile_x_pos, tile_x_pos + num_tiles):
-            tile_x_start = i * tile_width
-            tile_x_end = (i + 1) * tile_width
-            tile_pos = i
+            for i in range(tile_x_pos, tile_x_pos + num_tiles):
+                tile_x_start = i * tile_width
+                tile_x_end = (i + 1) * tile_width
+                tile_pos = i
 
-            if x_start < tile_x_end and x_end >= tile_x_start:
-                new_rows[tile_pos] += [
-                    # add the position offset to the returned values
-                    {
-                        "xStart": r[0],
-                        "xEnd": r[1],
-                        "chrOffset": r[2],
-                        "importance": r[3],
-                        "uid": uid,
-                        "fields": r[4].split("\t"),
-                    }
-                ]
-    conn.close()
+                if x_start < tile_x_end and x_end >= tile_x_start:
+                    new_rows[tile_pos] += [
+                        # add the position offset to the returned values
+                        {
+                            "xStart": r[0],
+                            "xEnd": r[1],
+                            "chrOffset": r[2],
+                            "importance": r[3],
+                            "uid": uid,
+                            "fields": r[4].split("\t"),
+                        }
+                    ]
 
-    return new_rows
+        return new_rows
 
 
 def get_2d_tiles(db_file, zoom, tile_x_pos, tile_y_pos, numx=1, numy=1):
@@ -157,77 +170,76 @@ def get_2d_tiles(db_file, zoom, tile_x_pos, tile_y_pos, numx=1, numy=1):
         A set of tiles, indexed by position
     """
     tileset_info = get_tileset_info(db_file)
-
-    conn = sqlite3.connect(db_file)
-
-    c = conn.cursor()
-    tile_width = tileset_info["max_width"] / 2 ** zoom
-
-    tile_x_start_pos = tile_width * tile_x_pos
-    tile_x_end_pos = tile_x_start_pos + (numx * tile_width)
-
-    tile_y_start_pos = tile_width * tile_y_pos
-    tile_y_end_pos = tile_y_start_pos + (numy * tile_width)
-
-    query = """
-    SELECT
-        fromX, toX, fromY, toY, chrOffset, importance, fields, uid, intervals.id
-    FROM
-        intervals, position_index
-    WHERE
-        intervals.id=position_index.id AND
-        zoomLevel <= {} AND
-        rToX >= {} AND
-        rFromX <= {} AND
-        rToY >= {} AND
-        rFromY <= {}
-    """.format(
-        zoom, tile_x_start_pos, tile_x_end_pos, tile_y_start_pos, tile_y_end_pos
-    )
-
-    rows = c.execute(query).fetchall()
-
-    new_rows = col.defaultdict(list)
-
-    for r in rows:
-        try:
-            uid = r[7].decode("utf-8")
-        except AttributeError:
-            uid = r[7]
-
-        x_start = r[0]
-        x_end = r[1]
-        y_start = r[2]
-        y_end = r[3]
-
-        for i in range(tile_x_pos, tile_x_pos + numx):
-            for j in range(tile_y_pos, tile_y_pos + numy):
-                tile_x_start = i * tile_width
-                tile_x_end = (i + 1) * tile_width
-
-                tile_y_start = j * tile_width
-                tile_y_end = (j + 1) * tile_width
-
-                if (
-                    x_start < tile_x_end
-                    and x_end >= tile_x_start
-                    and y_start < tile_y_end
-                    and y_end >= tile_y_start
-                ):
-                    # add the position offset to the returned values
-                    new_rows[(i, j)] += [
-                        {
-                            "xStart": r[0],
-                            "xEnd": r[1],
-                            "yStart": r[2],
-                            "yEnd": r[3],
-                            "chrOffset": r[4],
-                            "importance": r[5],
-                            "uid": uid,
-                            "id": r[8],
-                            "fields": r[6].split("\t"),
-                        }
-                    ]
-    conn.close()
-
-    return new_rows
+    with apsw.Connection(
+        db_file, vfs=sovfs.name, flags=apsw.SQLITE_OPEN_READONLY
+    ) as conn:
+        c = conn.cursor()
+        tile_width = tileset_info["max_width"] / 2 ** zoom
+
+        tile_x_start_pos = tile_width * tile_x_pos
+        tile_x_end_pos = tile_x_start_pos + (numx * tile_width)
+
+        tile_y_start_pos = tile_width * tile_y_pos
+        tile_y_end_pos = tile_y_start_pos + (numy * tile_width)
+
+        query = """
+        SELECT
+            fromX, toX, fromY, toY, chrOffset, importance, fields, uid, intervals.id
+        FROM
+            intervals, position_index
+        WHERE
+            intervals.id=position_index.id AND
+            zoomLevel <= {} AND
+            rToX >= {} AND
+            rFromX <= {} AND
+            rToY >= {} AND
+            rFromY <= {}
+        """.format(
+            zoom, tile_x_start_pos, tile_x_end_pos, tile_y_start_pos, tile_y_end_pos
+        )
+
+        rows = c.execute(query).fetchall()
+
+        new_rows = col.defaultdict(list)
+
+        for r in rows:
+            try:
+                uid = r[7].decode("utf-8")
+            except AttributeError:
+                uid = r[7]
+
+            x_start = r[0]
+            x_end = r[1]
+            y_start = r[2]
+            y_end = r[3]
+
+            for i in range(tile_x_pos, tile_x_pos + numx):
+                for j in range(tile_y_pos, tile_y_pos + numy):
+                    tile_x_start = i * tile_width
+                    tile_x_end = (i + 1) * tile_width
+
+                    tile_y_start = j * tile_width
+                    tile_y_end = (j + 1) * tile_width
+
+                    if (
+                        x_start < tile_x_end
+                        and x_end >= tile_x_start
+                        and y_start < tile_y_end
+                        and y_end >= tile_y_start
+                    ):
+                        # add the position offset to the returned values
+                        new_rows[(i, j)] += [
+                            {
+                                "xStart": r[0],
+                                "xEnd": r[1],
+                                "yStart": r[2],
+                                "yEnd": r[3],
+                                "chrOffset": r[4],
+                                "importance": r[5],
+                                "uid": uid,
+                                "id": r[8],
+                                "fields": r[6].split("\t"),
+                            }
+                        ]
+
+        return new_rows
diff --git a/clodius/higlass_getter.py b/clodius/higlass_getter.py
index 242eae2d..51dbf7ce 100644
--- a/clodius/higlass_getter.py
+++ b/clodius/higlass_getter.py
@@ -29,7 +29,6 @@ def absCoord2bin(c, pos):
 
 
 def getData(FILEPATH, zoomLevel, startPos1, endPos1, startPos2, endPos2):
-
     groupname = str(zoomLevel)
 
     with h5py.File(FILEPATH, "r") as f:
@@ -47,7 +46,6 @@ def getData(FILEPATH, zoomLevel, startPos1, endPos1, startPos2, endPos2):
 
 
 def getData2(cooler_matrix, zoomLevel, startPos1, endPos1, startPos2, endPos2):
-
     c = cooler_matrix["cooler"]
     matrix = cooler_matrix["matrix"]
 
@@ -81,7 +79,7 @@ def getData3(cooler_matrix, zoomLevel, startPos1, endPos1, startPos2, endPos2):
     if (i1 - i0) == 0 or (j1 - j0) == 0:
         return pd.DataFrame(columns=["genome_start", "genome_end", "balanced"])
 
-    pixels = c.matrix(as_pixels=True, max_chunk=np.inf)[i0:i1, j0:j1]
+    pixels = c.matrix(as_pixels=True)[i0:i1, j0:j1]
 
     if not len(pixels):
         return pd.DataFrame(columns=["genome_start", "genome_end", "balanced"])
@@ -99,7 +97,6 @@ def getData3(cooler_matrix, zoomLevel, startPos1, endPos1, startPos2, endPos2):
 
 
 def getInfo(FILEPATH):
-
     with h5py.File(FILEPATH, "r") as f:
         total_length = int(cumul_lengths[-1])
         binsize = int(f["0"].attrs["bin-size"])
@@ -107,7 +104,7 @@ def getInfo(FILEPATH):
         n_tiles = total_length / binsize / TILESIZE
         print("total_length:", total_length, binsize, TILESIZE)
         n_zooms = int(np.ceil(np.log2(n_tiles)))
-        max_width = binsize * TILESIZE * 2 ** n_zooms
+        max_width = binsize * TILESIZE * 2**n_zooms
 
         info = {
             "min_pos": [0.0, 0.0],
diff --git a/clodius/models/gff_models.py b/clodius/models/gff_models.py
new file mode 100644
index 00000000..ea4aebb6
--- /dev/null
+++ b/clodius/models/gff_models.py
@@ -0,0 +1,181 @@
+from typing import List, Optional, Union, Literal
+from pydantic import BaseModel, Field
+
+
+class BaseGFFEntity(BaseModel):
+    """Base class for all GFF entities"""
+
+    type: str
+    id: str
+    chrom: str
+    start: int
+    end: int
+    strand: Optional[Literal["+", "-", "."]] = None
+    score: Optional[float] = None
+    phase: Optional[int] = None
+    attributes: Optional[dict] = None
+
+
+class Exon(BaseGFFEntity):
+    """Exon entity - can be child of any transcript type"""
+
+    type: Literal["exon"] = "exon"
+
+
+class CDS(BaseGFFEntity):
+    """Coding sequence entity - child of mRNA"""
+
+    type: Literal["CDS"] = "CDS"
+
+
+class Gene(BaseGFFEntity):
+    """Root gene entity"""
+
+    type: Literal["gene"] = "gene"
+    gene_biotype: Optional[str] = None
+    pseudo: Optional[bool] = False
+
+
+class mRNA(BaseGFFEntity):
+    """Protein-coding transcript"""
+
+    type: Literal["mRNA"] = "mRNA"
+    parent_gene_id: str
+    exons: List[Exon] = Field(default_factory=list)
+    cds: List[CDS] = Field(default_factory=list)
+
+
+class lnc_RNA(BaseGFFEntity):
+    """Long non-coding RNA transcript"""
+
+    type: Literal["lnc_RNA"] = "lnc_RNA"
+    parent_gene_id: str
+    exons: List[Exon] = Field(default_factory=list)
+
+
+class miRNA(BaseGFFEntity):
+    """Mature microRNA"""
+
+    type: Literal["miRNA"] = "miRNA"
+    parent_transcript_id: str
+    exons: List[Exon] = Field(default_factory=list)
+
+
+class primary_transcript(BaseGFFEntity):
+    """Precursor RNA transcript"""
+
+    type: Literal["primary_transcript"] = "primary_transcript"
+    parent_gene_id: str
+    exons: List[Exon] = Field(default_factory=list)
+    mirnas: List[miRNA] = Field(default_factory=list)
+
+
+class antisense_RNA(BaseGFFEntity):
+    """Antisense RNA transcript"""
+
+    type: Literal["antisense_RNA"] = "antisense_RNA"
+    parent_gene_id: str
+    exons: List[Exon] = Field(default_factory=list)
+
+
+class snoRNA(BaseGFFEntity):
+    """Small nucleolar RNA transcript"""
+
+    type: Literal["snoRNA"] = "snoRNA"
+    parent_gene_id: str
+    exons: List[Exon] = Field(default_factory=list)
+
+
+class tRNA(BaseGFFEntity):
+    """Transfer RNA transcript"""
+
+    type: Literal["tRNA"] = "tRNA"
+    parent_gene_id: str
+    exons: List[Exon] = Field(default_factory=list)
+
+
+class rRNA(BaseGFFEntity):
+    """Ribosomal RNA transcript"""
+
+    type: Literal["rRNA"] = "rRNA"
+    parent_gene_id: str
+    exons: List[Exon] = Field(default_factory=list)
+
+
+class snRNA(BaseGFFEntity):
+    """Small nuclear RNA transcript"""
+
+    type: Literal["snRNA"] = "snRNA"
+    parent_gene_id: str
+    exons: List[Exon] = Field(default_factory=list)
+
+
+class SRP_RNA(BaseGFFEntity):
+    """Signal recognition particle RNA"""
+
+    type: Literal["SRP_RNA"] = "SRP_RNA"
+    parent_gene_id: str
+    exons: List[Exon] = Field(default_factory=list)
+
+
+class RNase_P_RNA(BaseGFFEntity):
+    """RNase P RNA transcript"""
+
+    type: Literal["RNase_P_RNA"] = "RNase_P_RNA"
+    parent_gene_id: str
+    exons: List[Exon] = Field(default_factory=list)
+
+
+class RNase_MRP_RNA(BaseGFFEntity):
+    """RNase MRP RNA transcript"""
+
+    type: Literal["RNase_MRP_RNA"] = "RNase_MRP_RNA"
+    parent_gene_id: str
+    exons: List[Exon] = Field(default_factory=list)
+
+
+class Pseudogene(BaseGFFEntity):
+    """Non-functional gene copy"""
+
+    type: Literal["pseudogene"] = "pseudogene"
+    pseudo: bool = True
+    exons: List[Exon] = Field(default_factory=list)
+
+
+# Union type for all transcript types
+TranscriptType = Union[
+    mRNA,
+    lnc_RNA,
+    primary_transcript,
+    antisense_RNA,
+    snoRNA,
+    tRNA,
+    rRNA,
+    snRNA,
+    SRP_RNA,
+    RNase_P_RNA,
+    RNase_MRP_RNA,
+]
+
+
+class GeneModel(BaseModel):
+    """Complete gene model with all associated transcripts"""
+
+    gene: Gene
+    transcripts: List[TranscriptType] = Field(default_factory=list)
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+class PseudogeneModel(BaseModel):
+    """Pseudogene model"""
+
+    pseudogene: Pseudogene
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
+# Union type for all gene forms
+GeneForm = Union[GeneModel, PseudogeneModel]
diff --git a/clodius/multivec.py b/clodius/multivec.py
index b95d943e..3c30b3cb 100644
--- a/clodius/multivec.py
+++ b/clodius/multivec.py
@@ -1,6 +1,7 @@
 from __future__ import print_function
 
 import gzip
+import json
 import logging
 import math
 import os
@@ -201,14 +202,21 @@ def create_multivec_multires(
     chroms, lengths = zip(*chromsizes)
     chrom_array = np.array(chroms, dtype="S")
 
-    # row_infos = None
-    if "row_infos" in array_data.attrs:
-        row_infos = array_data.attrs["row_infos"]
+    try:
+        if "row_infos" in array_data.attrs:
+            row_infos = array_data.attrs["row_infos"]
+    except AttributeError:
+        # array data probably isn't an HDF5 file
+        pass
 
-    # add the chromosome information
+    # add the row_info information
     if row_infos is not None:
-        f["resolutions"][str(curr_resolution)].attrs.create("row_infos", row_infos)
+        # Convert bytes to strings if necessary
+        if isinstance(row_infos, (list, tuple)):
+            row_infos = [r.decode('utf-8') if isinstance(r, bytes) else r for r in row_infos]
+        f["info"].create_dataset("row_infos", data=json.dumps(row_infos))
 
+    # add the chromosome information
     f["resolutions"][str(curr_resolution)].create_group("chroms")
     f["resolutions"][str(curr_resolution)].create_group("values")
     f["resolutions"][str(curr_resolution)]["chroms"].create_dataset(
@@ -276,10 +284,6 @@ def create_multivec_multires(
         curr_resolution = prev_resolution * 2
         f["resolutions"].create_group(str(curr_resolution))
 
-        # add information about each of the rows
-        if row_infos is not None:
-            f["resolutions"][str(curr_resolution)].attrs.create("row_infos", row_infos)
-
         f["resolutions"][str(curr_resolution)].create_group("chroms")
         f["resolutions"][str(curr_resolution)].create_group("values")
         f["resolutions"][str(curr_resolution)]["chroms"].create_dataset(
@@ -351,4 +355,5 @@ def create_multivec_multires(
                 start += int(min(standard_chunk_size, len(chrom_data) - start))
 
         prev_resolution = curr_resolution
+
     return f
diff --git a/clodius/tiles/bam.py b/clodius/tiles/bam.py
index 23e182f5..a4111707 100644
--- a/clodius/tiles/bam.py
+++ b/clodius/tiles/bam.py
@@ -1,65 +1,350 @@
+import io
 import json
 import math
 
 import numpy as np
-import pysam
+import pandas as pd
 
 import clodius.tiles.bigwig as ctbw
 from clodius.tiles.tabix import est_query_size_ix, load_bai_index
 from clodius.tiles.utils import abs2genomic
+from clodius.utils import TILE_OPTIONS_CHAR
+import logging
 
+import oxbow as ox
+import polars as pl
 
-def get_cigar_substitutions(read):
+logger = logging.getLogger(__name__)
+
+
+def get_cigar_substitutions(pos, query_length, cigartuples):
     subs = []
     curr_pos = 0
 
-    cigartuples = read.cigartuples
-    readstart = read.pos
-    readend = read.pos + read.query_length
+    cigartuples = cigartuples
+    readstart = pos
+    readend = pos + query_length
 
     for ctuple in cigartuples:
-        if ctuple[0] == pysam.CDIFF:
+        if ctuple[0] == "X":
             subs.append((readstart + curr_pos, "X", ctuple[1]))
             curr_pos += ctuple[1]
-        elif ctuple[0] == pysam.CINS:
+        elif ctuple[0] == "I":
             subs.append((readstart + curr_pos, "I", ctuple[1]))
-        elif ctuple[0] == pysam.CDEL:
+        elif ctuple[0] == "D":
             subs.append((readstart + curr_pos, "D", ctuple[1]))
             curr_pos += ctuple[1]
-        elif ctuple[0] == pysam.CREF_SKIP:
+        elif ctuple[0] == "N":
             subs.append((readstart + curr_pos, "N", ctuple[1]))
             curr_pos += ctuple[1]
-        elif ctuple[0] == pysam.CEQUAL or ctuple[0] == pysam.CMATCH:
+        elif ctuple[0] == "M" or ctuple[0] == "=":
             curr_pos += ctuple[1]
 
     if len(cigartuples):
         first_ctuple = cigartuples[0]
         last_ctuple = cigartuples[-1]
 
-        if first_ctuple[0] == pysam.CSOFT_CLIP:
+        if first_ctuple[0] == "S":
             subs.append((readstart - first_ctuple[1], "S", first_ctuple[1]))
-        if first_ctuple[0] == pysam.CHARD_CLIP:
+        if first_ctuple[0] == "H":
             subs.append((readstart - first_ctuple[1], "H", first_ctuple[1]))
 
-        if last_ctuple[0] == pysam.CSOFT_CLIP:
-            subs.append((readend - last_ctuple[1], "S", last_ctuple[1]))
-        if last_ctuple[0] == pysam.CHARD_CLIP:
+        if last_ctuple[0] == "S":
+            subs.append((readend + 1, "S", last_ctuple[1]))
+        if last_ctuple[0] == "H":
             subs.append((readend, "H", last_ctuple[1]))
 
     return subs
 
 
-def load_reads(
-    samfile, start_pos, end_pos, chromsizes=None, index_filename=None, cache=None
-):
+def parse_cigar_string(cigar):
+    if not cigar or not isinstance(cigar, str):
+        return []
+    parts = []
+    curr = 0
+    for c in cigar:
+        if c.isnumeric():
+            curr = curr * 10 + int(c)
+        else:
+            parts += [(c, curr)]
+            curr = 0
+    return parts
+
+
+def reconstruct_ref(seq, md, cigar):
+    """Reconstruct a reference sequence that has the insertions from the query sequence.
+
+    The reason we can't exclude the insertions is that they are encoded for in the CIGAR
+    string so we would need to use that to remove them.
+    """
+    i_seq = 0
+    i_md = 0
+    match_count = 0
+    deletion = False
+    num = 0
+
+    new_seq = ""
+    ref_seq = []
+
+    # go through the cigar and remove the ignored bases
+    for i_cig in range(len(cigar)):
+        if cigar[i_cig].isnumeric():
+            # getting the number of bases the upcoming operation applies to
+            num = num * 10 + int(cigar[i_cig])
+        else:
+            op = cigar[i_cig]
+            # print("op", num, op, 'iseq:', i_seq)
+            if op == 'S':
+                i_seq += num
+            elif op == "I":
+                ref_seq += ['-'] * num
+                new_seq += seq[i_seq : i_seq + num]
+                i_seq += num
+            elif op == 'M':
+                new_seq += seq[i_seq : i_seq + num]
+                ref_seq += list(seq[i_seq : i_seq + num])
+                i_seq += num
+            elif op == 'D':
+                ref_seq += ['N'] * num
+                new_seq += '-' * num
+
+            num = 0
+            i_cig += 1
+
+    # print(ref_seq)
+    # print(new_seq)
+
+    i_seq = 0
+
+    i_ref = 0
+
+    # let's iterate over the entire md string
+    for i_md in range(len(md)):
+        # if we encounter a numeric value then we keep track of what it is
+        if md[i_md].isnumeric():
+            match_count = match_count * 10 + int(md[i_md])
+            # We're definitely not in a deletion if we're in a numeric number
+            deletion = False
+        else:
+            # Add the matches that we've gone over
+            # If we've been going over a deletion or mismatches, then match_count will be 0
+            # ref += seq[i_seq : i_seq + match_count]
+            i_ref += match_count
+            # print("readding", i_seq, match_count)
+            # print(oseq)
+            # print('--------')
+            # print(ref)
+            # print('==========')
+
+            i_seq += match_count
+            match_count = 0
+
+            if md[i_md] == "^":
+                # We're starting a deletion sequence
+                deletion = True
+            else:
+                # A letter can indicate that we're either encountering a deletion
+                # or a mistmatch
+
+                if deletion:
+                    # It's a deletion in the reference
+                    # ref += md[i_md]
+                    ref_seq[i_ref] = md[i_md]
+                    i_ref += 1
+                else:
+                    # It's a mismatch, add the MD letter and skip the sequence letter
+                    # ref += md[i_md]
+                    ref_seq[i_ref] = md[i_md]
+                    i_ref += 1
+                    i_seq += 1
+
+    # Add the last match_count stretch
+    # ref += seq[i_seq : i_seq + match_count]
+    # print("readding", i_seq, match_count)
+    # print(oseq)
+    # print('--------')
+    # print(ref)
+    # print('==========')
+    return "".join(ref_seq), new_seq
+
+
+def variants_list(ref, seq):
+    """Get a list of variants that are in seq relative to ref
+
+    Returns:
+        A list of 0-based (query_pos, ref_pos, query_base) pairs.
+    """
+    variants = []
+
+    assert len(seq) == len(ref)
+
+    ref_pos = 0
+    seq_pos = 0
+
+    for i in range(len(seq)):
+        if ref[i] == '-':
+            seq_pos += 1
+            continue
+        if seq[i] == '-':
+            ref_pos += 1
+            continue
+
+        seq_pos += 1
+        ref_pos += 1
+
+        if seq[i] != ref[i]:
+            variants += [(seq_pos - 1, ref_pos - 1, seq[i], ref[i])]
+
+    return variants
+
+
+def get_reads_df(file, index_file, chromosome, start, end):
+    """Get reads in a chromosome range."""
+    from time import time
+
+    logger.info("Getting reads for %s:%d-%d", chromosome, start, end)
+    file.seek(0)
+    index_file.seek(0)
+
+    region = f"{chromosome}:{start}-{end}"
+
+    t1 = time()
+
+    print("region", region)
+    ipc = ox.read_bam(file, region, index=index_file, fields='*', compressed=True)
+    t2 = time()
+    logger.info("Reading BAM: %.2f", t2 - t1)
+    reads_df = pl.read_ipc(io.BytesIO(ipc)).to_pandas()
+
+    # Exclude secondary and supplementary alignments
+    # When we decide to handle them, we'll need to fetch
+    # the primary read for secondary alignments in order
+    # to get the "seq" field which is omitted in secondary
+    # alignments
+    reads_df = reads_df[
+        ~((reads_df["flag"] & 0x100 > 0) | (reads_df["flag"] & 0x800 > 0))
+    ]
+
+    # for i, row in reads_df.iterrows():
+    #     print("pos", row['pos'], "secondary",
+    #           row['flag'] & 0x100, "supplementary",
+    #           row['flag'] & 0x800,  "seq len", len(row['seq']))
+
+    reads_df["is_paired"] = reads_df["flag"] & 1
+    reads_df["id"] = (
+        reads_df["qname"].astype(str)
+        + "_"
+        + reads_df["rname"].astype(str)
+        + "_"
+        + reads_df["pos"].astype(str)
+        + "_"
+        + reads_df["end"].astype(str)
+    )
+    return reads_df
+
+
+def get_paired_reads(file, index_file, chromosome, start, end):
+    """Get reads and their mates for a chromosome range.
+
+    All mate pairs have to be on the same chromosome. Mates that are on different
+    chromosomes will be ignored.
+    """
+    logger.info("getting paired reads: %s %d %d", chromosome, start, end)
+
+    # Iterative mate resolution takes 1.44s
+    MATE_EXTENSION = 500
+
+    # The the single ended reads in slightly wider interval
+    # so that we can pick up mates in one go
+    df_all = get_reads_df(
+        file, index_file, chromosome, max(1, start - MATE_EXTENSION), end + MATE_EXTENSION
+    )
+
+    df = df_all[(df_all["pos"] <= end + 1) & (df_all["end"] >= start - 1)]
+
+    qnames = set(df["qname"])
+    df = df_all[df_all["qname"].isin(qnames)]
+
+    # Find which reads we have the first and last mates for
+    firsts = set(df[df["flag"] & 64 > 0]["qname"])
+    lasts = set(df[df["flag"] & 128 > 0]["qname"])
+
+    # We're only going to get mates that are on the same chromosome
+    needs_mates = df[
+        (~df["qname"].isin(firsts & lasts)) & (df["rnext"].astype(str) == chromosome)
+    ]
+
+    fetched = set()
+
+    counter = 1
+    while len(needs_mates):
+        row = needs_mates.iloc[0]
+
+        to_fetch = (row["rnext"], row["pnext"], row["pnext"] + 1)
+
+        if to_fetch in fetched:
+            # We've already tried fetching this region and didn't find a mate
+            needs_mates = needs_mates[needs_mates['qname'] != row['qname']]
+
+            continue
+
+        # Fetch the mate for this read. This will fetch a bunch of other
+        # reads in the mate's interval as well
+        # print('fetching', row['pnext'])
+        new_reads = get_reads_df(
+            file, index_file, *to_fetch
+        )
+
+        fetched.add(to_fetch)
+
+        # In order to filter out the reads that we are not expecting
+        # we'll calculate the current set of incomplete reads as the
+        # reads that we have either a first or last but not both
+        incomplete_reads = (firsts | lasts) - (firsts & lasts)
+
+        # print("incomplete", incomplete_reads)
+        # print("fetched", to_fetch)
+
+        # We'll keep the reads that match our list of incomplete read names
+        to_keep = new_reads[new_reads["qname"].isin(incomplete_reads)]
+        # for i, row in new_reads.iterrows():
+        #     print("got", row['qname'], "first", row['flag'], row['flag'] & 64, "last", row['flag'] & 128, "rname", row['rname'], "pos", row['pos'])
+
+        # for i, row in needs_mates.iterrows():
+        #     print("need", row['qname'], "first", row['flag'], row['flag'] & 64, "last", row['flag'] & 128, "rnext", row['rnext'], 'pnext', row['pnext'])
+
+        # for i, row in to_keep.iterrows():
+        #     print("keeping", row['qname'], "first", row['flag'], row['flag'] & 64, "last", row['flag'] & 128)
+
+        # Add the new reads to the list of firsts and lasts
+        new_firsts = set(to_keep[to_keep["flag"] & 64 > 0]["qname"])
+        new_lasts = set(to_keep[to_keep["flag"] & 128 > 0]["qname"])
+
+        firsts = new_firsts | firsts
+        lasts = new_lasts | lasts
+
+        df = pd.concat([df, to_keep])
+
+        needs_mates = df[
+            (~df["qname"].isin(firsts & lasts))
+            & (df["rnext"].astype(str) == chromosome)
+        ]
+
+        counter += 1
+    logger.info("Number of paired end refetches: %d", counter)
+
+    return df
+
+
+def load_reads(file, start_pos, end_pos, chromsizes=None, index_file=None, cache=None):
     """
     Sample reads from the specified region, assuming that the chromosomes
-    are ordered in some fashion. Returns an list of pysam reads
+    are ordered in some fashion. Returns an list of reads
 
     Parameters:
     -----------
-    samfile: pysam.AlignmentFile
-        A pysam entry into an indexed bam file
+    file: file-like
+        The opened BAM file
     start_pos: int
         The start position of the sampled region
     end_pos: int
@@ -67,6 +352,8 @@ def load_reads(
     chromsize: pandas.Series
         A listing of chromosome sizes. If not provided, the chromosome
         list will be extracted from the the bam file header
+    index_file: file-like
+        The index file
     cache:
         An object that implements the `get`, `set` and `exists` methods
         for caching data
@@ -78,6 +365,10 @@ def load_reads(
     """
     # if chromorder is not None...
     # specify the chromosome order for the fetched reads
+    if isinstance(file, str):
+        file = open(file, "rb")
+    if index_file and isinstance(index_file, str):
+        index_file = open(index_file, "rb")
 
     if chromsizes is not None:
         chromsizes_list = []
@@ -85,15 +376,21 @@ def load_reads(
         for chrom, size in chromsizes.items():
             chromsizes_list += [[chrom, int(size)]]
     else:
-        references = np.array(samfile.references)
-        lengths = np.array(samfile.lengths)
+        def _bam_src():
+            file.seek(0)
+            return file
+
+        chrom_sizes = ox.from_bam(_bam_src, tag_defs=[]).chrom_sizes
+        ref_lengths = dict(chrom_sizes)
 
         # we're going to create a natural ordering for references
         # e.g. (chr1, chr2,..., chr10, chr11...chr22,chrX, chrY, chrM...)
-        references = ctbw.natsorted(references)
+        references = ctbw.natsorted([name for name, _ in chrom_sizes])
+        lengths = [ref_lengths[r] for r in references]
         chromsizes_list = list(zip(references, [int(length) for length in lengths]))
 
     lengths = [r[1] for r in chromsizes_list]
+
     abs_chrom_offsets = np.r_[0, np.cumsum(lengths)]
 
     results = {
@@ -104,10 +401,6 @@ def load_reads(
         "chrName": [],
         "chrOffset": [],
         "cigar": [],
-        "m1From": [],
-        "m1To": [],
-        "m2From": [],
-        "m2To": [],
         "mapq": [],
         "tags.HP": [],
         "strand": [],
@@ -117,17 +410,20 @@ def load_reads(
 
     strands = {True: "-", False: "+"}
 
-    idx = load_bai_index(index_filename)
+    index_file.seek(0)
+    idx = load_bai_index(index_file)
 
     total_size = 0
     # check the size of the file to load to get an approximation
     # of whether we're going to return too much data
     for cid, start, end in abs2genomic(lengths, start_pos, end_pos):
+        if cid >= len(chromsizes_list):
+            continue
         total_size += est_query_size_ix(idx[cid], start, end)
 
     MAX_SIZE = 4e6
     if total_size > MAX_SIZE:
-        return {"error": "Tile encompasses too much data: {total_size}"}
+        return {"error": f"Tile encompasses too much data: {total_size}"}
 
     for cid, start, end in abs2genomic(lengths, start_pos, end_pos):
         chr_offset = int(abs_chrom_offsets[cid])
@@ -136,95 +432,64 @@ def load_reads(
             continue
 
         seq_name = f"{chromsizes_list[cid][0]}"
-        reads = samfile.fetch(seq_name, start, end)
-        for read in reads:
-            if read.is_unmapped:
-                continue
-            # query_seq = read.query_sequence
-
-            # differences = []
-
-            # try:
-            #     for counter, (qpos, rpos, ref_base) in enumerate(read.get_aligned_pairs(with_seq=True)):
-            #         # inferred from the pysam source code:
-            #         # https://github.com/pysam-developers/pysam/blob/3defba98911d99abf8c14a483e979431f069a9d2/pysam/libcalignedsegment.pyx
-            #         # and GitHub issue:
-            #         # https://github.com/pysam-developers/pysam/issues/163
-            #         #print('qpos, rpos, ref_base', qpos, rpos, ref_base)
-            #         if rpos is None:
-            #             differences += [(qpos, 'I')]
-            #         elif qpos is None:
-            #             differences += [(counter, 'D')]
-            #         elif ref_base.islower():
-            #             differences += [(qpos, query_seq[qpos], ref_base)]
-            # except ValueError as ve:
-            #     # probably lacked an MD string
-            #     pass
-            try:
-                id_suffix = ""
-                if read.is_paired:
-                    if read.is_read1:
-                        id_suffix = "_1"
-                    if read.is_read2:
-                        id_suffix = "_2"
-
-                read_id = read.query_name + id_suffix
-                results["id"] += [read_id]
-                results["from"] += [int(read.reference_start + chr_offset)]
-                results["to"] += [int(read.reference_end + chr_offset)]
-                results["chrName"] += [read.reference_name]
-                results["chrOffset"] += [chr_offset]
-                results["cigar"] += [read.cigarstring]
-                results["mapq"] += [read.mapq]
-                # aligned_pairs = read.get_aligned_pairs(with_seq=True)
-
-                # For ONT reads retrieving the variants can be a lengthy
-                # procedure. We can try to cache them
-                use_cache = read.query_length > 40000
-                if use_cache:
-                    variants = get_cached_variants(cache, read_id)
-                else:
-                    variants = None
-                # variants = None
-
-                if not variants:
-                    if read.query_sequence:
-                        # read.get_aligned_pairs(with_seq=True, matches_only=True)
-                        try:
-                            variants = [
-                                (r[0], r[1], read.query_sequence[r[0]])
-                                for r in read.get_aligned_pairs(
-                                    with_seq=True, matches_only=True
-                                )
-                                if start <= r[1] <= end
-                                and r[2] is not None
-                                and r[2].islower()
-                            ]
-                        except ValueError:
-                            # Probably MD tag not present
-                            variants = []
-
-                        if use_cache:
-                            set_cached_variants(cache, read_id, variants)
-
-                        results["variants"] += [variants]
-                    else:
-                        results["variants"] += []
-                else:
-                    results["variants"] += [variants]
+        if start == 0:
+            start = 1
+
+        reads_df = get_paired_reads(
+            file=file, index_file=index_file, chromosome=seq_name, start=start, end=end
+        )
+        # We can drastically speed these functions up by coding them in Rust in oxbow
+        results["cigars"] = [
+            get_cigar_substitutions(pos - 1, end - pos, parse_cigar_string(cigar))
+            for pos, end, cigar in zip(
+                reads_df["pos"], reads_df["end"], reads_df["cigar"]
+            )
+        ]
+
+        num_reads = len(reads_df)
+
+        strands = {0: '+', 16: '-'}
+
+        results["first_seq"] = list(reads_df["flag"] & 64)
+        results["last_seq"] = list(reads_df["flag"] & 128)
+        results["is_paired"] = list(reads_df["flag"] & 1)
+        results["from"] = list(reads_df["pos"] - 1)
+        results["to"] = list(reads_df["end"])
+        results["chrName"] = list(reads_df["rname"])
+        results["chrOffset"] = [chr_offset] * num_reads
+        results["readName"] = list(reads_df["qname"])
+        results['mapq'] = list(reads_df['mapq'])
+        results['strand'] = [strands[x] for x in list(reads_df['flag'] & 16)]
+
+        results["id"] = [
+            name if not is_paired else (f"{name}_1" if first else f"{name}_2")
+            for name, first, is_paired in zip(
+                reads_df["qname"], results["first_seq"], results["is_paired"]
+            )
+        ]
 
-                results["cigars"] += [get_cigar_substitutions(read)]
-                tags = dict(read.tags)
-                results["tags.HP"] += [tags.get("HP", 0)]
-                results["strand"] += [strands[read.is_reverse]]
-            except:
-                raise
+        if "HP" not in reads_df:
+            results["tags.HP"] = [0] * num_reads
+        else:
+            results["tags.HP"] = reads_df["HP"]
 
-            try:
-                results["md"] += [read.get_tag("MD")]
-            except KeyError:
-                results["md"] += [""]
-                continue
+        if "MD" not in reads_df:
+            results["md"] = [""] * num_reads
+            results["variants"] = []
+        else:
+            results["md"] = list(reads_df["MD"])
+            results["variants"] = [
+                (
+                    variants_list(
+                        *reconstruct_ref(iseq, imd, icigar)
+                    )
+                    if imd
+                    else []
+                )
+                for iseq, imd, ipos, icigar in zip(
+                    reads_df["seq"], reads_df["MD"], reads_df["pos"], reads_df["cigar"]
+                )
+            ]
 
     return results
 
@@ -249,7 +514,7 @@ def set_cached_variants(cache, read_id, variants):
         cache.set(cache_id, json.dumps(variants))
 
 
-def alignment_tileset_info(samfile, chromsizes):
+def alignment_tileset_info(file, chromsizes):
     """
     Get the tileset info for a bam file
 
@@ -266,6 +531,9 @@ def alignment_tileset_info(samfile, chromsizes):
                     'max_zoom': 7
                     }
     """
+    if isinstance(file, str):
+        file = open(file, "rb")
+
     if chromsizes is not None:
         chromsizes_list = []
 
@@ -274,13 +542,14 @@ def alignment_tileset_info(samfile, chromsizes):
 
         total_length = sum([c[1] for c in chromsizes_list])
     else:
-        total_length = sum(samfile.lengths)
-
-        references = np.array(samfile.references)
-        lengths = np.array(samfile.lengths)
+        def _bam_src():
+            file.seek(0)
+            return file
 
-        ref_lengths = dict(zip(references, lengths))
-        references = ctbw.natsorted(references)
+        chrom_sizes = ox.from_bam(_bam_src, tag_defs=[]).chrom_sizes
+        total_length = sum(length for _, length in chrom_sizes)
+        ref_lengths = dict(chrom_sizes)
+        references = ctbw.natsorted([name for name, _ in chrom_sizes])
 
         lengths = [ref_lengths[r] for r in references]
         chromsizes_list = list(zip(references, [int(length) for length in lengths]))
@@ -294,7 +563,7 @@ def alignment_tileset_info(samfile, chromsizes):
     tileset_info = {
         "min_pos": [0],
         "max_pos": [total_length],
-        "max_width": tile_size * 2 ** max_zoom,
+        "max_width": tile_size * 2**max_zoom,
         "tile_size": tile_size,
         "chromsizes": chromsizes_list,
         "max_zoom": max_zoom,
@@ -305,9 +574,9 @@ def alignment_tileset_info(samfile, chromsizes):
 
 
 def alignment_tiles(
-    samfile,
+    file,
     tile_ids,
-    index_filename=None,
+    index_file=None,
     chromsizes=None,
     max_tile_width=None,
     cache=None,
@@ -335,14 +604,22 @@ def alignment_tiles(
     tile_list: [(tile_id, tile_data),...]
         A list of tile_id, tile_data tuples
     """
+    if index_file is None:
+        if isinstance(file, str):
+            index_file = file + ".bai"
+        else:
+            raise ValueError(
+                "A file pointer is provided without an index file. "
+                "Please specify an index file"
+            )
     generated_tiles = []
-    tsinfo = alignment_tileset_info(samfile, chromsizes)
+    tsinfo = alignment_tileset_info(file, chromsizes)
 
     for tile_id in tile_ids:
-        tile_id_parts = tile_id.split("|")[0].split(".")
+        tile_id_parts = tile_id.split(TILE_OPTIONS_CHAR)[0].split(".")
         tile_position = list(map(int, tile_id_parts[1:3]))
 
-        tile_width = tsinfo["max_width"] / 2 ** int(tile_position[0])
+        tile_width = tsinfo["max_width"] // 2 ** int(tile_position[0])
 
         if max_tile_width and tile_width >= max_tile_width:
             # this tile is larger than the max allowed
@@ -359,11 +636,11 @@ def alignment_tiles(
             end_pos = start_pos + tile_width
 
             tile_value = load_reads(
-                samfile,
+                file,
                 start_pos=start_pos,
                 end_pos=end_pos,
                 chromsizes=chromsizes,
-                index_filename=index_filename,
+                index_file=index_file,
                 cache=cache,
             )
             generated_tiles += [(tile_id, tile_value)]
@@ -371,28 +648,23 @@ def alignment_tiles(
     return generated_tiles
 
 
-def tileset_info(filename, chromsizes=None):
-    samfile = pysam.AlignmentFile(filename)
-
-    return alignment_tileset_info(samfile, chromsizes)
+def tileset_info(file, chromsizes=None):
+    return alignment_tileset_info(file, chromsizes)
 
 
 def tiles(
-    filename,
+    file,
     tile_ids,
+    index_file=None,
     index_filename=None,
     chromsizes=None,
     max_tile_width=None,
     cache=None,
 ):
-    if not index_filename:
-        index_filename = f"{filename}.bai"
-    samfile = pysam.AlignmentFile(filename, index_filename=index_filename)
-
     return alignment_tiles(
-        samfile,
+        file,
         tile_ids,
-        index_filename=index_filename,
+        index_file=index_filename or index_file,
         chromsizes=chromsizes,
         max_tile_width=None,
         cache=cache,
diff --git a/clodius/tiles/bam_pysam.py b/clodius/tiles/bam_pysam.py
new file mode 100644
index 00000000..1dfc25d2
--- /dev/null
+++ b/clodius/tiles/bam_pysam.py
@@ -0,0 +1,403 @@
+import json
+import math
+
+import numpy as np
+
+import clodius.tiles.bigwig as ctbw
+import pysam
+from clodius.tiles.tabix import est_query_size_ix, load_bai_index
+from clodius.tiles.utils import abs2genomic
+from clodius.utils import TILE_OPTIONS_CHAR
+
+
+def get_cigar_substitutions(read):
+    subs = []
+    curr_pos = 0
+
+    cigartuples = read.cigartuples
+    readstart = read.pos
+    readend = read.pos + read.query_length
+
+    for ctuple in cigartuples:
+        if ctuple[0] == pysam.CDIFF:
+            subs.append((readstart + curr_pos, "X", ctuple[1]))
+            curr_pos += ctuple[1]
+        elif ctuple[0] == pysam.CINS:
+            subs.append((readstart + curr_pos, "I", ctuple[1]))
+        elif ctuple[0] == pysam.CDEL:
+            subs.append((readstart + curr_pos, "D", ctuple[1]))
+            curr_pos += ctuple[1]
+        elif ctuple[0] == pysam.CREF_SKIP:
+            subs.append((readstart + curr_pos, "N", ctuple[1]))
+            curr_pos += ctuple[1]
+        elif ctuple[0] == pysam.CEQUAL or ctuple[0] == pysam.CMATCH:
+            curr_pos += ctuple[1]
+
+    if len(cigartuples):
+        first_ctuple = cigartuples[0]
+        last_ctuple = cigartuples[-1]
+
+        if first_ctuple[0] == pysam.CSOFT_CLIP:
+            subs.append((readstart - first_ctuple[1], "S", first_ctuple[1]))
+        if first_ctuple[0] == pysam.CHARD_CLIP:
+            subs.append((readstart - first_ctuple[1], "H", first_ctuple[1]))
+
+        if last_ctuple[0] == pysam.CSOFT_CLIP:
+            subs.append((readend - last_ctuple[1], "S", last_ctuple[1]))
+        if last_ctuple[0] == pysam.CHARD_CLIP:
+            subs.append((readend, "H", last_ctuple[1]))
+
+    return subs
+
+
+def load_reads(
+    samfile, start_pos, end_pos, chromsizes=None, index_filename=None, cache=None
+):
+    """
+    Sample reads from the specified region, assuming that the chromosomes
+    are ordered in some fashion. Returns an list of pysam reads
+
+    Parameters:
+    -----------
+    samfile: pysam.AlignmentFile
+        A pysam entry into an indexed bam file
+    start_pos: int
+        The start position of the sampled region
+    end_pos: int
+        The end position of the sampled region
+    chromsize: pandas.Series
+        A listing of chromosome sizes. If not provided, the chromosome
+        list will be extracted from the the bam file header
+    cache:
+        An object that implements the `get`, `set` and `exists` methods
+        for caching data
+
+    Returns
+    -------
+    reads: [read1, read2...]
+        The list of in the sampled regions
+    """
+    # if chromorder is not None...
+    # specify the chromosome order for the fetched reads
+
+    if chromsizes is not None:
+        chromsizes_list = []
+
+        for chrom, size in chromsizes.items():
+            chromsizes_list += [[chrom, int(size)]]
+    else:
+        references = np.array(samfile.references)
+        lengths = np.array(samfile.lengths)
+
+        ref_lengths = dict(zip(references, lengths))
+
+        # we're going to create a natural ordering for references
+        # e.g. (chr1, chr2,..., chr10, chr11...chr22,chrX, chrY, chrM...)
+        references = ctbw.natsorted(references)
+        lengths = [ref_lengths[r] for r in references]
+        chromsizes_list = list(zip(references, [int(length) for length in lengths]))
+
+    lengths = [r[1] for r in chromsizes_list]
+    abs_chrom_offsets = np.r_[0, np.cumsum(lengths)]
+
+    results = {
+        "id": [],
+        "from": [],
+        "to": [],
+        "md": [],
+        "chrName": [],
+        "chrOffset": [],
+        "cigar": [],
+        "m1From": [],
+        "m1To": [],
+        "m2From": [],
+        "m2To": [],
+        "mapq": [],
+        "tags.HP": [],
+        "strand": [],
+        "variants": [],
+        "cigars": [],
+    }
+
+    strands = {True: "-", False: "+"}
+
+    idx = load_bai_index(open(index_filename, "rb"))
+
+    total_size = 0
+    # check the size of the file to load to get an approximation
+    # of whether we're going to return too much data
+    for cid, start, end in abs2genomic(lengths, start_pos, end_pos):
+        if cid >= len(chromsizes_list):
+            break
+        total_size += est_query_size_ix(idx[cid], start, end)
+
+    MAX_SIZE = 4e6
+    if total_size > MAX_SIZE:
+        return {"error": f"Tile encompasses too much data: {total_size}"}
+
+    for cid, start, end in abs2genomic(lengths, start_pos, end_pos):
+        chr_offset = int(abs_chrom_offsets[cid])
+
+        if cid >= len(chromsizes_list):
+            continue
+
+        seq_name = f"{chromsizes_list[cid][0]}"
+        reads = samfile.fetch(seq_name, start, end)
+        for read in reads:
+            if read.is_unmapped:
+                continue
+            # query_seq = read.query_sequence
+
+            # differences = []
+
+            # try:
+            #     for counter, (qpos, rpos, ref_base) in enumerate(read.get_aligned_pairs(with_seq=True)):
+            #         # inferred from the pysam source code:
+            #         # https://github.com/pysam-developers/pysam/blob/3defba98911d99abf8c14a483e979431f069a9d2/pysam/libcalignedsegment.pyx
+            #         # and GitHub issue:
+            #         # https://github.com/pysam-developers/pysam/issues/163
+            #         #print('qpos, rpos, ref_base', qpos, rpos, ref_base)
+            #         if rpos is None:
+            #             differences += [(qpos, 'I')]
+            #         elif qpos is None:
+            #             differences += [(counter, 'D')]
+            #         elif ref_base.islower():
+            #             differences += [(qpos, query_seq[qpos], ref_base)]
+            # except ValueError as ve:
+            #     # probably lacked an MD string
+            #     pass
+            try:
+                id_suffix = ""
+                if read.is_paired:
+                    if read.is_read1:
+                        id_suffix = "_1"
+                    if read.is_read2:
+                        id_suffix = "_2"
+
+                read_id = read.query_name + id_suffix
+                results["id"] += [read_id]
+                results["from"] += [int(read.reference_start + chr_offset)]
+                results["to"] += [int(read.reference_end + chr_offset)]
+                results["chrName"] += [read.reference_name]
+                results["chrOffset"] += [chr_offset]
+                results["cigar"] += [read.cigarstring]
+                results["mapq"] += [read.mapq]
+                # aligned_pairs = read.get_aligned_pairs(with_seq=True)
+
+                # For ONT reads retrieving the variants can be a lengthy
+                # procedure. We can try to cache them
+                use_cache = read.query_length > 40000
+                if use_cache:
+                    variants = get_cached_variants(cache, read_id)
+                else:
+                    variants = None
+                # variants = None
+
+                if not variants:
+                    if read.query_sequence:
+                        # read.get_aligned_pairs(with_seq=True, matches_only=True)
+                        try:
+                            variants = [
+                                (r[0], r[1], read.query_sequence[r[0]])
+                                for r in read.get_aligned_pairs(
+                                    with_seq=True, matches_only=True
+                                )
+                                if start <= r[1] <= end
+                                and r[2] is not None
+                                and r[2].islower()
+                            ]
+                        except ValueError:
+                            # Probably MD tag not present
+                            variants = []
+
+                        if use_cache:
+                            set_cached_variants(cache, read_id, variants)
+
+                        results["variants"] += [variants]
+                    else:
+                        results["variants"] += []
+                else:
+                    results["variants"] += [variants]
+
+                results["cigars"] += [get_cigar_substitutions(read)]
+                tags = dict(read.tags)
+                results["tags.HP"] += [tags.get("HP", 0)]
+                results["strand"] += [strands[read.is_reverse]]
+            except:
+                raise
+
+            try:
+                results["md"] += [read.get_tag("MD")]
+            except KeyError:
+                results["md"] += [""]
+                continue
+
+    return results
+
+
+def get_cached_variants(cache, read_id):
+    """Try to get variants from a read we've seen before.
+
+    This is useful for ONT reads where there's many variants
+    per read and retrieving them takes a while.
+    """
+    cache_id = f"variants.{read_id}"
+    if cache and cache.exists(cache_id):
+        return json.loads(cache.get(cache_id))
+
+    return None
+
+
+def set_cached_variants(cache, read_id, variants):
+    """Save a set of variants to the cache."""
+    cache_id = f"variants.{read_id}"
+    if cache:
+        cache.set(cache_id, json.dumps(variants))
+
+
+def alignment_tileset_info(samfile, chromsizes):
+    """
+    Get the tileset info for a bam file
+
+    Parameters
+    ----------
+    tileset: tilesets.models.Tileset object
+        The tileset that the tile ids should be retrieved from
+
+    Returns
+    -------
+    tileset_info: {'min_pos': [],
+                    'max_pos': [],
+                    'tile_size': 1024,
+                    'max_zoom': 7
+                    }
+    """
+    if chromsizes is not None:
+        chromsizes_list = []
+
+        for chrom, size in chromsizes.items():
+            chromsizes_list += [[chrom, int(size)]]
+
+        total_length = sum([c[1] for c in chromsizes_list])
+    else:
+        total_length = sum(samfile.lengths)
+
+        references = np.array(samfile.references)
+        lengths = np.array(samfile.lengths)
+
+        ref_lengths = dict(zip(references, lengths))
+        references = ctbw.natsorted(references)
+
+        lengths = [ref_lengths[r] for r in references]
+        chromsizes_list = list(zip(references, [int(length) for length in lengths]))
+
+    tile_size = 256
+    max_zoom = math.ceil(math.log(total_length / tile_size) / math.log(2))
+
+    # this should eventually be a configurable option
+    MAX_TILE_WIDTH = 100000
+
+    tileset_info = {
+        "min_pos": [0],
+        "max_pos": [total_length],
+        "max_width": tile_size * 2**max_zoom,
+        "tile_size": tile_size,
+        "chromsizes": chromsizes_list,
+        "max_zoom": max_zoom,
+        "max_tile_width": MAX_TILE_WIDTH,
+    }
+
+    return tileset_info
+
+
+def alignment_tiles(
+    samfile,
+    tile_ids,
+    index_filename=None,
+    chromsizes=None,
+    max_tile_width=None,
+    cache=None,
+):
+    """
+    Generate tiles from a bigwig file.
+
+    Parameters
+    ----------
+    tileset: tilesets.models.Tileset object
+        The tileset that the tile ids should be retrieved from
+    tile_ids: [str,...]
+        A list of tile_ids (e.g. xyx.0.0) identifying the tiles
+        to be retrieved
+    index_filename: str
+        The name of the file containing the index
+    max_tile_width: int
+        How wide can each tile be before we return no data. This
+        can be used to limit the amount of data returned.
+    cache:
+        An object that implements the `get`, `set` and `exists` methods
+        for caching data
+    Returns
+    -------
+    tile_list: [(tile_id, tile_data),...]
+        A list of tile_id, tile_data tuples
+    """
+    generated_tiles = []
+    tsinfo = alignment_tileset_info(samfile, chromsizes)
+
+    for tile_id in tile_ids:
+        tile_id_parts = tile_id.split(TILE_OPTIONS_CHAR)[0].split(".")
+        tile_position = list(map(int, tile_id_parts[1:3]))
+
+        tile_width = tsinfo["max_width"] // 2 ** int(tile_position[0])
+
+        if max_tile_width and tile_width >= max_tile_width:
+            # this tile is larger than the max allowed
+            return [
+                (
+                    tile_id,
+                    {
+                        "error": f"Tile too large, no data returned. Max tile size: {max_tile_width}"
+                    },
+                )
+            ]
+        else:
+            start_pos = int(tile_position[1]) * tile_width
+            end_pos = start_pos + tile_width
+
+            tile_value = load_reads(
+                samfile,
+                start_pos=start_pos,
+                end_pos=end_pos,
+                chromsizes=chromsizes,
+                index_filename=index_filename,
+                cache=cache,
+            )
+            generated_tiles += [(tile_id, tile_value)]
+
+    return generated_tiles
+
+
+def tileset_info(filename, chromsizes=None):
+    samfile = pysam.AlignmentFile(filename)
+
+    return alignment_tileset_info(samfile, chromsizes)
+
+
+def tiles(
+    filename,
+    tile_ids,
+    index_filename=None,
+    chromsizes=None,
+    max_tile_width=None,
+    cache=None,
+):
+    samfile = pysam.AlignmentFile(filename, index_filename=index_filename)
+
+    return alignment_tiles(
+        samfile,
+        tile_ids,
+        index_filename=index_filename,
+        chromsizes=chromsizes,
+        max_tile_width=None,
+        cache=cache,
+    )
diff --git a/clodius/tiles/bed2ddb.py b/clodius/tiles/bed2ddb.py
index 23c27174..133a3b4b 100644
--- a/clodius/tiles/bed2ddb.py
+++ b/clodius/tiles/bed2ddb.py
@@ -1,29 +1,33 @@
 import collections as col
-import sqlite3
+import sosqlite
+import apsw
 
 from .utils import tiles_wrapper_2d
 
+sovfs = sosqlite.SmartOpenVFS(name="so-vfs")
+
 
 def tileset_info(filepath):
-    conn = sqlite3.connect(filepath)
-    c = conn.cursor()
-
-    row = c.execute("SELECT * from tileset_info").fetchone()
-    tileset_info = {
-        "zoom_step": row[0],
-        "max_length": row[1],
-        "assembly": row[2],
-        "chrom_names": row[3],
-        "chrom_sizes": row[4],
-        "tile_size": row[5],
-        "max_zoom": row[6],
-        "max_width": row[7],
-        "min_pos": [1, 1],
-        "max_pos": [row[1], row[1]],
-    }
-    conn.close()
-
-    return tileset_info
+    with apsw.Connection(
+        filepath, vfs=sovfs.name, flags=apsw.SQLITE_OPEN_READONLY
+    ) as conn:
+        c = conn.cursor()
+
+        row = c.execute("SELECT * from tileset_info").fetchone()
+        tileset_info = {
+            "zoom_step": row[0],
+            "max_length": row[1],
+            "assembly": row[2],
+            "chrom_names": row[3],
+            "chrom_sizes": row[4],
+            "tile_size": row[5],
+            "max_zoom": row[6],
+            "max_width": row[7],
+            "min_pos": [1, 1],
+            "max_pos": [row[1], row[1]],
+        }
+
+        return tileset_info
 
 
 # Deprecated. Use `tileset_info()`
@@ -35,7 +39,7 @@ def tiles(filepath, tile_ids):
     if len(tile_ids) == 0:
         return []
 
-    is_1d = len(tile_ids[0].split(".")) < 4
+    is_1d = len(list(tile_ids)[0].split(".")) < 4
 
     if is_1d:
         return tiles_1d(filepath, tile_ids)
@@ -88,68 +92,69 @@ def get_1d_tiles(filepath, zoom: int, tile_x_pos: int, num_tiles: int = 1):
     """
     ts_info = tileset_info(filepath)
 
-    conn = sqlite3.connect(filepath)
-    c = conn.cursor()
-
-    tile_width = ts_info["max_width"] / 2 ** zoom
-
-    tile_x_start_pos = tile_width * tile_x_pos
-    tile_x_end_pos = tile_x_start_pos + (tile_width * num_tiles)
-
-    query = f"""
-    SELECT fromX, toX, fromY, toY, chrOffset, importance, fields, uid
-    FROM intervals, position_index
-    WHERE
-        intervals.id=position_index.id AND
-        zoomLevel <= {zoom} AND
-        rToX >= {tile_x_start_pos} AND
-        rFromX <= {tile_x_end_pos}
-    UNION
-    SELECT fromX, toX, fromY, toY, chrOffset, importance, fields, uid
-    FROM intervals, position_index
-    WHERE
-        intervals.id=position_index.id AND
-        zoomLevel <= {zoom} AND
-        rToY >= {tile_x_start_pos} AND
-        rFromY <= {tile_x_end_pos}
-    """
+    with apsw.Connection(
+        filepath, vfs=sovfs.name, flags=apsw.SQLITE_OPEN_READONLY
+    ) as conn:
+        c = conn.cursor()
+
+        tile_width = ts_info["max_width"] / 2 ** zoom
+
+        tile_x_start_pos = tile_width * tile_x_pos
+        tile_x_end_pos = tile_x_start_pos + (tile_width * num_tiles)
+
+        query = f"""
+        SELECT fromX, toX, fromY, toY, chrOffset, importance, fields, uid
+        FROM intervals, position_index
+        WHERE
+            intervals.id=position_index.id AND
+            zoomLevel <= {zoom} AND
+            rToX >= {tile_x_start_pos} AND
+            rFromX <= {tile_x_end_pos}
+        UNION
+        SELECT fromX, toX, fromY, toY, chrOffset, importance, fields, uid
+        FROM intervals, position_index
+        WHERE
+            intervals.id=position_index.id AND
+            zoomLevel <= {zoom} AND
+            rToY >= {tile_x_start_pos} AND
+            rFromY <= {tile_x_end_pos}
+        """
+
+        rows = c.execute(query).fetchall()
+
+        new_rows = col.defaultdict(list)
+
+        for r in rows:
+            try:
+                uid = r[7].decode("utf-8")
+            except AttributeError:
+                uid = r[7]
+
+            x_start = r[0]
+            x_end = r[1]
+            y_start = r[2]
+            y_end = r[3]
+
+            for i in range(tile_x_pos, tile_x_pos + num_tiles):
+                tile_x_start = i * tile_width
+                tile_x_end = (i + 1) * tile_width
 
-    rows = c.execute(query).fetchall()
-
-    new_rows = col.defaultdict(list)
-
-    for r in rows:
-        try:
-            uid = r[7].decode("utf-8")
-        except AttributeError:
-            uid = r[7]
-
-        x_start = r[0]
-        x_end = r[1]
-        y_start = r[2]
-        y_end = r[3]
-
-        for i in range(tile_x_pos, tile_x_pos + num_tiles):
-            tile_x_start = i * tile_width
-            tile_x_end = (i + 1) * tile_width
-
-            if x_start < tile_x_end and x_end >= tile_x_start:
-                # add the position offset to the returned values
-                new_rows[i] += [
-                    {
-                        "xStart": x_start,
-                        "xEnd": x_end,
-                        "yStart": y_start,
-                        "yEnd": y_end,
-                        "chrOffset": r[4],
-                        "importance": r[5],
-                        "uid": uid,
-                        "fields": r[6].split("\t"),
-                    }
-                ]
-    conn.close()
+                if x_start < tile_x_end and x_end >= tile_x_start:
+                    # add the position offset to the returned values
+                    new_rows[i] += [
+                        {
+                            "xStart": x_start,
+                            "xEnd": x_end,
+                            "yStart": y_start,
+                            "yEnd": y_end,
+                            "chrOffset": r[4],
+                            "importance": r[5],
+                            "uid": uid,
+                            "fields": r[6].split("\t"),
+                        }
+                    ]
 
-    return new_rows
+        return new_rows
 
 
 def get_1D_tiles(*args):
@@ -188,74 +193,75 @@ def get_2d_tiles(db_file, zoom, tile_x_pos, tile_y_pos, numx=1, numy=1):
     """
     tileset_info = get_2d_tileset_info(db_file)
 
-    conn = sqlite3.connect(db_file)
-
-    c = conn.cursor()
-    tile_width = tileset_info["max_width"] / 2 ** zoom
-
-    tile_x_start_pos = tile_width * tile_x_pos
-    tile_x_end_pos = tile_x_start_pos + (numx * tile_width)
-
-    tile_y_start_pos = tile_width * tile_y_pos
-    tile_y_end_pos = tile_y_start_pos + (numy * tile_width)
-
-    query = """
-    SELECT fromX, toX, fromY, toY, chrOffset, importance, fields, uid
-    FROM intervals,position_index
-    WHERE
-        intervals.id=position_index.id AND
-        zoomLevel <= {} AND
-        rToX >= {} AND
-        rFromX <= {} AND
-        rToY >= {} AND
-        rFromY <= {}
-    """.format(
-        zoom, tile_x_start_pos, tile_x_end_pos, tile_y_start_pos, tile_y_end_pos
-    )
-
-    rows = c.execute(query).fetchall()
-
-    new_rows = col.defaultdict(list)
-
-    for r in rows:
-        try:
-            uid = r[7].decode("utf-8")
-        except AttributeError:
-            uid = r[7]
-
-        x_start = r[0]
-        x_end = r[1]
-        y_start = r[2]
-        y_end = r[3]
-
-        for i in range(tile_x_pos, tile_x_pos + numx):
-            for j in range(tile_y_pos, tile_y_pos + numy):
-                tile_x_start = i * tile_width
-                tile_x_end = (i + 1) * tile_width
-
-                tile_y_start = j * tile_width
-                tile_y_end = (j + 1) * tile_width
-
-                if (
-                    x_start < tile_x_end
-                    and x_end >= tile_x_start
-                    and y_start < tile_y_end
-                    and y_end >= tile_y_start
-                ):
-                    # add the position offset to the returned values
-                    new_rows[(i, j)] += [
-                        {
-                            "xStart": r[0],
-                            "xEnd": r[1],
-                            "yStart": r[2],
-                            "yEnd": r[3],
-                            "chrOffset": r[4],
-                            "importance": r[5],
-                            "uid": uid,
-                            "fields": r[6].split("\t"),
-                        }
-                    ]
-    conn.close()
+    with apsw.Connection(
+        db_file, vfs=sovfs.name, flags=apsw.SQLITE_OPEN_READONLY
+    ) as conn:
+
+        c = conn.cursor()
+        tile_width = tileset_info["max_width"] / 2 ** zoom
+
+        tile_x_start_pos = tile_width * tile_x_pos
+        tile_x_end_pos = tile_x_start_pos + (numx * tile_width)
+
+        tile_y_start_pos = tile_width * tile_y_pos
+        tile_y_end_pos = tile_y_start_pos + (numy * tile_width)
+
+        query = """
+        SELECT fromX, toX, fromY, toY, chrOffset, importance, fields, uid
+        FROM intervals,position_index
+        WHERE
+            intervals.id=position_index.id AND
+            zoomLevel <= {} AND
+            rToX >= {} AND
+            rFromX <= {} AND
+            rToY >= {} AND
+            rFromY <= {}
+        """.format(
+            zoom, tile_x_start_pos, tile_x_end_pos, tile_y_start_pos, tile_y_end_pos
+        )
+
+        rows = c.execute(query).fetchall()
+
+        new_rows = col.defaultdict(list)
+
+        for r in rows:
+            try:
+                uid = r[7].decode("utf-8")
+            except AttributeError:
+                uid = r[7]
+
+            x_start = r[0]
+            x_end = r[1]
+            y_start = r[2]
+            y_end = r[3]
+
+            for i in range(tile_x_pos, tile_x_pos + numx):
+                for j in range(tile_y_pos, tile_y_pos + numy):
+                    tile_x_start = i * tile_width
+                    tile_x_end = (i + 1) * tile_width
+
+                    tile_y_start = j * tile_width
+                    tile_y_end = (j + 1) * tile_width
+
+                    if (
+                        x_start < tile_x_end
+                        and x_end >= tile_x_start
+                        and y_start < tile_y_end
+                        and y_end >= tile_y_start
+                    ):
+                        # add the position offset to the returned values
+                        new_rows[(i, j)] += [
+                            {
+                                "xStart": r[0],
+                                "xEnd": r[1],
+                                "yStart": r[2],
+                                "yEnd": r[3],
+                                "chrOffset": r[4],
+                                "importance": r[5],
+                                "uid": uid,
+                                "fields": r[6].split("\t"),
+                            }
+                        ]
 
     return new_rows
 
diff --git a/clodius/tiles/beddb.py b/clodius/tiles/beddb.py
index c119b553..b35baa26 100644
--- a/clodius/tiles/beddb.py
+++ b/clodius/tiles/beddb.py
@@ -1,18 +1,34 @@
-import sqlite3
+from time import time
+import sosqlite
+import apsw
+import logging
+
+from clodius.utils import TILE_OPTIONS_CHAR
+
+logger = logging.getLogger(__name__)
+
+t1 = time()
+
+sovfs = sosqlite.SmartOpenVFS(name="so-vfs")
 
 
 def tileset_info(db_file):
-    conn = sqlite3.connect(db_file)
-    cursor = conn.cursor()
+    with apsw.Connection(
+        db_file, vfs=sovfs.name, flags=apsw.SQLITE_OPEN_READONLY
+    ) as conn:
+        cursor = conn.cursor()
 
-    row = cursor.execute("SELECT * from tileset_info").fetchone()
+        row = cursor.execute("SELECT * from tileset_info").fetchone()
 
-    colnames = next(zip(*cursor.description))
+        colnames = next(zip(*cursor.description))
 
     if "version" not in colnames:
         version = 1
     else:
-        version = int(row[colnames.index("version")])
+        try:
+            version = int(row[colnames.index("version")])
+        except ValueError:
+            version = row[colnames.index("version")]
 
     if "header" not in colnames:
         header = ""
@@ -33,7 +49,6 @@ def tileset_info(db_file):
         "chromsizes": list(
             zip(row[3].split("\t"), [int(cs) for cs in row[4].split("\t")])
         ),
-        "info_version": "2",
     }
     conn.close()
 
@@ -59,8 +74,7 @@ def tiles(filepath, tile_ids):
     to_return = []
 
     for tile_id in tile_ids:
-        # tile_option_parts = tile_id.split('|')[1:]
-        tile_no_options = tile_id.split("|")[0]
+        tile_no_options = tile_id.split(TILE_OPTIONS_CHAR)[0]
         parts = tile_no_options.split(".")
 
         zoom = int(parts[1])
@@ -70,14 +84,13 @@ def tiles(filepath, tile_ids):
         new_rows = {}
         new_rows = []
 
-        for j in range(2 ** extra_zoom):
+        for j in range(2**extra_zoom):
             # the old rows are indexed by the higher
             # resolution tile numbers
-            higher_xpos = 2 ** extra_zoom * xpos + j
+            higher_xpos = 2**extra_zoom * xpos + j
             old_rows = get_1D_tiles(filepath, zoom + extra_zoom, higher_xpos)
             new_rows += old_rows
 
-        # print("new_rows length", len(new_rows))
         to_return += [(tile_id, new_rows)]
 
     return to_return
@@ -106,59 +119,67 @@ def get_1D_tiles(db_file, zoom, tile_x_pos, num_tiles=1):
     ts_info = tileset_info(db_file)
     version = ts_info["version"]
 
-    conn = sqlite3.connect(db_file)
+    with apsw.Connection(
+        db_file, vfs=sovfs.name, flags=apsw.SQLITE_OPEN_READONLY
+    ) as conn:
+        c = conn.cursor()
 
-    c = conn.cursor()
+        tile_width = ts_info["max_width"] / 2**zoom
 
-    tile_width = ts_info["max_width"] / 2 ** zoom
+        tile_start_pos = tile_width * tile_x_pos
+        tile_end_pos = tile_start_pos + num_tiles * tile_width
 
-    tile_start_pos = tile_width * tile_x_pos
-    tile_end_pos = tile_start_pos + num_tiles * tile_width
-
-    query = """
-    SELECT startPos, endPos, chrOffset, importance, fields, uid
-    FROM intervals,position_index
-    WHERE
-        intervals.id=position_index.id AND
-        zoomLevel <= {} AND
-        rEndPos >= {} AND
-        rStartPos <= {}
-    """.format(
-        zoom, tile_start_pos, tile_end_pos
-    )
-
-    if version == 2:
         query = """
         SELECT startPos, endPos, chrOffset, importance, fields, uid
         FROM intervals,position_index
         WHERE
             intervals.id=position_index.id AND
-            rStartZoomLevel <= {} AND
-            rEndZoomLevel >= 0 AND
+            zoomLevel <= {} AND
             rEndPos >= {} AND
             rStartPos <= {}
         """.format(
             zoom, tile_start_pos, tile_end_pos
         )
 
-    if version == 3:
-        query = """
-        SELECT startPos, endPos, chrOffset, importance, fields, uid, name
-        FROM intervals,position_index
-        WHERE
-            intervals.id=position_index.id AND
-            rStartZoomLevel <= {} AND
-            rEndZoomLevel >= 0 AND
-            rEndPos >= {} AND
-            rStartPos <= {}
-        """.format(
-            zoom, tile_start_pos, tile_end_pos
-        )
+        if version == 2:
+            query = """
+            SELECT startPos, endPos, chrOffset, importance, fields, uid
+            FROM intervals,position_index
+            WHERE
+                intervals.id=position_index.id AND
+                rStartZoomLevel <= {} AND
+                rEndZoomLevel >= 0 AND
+                rEndPos >= {} AND
+                rStartPos <= {}
+            """.format(
+                zoom, tile_start_pos, tile_end_pos
+            )
 
-    # import time
-    # t1 = time.time()
-    rows = c.execute(query).fetchall()
-    # t2 = time.time()
+        if version == 3:
+            query = """
+            SELECT startPos, endPos, chrOffset, importance, fields, uid, name
+            FROM intervals,position_index
+            WHERE
+                intervals.id=position_index.id AND
+                rStartZoomLevel <= {} AND
+                rEndZoomLevel >= 0 AND
+                rEndPos >= {} AND
+                rStartPos <= {}
+            """.format(
+                zoom, tile_start_pos, tile_end_pos
+            )
+
+        if version == "3t":
+            tile_id = sum([2**x for x in range(zoom)]) + tile_x_pos
+            query = f"""
+            SELECT startPos, endPos, chrOffset, importance, fields, uid, name
+            FROM intervals, tiles
+            WHERE
+                tiles.id = {tile_id} AND
+                tiles.intervalId = intervals.id
+            """
+
+        rows = c.execute(query).fetchall()
 
     new_rows = []
 
@@ -189,7 +210,6 @@ def get_1D_tiles(db_file, zoom, tile_x_pos, num_tiles=1):
                     to_add["name"] = r[6]
 
                 new_rows += [to_add]
-    conn.close()
 
     return new_rows
 
@@ -212,78 +232,79 @@ def list_items(db_file, start, end, max_entries=None):
     ts_info = tileset_info(db_file)
     version = ts_info["version"]
 
-    conn = sqlite3.connect(db_file)
+    with apsw.Connection(
+        db_file, vfs=sovfs.name, flags=apsw.SQLITE_OPEN_READONLY
+    ) as conn:
 
-    c = conn.cursor()
+        c = conn.cursor()
 
-    # some large number because we want to extract all entries
-    zoom = 100000
+        # some large number because we want to extract all entries
+        zoom = 100000
 
-    query = """
-    SELECT startPos, endPos, chrOffset, importance, fields, uid
-    FROM intervals,position_index
-    WHERE
-        intervals.id=position_index.id AND
-        zoomLevel <= {} AND
-        rEndPos >= {} AND
-        rStartPos <= {}
-    """.format(
-        zoom, start, end
-    )
-
-    if version == 2:
         query = """
         SELECT startPos, endPos, chrOffset, importance, fields, uid
         FROM intervals,position_index
         WHERE
             intervals.id=position_index.id AND
-            rStartZoomLevel <= {} AND
-            rEndZoomLevel >= 0 AND
+            zoomLevel <= {} AND
             rEndPos >= {} AND
             rStartPos <= {}
         """.format(
             zoom, start, end
         )
 
-    if version == 3:
-        query = """
-        SELECT startPos, endPos, chrOffset, importance, fields, uid, name
-        FROM intervals,position_index
-        WHERE
-            intervals.id=position_index.id AND
-            rStartZoomLevel <= {} AND
-            rEndZoomLevel >= 0 AND
-            rEndPos >= {} AND
-            rStartPos <= {}
-        """.format(
-            zoom, start, end
-        )
-    if max_entries is not None:
-        query += " LIMIT {}".format(max_entries)
+        if version == 2:
+            query = """
+            SELECT startPos, endPos, chrOffset, importance, fields, uid
+            FROM intervals,position_index
+            WHERE
+                intervals.id=position_index.id AND
+                rStartZoomLevel <= {} AND
+                rEndZoomLevel >= 0 AND
+                rEndPos >= {} AND
+                rStartPos <= {}
+            """.format(
+                zoom, start, end
+            )
 
-    rows = c.execute(query).fetchall()
+        if version == 3:
+            query = """
+            SELECT startPos, endPos, chrOffset, importance, fields, uid, name
+            FROM intervals,position_index
+            WHERE
+                intervals.id=position_index.id AND
+                rStartZoomLevel <= {} AND
+                rEndZoomLevel >= 0 AND
+                rEndPos >= {} AND
+                rStartPos <= {}
+            """.format(
+                zoom, start, end
+            )
+        if max_entries is not None:
+            query += " LIMIT {}".format(max_entries)
+
+        rows = c.execute(query).fetchall()
 
-    new_rows = []
+        new_rows = []
 
-    for r in rows:
-        try:
-            uid = r[5].decode("utf-8")
-        except AttributeError:
-            uid = r[5]
+        for r in rows:
+            try:
+                uid = r[5].decode("utf-8")
+            except AttributeError:
+                uid = r[5]
 
-        to_add = {
-            "xStart": r[0],
-            "xEnd": r[1],
-            "chrOffset": r[2],
-            "importance": r[3],
-            "uid": uid,
-            "fields": r[4].split("\t"),
-        }
+            to_add = {
+                "xStart": r[0],
+                "xEnd": r[1],
+                "chrOffset": r[2],
+                "importance": r[3],
+                "uid": uid,
+                "fields": r[4].split("\t"),
+            }
 
-        if version == 3:
-            to_add["name"] = r[6]
+            if version == 3:
+                to_add["name"] = r[6]
 
-        new_rows += [to_add]
-    conn.close()
+            new_rows += [to_add]
 
-    return new_rows
+        return new_rows
diff --git a/clodius/tiles/bedfile.py b/clodius/tiles/bedfile.py
index 3f1bef6a..97489791 100644
--- a/clodius/tiles/bedfile.py
+++ b/clodius/tiles/bedfile.py
@@ -1,4 +1,57 @@
-def tileset_info(filename):
+import functools as ft
+import hashlib
+import math
+import os
+import random
+
+import pandas as pd
+from pydantic import BaseModel
+import io
+import json
+
+import clodius.tiles.tabix as ctt
+import logging
+
+from smart_open import open
+
+# import pysam
+from clodius.tiles.vcf import generic_regions
+from clodius.utils import get_file_compression, TILE_OPTIONS_CHAR
+
+logger = logging.getLogger(__name__)
+
+cache = []
+
+
+class LRUCache:
+    def __init__(self, capacity):
+        self.capacity = capacity
+        self.tm = 0
+        self.cache = {}
+        self.lru = {}
+
+    def get(self, key):
+        if key in self.cache:
+            self.lru[key] = self.tm
+            self.tm += 1
+            return self.cache[key]
+        return None
+
+    def set(self, key, value):
+        if len(self.cache) >= self.capacity:
+            # find the LRU entry
+            old_key = min(self.lru.keys(), key=lambda k: self.lru[k])
+            self.cache.pop(old_key)
+            self.lru.pop(old_key)
+        self.cache[key] = value
+        self.lru[key] = self.tm
+        self.tm += 1
+
+
+cache = LRUCache(1)
+
+
+def tileset_info(filename, chromsizes=None, index_filename=None):
     """
 
     Return the bounds of this tileset. The bounds should encompass the entire
@@ -9,4 +62,301 @@ def tileset_info(filename):
     browser to pass in a set of chromsizes
     """
 
-    pass
+    # do this so that we can serialize the int64s in the numpy array
+    chromsizes_list = []
+
+    if chromsizes is None:
+        return {
+            "error": "No chromsizes found. Make sure the project has a chromsizes file or the assembly: tag is set"
+        }
+    for chrom, size in chromsizes.items():
+        chromsizes_list += [[chrom, int(size)]]
+
+    max_width = sum([c[1] for c in chromsizes_list])
+
+    if not index_filename:
+        if isinstance(filename, str):
+            filesize = os.stat(filename).st_size
+        else:
+            # We're going to record the current position in the file
+            # seek to the end to see how big it is and then seek back to
+            # the original position
+            orig_pos = filename.tell()
+            filename.seek(0, io.SEEK_END)
+            filesize = filename.tell()
+            filename.seek(orig_pos)
+
+        if filesize > 20e6:
+            return {"error": "File too large (>20Mb), please index"}
+
+    return {
+        "max_width": max_width,
+        "max_zoom": int(math.log(max_width) / math.log(2)),
+        "chromsizes": chromsizes_list,
+        "min_pos": [0],
+        "max_pos": [max_width],
+    }
+
+
+def row_to_bedlike(row, css, orig_columns):
+    ret = {
+        "uid": row["ix"],
+        "xStart": row["xStart"],
+        "xEnd": row["xEnd"],
+        "chrOffset": css[row[0]],
+        "importance": random.random(),
+        "fields": [r for r in row[orig_columns]],
+    }
+
+    return ret
+
+
+def ts_hash(filename, chromsizes):
+    cs_hash = hashlib.md5(str(chromsizes).encode("utf-8")).hexdigest()
+    return f"{filename}.{cs_hash}"
+
+
+def single_indexed_tile(file, index, chromsizes, tsinfo, z, x, tbx_index, settings):
+    """Retrieve a single tile from an indexed bedfile."""
+    from clodius.tiles.tabix import dataframe_tabix_fetcher
+
+    css = chromsizes.cumsum().shift().fillna(0).to_dict()
+
+    # try:
+    df = ctt.single_indexed_tile(
+        file,
+        index,
+        chromsizes,
+        tsinfo,
+        z,
+        x,
+        tbx_index=tbx_index,
+        fetcher=dataframe_tabix_fetcher,
+        max_results=settings.get("MAX_BEDFILE_ENTRIES"),
+    )
+
+    res = [x.split("\t") for x in df["raw"]]
+    # except ValueError as err:
+    #     return {"error": str(err)}
+
+    formatted = []
+
+    if "error" in res:
+        # tile probably too large
+        return res
+
+    for row in res:
+        parts = row
+        if settings.get("filetype") == "vcf":
+            xEnd = css[parts[0]] + int(parts[1]) + len(parts[3])
+        else:
+            xEnd = css[parts[0]] + int(parts[2])
+
+        ret = {
+            "uid": hashlib.md5("\t".join(row).encode("utf-8")).hexdigest(),
+            "xStart": css[parts[0]] + int(parts[1]),
+            "xEnd": xEnd,
+            "chrOffset": css[parts[0]],
+            "importance": random.random(),
+            "fields": parts,
+        }
+
+        formatted += [ret]
+
+    return formatted
+
+
+def get_bedfile_values(filename, chromsizes, settings):
+    """Return a processed bedfile containing a dataframe and
+    and some other information."""
+    cache = settings.get("cache")
+    identifier = settings.get("filename")
+    hash_ = None
+
+    if not isinstance(filename, str):
+        # we already have a file pointer
+        filename = filename
+    else:
+        filename = open(filename, "rb", compression="disable")
+
+    f = filename
+
+    logger.info("bedfiles identifier: %s", identifier)
+
+    val = None
+
+    if identifier:
+        hash_ = ts_hash(identifier, chromsizes)
+
+        # hash the loaded data table so that we don't have to read the entire thing
+        # and calculate cumulative start and end positions
+        val = cache.get(hash_) if cache else None
+        val = json.loads(val) if val else None
+
+    if val is None:
+        # We have a file-like object, we need to rewing to the beginning
+        f.seek(0)
+
+        # Then we have to figure out how it's compressed because we expect a
+        # file pointer with no compression enabled
+        compression = get_file_compression(filename)
+
+        t = pd.read_csv(
+            filename,
+            header=None,
+            delimiter="\t",
+            encoding="ISO-8859-1",
+            comment="#",
+            compression=compression,
+        )
+
+        orig_columns = list(t.columns)
+        css = chromsizes.cumsum().shift().fillna(0).to_dict()
+
+        # xStart and xEnd are cumulative start and end positions calculated
+        # as if the chromosomes are concatenated from end to end
+        t["chromStart"] = t[0].map(lambda x: css[x])
+
+        t["xStart"] = t["chromStart"] + t[1]
+
+        if settings.get("filetype") == "vcf":
+            t["xEnd"] = t["chromStart"] + t[1] + len(t[3])
+        else:
+            t["xEnd"] = t["chromStart"] + t[2]
+        t["ix"] = t.index
+
+        val = {"rows": t.to_json(), "orig_columns": orig_columns, "css": css}
+        if cache and hash_:
+            cache.set(hash_, json.dumps(val))
+
+    return val
+
+
+def single_tile(filename, chromsizes, tsinfo, z, x, settings=None):
+    """
+    Available settings:
+
+    {
+        MAX_BEDFILE_ENTRIES: int
+    }
+    """
+    if settings is None:
+        settings = {}
+
+    try:
+        val = get_bedfile_values(filename, chromsizes, settings)
+    except KeyError as ke:
+        return {
+            "error": f"Key error: (bedfile tab separated? correct chromsizes?) {str(ke)}"
+        }
+
+    t = pd.read_json(io.StringIO(val["rows"]))
+    # pandas 2.x converts integer column names to strings during JSON round-trip
+    t.columns = [int(c) if isinstance(c, str) and c.isdigit() else c for c in t.columns]
+    orig_columns = val["orig_columns"]
+    css = val["css"]
+
+    tileStart = x * tsinfo["max_width"] / 2**z
+    tileEnd = (x + 1) * tsinfo["max_width"] / 2**z
+
+    t = t.query(f"xEnd >= {tileStart} & xStart <= {tileEnd}")
+    MAX_PER_TILE = settings.get("MAX_BEDFILE_ENTRIES") or 1024
+
+    t = t.sample(MAX_PER_TILE) if len(t) > MAX_PER_TILE else t
+
+    ret = t.apply(
+        ft.partial(row_to_bedlike, css=css, orig_columns=orig_columns), axis=1
+    )
+    return list(ret.values)
+
+
+def tiles(
+    filename, tile_ids, chromsizes, index_filename, settings=None, single_tile_func=None
+):
+    if single_tile_func is None:
+        single_tile_func = single_tile
+
+    tsinfo = tileset_info(filename, chromsizes, index_filename)
+
+    if settings is None:
+        settings = {}
+
+    tile_values = []
+
+    if isinstance(filename, str):
+        if index_filename:
+            # If the file is indexed we need to disable compression so that
+            # tabix indexing can retrieve the correct positions
+            file = open(filename, "rb", compression="disable")
+        else:
+            # If the file isn't indexed, we're going to use a polars dataframe
+            # to load it and that requires the compression to be resolved
+            file = open(filename, "rb")
+    else:
+        file = filename
+
+    if index_filename:
+        tbx_index = ctt.load_tbi_idx(index_filename)
+
+    for tile_id in tile_ids:
+        tile_no_options = tile_id.split(TILE_OPTIONS_CHAR)[0]
+        tile_id_parts = tile_no_options.split(".")
+        tile_position = list(map(int, tile_id_parts[1:3]))
+
+        if len(tile_position) < 2:
+            raise IndexError("Not enough tile info present")
+
+        z = tile_position[0]
+        x = tile_position[1]
+
+        if index_filename:
+            values = single_indexed_tile(
+                file,
+                index_filename,
+                chromsizes,
+                tsinfo,
+                z,
+                x,
+                tbx_index=tbx_index,
+                settings=settings,
+            )
+        else:
+            values = single_tile_func(file, chromsizes, tsinfo, z, x, settings=settings)
+
+        tile_values += [(tile_id, values)]
+
+    return tile_values
+
+
+class BedfileEntry(BaseModel):
+    chrom: str
+    start: int
+    end: int
+
+
+def regions(filename, chromsizes, offset, limit, settings={}):
+    """Return a list of regions in the range.
+
+    Arguments:
+        filename: The name of the file
+        chromsizes: A dictionary containing the offsets of each chromosome
+            from the start of the genome
+        offset: The offset from the beginning of the file from which to start
+            fetching entries
+        limit: The total number of entries to fetch
+    """
+    vals = get_bedfile_values(filename, chromsizes, settings=settings)
+
+    def row_iterator():
+        _df = pd.read_json(io.StringIO(vals["rows"]))
+        _df.columns = [int(c) if isinstance(c, str) and c.isdigit() else c for c in _df.columns]
+        for ix, row in _df.iterrows():
+            yield {
+                "uid": row["ix"],
+                "chrOffset": row["chromStart"],
+                "xStart": row["xStart"],
+                "xEnd": row["xEnd"],
+                "fields": list(row[vals["orig_columns"]].array),
+            }
+
+    return generic_regions(row_iterator(), offset, limit)
diff --git a/clodius/tiles/bedpe.py b/clodius/tiles/bedpe.py
new file mode 100644
index 00000000..0f91dc5a
--- /dev/null
+++ b/clodius/tiles/bedpe.py
@@ -0,0 +1,272 @@
+import functools as ft
+import hashlib
+import math
+import random
+
+import pandas as pd
+from pandas.errors import EmptyDataError
+
+from clodius.utils import TILE_OPTIONS_CHAR
+from clodius.utils import get_file_compression
+
+cache = []
+
+
+class LRUCache:
+    def __init__(self, capacity):
+        self.capacity = capacity
+        self.tm = 0
+        self.cache = {}
+        self.lru = {}
+
+    def get(self, key):
+        if key in self.cache:
+            self.lru[key] = self.tm
+            self.tm += 1
+            return self.cache[key]
+        return None
+
+    def set(self, key, value):
+        if len(self.cache) >= self.capacity:
+            # find the LRU entry
+            old_key = min(self.lru.keys(), key=lambda k: self.lru[k])
+            self.cache.pop(old_key)
+            self.lru.pop(old_key)
+        self.cache[key] = value
+        self.lru[key] = self.tm
+        self.tm += 1
+
+
+cache = LRUCache(1)
+
+
+def tileset_info(filename, chromsizes=None):
+    """
+
+    Return the bounds of this tileset. The bounds should encompass the entire
+    width of this dataset.
+
+    So how do we know what those are if we don't know chromsizes? We can assume
+    that the file is enormous (e.g. has a width of 4 trillion) and rely on the
+    browser to pass in a set of chromsizes
+    """
+    if isinstance(filename, str):
+        filename = open(filename, "rb")
+
+    compression = get_file_compression(filename)
+
+    # do this so that we can serialize the int64s in the numpy array
+    chromsizes_list = []
+
+    t = pd.read_csv(
+        filename, nrows=2, sep="\t", comment="#", header=None, compression=compression
+    )
+
+    header = ""
+
+    try:
+        filename.seek(0)
+        t_head = pd.read_csv(
+            filename,
+            nrows=2,
+            sep="\t",
+            comment="#",
+            header=None,
+            skiprows=1,
+            compression=compression,
+        )
+
+        if (t.dtypes == t_head.dtypes).all():
+            header = ""
+        else:
+            header = "\t".join(t.head().values[0])
+    except EmptyDataError:
+        pass
+
+    if chromsizes is None:
+        return {
+            "error": "No chromsizes found. Make sure the project has a chromsizes file or the assembly: tag is set"
+        }
+
+    for chrom, size in chromsizes.items():
+        chromsizes_list += [[chrom, int(size)]]
+
+    max_width = sum([c[1] for c in chromsizes_list])
+
+    filename.seek(0, 2)
+    filesize = filename.tell()
+    filename.seek(0)
+
+    if filesize > 20e6:
+        return {"error": "File too large (>20Mb), please index"}
+
+    tsinfo = {
+        "max_width": max_width,
+        "max_zoom": int(math.log(max_width) / math.log(2)),
+        "chromsizes": chromsizes_list,
+        "min_pos": [0, 0],
+        "max_pos": [max_width, max_width],
+        "header": header,
+    }
+
+    return tsinfo
+
+
+def row_to_bedlike(row, css, orig_columns):
+    ret = {
+        "uid": row["ix"],
+        "xStart": row["xStart"],
+        "xEnd": row["xEnd"],
+        "yStart": row["yStart"],
+        "yEnd": row["yEnd"],
+        "xChrOffset": css[str(row[0])],
+        "yChrOffset": css[str(row[3])],
+        "importance": random.random(),
+        "fields": [r for r in row[orig_columns]],
+    }
+
+    return ret
+
+
+def ts_hash(filename, chromsizes):
+    cs_hash = hashlib.md5(str(chromsizes).encode("utf-8")).hexdigest()
+    return f"{filename}.{cs_hash}"
+
+
+def bedpe_to_df(filename, chromsizes, tsinfo):
+    """Prepare the bedpe file so that we can query it."""
+    if isinstance(filename, str):
+        filename = open(filename, "rb")
+
+    compression = get_file_compression(filename)
+
+    hash_ = ts_hash(filename, chromsizes)
+
+    # hash the loaded data table so that we don't have to read the entire thing
+    # and calculate cumulative start and end positions
+    val = cache.get(hash_)
+
+    if val:
+        return val
+
+    skiprows = 0
+
+    # if this file has a header, skip the first row
+    if len(tsinfo["header"]):
+        skiprows = 1
+
+    t = pd.read_csv(
+        filename,
+        header=None,
+        comment="#",
+        sep="\t",
+        skiprows=skiprows,
+        compression=compression,
+    )
+
+    cache.set(hash_, t)
+
+    orig_columns = t.columns
+    css = chromsizes.cumsum().shift().fillna(0).to_dict()
+
+    # xStart and xEnd are cumulative start and end positions calculated
+    # as if the chromosomes are concatenated from end to end
+    t["xChromStart"] = [
+        css[str(x)] for x in t[0].values
+    ]  # .astype("str").map(lambda x: css[str(x)])
+    t["yChromStart"] = [
+        css[str(x)] for x in t[3].values
+    ]  # .astype("str").map(lambda x: css[str(x)])
+
+    t["xStart"] = t["xChromStart"] + t[1]
+    t["xEnd"] = t["xChromStart"] + t[2]
+
+    t["yStart"] = t["yChromStart"] + t[4]
+    t["yEnd"] = t["yChromStart"] + t[5]
+
+    t["ix"] = t.index
+
+    val = {"rows": t, "orig_columns": orig_columns, "css": css}
+    cache.set(hash_, val)
+
+    return val
+
+
+def single_2d_tile(filename, chromsizes, tsinfo, z, x, y):
+    val = bedpe_to_df(filename, chromsizes, tsinfo)
+
+    t = val["rows"]
+    orig_columns = val["orig_columns"]
+    css = val["css"]
+
+    xTileStart = x * tsinfo["max_width"] / 2**z
+    xTileEnd = (x + 1) * tsinfo["max_width"] / 2**z
+
+    yTileStart = y * tsinfo["max_width"] / 2**z
+    yTileEnd = (y + 1) * tsinfo["max_width"] / 2**z
+
+    t = t.query(
+        f"xEnd >= {xTileStart} & xStart <= {xTileEnd} & "
+        + f"yEnd >= {yTileStart} & yStart <= {yTileEnd}"
+    )
+    MAX_PER_TILE = 512
+
+    t = t.sample(MAX_PER_TILE) if len(t) > MAX_PER_TILE else t
+
+    ret = t.apply(
+        ft.partial(row_to_bedlike, css=css, orig_columns=orig_columns), axis=1
+    )
+    return list(ret.values)
+
+
+def single_1d_tile(filename, chromsizes, tsinfo, z, x):
+    val = bedpe_to_df(filename, chromsizes, tsinfo)
+
+    t = val["rows"]
+    orig_columns = val["orig_columns"]
+    css = val["css"]
+
+    xTileStart = x * tsinfo["max_width"] / 2**z
+    xTileEnd = (x + 1) * tsinfo["max_width"] / 2**z
+
+    t = t.query(
+        f"xEnd >= {xTileStart} & xStart <= {xTileEnd} | "
+        + f"yEnd >= {xTileStart} & yStart <= {xTileEnd}"
+    )
+    MAX_PER_TILE = 512
+
+    t = t.sample(MAX_PER_TILE) if len(t) > MAX_PER_TILE else t
+
+    ret = t.apply(
+        ft.partial(row_to_bedlike, css=css, orig_columns=orig_columns), axis=1
+    )
+    return list(ret.values)
+
+
+def tiles(filename, tile_ids, chromsizes):
+    tsinfo = tileset_info(filename, chromsizes)
+
+    tile_values = []
+
+    for tile_id in tile_ids:
+        tile_no_options = tile_id.split(TILE_OPTIONS_CHAR)[0]
+        tile_id_parts = tile_no_options.split(".")
+        tile_position = list(map(int, tile_id_parts[1:4]))
+
+        if len(tile_position) < 2:
+            raise IndexError("Not enough tile info present (z.x[.y])")
+
+        z = tile_position[0]
+        x = tile_position[1]
+
+        if len(tile_position) == 2:
+            values = single_1d_tile(filename, chromsizes, tsinfo, z, x)
+
+        else:
+            y = tile_position[2]
+
+            values = single_2d_tile(filename, chromsizes, tsinfo, z, x, y)
+
+        tile_values += [(tile_id, values)]
+
+    return tile_values
diff --git a/clodius/tiles/bigbed.py b/clodius/tiles/bigbed.py
index ffa54cf2..da8618e3 100644
--- a/clodius/tiles/bigbed.py
+++ b/clodius/tiles/bigbed.py
@@ -1,13 +1,10 @@
-import bbi
-import functools as ft
 import logging
-import numpy as np
-import pandas as pd
+import numpy.random as nr
 import random
-import clodius.tiles.bigwig as hgbi
-from .utils import abs2genomic, get_quadtree_depth
-
-from concurrent.futures import ThreadPoolExecutor
+import clodius.tiles.bigwig as hgbw
+from clodius.utils import TILE_OPTIONS_CHAR
+import pybigtools
+from hashlib import md5
 
 DEFAULT_RANGE_MODE = "significant"
 MIN_ELEMENTS = 1
@@ -21,159 +18,168 @@
 
 
 def tileset_info(bbpath, chromsizes=None):
-    ti = hgbi.tileset_info(bbpath, chromsizes)
+    ti = hgbw.tileset_info(bbpath, chromsizes)
     ti["range_modes"] = range_modes
     return ti
 
 
-def fetch_data(a):
-    (
-        bbpath,
-        binsize,
-        chromsizes,
-        range_mode,
-        min_elements,
-        max_elements,
-        cid,
-        start,
-        end,
-    ) = a
-
-    """
-    Retrieve tile data from a bigbed file.
-
-    This approach currently returns a subset of intervals within the bounds of the specified
-    query range.
-
-    The subset is determined, at this time, by using the population of scores in the score
-    column of the BED data to generate a quantile value that would allow, at most, a maximum
-    number of elements (either a default or specified value). Because intervals are discrete
-    elements, it is possible for a quantile to allow a few more elements than the desired
-    limit; in this case, a uniformly-random sample is drawn from the thresholded set without
-    replacement.
-
-    Parameters
-    ----------
-    bbpath: string
-        The path to the bigBed media file
-    binsize: integer
-        Resolution of a bin at a particular zoom level
-    chromsizes: [[chrom, size],...]
-        A 2d array containing chromosome names and sizes. Overrides the
-        chromsizes in chromsizes_map
-    range_mode: string or None
-        If specified, determines what rule is applied to intervals retrieved
-        over the specified chromosome, start, and end range
-    min_elements: integer
-        For fetched intervals, return no fewer than the specified number
-    max_elements: integer
-        For fetched intervals, return no more than the specified number
-    cid: integer
-        Index of chromosome associated with chromsizes
-    start: integer
-        Start position of interval query (relative to chromosome)
-    end: integer
-        End position of interval query (relative to chromosome)
-
-    Returns
-    -------
-    intervals: [{'chrOffset': integer, 'importance': integer, 'fields': [interval]}, ... ]
-        A list of beddb-like gene annotation objects
-    """
-
-    try:
-        chrom = chromsizes.index[cid]
-
-        fetch_factory = ft.partial(bbi.fetch_intervals, bbpath, chrom, start, end)
-
-        if range_mode == "significant":
-            intervals, intervals2 = fetch_factory(), fetch_factory()
-        else:
-            intervals, intervals2 = fetch_factory(), fetch_factory()
-
-    except IndexError:
-        # beyond the range of the available chromosomes
-        # probably means we've requested a range of absolute
-        # coordinates that stretch beyond the end of the genome
-        intervals, intervals2 = None, None
-
-    except KeyError:
-        # probably requested a chromosome that doesn't exist (e.g. chrM)
-        intervals, intervals2 = None, None
-
-    offset = 0
-    offsetIdx = 0
-    chrOffsets = {}
-    for chrSize in chromsizes:
-        chrOffsets[chromsizes.index[offsetIdx]] = offset
-        offset += chrSize
-        offsetIdx += 1
-
-    final_intervals = []
-    intervals_length = 0
-    scores = []
-
-    if not intervals:
-        return final_intervals
-
-    for interval in intervals:
-        try:
-            scores.append(int(interval[4]))
-        except (ValueError, IndexError):
-            scores.append(DEFAULT_SCORE)
-        intervals_length += 1
-
-    # generate beddb-like elements for parsing by the higlass plugin
-    if intervals_length >= min_elements and intervals_length <= max_elements:
-        for interval in intervals2:
-            try:
-                score = int(interval[4])
-                final_intervals.append(
-                    {
-                        "chrOffset": chrOffsets[chrom],
-                        "importance": score,
-                        "fields": interval,
-                    }
-                )
-            except (ValueError, IndexError):
-                final_intervals.append(
-                    {
-                        "chrOffset": chrOffsets[chrom],
-                        "importance": DEFAULT_SCORE,
-                        "fields": interval,
-                    }
-                )
-
-    elif intervals_length > max_elements:
-        thresholded_intervals = []
-        desired_perc = max_elements / intervals_length
-        thresholded_score = int(np.quantile(scores, 1 - desired_perc))
-        for interval in intervals2:
-            try:
-                score = int(interval[4])
-                if score >= thresholded_score:
-                    thresholded_intervals.append(
-                        {
-                            "chrOffset": chrOffsets[chrom],
-                            "importance": score,
-                            "fields": interval,
-                        }
-                    )
-            except (ValueError, IndexError):
-                if DEFAULT_SCORE >= thresholded_score:
-                    thresholded_intervals.append(
-                        {
-                            "chrOffset": chrOffsets[chrom],
-                            "importance": DEFAULT_SCORE,
-                            "fields": interval,
-                        }
-                    )
-        thresholded_intervals_length = len(thresholded_intervals)
-        if thresholded_intervals_length > max_elements:
-            indices = random.sample(range(thresholded_intervals_length), max_elements)
-            final_intervals = [thresholded_intervals[i] for i in sorted(indices)]
-
-    return final_intervals
+# def fetch_data(a):
+#     (
+#         bbpath,
+#         binsize,
+#         chromsizes,
+#         range_mode,
+#         min_elements,
+#         max_elements,
+#         cid,
+#         start,
+#         end,
+#     ) = a
+
+#     """
+#     Retrieve tile data from a bigbed file.
+
+#     This approach currently returns a subset of intervals within the bounds of the specified
+#     query range.
+
+#     The subset is determined, at this time, by using the population of scores in the score
+#     column of the BED data to generate a quantile value that would allow, at most, a maximum
+#     number of elements (either a default or specified value). Because intervals are discrete
+#     elements, it is possible for a quantile to allow a few more elements than the desired
+#     limit; in this case, a uniformly-random sample is drawn from the thresholded set without
+#     replacement.
+
+#     Parameters
+#     ----------
+#     bbpath: string
+#         The path to the bigBed media file
+#     binsize: integer
+#         Resolution of a bin at a particular zoom level
+#     chromsizes: [[chrom, size],...]
+#         A 2d array containing chromosome names and sizes. Overrides the
+#         chromsizes in chromsizes_map
+#     range_mode: string or None
+#         If specified, determines what rule is applied to intervals retrieved
+#         over the specified chromosome, start, and end range
+#     min_elements: integer
+#         For fetched intervals, return no fewer than the specified number
+#     max_elements: integer
+#         For fetched intervals, return no more than the specified number
+#     cid: integer
+#         Index of chromosome associated with chromsizes
+#     start: integer
+#         Start position of interval query (relative to chromosome)
+#     end: integer
+#         End position of interval query (relative to chromosome)
+
+#     Returns
+#     -------
+#     intervals: [{'chrOffset': integer, 'importance': integer, 'fields': [interval]}, ... ]
+#         A list of beddb-like gene annotation objects
+#     """
+
+#     try:
+#         chrom = chromsizes.index[cid]
+
+#         fetch_factory = ft.partial(bbi.fetch_intervals, bbpath, chrom, start, end)
+
+#         if range_mode == "significant":
+#             intervals, intervals2 = fetch_factory(), fetch_factory()
+#         else:
+#             intervals, intervals2 = fetch_factory(), fetch_factory()
+
+#     except IndexError:
+#         # beyond the range of the available chromosomes
+#         # probably means we've requested a range of absolute
+#         # coordinates that stretch beyond the end of the genome
+#         intervals, intervals2 = None, None
+
+#     except KeyError:
+#         # probably requested a chromosome that doesn't exist (e.g. chrM)
+#         intervals, intervals2 = None, None
+
+#     offset = 0
+#     offsetIdx = 0
+#     chrOffsets = {}
+#     for chrSize in chromsizes:
+#         chrOffsets[chromsizes.index[offsetIdx]] = offset
+#         offset += chrSize
+#         offsetIdx += 1
+
+#     final_intervals = []
+#     intervals_length = 0
+#     scores = []
+
+#     return [
+#         {
+#             "chrOffset": chrOffsets[chrom],
+#             "importance": random.random(),
+#             "fields": interval,
+#         }
+#         for interval in intervals2
+#     ]
+
+#     if not intervals:
+#         return final_intervals
+
+#     for interval in intervals:
+#         try:
+#             scores.append(int(interval[4]))
+#         except (ValueError, IndexError):
+#             scores.append(DEFAULT_SCORE)
+#         intervals_length += 1
+
+#     # generate beddb-like elements for parsing by the higlass plugin
+#     if intervals_length >= min_elements and intervals_length <= max_elements:
+#         for interval in intervals2:
+#             try:
+#                 score = int(interval[4])
+#                 final_intervals.append(
+#                     {
+#                         "chrOffset": chrOffsets[chrom],
+#                         "importance": score,
+#                         "fields": interval,
+#                     }
+#                 )
+#             except (ValueError, IndexError):
+#                 final_intervals.append(
+#                     {
+#                         "chrOffset": chrOffsets[chrom],
+#                         "importance": DEFAULT_SCORE,
+#                         "fields": interval,
+#                     }
+#                 )
+
+#     elif intervals_length > max_elements:
+#         thresholded_intervals = []
+#         desired_perc = max_elements / intervals_length
+#         thresholded_score = int(np.quantile(scores, 1 - desired_perc))
+#         for interval in intervals2:
+#             try:
+#                 score = int(interval[4])
+#                 if score >= thresholded_score:
+#                     thresholded_intervals.append(
+#                         {
+#                             "chrOffset": chrOffsets[chrom],
+#                             "importance": score,
+#                             "fields": interval,
+#                         }
+#                     )
+#             except (ValueError, IndexError):
+#                 if DEFAULT_SCORE >= thresholded_score:
+#                     thresholded_intervals.append(
+#                         {
+#                             "chrOffset": chrOffsets[chrom],
+#                             "importance": DEFAULT_SCORE,
+#                             "fields": interval,
+#                         }
+#                     )
+#         thresholded_intervals_length = len(thresholded_intervals)
+#         if thresholded_intervals_length > max_elements:
+#             indices = random.sample(range(thresholded_intervals_length), max_elements)
+#             final_intervals = [thresholded_intervals[i] for i in sorted(indices)]
+
+#     return final_intervals
 
 
 def get_bigbed_tile(
@@ -186,43 +192,73 @@ def get_bigbed_tile(
     min_elements=None,
     max_elements=None,
 ):
+    bbpath.seek(0)
+    f = pybigtools.open(bbpath)
+
     if chromsizes is None:
-        chromsizes = hgbi.get_chromsizes(bbpath)
+        chromsizes = hgbw.get_chromsizes(bbpath)
 
     if min_elements is None:
         min_elements = MIN_ELEMENTS
     if max_elements is None:
         max_elements = MAX_ELEMENTS
 
-    resolutions = hgbi.get_zoom_resolutions(chromsizes)
-    binsize = resolutions[zoom_level]
-
-    cids_starts_ends = list(abs2genomic(chromsizes, start_pos, end_pos))
-
-    with ThreadPoolExecutor(max_workers=16) as e:
-        arrays = list(
-            e.map(
-                fetch_data,
-                [
-                    tuple(
-                        [
-                            bbpath,
-                            binsize,
-                            chromsizes,
-                            range_mode,
-                            min_elements,
-                            max_elements,
-                        ]
-                        + list(c)
-                    )
-                    for c in cids_starts_ends
-                ],
-            )
-        )
+    cids_starts_ends = list(hgbw.abs2genomic(chromsizes, start_pos, end_pos))
 
-    # concatenate bigBed tileset data across chromosomes, so that it looks similar to a beddb response
-    results = [x for x in arrays if x != []]
-    return [item for sublist in results for item in sublist]
+    offset = 0
+    offsetIdx = 0
+    chrOffsets = {}
+    for chrSize in chromsizes:
+        chrOffsets[chromsizes.index[offsetIdx]] = offset
+        offset += chrSize
+        offsetIdx += 1
+
+    intervals = []
+
+    total_length = sum([c[2] - c[1] for c in cids_starts_ends])
+    probs = [(c[2] - c[1]) / total_length for c in cids_starts_ends]
+
+    # If there's a million chromosomes, pick at most 128 ones at random
+    # weighted by their size
+    NUM_TO_PICK = 128
+    if NUM_TO_PICK < len(probs):
+        rnds_ixs = nr.choice(
+            len(cids_starts_ends), NUM_TO_PICK, p=probs, replace=NUM_TO_PICK
+        )
+        chosen_starts_ends = [cids_starts_ends[ix] for ix in rnds_ixs]
+    else:
+        chosen_starts_ends = cids_starts_ends
+
+    for c in chosen_starts_ends:
+        if c[0] >= len(chromsizes):
+            continue
+        # intervals += bbi.fetch_intervals(bbpath, chromsizes.index[c[0]], c[1], c[2])
+        intervals += [
+            # We're going to append the chromosome name to each record
+            (chromsizes.index[c[0]],) + r
+            for r in f.records(chromsizes.index[c[0]], c[1], c[2])
+        ]
+
+    MAX_RET = 100
+
+    if len(intervals) > MAX_RET:
+        chosen_intervals = random.choices(intervals, k=MAX_RET)
+    else:
+        chosen_intervals = intervals
+
+    all_intervals = [
+        {
+            "chrOffset": chrOffsets[interval[0]],
+            "importance": random.random(),
+            "uid": md5("".join(map(str, interval)).encode('utf8')).hexdigest(),
+            "fields": interval,
+            'xStart': chrOffsets[interval[0]] + interval[1],
+            'xEnd': chrOffsets[interval[0]] + interval[2],
+        }
+        for interval in chosen_intervals
+    ]
+
+    return all_intervals
 
 
 def tiles(bbpath, tile_ids, chromsizes_map={}, chromsizes=None):
@@ -255,8 +291,8 @@ def tiles(bbpath, tile_ids, chromsizes_map={}, chromsizes=None):
 
     generated_tiles = []
     for tile_id in tile_ids:
-        tile_option_parts = tile_id.split("|")[1:]
-        tile_no_options = tile_id.split("|")[0]
+        tile_option_parts = tile_id.split(TILE_OPTIONS_CHAR)[1:]
+        tile_no_options = tile_id.split(TILE_OPTIONS_CHAR)[0]
         tile_id_parts = tile_no_options.split(".")
         tile_position = list(map(int, tile_id_parts[1:3]))
         return_value = (
@@ -284,10 +320,8 @@ def tiles(bbpath, tile_ids, chromsizes_map={}, chromsizes=None):
             min_elements = MIN_ELEMENTS
             max_elements = MAX_ELEMENTS
 
-        if chromsizes:
-            chromnames = [c[0] for c in chromsizes]
-            chromlengths = [int(c[1]) for c in chromsizes]
-            chromsizes_to_use = pd.Series(chromlengths, index=chromnames)
+        if chromsizes is not None:
+            chromsizes_to_use = chromsizes
         else:
             chromsizes_id = None
             if "cos" in tile_options:
@@ -303,10 +337,10 @@ def tiles(bbpath, tile_ids, chromsizes_map={}, chromsizes=None):
         # this doesn't combine multiple consequetive ids, which
         # would speed things up
         if chromsizes_to_use is None:
-            chromsizes_to_use = hgbi.get_chromsizes(bbpath)
+            chromsizes_to_use = hgbw.get_chromsizes(bbpath)
 
-        max_depth = get_quadtree_depth(chromsizes_to_use, hgbi.TILE_SIZE)
-        tile_size = hgbi.TILE_SIZE * 2 ** (max_depth - zoom_level)
+        max_depth = hgbw.get_quadtree_depth(chromsizes_to_use)
+        tile_size = hgbw.TILE_SIZE * 2 ** (max_depth - zoom_level)
         start_pos = tile_pos * tile_size
         end_pos = start_pos + tile_size
 
@@ -327,4 +361,4 @@ def tiles(bbpath, tile_ids, chromsizes_map={}, chromsizes=None):
 
 
 def chromsizes(filename):
-    return hgbi.chromsizes(filename)
+    return hgbw.chromsizes(filename)
diff --git a/clodius/tiles/bigwig.py b/clodius/tiles/bigwig.py
index f9d35839..68b6983e 100644
--- a/clodius/tiles/bigwig.py
+++ b/clodius/tiles/bigwig.py
@@ -1,11 +1,13 @@
-import bbi
-import clodius.tiles.format as hgfo
+import functools as ft
 import logging
+import math
+import re
 import numpy as np
 import pandas as pd
-from .utils import get_quadtree_depth, abs2genomic, natsorted
+import pybigtools
 
-from concurrent.futures import ThreadPoolExecutor
+import clodius.tiles.format as hgfo
+from clodius.utils import TILE_OPTIONS_CHAR
 
 MAX_THREADS = 4
 TILE_SIZE = 1024
@@ -17,14 +19,69 @@
 aggregation_modes["min"] = {"name": "Min", "value": "min"}
 aggregation_modes["max"] = {"name": "Max", "value": "max"}
 aggregation_modes["std"] = {"name": "Standard Deviation", "value": "std"}
+aggregation_modes["sum"] = {"name": "Sum", "value": "sum"}
 
 range_modes = {}
 range_modes["minMax"] = {"name": "Min-Max", "value": "minMax"}
 range_modes["whisker"] = {"name": "Whisker", "value": "whisker"}
 
 
+def get_quadtree_depth(chromsizes):
+    tile_size_bp = TILE_SIZE
+    min_tile_cover = np.ceil(sum(chromsizes) / tile_size_bp)
+    return int(np.ceil(np.log2(min_tile_cover)))
+
+
 def get_zoom_resolutions(chromsizes):
-    return [2 ** x for x in range(get_quadtree_depth(chromsizes, TILE_SIZE) + 1)][::-1]
+    return [2**x for x in range(get_quadtree_depth(chromsizes) + 1)][::-1]
+
+
+def natsort_key(s, _NS_REGEX=re.compile(r"(\d+)", re.U)):
+    return tuple([int(x) if x.isdigit() else x for x in _NS_REGEX.split(s) if x])
+
+
+def natcmp(x, y):
+    if x.find("_") >= 0:
+        x_parts = x.split("_")
+        if y.find("_") >= 0:
+            # chr_1 vs chr_2
+            y_parts = y.split("_")
+
+            return natcmp(x_parts[1], y_parts[1])
+        else:
+            # chr_1 vs chr1
+            # chr1 comes first
+            return 1
+    if y.find("_") >= 0:
+        # chr1 vs chr_1
+        # y comes second
+        return -1
+
+    _NS_REGEX = re.compile(r"(\d+)", re.U)
+    x_parts = tuple([int(a) if a.isdigit() else a for a in _NS_REGEX.split(x) if a])
+    y_parts = tuple([int(a) if a.isdigit() else a for a in _NS_REGEX.split(y) if a])
+
+    # order of these parameters is purposefully reverse how they should be
+    # ordered
+    for key in ["m", "y", "x"]:
+        if key in y.lower():
+            return -1
+        if key in x.lower():
+            return 1
+
+    try:
+        if x_parts < y_parts:
+            return -1
+        elif y_parts > x_parts:
+            return 1
+        else:
+            return 0
+    except TypeError:
+        return 1
+
+
+def natsorted(iterable):
+    return sorted(iterable, key=ft.cmp_to_key(natcmp))
 
 
 def get_chromsizes(bwpath):
@@ -34,12 +91,34 @@ def get_chromsizes(bwpath):
     Also, return NaNs from any missing chromosomes in bbi.fetch
 
     """
-    chromsizes = bbi.chromsizes(bwpath)
+    if not isinstance(bwpath, str):
+        # we already have a file pointer
+        bwpath = bwpath
+    else:
+        bwpath = open(bwpath, "rb")
+
+    bwpath.seek(0)
+    f = pybigtools.open(bwpath)
+    chromsizes = f.chroms()
     chromosomes = natsorted(chromsizes.keys())
     chrom_series = pd.Series(chromsizes)[chromosomes]
     return chrom_series
 
 
+def abs2genomic(chromsizes, start_pos, end_pos):
+    abs_chrom_offsets = np.r_[0, np.cumsum(chromsizes.values)]
+    cid_lo, cid_hi = (
+        np.searchsorted(abs_chrom_offsets, [start_pos, end_pos], side="right") - 1
+    )
+    rel_pos_lo = start_pos - abs_chrom_offsets[cid_lo]
+    rel_pos_hi = end_pos - abs_chrom_offsets[cid_hi]
+    start = rel_pos_lo
+    for cid in range(cid_lo, cid_hi):
+        yield cid, start, chromsizes.iloc[cid]
+        start = 0
+    yield cid_hi, start, rel_pos_hi
+
+
 def tileset_info(bwpath, chromsizes=None):
     """
     Get the tileset info for a bigWig file
@@ -61,6 +140,8 @@ def tileset_info(bwpath, chromsizes=None):
                     'max_zoom': 7
                     }
     """
+    TILE_SIZE = 1024
+
     if chromsizes is None:
         chromsizes = get_chromsizes(bwpath)
         chromsizes_list = []
@@ -69,19 +150,19 @@ def tileset_info(bwpath, chromsizes=None):
             chromsizes_list += [[chrom, int(size)]]
     else:
         chromsizes_list = chromsizes
-        chromsizes = [int(c[1]) for c in chromsizes_list]
 
-    max_zoom = get_quadtree_depth(chromsizes, TILE_SIZE)
+    min_tile_cover = np.ceil(sum([int(c[1]) for c in chromsizes_list]) / TILE_SIZE)
+    max_zoom = int(np.ceil(np.log2(min_tile_cover)))
 
     tileset_info = {
         "min_pos": [0],
-        "max_pos": [sum(chromsizes)],
-        "max_width": TILE_SIZE * 2 ** max_zoom,
+        "max_pos": [TILE_SIZE * 2**max_zoom],
+        "max_width": TILE_SIZE * 2**max_zoom,
         "tile_size": TILE_SIZE,
         "max_zoom": max_zoom,
         "chromsizes": chromsizes_list,
-        "aggregation_modes": aggregation_modes,
-        "range_modes": range_modes,
+        "aggregation_modes": list(aggregation_modes.values()),
+        "range_modes": list(range_modes.values()),
     }
     return tileset_info
 
@@ -97,31 +178,45 @@ def fetch_data(a):
     if range_mode == "whisker":
         n_dim = 4
 
+    # print("bwpath", bwpath)
     x = np.zeros((n_bins, n_dim)) if n_dim > 1 else np.zeros(n_bins)
 
+    if not isinstance(bwpath, str):
+        # we already have a file pointer
+        bwpath = bwpath
+    else:
+        bwpath = open(bwpath, "rb")
+
+    bwpath.seek(0)
+    b = pybigtools.open(bwpath)
+
     try:
         chrom = chromsizes.index[cid]
-        clen = chromsizes.values[cid]
 
-        args = [bwpath, chrom, start, end]
-        kwargs = {"bins": n_bins, "missing": np.nan}
+        args = [str(chrom), int(start), int(end), n_bins]
 
-        if range_mode == "minMax":
-            x[:, 0] = bbi.fetch(*args, **dict(kwargs, summary="min"))
-            x[:, 1] = bbi.fetch(*args, **dict(kwargs, summary="max"))
+        try:
+            if range_mode == "minMax":
+                x[:, 0] = b.values(*args, "min")
+                x[:, 1] = b.values(*args, "max")
 
-        elif range_mode == "whisker":
-            x[:, 0] = bbi.fetch(*args, **dict(kwargs, summary="min"))
-            x[:, 1] = bbi.fetch(*args, **dict(kwargs, summary="max"))
-            x[:, 2] = bbi.fetch(*args, **dict(kwargs, summary="mean"))
-            x[:, 3] = bbi.fetch(*args, **dict(kwargs, summary="std"))
+            elif range_mode == "whisker":
+                x[:, 0] = b.values(*args, "min")
+                x[:, 1] = b.values(*args, "max")
+                x[:, 2] = b.values(*args, "mean")
+                x[:, 3] = b.values(*args, "std")
 
-        else:
-            x[:] = bbi.fetch(*args, **dict(kwargs, summary=aggregation_mode))
-
-        # drop the very last bin if it is smaller than the binsize
-        if end == clen and clen % binsize != 0:
-            x = x[:-1]
+            else:
+                # print("args", [a for a in args], "aggregation_mode", aggregation_mode)
+                x[:] = b.values(*args, aggregation_mode)
+        except Exception as ex:
+            if "No chromomsome with name" in str(ex):
+                raise KeyError
+
+        # the following is commented out because it is handled in get_bigwig_tile
+        # # drop the very last bin if it is smaller than the binsize
+        # if end == clen and clen % binsize != 0:
+        #     x = x[:-1]
     except IndexError:
         # beyond the range of the available chromosomes
         # probably means we've requested a range of absolute
@@ -148,23 +243,58 @@ def get_bigwig_tile(
 
     resolutions = get_zoom_resolutions(chromsizes)
     binsize = resolutions[zoom_level]
-
     cids_starts_ends = list(abs2genomic(chromsizes, start_pos, end_pos))
-    with ThreadPoolExecutor(max_workers=16) as e:
-        arrays = list(
-            e.map(
-                fetch_data,
-                [
-                    tuple(
-                        [bwpath, binsize, chromsizes, aggregation_mode, range_mode]
-                        + list(c)
-                    )
-                    for c in cids_starts_ends
-                ],
-            )
+    arrays = [
+        fetch_data(
+            tuple([bwpath, binsize, chromsizes, aggregation_mode, range_mode] + list(c))
         )
+        for c in cids_starts_ends
+    ]
+
+    # with ThreadPoolExecutor(max_workers=1) as e:
+    #     arrays = list(
+    #         e.map(
+    #             fetch_data,
+    #             [
+    #                 tuple(
+    #                     [bwpath, binsize, chromsizes, aggregation_mode, range_mode]
+    #                     + list(c)
+    #                )
+    #                 for c in cids_starts_ends
+    #             ],
+    #         )
+    #     )
+
+    current_data_position = 0
+    current_binned_data_position = 0
+
+    new_arrays = []
+
+    for (cid, start, end), x in zip(cids_starts_ends, arrays):
+        current_data_position += end - start
+
+        start_pos = math.floor(start / binsize)
+        end_pos = math.ceil(end / binsize)
+
+        # print("start", start, "end", end)
+        # print("start_pos", start_pos, "end_pos", end_pos)
+        # print("# bins calc", end_pos - start_pos)
+        # print("# bins actual", len(x))
+
+        if start_pos >= end_pos:
+            continue
+
+        current_binned_data_position += binsize * (end_pos - start_pos)
+        offset = current_binned_data_position - current_data_position
+
+        if offset > binsize:
+            current_binned_data_position -= binsize
+            x = x[:-1]
+
+        new_arrays.append(x)
 
-    return np.concatenate(arrays)
+    ret = np.concatenate(new_arrays)
+    return ret
 
 
 def tiles(bwpath, tile_ids, chromsizes_map={}, chromsizes=None):
@@ -181,7 +311,7 @@ def tiles(bwpath, tile_ids, chromsizes_map={}, chromsizes=None):
     chromsizes_map: {uid: []}
         A set of chromsizes listings corresponding to the parameters of the
         tile_ids. To be used if a chromsizes id is passed in with the tile id
-        with the `|cos:id` tag in the tile id
+        with the `,cos:id` tag in the tile id
     chromsizes: [[chrom, size],...]
         A 2d array containing chromosome names and sizes. Overrides the
         chromsizes in chromsizes_map
@@ -191,10 +321,11 @@ def tiles(bwpath, tile_ids, chromsizes_map={}, chromsizes=None):
     tile_list: [(tile_id, tile_data),...]
         A list of tile_id, tile_data tuples
     """
+    TILE_SIZE = 1024
     generated_tiles = []
     for tile_id in tile_ids:
-        tile_option_parts = tile_id.split("|")[1:]
-        tile_no_options = tile_id.split("|")[0]
+        tile_option_parts = tile_id.split(TILE_OPTIONS_CHAR)[1:]
+        tile_no_options = tile_id.split(TILE_OPTIONS_CHAR)[0]
         tile_id_parts = tile_no_options.split(".")
         tile_position = list(map(int, tile_id_parts[1:3]))
         return_value = tile_id_parts[3] if len(tile_id_parts) > 3 else "mean"
@@ -225,7 +356,7 @@ def tiles(bwpath, tile_ids, chromsizes_map={}, chromsizes=None):
         if chromsizes_to_use is None:
             chromsizes_to_use = get_chromsizes(bwpath)
 
-        max_depth = get_quadtree_depth(chromsizes_to_use, TILE_SIZE)
+        max_depth = get_quadtree_depth(chromsizes_to_use)
         tile_size = TILE_SIZE * 2 ** (max_depth - zoom_level)
         start_pos = tile_pos * tile_size
         end_pos = start_pos + tile_size
diff --git a/clodius/tiles/chromsizes.py b/clodius/tiles/chromsizes.py
index d3307824..ec2bc1b4 100644
--- a/clodius/tiles/chromsizes.py
+++ b/clodius/tiles/chromsizes.py
@@ -4,18 +4,7 @@
 logger = logging.getLogger(__name__)
 
 
-def tileset_info(filename: str) -> dict:
-    """Return a standard higlass tileset info object that contains
-    chromsizes as an element.
-
-    The chromsizes in the returned object will be a list of [name, size]
-    tuples.
-
-    [
-        ['chr1', 1000],
-        ['chr2', 2000]
-    ]
-    """
+def tileset_info(filename):
     chromsizes = get_tsv_chromsizes(filename)
 
     max_width = sum([int(c[1]) for c in chromsizes])
@@ -30,11 +19,11 @@ def tileset_info(filename: str) -> dict:
 def get_tsv_chromsizes(file):
     """
     Get a list of chromosome sizes from this [presumably] tsv
-    chromsizes file.
+    chromsizes file file.
 
     Parameters:
     -----------
-    file: string or file-like object
+    file: string
         A file-like object
 
     Returns
diff --git a/clodius/tiles/cooler.py b/clodius/tiles/cooler.py
index 49083444..016c806e 100644
--- a/clodius/tiles/cooler.py
+++ b/clodius/tiles/cooler.py
@@ -1,12 +1,14 @@
 import collections as col
+import itertools as it
+import logging
+
 import cooler
-import clodius.tiles.format as hgfo
-import clodius.tiles.utils as hgut
 import h5py
-import itertools as it
 import numpy as np
+
+import clodius.tiles.format as hgfo
+import clodius.tiles.utils as hgut
 import pandas as pd
-import logging
 
 logger = logging.getLogger(__name__)
 
@@ -131,9 +133,14 @@ def get_data(
     bins = c.bins(convert_enum=False)[cols]
     pixels = cooler.annotate(pixels, bins)
 
+    # t1 = time.time()
     pixels["genome_start1"] = chrom_cum_lengths[pixels["chrom1"]] + pixels["start1"]
+    # t2 = time.time()
     pixels["genome_start2"] = chrom_cum_lengths[pixels["chrom2"]] + pixels["start2"]
+    # t3 = time.time()
 
+    # print(f"genome_start1: {t2 - t1:.2f}")
+    # print(f"genome_start: {t2 - t1:.2f}")
     bins1 = bins[i0 : i1 + 1]
     bins2 = bins[j0 : j1 + 1]
 
@@ -186,7 +193,7 @@ def _get_info_multi_v1(file_path):
         max_zoom = f.attrs["max-zoom"]
         bin_size = int(f[str(max_zoom)].attrs["bin-size"])
 
-        max_width = bin_size * TILE_SIZE * 2 ** max_zoom
+        max_width = bin_size * TILE_SIZE * 2**max_zoom
 
         # the list of available data transforms
         transforms = {}
@@ -215,10 +222,21 @@ def _get_info_multi_v1(file_path):
     return info
 
 
+def get_quadtree_depth(chromsizes, binsize):
+    """
+    Depth of quad tree necessary to tesselate the concatenated genome with quad
+    tiles such that linear dimension of the tiles is a preset multiple of the
+    genomic resolution.
+
+    """
+    tile_size_bp = TILE_SIZE * binsize
+    min_tile_cover = np.ceil(sum(chromsizes) / tile_size_bp)
+    return int(np.ceil(np.log2(min_tile_cover)))
+
+
 def get_zoom_resolutions(chromsizes, base_res):
     return [
-        base_res * 2 ** x
-        for x in range(hgut.get_quadtree_depth(chromsizes, base_res * TILE_SIZE) + 1)
+        base_res * 2**x for x in range(get_quadtree_depth(chromsizes, base_res) + 1)
     ]
 
 
@@ -278,8 +296,6 @@ def make_tiles(
     # print("resolution:", resolution)
     # print("tile_size:", tile_size)
     # print("transform_type:", transform_type);
-    # print('start1:', start1, end1)
-    # print('start2:', start2, end2)
 
     c = cooler.Cooler(hdf_for_resolution)
     (chroms, chrom_sizes, chrom_cum_lengths) = get_chromosome_names_cumul_lengths(c)
@@ -307,7 +323,6 @@ def make_tiles(
 
     for x_offset in range(0, x_width):
         for y_offset in range(0, y_width):
-
             start1 = (x_pos + x_offset) * tile_size
             end1 = (x_pos + x_offset + 1) * tile_size
             start2 = (y_pos + y_offset) * tile_size
@@ -316,8 +331,8 @@ def make_tiles(
             # print("resolution:", resolution)
             # print("tile_size", tile_size)
             # print("x_pos:", x_pos, "x_offset", x_offset)
-            # print("start1", start1, 'end1', end1)
-            # print("start2", start2, 'end2', end2)
+            # print("start1", start1, "end1", end1)
+            # print("start2", start2, "end2", end2)
 
             df = data[data["genome_start1"] >= start1]
             df = df[df["genome_start1"] < end1]
@@ -330,6 +345,10 @@ def make_tiles(
             j = ((df["genome_start1"].values - start1) // binsize).astype(int)
             i = ((df["genome_start2"].values - start2) // binsize).astype(int)
 
+            # print("df", df)
+            # print("j", j)
+            # print("i", i)
+
             if "balanced" in df:
                 v = np.nan_to_num(df["balanced"].values)
             else:
@@ -495,7 +514,7 @@ def make_mats(filepath):
 
         # get the genome size
         resolution = list(f["resolutions"].keys())[0]
-        genome_length = int(np.sum(f["resolutions"][resolution]["chroms"]["length"]))
+        genome_length = int(sum(f["resolutions"][resolution]["chroms"]["length"]))
 
         info["max_pos"] = [genome_length, genome_length]
         info["min_pos"] = [1, 1]
@@ -653,7 +672,7 @@ def generate_tiles(filepath, tile_ids):
                 # this tile has too high of a zoom level specified
                 continue
             hdf_for_resolution = tileset_file[str(zoom_level)]
-            resolution = (tileset_info["max_width"] / 2 ** zoom_level) / BINS_PER_TILE
+            resolution = (tileset_info["max_width"] / 2**zoom_level) / BINS_PER_TILE
 
         tile_positions = [[int(x) for x in t.split(".")[2:4]] for t in tile_group]
 
diff --git a/clodius/tiles/cram.py b/clodius/tiles/cram.py
new file mode 100644
index 00000000..a9bc6133
--- /dev/null
+++ b/clodius/tiles/cram.py
@@ -0,0 +1,20 @@
+import pysam
+
+from clodius.tiles.bam import alignment_tileset_info
+from clodius.tiles.bam import alignment_tiles
+
+
+def tileset_info(filename, chromsizes):
+    samfile = pysam.AlignmentFile(filename, "rc")
+
+    return alignment_tileset_info(samfile, chromsizes)
+
+
+def tiles(
+    filename, tile_ids, index_filename=None, chromsizes=None, max_tile_width=None
+):
+    samfile = pysam.AlignmentFile(filename, "rc", index_filename=index_filename)
+
+    return alignment_tiles(
+        samfile, tile_ids, index_filename=None, chromsizes=None, max_tile_width=None
+    )
diff --git a/clodius/tiles/csv.py b/clodius/tiles/csv.py
new file mode 100644
index 00000000..f202c3c0
--- /dev/null
+++ b/clodius/tiles/csv.py
@@ -0,0 +1,95 @@
+from clodius.chromosomes import chromsizes_as_array
+import io
+
+
+# @lru_cache
+def csv_sequence_tileset_functions(
+    filename,
+    tile_functions,
+    colname=None,
+    colnum=None,
+    header=True,
+    sep=",",
+    refrow=None,
+    fasta_datafile=None,
+    chromsizes_datafile=None,
+    chromsizes=None,
+):
+    """Read a csv file and return a list of sequences.
+
+    Parameters
+    ----------
+    filename: string
+        The name of the csv file
+    tile_functions:
+        A function that will take a list of sequences as a parameters
+        and return tileset_info and tiles functions
+    colname: Optional[str]
+        The name of the column containing the sequences.
+    colnum: Optional[int]
+        The column number of the sequence logo file. 0-based.
+        Only used if colname is not provided.
+    sep: string
+        The separator used in the csv file
+    refrow: A row to use as a reference sequence when calculating
+        alignments. Should be 1-based
+    fasta_datafile: A fasta file to align the sequences to.
+    """
+    import pandas as pd
+
+    if not header:
+        header = None
+    else:
+        header = 0
+
+    if not colname and not colnum:
+        raise ValueError("No colname or colnum specified")
+
+    df = pd.read_csv(filename, header=header, sep=sep)
+
+    if not colname:
+        colname = df.columns[colnum - 1]
+
+    sequences = df[colname].values
+
+    if refrow:
+        refseqs = [{"id": f"row_{refrow}", "seq": sequences[refrow - 1]}]
+        if chromsizes is None:
+            chromsizes = [[f"row_{refrow}", len(sequences[refrow - 1])]]
+    else:
+        if fasta_datafile:
+            from Bio import SeqIO
+
+            if isinstance(fasta_datafile, str):
+                fasta_handle = open(fasta_datafile, "rb")
+            else:
+                fasta_handle = fasta_datafile
+
+            refseqs = [
+                {"id": record.id, "seq": str(record.seq)}
+                for record in SeqIO.parse(
+                    io.TextIOWrapper(fasta_handle, "utf-8"), "fasta"
+                )
+            ]
+
+            if chromsizes is None:
+                if chromsizes_datafile:
+                    chromsizes = chromsizes_as_array(chromsizes_datafile)
+                else:
+                    chromsizes = [[r["id"], len(r["seq"])] for r in refseqs]
+        else:
+            raise ValueError("No reference row or fasta file provided")
+
+    tf = tile_functions(
+        sequences,
+        refseqs=refseqs,
+        values=df.to_dict(orient="records"),
+        chromsizes=chromsizes,
+    )
+
+    orig_tsinfo = tf["tileset_info"]()
+    # Decorate the tileset info function so that it returns
+    # the column names as well.
+    tf["tileset_info"] = lambda: {"columns": list(df.columns), **orig_tsinfo}
+
+    return tf
diff --git a/clodius/tiles/fasta.py b/clodius/tiles/fasta.py
index 36b23898..2fdecdad 100644
--- a/clodius/tiles/fasta.py
+++ b/clodius/tiles/fasta.py
@@ -1,29 +1,42 @@
-from pyfaidx import Fasta
+import math
+from typing import Any, List, Tuple
+
 import numpy as np
-import pandas as pd
-import logging
-from .utils import natsorted, get_quadtree_depth
 
-logger = logging.getLogger(__name__)
+import clodius.tiles.chromsizes as cts
+from clodius.tiles.format import format_dense_tile
+from clodius.tiles.utils import TilesetInfo, abs2genome_fn, parse_tile_id
+
+# from pysam import FastaFile
 
 TILE_SIZE = 1024
 
 
-def get_chromsizes(fapath):
-    with Fasta(fapath, one_based_attributes=False) as fa:
-        chromsizes = dict((seq, len(fa.records[seq])) for seq in fa.keys())
-        chromosomes = natsorted(fa.keys())
-    return pd.Series(chromsizes)[chromosomes]
+def convert_bases_to_multivec(seq):
+    res = []
+
+    to_append = {
+        "a": [1, 0, 0, 0, 0, 0],
+        "t": [0, 1, 0, 0, 0, 0],
+        "g": [0, 0, 1, 0, 0, 0],
+        "c": [0, 0, 0, 1, 0, 0],
+        "n": [0, 0, 0, 0, 1, 0],
+    }
 
+    for c in seq:
+        res.append(to_append.get(c.lower(), [0, 0, 0, 0, 0, 1]))
 
-def tileset_info(fapath, chromsizes=None):
+    return res
+
+
+def tileset_info(fai_filename):
     """
     Get the tileset info for a FASTA file
 
     Parameters
     ----------
-    fapath: string
-        The path to the FASTA file from which to retrieve data
+    fai_filename: string
+        The path to the FASTA index file from which to retrieve data
     chromsizes: [[chrom, size],...]
         A list of chromosome sizes associated with this tileset.
         Typically passed in to specify in what order data from
@@ -37,163 +50,158 @@ def tileset_info(fapath, chromsizes=None):
                     'max_zoom': 7
                     }
     """
-    if chromsizes is None:
-        chromsizes = get_chromsizes(fapath)
-        chromsizes_list = []
-
-        for chrom, size in chromsizes.items():
-            chromsizes_list += [[chrom, int(size)]]
-    else:
-        chromsizes_list = chromsizes
-        chromsizes = [int(c[1]) for c in chromsizes_list]
-    max_zoom = get_quadtree_depth(chromsizes, TILE_SIZE)
-    tileset_info = {
-        "min_pos": [0],
-        "max_pos": [sum(chromsizes)],
-        "max_width": TILE_SIZE * 2 ** max_zoom,
-        "tile_size": TILE_SIZE,
-        "max_zoom": max_zoom,
-        "chromsizes": chromsizes_list,
-    }
-    return tileset_info
 
-
-def abs2genomic(chromsizes, start_pos, end_pos):
-    """
-    Convert absolute genomic sizes to genomic
-
-    Parameters:
-    -----------
-    chromsizes: [1000,...]
-        An array of the lengths of the chromosomes
-    start_pos: int
-        The starting genomic position
-    end_pos: int
-        The ending genomic position
-    """
-    abs_chrom_offsets = np.r_[0, np.cumsum(chromsizes)]
-    cid_lo, cid_hi = (
-        np.searchsorted(abs_chrom_offsets, [start_pos, end_pos], side="right") - 1
+    tsinfo = cts.tileset_info(fai_filename)
+    tsinfo["max_zoom"] = math.ceil(
+        math.log(tsinfo["max_pos"][0] / TILE_SIZE) / math.log(2)
     )
-    rel_pos_lo = start_pos - abs_chrom_offsets[cid_lo]
-    rel_pos_hi = end_pos - abs_chrom_offsets[cid_hi]
-    start = rel_pos_lo
-    for cid in range(cid_lo, cid_hi):
-        yield cid, start, chromsizes[cid]
-        start = 0
-    yield cid_hi, start, rel_pos_hi
-
-
-def get_fasta_tile(
-    fapath, zoom_level, start_pos, end_pos, chromsizes=None,
-):
-    if chromsizes is None:
-        chromsizes = get_chromsizes(fapath)
-    chrom_names = chromsizes.keys()
-    cids_starts_ends = list(abs2genomic(chromsizes, start_pos, end_pos))
-    with Fasta(fapath, one_based_attributes=False) as fa:
-        # investigate using 4 bits per character (only 16 possible chars)
-        arrays = [
-            fa[chrom_names[cid]][start:end].seq for cid, start, end in cids_starts_ends
-        ]
-    return "".join(arrays)
-
-
-def tiles(fapath, tile_ids, chromsizes_map={}, chromsizes=None, max_tile_width=None):
-    """
-    Generate tiles from a FASTA file.
 
-    Parameters
-    ----------
-    fapath: str
-        The filepath of the FASTA file
-    tile_ids: [str,...]
-        A list of tile_ids (e.g. xyx.0.0) identifying the tiles
-        to be retrieved
-    chromsizes_map: {uid: []}
-        A set of chromsizes listings corresponding to the parameters of the
-        tile_ids. To be used if a chromsizes id is passed in with the tile id
-        with the `|cos:id` tag in the tile id
-    chromsizes: [[chrom, size],...]
-        A 2d array containing chromosome names and sizes. Overrides the
-        chromsizes in chromsizes_map
-    max_tile_width: int
-        How wide can each tile be before we return no data. This
-        can be used to limit the amount of data returned.
-    Returns
-    -------
-    tile_list: [(tile_id, tile_data),...]
-        A list of tile_id, tile_data tuples
+    tsinfo["max_width"] = TILE_SIZE * 2 ** tsinfo["max_zoom"]
+    # tsinfo['bins_per_dimension'] = TILE_SIZE
+    tsinfo["tile_size"] = TILE_SIZE
+    tsinfo["datatype"] = "multivec_singleres_sequence"
+    return tsinfo
+
+
+def sequence_tiles_to_multivec(tiles):
+    """Convert sequence tiles to multivec representation."""
+    new_tiles = []
+    for tile_id, tile in tiles:
+        seq = tile["sequence"]
+        res = convert_bases_to_multivec(seq)
+        tile = format_dense_tile(np.array(res).T)
+        tile["shape"] = [6, len(seq)]
+
+        new_tiles += [(tile_id, tile)]
+    return new_tiles
+
+
+def multivec_tiles(*args, **kwargs):
+    seq_tiles = sequence_tiles(*args, **kwargs)
+    return sequence_tiles_to_multivec(seq_tiles)
+
+
+def read_fai(fai_file):
+    if isinstance(fai_file, str):
+        fai_file = open(fai_file, "rb")
+
+    fai_index = {}
+    fai_file.seek(0)
+    binary_data = fai_file.read()
+    text_data = binary_data.decode("utf-8")
+
+    for line in [row.strip() for row in text_data.split("\n") if row.strip()]:
+        fields = line.strip().split("\t")
+        seq_name = fields[0]
+        seq_length = int(fields[1])
+        offset = int(fields[2])
+        line_blen = int(fields[3])
+        line_len = int(fields[4])
+        fai_index[seq_name] = (seq_length, offset, line_blen, line_len)
+    return fai_index
+
+
+def fetch_sequence(fasta_file, fai_index, seq_name, start, end):
+    if isinstance(fasta_file, str):
+        fasta_file = open(fasta_file, "rb")
+
+    if seq_name not in fai_index:
+        raise ValueError(f"Sequence {seq_name} not found in index")
+
+    seq_length, offset, line_blen, line_len = fai_index[seq_name]
+
+    if start < 0 or end > seq_length or start >= end:
+        raise ValueError(f"Invalid range: {start}-{end} for sequence {seq_name}")
+
+    # Calculate the byte range to read
+    lines_to_skip = start // line_blen
+    f = fasta_file
+    # Move to the start of the sequence in the FASTA file
+    f.seek(offset + lines_to_skip * line_len + (start % line_blen))
+
+    # print("seq_name", seq_name)
+    # print("line_blen", line_blen)
+    # print("line_len", line_len)
+    # print("start", start)
+    # print("end", end)
+
+    # Read the required lines
+    total_read = 0
+    sequence = []
+    to_read = end - start
+    while total_read < to_read:
+        # print("end - start", end - start)
+        chunk = f.read(min(end - start, line_blen - (start % line_blen)))
+        # print("chunk", len(chunk))
+        sequence.append(chunk.strip().decode("utf8"))
+        start += len(chunk)
+        total_read += len(chunk)
+        f.seek(f.tell() + (line_len - line_blen))  # Skip to the next line
+
+    full_seq = "".join(sequence)
+    # print("len(full_seq):", len(full_seq))
+    return full_seq
+
+
+def sequence_tiles(
+    fasta_filename: str,
+    tile_ids: List[str],
+    index_filename: str,
+    chromsizes_fn: str = None,
+) -> List[Tuple[str, Any]]:
+    """Retrieve higlass tiles.
+
+    Arguments:
+        fasta_filename: The name of the fasta file to load
+        tile_ids: The incoming tile ids (e.g. 'x.0.0')
+        fai_filename: The name of the fasta index file (`samtools faidx`)
+        chromsizes_filename: The chromsizes filename to use in case we
+            want a specific chromosome order.
+    Returns:
+        Tile data
     """
+    tsinfo = tileset_info(index_filename)
+    tsinfo = TilesetInfo(**tsinfo)
     generated_tiles = []
+
+    fa_index = read_fai(index_filename)
+
+    if not chromsizes_fn:
+        chromsizes_fn = index_filename
+
     for tile_id in tile_ids:
-        tile_option_parts = tile_id.split("|")[1:]
-        tile_no_options = tile_id.split("|")[0]
-        tile_id_parts = tile_no_options.split(".")
-        tile_position = list(map(int, tile_id_parts[1:3]))
-
-        tile_options = dict([o.split(":") for o in tile_option_parts])
-
-        if chromsizes:
-            chromnames = [c[0] for c in chromsizes]
-            chromlengths = [int(c[1]) for c in chromsizes]
-            chromsizes_to_use = pd.Series(chromlengths, index=chromnames)
-        else:
-            chromsizes_id = None
-            if "cos" in tile_options:
-                chromsizes_id = tile_options["cos"]
-            if chromsizes_id in chromsizes_map:
-                chromsizes_to_use = chromsizes_map[chromsizes_id]
-            else:
-                chromsizes_to_use = None
-
-        zoom_level = tile_position[0]
-        tile_pos = tile_position[1]
-
-        # this doesn't combine multiple consequetive ids, which
-        # would speed things up
-        if chromsizes_to_use is None:
-            chromsizes_to_use = get_chromsizes(fapath)
-
-        max_depth = get_quadtree_depth(chromsizes_to_use, TILE_SIZE)
-        tile_size = TILE_SIZE * 2 ** (max_depth - zoom_level)
-        if max_tile_width and tile_size > max_tile_width:
-            return [
+        tile_info = parse_tile_id(tile_id, tsinfo)
+
+        zoom_diff = tsinfo.max_zoom - tile_info.zoom
+        if zoom_diff > 3:
+            generated_tiles += [
                 (
                     tile_id,
                     {
-                        "error": f"Tile too large, no data returned. Max tile size: {max_tile_width}"
+                        "error": f"Tile too wide (zoom level {tile_info.zoom}). Please zoom in."
                     },
                 )
             ]
-        start_pos = tile_pos * tile_size
-        end_pos = start_pos + tile_size
-        tile = get_fasta_tile(fapath, zoom_level, start_pos, end_pos, chromsizes_to_use)
-        generated_tiles += [(tile_id, {"sequence": tile})]
-    return generated_tiles
+            continue
 
+        seq = ""
 
-def chromsizes(filename):
-    """
-    Get a list of chromosome sizes from this [presumably] fasta
-    file.
+        for chr_interval in abs2genome_fn(
+            chromsizes_fn, tile_info.start[0], tile_info.end[0]
+        ):
+            fs = fetch_sequence(
+                fasta_filename,
+                fa_index,
+                chr_interval.name,
+                chr_interval.start,
+                chr_interval.end,
+            )
+            # fas = fa_file.fetch(chr_interval.name, chr_interval.start, chr_interval.end)
 
-    Parameters:
-    -----------
-    filename: string
-        The filename of the fasta file
+            # assert fs == fas
 
-    Returns
-    -------
-    chromsizes: [(name:string, size:int), ...]
-        An ordered list of chromosome names and sizes
-    """
-    try:
-        chrom_series = get_chromsizes(filename)
-        data = []
-        for chrom, size in chrom_series.items():
-            data.append([chrom, size])
-        return data
-    except Exception as ex:
-        logger.error(ex)
-        raise Exception("Error loading chromsizes from bigwig file: {}".format(ex))
+            seq += fs
+
+        generated_tiles += [(tile_id, {"sequence": seq})]
+
+    return generated_tiles
diff --git a/clodius/tiles/format.py b/clodius/tiles/format.py
index 5a4c5ee7..3e0d32d2 100644
--- a/clodius/tiles/format.py
+++ b/clodius/tiles/format.py
@@ -1,7 +1,6 @@
 import base64
-import warnings
-
 import numpy as np
+import warnings
 
 
 def format_dense_tile(data):
@@ -36,8 +35,8 @@ def format_dense_tile(data):
     tile_data["min_value"] = min_dense if not np.isnan(min_dense) else "NaN"
     tile_data["max_value"] = max_dense if not np.isnan(max_dense) else "NaN"
 
-    min_f16 = np.finfo("float16").min.item()
-    max_f16 = np.finfo("float16").max.item()
+    min_f16 = np.finfo("float16").min
+    max_f16 = np.finfo("float16").max
 
     has_nan = np.sum(np.isnan(data)) > 0
     n_dim = len(data.shape)
diff --git a/clodius/tiles/geo.py b/clodius/tiles/geo.py
index 785c3327..29465563 100644
--- a/clodius/tiles/geo.py
+++ b/clodius/tiles/geo.py
@@ -1,8 +1,10 @@
 import json
 import math
-import os
-import sqlite3
 import collections as col
+import apsw
+import sosqlite
+
+sovfs = sosqlite.SmartOpenVFS(name="so-vfs")
 
 
 def get_tile_box(zoom, x, y):
@@ -20,13 +22,13 @@ def get_lng_lat_from_tile_pos(zoom, x, y):
     (lng, lat) of top-left corner of tile"""
 
     # "map-centric" latitude, in radians:
-    lat_rad = math.pi - 2 * math.pi * y / (2 ** zoom)
+    lat_rad = math.pi - 2 * math.pi * y / (2**zoom)
     # true latitude:
     lat_rad = gudermannian(lat_rad)
     lat = lat_rad * 180.0 / math.pi
 
     # longitude maps linearly to map, so we simply scale:
-    lng = -180.0 + 360.0 * x / (2 ** zoom)
+    lng = -180.0 + 360.0 * x / (2**zoom)
 
     return (lng, lat)
 
@@ -39,8 +41,8 @@ def get_tile_pos_from_lng_lat(lng, lat, zoom):
     # "map-centric" latitude, in radians:
     lat_rad = inv_gudermannian(lat_rad)
 
-    x = 2 ** zoom * (lng + 180.0) / 360.0
-    y = 2 ** zoom * (math.pi - lat_rad) / (2 * math.pi)
+    x = 2**zoom * (lng + 180.0) / 360.0
+    y = 2**zoom * (math.pi - lat_rad) / (2 * math.pi)
 
     return (x, y)
 
@@ -54,21 +56,21 @@ def inv_gudermannian(y):
 
 
 def tileset_info(filepath):
-    if not os.path.isfile(filepath):
-        return {"error": "Tileset info is not available!"}
-
-    db = sqlite3.connect(filepath)
-
-    res = db.execute("SELECT * FROM tileset_info").fetchone()
-
-    o = {
-        "zoom_step": res[0],
-        "tile_size": res[1],
-        "max_zoom": res[2],
-        "min_pos": [res[3], res[5]],
-        "max_pos": [res[4], res[6]],
-        "max_data_length": res[1] * 2 ** res[2],
-    }
+    with apsw.Connection(
+        filepath, vfs=sovfs.name, flags=apsw.SQLITE_OPEN_READONLY
+    ) as conn:
+        c = conn.cursor()
+        c.execute("SELECT * FROM tileset_info")
+        res = c.fetchone()
+
+        o = {
+            "zoom_step": res[0],
+            "tile_size": res[1],
+            "max_zoom": res[2],
+            "min_pos": [res[3], res[5]],
+            "max_pos": [res[4], res[6]],
+            "max_data_length": res[1] * 2 ** res[2],
+        }
 
     return o
 
@@ -79,8 +81,8 @@ def get_tiles(db_file, zoom, x, y, width=1, height=1):
 
     Parameters
     ----------
-    db_file: str
-        The filename of the sqlite db file
+    db_file: str or file-like object
+        The filename of the sqlite db file or a file-like object
     zoom: int
         The zoom level
     x: int
@@ -97,10 +99,9 @@ def get_tiles(db_file, zoom, x, y, width=1, height=1):
     tiles: {pos: tile_value}
         A set of tiles, indexed by position
     """
-    conn = sqlite3.connect(db_file)
-
-    c = conn.cursor()
+    conn = apsw.Connection(db_file, vfs=sovfs.name, flags=apsw.SQLITE_OPEN_READONLY)
 
+    cursor = conn.cursor()
     lng_from, _, lat_from, _ = get_tile_box(zoom, x, y)
     _, lng_to, _, lat_to = get_tile_box(zoom, x + width - 1, y + height - 1)
 
@@ -125,7 +126,7 @@ def get_tiles(db_file, zoom, x, y, width=1, height=1):
         rMaxLat >= ?
     """
 
-    rows = c.execute(query, (zoom, lng_from, lng_to, lat_from, lat_to)).fetchall()
+    rows = cursor.execute(query, (zoom, lng_from, lng_to, lat_from, lat_to)).fetchall()
 
     new_rows = col.defaultdict(list)
 
diff --git a/clodius/tiles/geopoints.py b/clodius/tiles/geopoints.py
new file mode 100644
index 00000000..52116ef9
--- /dev/null
+++ b/clodius/tiles/geopoints.py
@@ -0,0 +1,49 @@
+import math
+
+import clodius.tiles.geo as ctg
+
+
+def y2lat(a):
+    return (
+        180.0
+        / math.pi
+        * (2.0 * math.atan(math.exp(a * math.pi / 180.0)) - math.pi / 2.0)
+    )
+
+
+def lat2y(a):
+    return (
+        180.0
+        / math.pi
+        * math.log(math.tan(math.pi / 4.0 + a * (math.pi / 180.0) / 2.0))
+    )
+
+
+def tileset_info(filepath):
+    tsinfo = ctg.tileset_info(filepath)
+    tsinfo["min_pos"] = [-180, -180]
+    tsinfo["max_pos"] = [180, 180]
+    tsinfo["max_width"] = 360
+    return tsinfo
+
+
+def get_tiles(filepath, z, x, y, width=1, height=1):
+    geo_tile = ctg.get_tiles(filepath, z, x, y, width, height)
+    # print("width:", width, "height", height)
+    # print("geo_tile:", geo_tile.items())
+    point_tile = [
+        (
+            (z, x, y),
+            [
+                {
+                    "x": u["geometry"]["coordinates"][0],
+                    "y": -lat2y(u["geometry"]["coordinates"][1]),
+                    "data": u["properties"]["SPECIES"],
+                    "uid": u["uid"],
+                }
+                for u in t
+            ],
+        )
+        for ((x, y), t) in geo_tile.items()
+    ]
+    return point_tile
diff --git a/clodius/tiles/gff.py b/clodius/tiles/gff.py
new file mode 100644
index 00000000..26be286f
--- /dev/null
+++ b/clodius/tiles/gff.py
@@ -0,0 +1,403 @@
+import functools as ft
+import random
+
+import clodius.tiles.bedfile as ctb
+import pandas as pd
+import polars as pl
+
+from clodius.utils import get_file_compression
+from clodius.models.gff_models import (
+    Gene, GeneModel, Pseudogene, PseudogeneModel,
+    mRNA, lnc_RNA, primary_transcript, antisense_RNA,
+    snoRNA, tRNA, miRNA, Exon, CDS,
+)
+from clodius.tiles.tabix import df_single_tile
+from clodius.utils import TILE_OPTIONS_CHAR
+from clodius.tiles.tabix import load_tbi_idx, raw_tabix_fetcher, single_indexed_tile
+from smart_open import open
+from uuid import uuid4
+
+
+def gff_chromsizes(filename):
+    """Use the "regions" sections of a GFF file as the chromsizes."""
+    if isinstance(filename, str):
+        filename = open(filename, "rb")
+
+    t = pd.read_csv(
+        filename,
+        header=None,
+        delimiter="\t",
+        comment="#",
+        compression=get_file_compression(filename),
+    )
+    regions = t[t[2] == "region"]
+    return pd.Series(regions[4].values, index=regions[0])
+
+
+def row_to_bedlike(row, css, orig_columns):
+    attrs = dict([x.split("=") for x in row[8].split(";")])
+
+    ret = {
+        "uid": row["ix"],
+        "xStart": row["xStart"],
+        "xEnd": row["xEnd"],
+        "chrOffset": css[row[0]],
+        "importance": random.random(),
+        "fields": [row[0], row[3], row[4], attrs["Name"], "-", row[6]],
+    }
+
+    return ret
+
+
+def tileset_info(filename, chromsizes=None, index_filename=None):
+    """
+
+    Return the bounds of this tileset. The bounds should encompass the entire
+    width of this dataset.
+
+    So how do we know what those are if we don't know chromsizes? We can assume
+    that the file is enormous (e.g. has a width of 4 trillion) and rely on the
+    browser to pass in a set of chromsizes
+    """
+    if chromsizes is None:
+        chromsizes = gff_chromsizes(filename)
+
+    return ctb.tileset_info(filename, chromsizes, index_filename)
+
+
+def rows_to_genes(rows, css):
+    """Convert a set of gff rows into gene annotations in BED12+3 format.
+
+    From https://genome.ucsc.edu/FAQ/FAQformat.html#format1.7
+
+    The format consists of the following:
+
+    chrom - Name of the chromosome (or contig, scaffold, etc.).
+    chromStart - The starting position of the feature in the chromosome or scaffold. The first base in a chromosome is numbered 0.
+    chromEnd - The ending position of the feature in the chromosome or scaffold. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99.
+    name - Name given to a region (preferably unique). Use "." if no name is assigned.
+    score - Indicates how dark the peak will be displayed in the browser (0-1000). If all scores were "0" when the data were submitted to the DCC, the DCC assigned scores 1-1000 based on signal value. Ideally the average signalValue per base spread is between 100-1000.
+    strand - +/- to denote strand or orientation (whenever applicable). Use "." if no orientation is assigned.
+    thickStart - The starting position at which the feature is drawn thickly. Not used in gappedPeak type, set to 0.
+    thickEnd - The ending position at which the feature is drawn thickly. Not used in gappedPeak type, set to 0.
+    itemRgb - An RGB value of the form R,G,B (e.g. 255,0,0). Not used in gappedPeak type, set to 0.
+    blockCount - The number of blocks (exons) in the BED line.
+    blockSizes - A comma-separated list of the block sizes. The number of items in this list should correspond to blockCount.
+    blockStarts - A comma-separated list of block starts. The first value must be 0 and all of the blockStart positions should be calculated relative to chromStart. The number of items in this list should correspond to blockCount.
+    signalValue - Measurement of overall (usually, average) enrichment for the region.
+    pValue - Measurement of statistical significance (-log10). Use -1 if no pValue is assigned.
+    qValue - Measurement of statistical significance using false discovery rate (-log10). Use -1 if no qValue is assigned.
+    """
+    # GFF file entries may have PARENT=<ID> hierarchies
+
+    pass
+
+
+def single_tile(filename, chromsizes, tsinfo, z, x, settings=None):
+    if isinstance(filename, str):
+        filename = open(filename, "rb")
+
+    hash_ = ctb.ts_hash(filename, chromsizes)
+
+    if settings is None:
+        settings = {}
+    # hash the loaded data table so that we don't have to read the entire thing
+    # and calculate cumulative start and end positions
+    val = ctb.cache.get(hash_)
+
+    if val is None:
+        t = pd.read_csv(
+            filename,
+            comment="#",
+            header=None,
+            delimiter="\t",
+            compression=get_file_compression(filename),
+        )
+        t = t[t[2] == "gene"]
+
+        orig_columns = t.columns
+        css = chromsizes.cumsum().shift().fillna(0).to_dict()
+
+        # xStart and xEnd are cumulative start and end positions calculated
+        # as if the chromosomes are concatenated from end to end
+        t["chromStart"] = t[0].map(lambda x: css[x])
+        t["xStart"] = t["chromStart"] + t[3]
+        t["xEnd"] = t["chromStart"] + t[4]
+        t["ix"] = t.index
+
+        val = {"rows": t, "orig_columns": orig_columns, "css": css}
+        ctb.cache.set(hash_, val)
+
+    t = val["rows"]
+    orig_columns = val["orig_columns"]
+    css = val["css"]
+
+    tileStart = x * tsinfo["max_width"] / 2**z
+    tileEnd = (x + 1) * tsinfo["max_width"] / 2**z
+
+    t = t.query(f"xEnd >= {tileStart} & xStart <= {tileEnd}")
+    MAX_PER_TILE = settings.get("MAX_BEDFILE_ENTRIES") or 1024
+
+    t = t.sample(MAX_PER_TILE) if len(t) > MAX_PER_TILE else t
+
+    ret = t.apply(
+        ft.partial(row_to_bedlike, css=css, orig_columns=orig_columns), axis=1
+    )
+    return list(ret.values)
+
+
+def convert_raw_to_gff_df(raw_data):
+    """Convert table with 'raw' column containing GFF rows to dataframe format."""
+    rows = []
+    for item in raw_data.iter_rows(named=True):
+        raw_line = item["raw"]
+        parts = raw_line.split("\t")
+        if len(parts) >= 9:
+            rows.append(
+                {
+                    "seqid": parts[0],
+                    "source": parts[1],
+                    "type": parts[2],
+                    "start": int(parts[3]),
+                    "end": int(parts[4]),
+                    "score": parts[5],
+                    "strand": parts[6],
+                    "phase": parts[7],
+                    "attributes": parts[8],
+                }
+            )
+    return pl.DataFrame(rows)
+
+
+def parse_gff_to_models(filtered_df, settings=None):
+    """Parse filtered GFF dataframe into gene and transcript models."""
+
+    def parse_attributes(attr_str):
+        if attr_str is None:
+            return {}
+        if isinstance(attr_str, dict):
+            return {k: v for k, v in attr_str.items() if v is not None}
+        attrs = {}
+        for item in attr_str.split(";"):
+            if "=" in item:
+                key, value = item.split("=", 1)
+                attrs[key] = value
+        return attrs
+
+    genes = {}
+    transcripts = {}
+    pseudogenes = {}
+
+    for row in filtered_df.iter_rows(named=True):
+        # Map GFF columns: seqname, source, feature, start, end, score, strand, frame, attribute
+        attrs = parse_attributes(row.get("attributes"))
+
+        entity_data = {
+            "id": attrs.get(
+                "ID",
+                f"{row.get('type')}_{row.get('start')}_{row.get('end')}",
+            ),
+            "chrom": row.get("seqid"),
+            "start": row.get("start"),
+            "end": row.get("end"),
+            "strand": (
+                row.get("strand") if row.get("strand") in ["+", "-", "."] else None
+            ),
+            "score": (
+                float(row.get("score"))
+                if row.get("score") is not None and row.get("score") != "."
+                else None
+            ),
+            "phase": (
+                int(row.get("phase"))
+                if row.get("phase") is not None and row.get("phase") != "."
+                else None
+            ),
+            "attributes": attrs,
+        }
+
+        feature_type = row.get("type")
+
+        if feature_type == "gene":
+            gene = Gene(
+                **entity_data,
+                gene_biotype=attrs.get("gene_biotype"),
+                pseudo=attrs.get("pseudo") == "true",
+            )
+            genes[entity_data["id"]] = GeneModel(gene=gene)
+
+        elif feature_type == "pseudogene":
+            pseudogene = Pseudogene(**entity_data)
+            pseudogenes[entity_data["id"]] = PseudogeneModel(pseudogene=pseudogene)
+
+        elif feature_type == "mRNA":
+            transcript = mRNA(**entity_data, parent_gene_id=attrs.get("Parent", ""))
+            transcripts[entity_data["id"]] = transcript
+
+        elif feature_type == "lnc_RNA":
+            transcript = lnc_RNA(**entity_data, parent_gene_id=attrs.get("Parent", ""))
+            transcripts[entity_data["id"]] = transcript
+
+        elif feature_type == "primary_transcript":
+            transcript = primary_transcript(
+                **entity_data, parent_gene_id=attrs.get("Parent", "")
+            )
+            transcripts[entity_data["id"]] = transcript
+
+        elif feature_type == "antisense_RNA":
+            transcript = antisense_RNA(
+                **entity_data, parent_gene_id=attrs.get("Parent", "")
+            )
+            transcripts[entity_data["id"]] = transcript
+
+        elif feature_type == "snoRNA":
+            transcript = snoRNA(**entity_data, parent_gene_id=attrs.get("Parent", ""))
+            transcripts[entity_data["id"]] = transcript
+
+        elif feature_type in [
+            "tRNA",
+            "rRNA",
+            "snRNA",
+            "SRP_RNA",
+            "RNase_P_RNA",
+            "RNase_MRP_RNA",
+        ]:
+            transcript_class = globals().get(feature_type, tRNA)
+            transcript = transcript_class(
+                **entity_data, parent_gene_id=attrs.get("Parent", "")
+            )
+            transcripts[entity_data["id"]] = transcript
+
+        elif feature_type == "ncRNA":
+            # Generic ncRNA - use lnc_RNA as fallback
+            transcript = lnc_RNA(**entity_data, parent_gene_id=attrs.get("Parent", ""))
+            transcripts[entity_data["id"]] = transcript
+
+        elif feature_type == "miRNA":
+            mirna = miRNA(**entity_data, parent_transcript_id=attrs.get("Parent", ""))
+            parent_id = attrs.get("Parent", "")
+            if parent_id in transcripts and hasattr(transcripts[parent_id], "mirnas"):
+                transcripts[parent_id].mirnas.append(mirna)
+
+        elif feature_type == "exon":
+            exon = Exon(**entity_data)
+            parent_id = attrs.get("Parent", "")
+            if parent_id in transcripts:
+                transcripts[parent_id].exons.append(exon)
+            elif parent_id in pseudogenes:
+                pseudogenes[parent_id].pseudogene.exons.append(exon)
+
+        elif feature_type == "CDS":
+            cds = CDS(**entity_data)
+            parent_id = attrs.get("Parent", "")
+            if parent_id in transcripts and isinstance(transcripts[parent_id], mRNA):
+                transcripts[parent_id].cds.append(cds)
+            else:
+                if not parent_id:
+                    parent_id = str(uuid4())
+                # Create transcript of type "cds" if it doesn't exist
+                transcript_data = entity_data.copy()
+                transcript_data["id"] = parent_id
+                transcript = mRNA(**transcript_data, parent_gene_id="")
+                transcripts[parent_id] = transcript
+                transcripts[parent_id].cds.append(cds)
+                # Create exon from CDS
+                exon = Exon(**entity_data)
+                transcripts[parent_id].exons.append(exon)
+                # Check if gene exists, create if it doesn't
+                gene_id = attrs.get("gene_id") or f"gene_{parent_id}"
+                if gene_id not in genes:
+                    gene_data = entity_data.copy()
+                    gene_data["id"] = gene_id
+                    gene = Gene(**gene_data)
+                    genes[gene_id] = GeneModel(gene=gene)
+                transcripts[parent_id].parent_gene_id = gene_id
+
+        # Skip unmodeled features: mobile_genetic_element, region, sequence_feature
+
+    # Associate transcripts with genes
+    for transcript in transcripts.values():
+        if hasattr(transcript, "parent_gene_id") and transcript.parent_gene_id in genes:
+            genes[transcript.parent_gene_id].transcripts.append(transcript)
+
+    # Combine genes and pseudogenes
+    all_genes = {**genes, **pseudogenes}
+
+    for key in all_genes:
+        all_genes[key] = all_genes[key].model_dump()
+    for key in transcripts:
+        transcripts[key] = transcripts[key].model_dump()
+
+    return all_genes, transcripts
+
+
+def tiles(filename, tile_ids, chromsizes=None, index_filename=None, settings=None):
+    if chromsizes is None:
+        chromsizes = gff_chromsizes(filename)
+
+    def gff_single_tile_func(filename, chromsizes, tsinfo, z, x, settings=None):
+        df = df_single_tile(
+            filename=filename,
+            chromsizes=chromsizes,
+            tsinfo=tsinfo,
+            z=z,
+            x=x,
+            mode="gff",
+        )
+
+        genes, transcripts = parse_gff_to_models(df)
+        return {"genes": genes, "transcripts": transcripts}
+
+    if isinstance(filename, str):
+        file = open(filename, "rb", compression="disable")
+    else:
+        file = filename
+
+    if isinstance(index_filename, str):
+        index = open(index_filename, "rb", compression="disable")
+    else:
+        index = index_filename
+
+    tile_values = []
+    tsinfo = tileset_info(filename, chromsizes, index_filename)
+
+    if index_filename:
+        tbx_index = load_tbi_idx(index_filename)
+
+    for tile_id in tile_ids:
+        tile_no_options = tile_id.split(TILE_OPTIONS_CHAR)[0]
+        tile_id_parts = tile_no_options.split(".")
+        tile_position = list(map(int, tile_id_parts[1:3]))
+
+        if len(tile_position) < 2:
+            raise IndexError("Not enough tile info present")
+
+        z, x = tile_position
+
+        if index_filename:
+            try:
+                raw_data = single_indexed_tile(
+                    file=file,
+                    index=index,
+                    chromsizes=chromsizes,
+                    tsinfo=tsinfo,
+                    z=z,
+                    x=x,
+                    tbx_index=tbx_index,
+                    fetcher=raw_tabix_fetcher,
+                )
+                if raw_data is None:
+                    genes, transcripts = {}, {}
+                else:
+                    genes, transcripts = parse_gff_to_models(raw_data)
+                values = {"genes": genes, "transcripts": transcripts}
+            except ValueError as ve:
+                values = {"error": str(ve)}
+        else:
+            values = gff_single_tile_func(
+                file, chromsizes, tsinfo, z, x, settings=settings
+            )
+
+        tile_values += [(tile_id, values)]
+
+    return tile_values
diff --git a/clodius/tiles/hibed.py b/clodius/tiles/hibed.py
new file mode 100644
index 00000000..735a2c86
--- /dev/null
+++ b/clodius/tiles/hibed.py
@@ -0,0 +1,173 @@
+import hashlib
+import h5py
+import json
+
+from clodius.tiles.utils import tiles_wrapper_1d
+
+from clodius.tiles.utils import (
+    calc_max_width,
+    interval_to_chrom_tiles,
+    genome_tile_to_intervals,
+)
+
+import math
+from clodius.tiles.utils import TilesetInfo
+
+
+def tile_entries_sorter(x):
+    # return x["zoom"], x["xEnd"] - x["xStart"]
+    return x["zoom"], x["uid"]
+
+
+MAX_PER_TILE = 4096
+
+
+def tileset_info(filename, chromsizes):
+    max_zoom = math.ceil(math.log(sum(chromsizes)) / math.log(2))
+    max_width = 2**max_zoom
+
+    chromsizes_list = [[chrom, int(size)] for chrom, size in chromsizes.items()]
+
+    with h5py.File(filename, "r") as f:
+        max_per_tile = f["info"].attrs["max_per_tile"]
+
+    return {
+        "max_width": max_width,
+        "max_zoom": int(max_zoom),
+        "chromsizes": chromsizes_list,
+        "min_pos": [0],
+        "max_pos": [max_width],
+        "max_per_tile": int(max_per_tile),
+    }
+
+
+def single_chromosome_tile(
+    filename, chromsizes, tsinfo: dict, chrom: str, z: int, x: int
+):
+    f = h5py.File(filename, "r")
+    max_per_tile = tsinfo["max_per_tile"]
+    css = chromsizes.cumsum().shift().fillna(0).to_dict()
+    chrom_len = chromsizes.to_dict()[chrom]
+
+    # print("max_per_tile", max_per_tile)
+    # print("chrom", chrom, "z", z, "x", x)
+    max_width = calc_max_width(chrom_len)
+    # print("max_width:", max_width)
+    tile_width = max_width / 2**z
+    # print("tile_width", tile_width)
+    tile_start = tile_width * x
+    tile_end = tile_width * (x + 1)
+    #     print('chromsizes:', chromsizes)
+    #     print("max_per_tile:", max_per_tile)
+    #     print("sct", z, x)
+    # print("tile_start", tile_start / 1e6, tile_end / 1e6)
+    items = []
+    tile_pos = x
+
+    if chrom not in f["values"]:
+        # No entries for this chromosome
+        return []
+
+    if str(z) in f["values"][chrom]:
+        # If the requested zoom is higher than the max then we just
+        # return the next lowest zoom
+        items += [
+            (z, x)
+            for x in list(
+                f["values"][chrom][str(z)][
+                    tile_pos * max_per_tile : (tile_pos + 1) * max_per_tile
+                ]
+            )
+            if len(x)
+        ]
+
+    while z > 0:
+        z -= 1
+        tile_pos //= 2
+
+        if str(z) in f["values"][chrom]:
+            # If the requested zoom is higher than the max then we just
+            # return the next lowest zoom
+            items += [
+                (z, x)
+                for x in list(
+                    f["values"][chrom][str(z)][
+                        tile_pos * max_per_tile : (tile_pos + 1) * max_per_tile
+                    ]
+                )
+                if len(x)
+            ]
+    formatted = []
+
+    for z, row in items:
+        row = json.loads(row.decode("utf8"))
+        # print("row", row["line"])
+        parts = row["line"].split("\t")
+        importance = row["importance"]
+
+        start = int(parts[1])
+        end = int(parts[2])
+
+        if not (end > tile_start and start < tile_end):
+            #             print("ts", tile_start, tile_end)
+            #             print("se", start, end)
+            #             print("no intersection")
+            # doesn't intersect tile
+            continue
+
+        ret = {
+            "uid": hashlib.md5(row["line"].encode("utf-8")).hexdigest(),
+            "zoom": z,
+            "xStart": css[parts[0]] + int(parts[1]),
+            "xEnd": css[parts[0]] + int(parts[2]),
+            "chrOffset": css[parts[0]],
+            "importance": importance,
+            "fields": parts,
+        }
+        formatted += [ret]
+
+    # sorted_formatted = sorted(formatted, key=lambda x: (x["zoom"], x["uid"]))
+    sorted_formatted = sorted(formatted, key=tile_entries_sorter)
+    return sorted_formatted[:max_per_tile]
+
+
+def single_genome_tile(filename, chromsizes, tsinfo, z, x):
+    chrom_tile_poss = []
+    # print("chromsizes", chromsizes.index)
+    intervals = genome_tile_to_intervals(
+        filename, chromsizes, TilesetInfo.parse_obj(tsinfo), z, x
+    )
+    for interval in intervals:
+        if interval[0] >= len(tsinfo["chromsizes"]):
+            break
+        chrom_name = tsinfo["chromsizes"][interval[0]][0]
+        chrom_size = tsinfo["chromsizes"][interval[0]][1]
+
+        chrom_tile_poss += [
+            (chrom_name, cz, cx)
+            for (cz, cx) in interval_to_chrom_tiles(
+                interval[1], interval[2], chrom_size
+            )
+        ]
+
+    # print("chrom_tile_poss", chrom_tile_poss)
+    chrom_tiles = []
+    for chrom, cz, cx in chrom_tile_poss:
+        chrom_tile = single_chromosome_tile(filename, chromsizes, tsinfo, chrom, cz, cx)
+        chrom_tiles += chrom_tile
+
+    chrom_tiles = sorted(chrom_tiles, key=tile_entries_sorter)[:MAX_PER_TILE]
+    # print("len(chrom_tiles)", len(chrom_tiles))
+    return chrom_tiles
+
+
+def tiles(filename, tile_ids, chromsizes):
+    tsinfo = tileset_info(filename, chromsizes)
+
+    return tiles_wrapper_1d(
+        tile_ids, lambda z, x: single_genome_tile(filename, chromsizes, tsinfo, z, x)
+    )
+
+
+# tileset_info(filename, chromsizes)
+# tile0 = single_genome_tile(filename, chromsizes, tsinfo, 1, 0)
diff --git a/clodius/tiles/mrmatrix.py b/clodius/tiles/mrmatrix.py
index 05696e3b..b783ce69 100644
--- a/clodius/tiles/mrmatrix.py
+++ b/clodius/tiles/mrmatrix.py
@@ -1,49 +1,66 @@
 import numpy as np
+import h5py
+from clodius.tiles.utils import tiles_wrapper_2d
+from clodius.tiles.format import format_dense_tile
 
 
-def tileset_info(f, bounds=None):
-    if "min-pos" in f.attrs:
-        min_pos = f.attrs["min-pos"]
+def tileset_info(file, bounds=None):
+    if isinstance(file, (str, bytes)) or hasattr(file, '__fspath__'):
+        f = h5py.File(file, "r")
+    else:
+        # Already an h5py-like object or mock
+        f = file
+
+    if 'min-pos' in f.attrs:
+        min_pos = f.attrs['min-pos']
     else:
         min_pos = [0, 0]
 
-    if "max-pos" in f.attrs:
-        max_pos = f.attrs["max-pos"]
+    if 'max-pos' in f.attrs:
+        max_pos = f.attrs['max-pos']
     else:
-        max_pos = f["resolutions"]["1"]["values"].shape
+        max_pos = f['resolutions']['1']['values'].shape
 
     return {
-        "min_pos": min_pos,
-        "max_pos": max_pos,
-        "resolutions": [int(r) for r in f["resolutions"]],
-        "mirror_tiles": "false",
-        "bins_per_dimension": 256,
+        'min_pos': min_pos,
+        'max_pos': max_pos,
+        'resolutions': [int(r) for r in f['resolutions']],
+        'mirror_tiles': 'false',
+        'bins_per_dimension': 256,
     }
 
 
-def tiles(f, z, x, y):
-    """
+def single_tile(file, z, x, y):
+    '''
     Return tiles for the given region.
 
     Parameters:
     -----------
-    f: h5py.File
-        File pointer to the hdf5 file containing the matrices
+    file: str | filelike
+        Path or file-like object of the file to load
     z: int
         The zoom level
     x: int
         The tile's x position
     y: int
         The tile's y position
-    """
-    resolutions = sorted(map(int, f["resolutions"].keys()))[::-1]
-    tsinfo = tileset_info(f)
-    n_bins = tsinfo["bins_per_dimension"]
+    '''
+    if isinstance(file, (str, bytes)) or hasattr(file, '__fspath__'):
+        f = h5py.File(file, "r")
+    else:
+        # Already an h5py-like object or mock
+        f = file
+
+    resolutions = sorted(map(int, f['resolutions'].keys()))[::-1]
+    tsinfo = tileset_info(file)
+    n_bins = tsinfo['bins_per_dimension']
 
     if z >= len(resolutions):
-        raise ValueError("Zoom level out of bounds:", z, "resolutions:", resolutions)
+        raise ValueError(
+            'Zoom level out of bounds:', z,
+            "resolutions:", resolutions)
 
-    tile_width = tsinfo["bins_per_dimension"]
+    tile_width = tsinfo['bins_per_dimension']
 
     # Where in the matrix the tile starts
     tile_x_start = x * tile_width
@@ -52,15 +69,23 @@ def tiles(f, z, x, y):
     tile_x_end = tile_x_start + n_bins
     tile_y_end = tile_y_start + n_bins
 
-    mat = f["resolutions"][str(resolutions[z])]["values"]
-    data = mat[tile_y_start:tile_y_end, tile_x_start:tile_x_end]
+    mat = f['resolutions'][str(resolutions[z])]['values']
+    data = mat[tile_y_start:tile_y_end,
+               tile_x_start:tile_x_end]
 
     x_pad = n_bins - data.shape[0]
     y_pad = n_bins - data.shape[1]
 
     if x_pad > 0 or y_pad > 0:
         data = np.pad(
-            data, ((0, x_pad), (0, y_pad)), "constant", constant_values=(np.nan, np.nan)
-        )
+            data, ((0, x_pad), (0, y_pad)), 'constant',
+            constant_values=(np.nan, np.nan))
 
     return data
+
+
+def tiles(filepath, tile_ids):
+    "Retrieve a set of tiles."
+    return tiles_wrapper_2d(
+        tile_ids, lambda z, x, y: format_dense_tile(single_tile(filepath, z, x, y))
+    )
diff --git a/clodius/tiles/multivec.py b/clodius/tiles/multivec.py
index 11c9e33c..d9b138fa 100644
--- a/clodius/tiles/multivec.py
+++ b/clodius/tiles/multivec.py
@@ -5,7 +5,31 @@
 import h5py
 import numpy as np
 
-from .utils import abs2genomic
+
+def abs2genomic(chromsizes, start_pos, end_pos):
+    """
+    Convert absolute genomic sizes to genomic
+
+    Parameters:
+    -----------
+    chromsizes: [1000,...]
+        An array of the lengths of the chromosomes
+    start_pos: int
+        The starting genomic position
+    end_pos: int
+        The ending genomic position
+    """
+    abs_chrom_offsets = np.r_[0, np.cumsum(chromsizes)]
+    cid_lo, cid_hi = (
+        np.searchsorted(abs_chrom_offsets, [start_pos, end_pos], side="right") - 1
+    )
+    rel_pos_lo = start_pos - abs_chrom_offsets[cid_lo]
+    rel_pos_hi = end_pos - abs_chrom_offsets[cid_hi]
+    start = rel_pos_lo
+    for cid in range(cid_lo, cid_hi):
+        yield cid, start, chromsizes[cid]
+        start = 0
+    yield cid_hi, start, rel_pos_hi
 
 
 def tiles(filename, tile_ids):
@@ -18,6 +42,7 @@ def tiles(filename, tile_ids):
         A list of tile_ids (e.g. xyx.0.0) identifying the tiles
         to be retrieved
     """
+    # print("getting tiles", tile_ids)
     f16 = np.finfo("float16")
     f16_min, f16_max = f16.min, f16.max
     generated_tiles = []
@@ -113,7 +138,7 @@ def get_tile(f, chromsizes, resolution, start_pos, end_pos, shape):
         the values for the portion of the genome that is visible.
     """
     binsize = resolution
-    # print('binsize:', binsize)
+    # print("binsize:", binsize)
     # print('start_pos:', start_pos, 'end_pos:', end_pos)
     # print("length:", end_pos - start_pos)
     # print('shape:', shape)
@@ -132,7 +157,7 @@ def get_tile(f, chromsizes, resolution, start_pos, end_pos, shape):
     for cid, start, end in abs2genomic([c[1] for c in chromsizes], start_pos, end_pos):
         n_bins = int(np.ceil((end - start) / binsize))
         total_length += end - start
-        # print('cid', cid, start, end, 'tl:', total_length)
+        # print("cid", cid, start, end, "tl:", total_length)
 
         try:
             # t1 = time.time()
@@ -167,7 +192,7 @@ def get_tile(f, chromsizes, resolution, start_pos, end_pos, shape):
                     continue
             """
 
-            # print("offset:", offset, "start_pos", start_pos, end_pos)
+            # print("start_pos", start_pos, end_pos)
             x = f["resolutions"][str(resolution)]["values"][chrom][start_pos:end_pos]
             current_binned_data_position += binsize * (end_pos - start_pos)
 
@@ -258,7 +283,21 @@ def tileset_info(filename):
         "shape": shape,
     }
 
-    if "row_infos" in f["resolutions"][str(resolutions[0])].attrs:
+    if "info" in f:
+        if "category_infos" in f["info"]:
+            try:
+                tileset_info["category_infos"] = json.loads(
+                    f["info"]["category_infos"][()]
+                )
+            except:
+                tileset_info["category_infos"] = json.loads(
+                    f["info"]["category_infos"][()].decode("utf8")
+                )
+
+    if "row_infos" in f["info"]:
+        row_infos_encoded = f["info"]["row_infos"][()]
+        tileset_info["row_infos"] = json.loads(row_infos_encoded)
+    elif "row_infos" in f["resolutions"][str(resolutions[0])].attrs:
         row_infos = f["resolutions"][str(resolutions[0])].attrs["row_infos"]
 
         if isinstance(row_infos[0], str):
@@ -274,10 +313,6 @@ def tileset_info(filename):
             except json.JSONDecodeError:
                 tileset_info["row_infos"] = [r.decode("utf8") for r in row_infos]
 
-    elif "row_infos" in f["info"]:
-        row_infos_encoded = f["info"]["row_infos"][()]
-        tileset_info["row_infos"] = json.loads(row_infos_encoded)
-
     f.close()
 
     return tileset_info
diff --git a/clodius/tiles/npmatrix.py b/clodius/tiles/npmatrix.py
index a54569a5..c5e335af 100644
--- a/clodius/tiles/npmatrix.py
+++ b/clodius/tiles/npmatrix.py
@@ -1,7 +1,5 @@
 import math
-
 import numpy as np
-
 import clodius.tiles.format as hgfo
 
 
@@ -83,11 +81,13 @@ def tiles(grid, z, x, y, nan_grid=None, bin_size=256):
         The number of values per bin
     """
     max_dim = max(grid.shape)
+    # print("max_dim", max_dim)
 
     max_zoom = math.ceil(math.log(max_dim / bin_size) / math.log(2))
     max_zoom = 0 if max_zoom < 0 else max_zoom
 
     # max_width = 2 ** max_zoom * bin_size
+    # print("max_width:", max_width, 'bin_size:', bin_size, 'max_zoom', max_zoom)
 
     tile_width = 2 ** (max_zoom - z) * bin_size
 
@@ -97,19 +97,26 @@ def tiles(grid, z, x, y, nan_grid=None, bin_size=256):
     x_end = min(grid.shape[0], x_start + tile_width)
     y_end = min(grid.shape[1], y_start + tile_width)
 
+    # print("tile_width", tile_width)
+    # print("x_start:", x_start, x_end)
+    # print("y_start:", y_start, y_end)
+
     num_to_sum = 2 ** (max_zoom - z)
+    # print("num_to_sum", num_to_sum)
 
     data = grid[x_start:x_end, y_start:y_end]
+    # print("data:", data)
 
     # add some data so that the data can be divided into squares
-    # We use max(1, data.shape...) to make avoid the condition where
-    # a narrow matrix yields data.shape[0] or data.shape[1] being zero
-    # and we return a degenerate tile
-    divisible_x_width = num_to_sum * math.ceil(max(1, data.shape[0]) / num_to_sum)
-    divisible_y_width = num_to_sum * math.ceil(max(1, data.shape[1]) / num_to_sum)
+    divisible_x_width = num_to_sum * math.ceil(data.shape[0] / num_to_sum)
+    divisible_y_width = num_to_sum * math.ceil(data.shape[1] / num_to_sum)
 
     divisible_x_pad = divisible_x_width - data.shape[0]
     divisible_y_pad = divisible_y_width - data.shape[1]
+    # print("data.shape", data.shape)
+
+    # print("divisible_x_pad:", divisible_x_pad)
+    # print("divisible_y_pad:", divisible_y_pad)
 
     a = np.pad(
         data,
@@ -121,8 +128,12 @@ def tiles(grid, z, x, y, nan_grid=None, bin_size=256):
     b = np.nansum(a.reshape((a.shape[0], -1, num_to_sum)), axis=2)
     ret_array = np.nansum(b.T.reshape(b.shape[1], -1, num_to_sum), axis=2).T
     ret_array[ret_array == 0.0] = np.nan
+    # print('ret_array:', ret_array)
+
+    # print("sum:", np.nansum(ret_array))
 
     if nan_grid is not None:
+        # print("normalizing")
         # we want to calculate the means of the data points
 
         # NOTE: In the line below, "nan_grid" was originally "not_nan_grid",
@@ -143,6 +154,9 @@ def tiles(grid, z, x, y, nan_grid=None, bin_size=256):
     x_pad = bin_size - ret_array.shape[0]
     y_pad = bin_size - ret_array.shape[1]
 
+    # print("ret_array:", ret_array.shape)
+    # print("x_pad:", x_pad, "y_pad:", y_pad)
+
     return np.pad(
         ret_array,
         ((0, x_pad), (0, y_pad)),
diff --git a/clodius/tiles/npvector.py b/clodius/tiles/npvector.py
index 3c6ccc3a..f7800c70 100644
--- a/clodius/tiles/npvector.py
+++ b/clodius/tiles/npvector.py
@@ -26,13 +26,16 @@ def tileset_info(array, bounds=None, bins_per_dimension=1024):
     """
     Get the tileset info for the array
     """
-    max_dim = max(array.shape)
+    # Handle 1D arrays
+    if len(array.shape) == 1:
+        max_dim = array.shape[0]
+    else:
+        max_dim = array.shape[1]
 
     max_zoom = math.ceil(math.log(max_dim / bins_per_dimension) / math.log(2))
     max_zoom = 0 if max_zoom < 0 else max_zoom
 
-    max_width = 2 ** max_zoom * bins_per_dimension
-    # print('max_zoom:', max_zoom)
+    max_width = 2**max_zoom * bins_per_dimension
 
     scale_up = max_width / max_dim
 
@@ -45,10 +48,14 @@ def tileset_info(array, bounds=None, bins_per_dimension=1024):
         max_width = (max_pos[0] - min_pos[0]) * scale_up
     else:
         min_pos = [0]
-        max_pos = [array.shape[0]]
-
-    if len(array.shape) > 1:
-        raise ValueError("The array shape is not a vector type", array.shape)
+        if len(array.shape) == 1:
+            max_pos = [array.shape[0]]
+        else:
+            max_pos = [array.shape[1]]
+
+    # Now supports nxm arrays, not just nx1
+    # if len(array.shape) > 1:
+    #     raise ValueError("The array shape is not a vector type", array.shape)
     return {
         "max_width": max_width,
         "min_pos": min_pos,
@@ -56,6 +63,7 @@ def tileset_info(array, bounds=None, bins_per_dimension=1024):
         "max_zoom": max_zoom,
         "bins_per_dimension": bins_per_dimension,
         "tile_size": bins_per_dimension,
+        "shape": array.shape,
     }
 
 
@@ -98,7 +106,7 @@ def tiles(array, z, x, not_nan_array=None, bin_size=1024):
     Parameters
     -----------
     array: np.array
-        An nxn array containing values
+        An nxm array containing values
     z: int
         The zoom level (0 corresponds to most zoomed out)
     x: int
@@ -118,9 +126,20 @@ def tiles(array, z, x, not_nan_array=None, bin_size=1024):
     divisible_x_width = num_to_sum * math.ceil(data.shape[0] / num_to_sum)
     divisible_x_pad = divisible_x_width - data.shape[0]
 
-    a = np.pad(data, ((0, divisible_x_pad),), "constant", constant_values=(np.nan,))
+    # Handle nxm arrays by padding along first dimension only
+    if len(data.shape) == 1:
+        pad_width = ((0, divisible_x_pad),)
+    else:
+        pad_width = ((0, divisible_x_pad),) + ((0, 0),) * (len(data.shape) - 1)
+
+    a = np.pad(data, pad_width, "constant", constant_values=(np.nan,))
 
-    ret_array = np.nansum(a.reshape((-1, num_to_sum)), axis=1)
+    # Reshape and sum along first axis, preserving other dimensions
+    if len(a.shape) == 1:
+        ret_array = np.nansum(a.reshape((-1, num_to_sum)), axis=1)
+    else:
+        new_shape = (-1, num_to_sum) + a.shape[1:]
+        ret_array = np.nansum(a.reshape(new_shape), axis=1)
 
     if not_nan_array is None:
         not_nan_data = ~np.isnan(array[x_start:x_end])
@@ -128,13 +147,29 @@ def tiles(array, z, x, not_nan_array=None, bin_size=1024):
         not_nan_data = not_nan_array[x_start:x_end]
 
     # we want to calculate the means of the data points
-    na = np.pad(
-        not_nan_data, ((0, divisible_x_pad)), "constant", constant_values=(np.nan,)
-    )
-    norm_array = np.nansum(na.reshape((-1, num_to_sum)), axis=1)
+    if len(not_nan_data.shape) == 1:
+        na_pad_width = ((0, divisible_x_pad),)
+    else:
+        na_pad_width = ((0, divisible_x_pad),) + ((0, 0),) * (
+            len(not_nan_data.shape) - 1
+        )
+
+    na = np.pad(not_nan_data, na_pad_width, "constant", constant_values=(np.nan,))
+
+    if len(na.shape) == 1:
+        norm_array = np.nansum(na.reshape((-1, num_to_sum)), axis=1)
+    else:
+        na_new_shape = (-1, num_to_sum) + na.shape[1:]
+        norm_array = np.nansum(na.reshape(na_new_shape), axis=1)
+
     ret_array = ret_array / (norm_array + 1)
 
     # determine how much to pad the array
     x_pad = bin_size - ret_array.shape[0]
 
-    return np.pad(ret_array, ((0, x_pad)), "constant", constant_values=(np.nan,))
+    if len(ret_array.shape) == 1:
+        final_pad_width = ((0, x_pad),)
+    else:
+        final_pad_width = ((0, x_pad),) + ((0, 0),) * (len(ret_array.shape) - 1)
+
+    return np.pad(ret_array, final_pad_width, "constant", constant_values=(np.nan,))
diff --git a/clodius/tiles/pileup.py b/clodius/tiles/pileup.py
new file mode 100644
index 00000000..4707e3aa
--- /dev/null
+++ b/clodius/tiles/pileup.py
@@ -0,0 +1,405 @@
+from Bio import Align
+import tempfile
+from clodius.alignment import alignment_to_subs, order_by_clustering
+from clodius.tiles.csv import csv_sequence_tileset_functions
+
+
+def get_subs(alignment):
+    """Wrapper for alignment_to_subs that returns the result."""
+    return alignment_to_subs(alignment)
+
+
+def get_pileup_alignment_data(refseq, seqs, cluster=None, values=None):
+    """Get pileup alignment data for a reference sequence and a list of sequences."""
+    chromsizes = [("ref", len(refseq))]
+    refseqs = [{"id": "ref", "seq": refseq}]
+
+    tf = tile_functions(
+        seqs, refseqs, cluster=cluster, values=values, chromsizes=chromsizes
+    )
+    tsinfo = tf["tileset_info"]()
+    tiles = tf["tiles"](["0.0"])
+
+    return {"type": tsinfo, "tiles": dict(tiles)}
+
+
+def calc_chr_offset(chromsizes, chrom_id):
+    sum = 0
+    for chrom in chromsizes:
+        if chrom[0] == chrom_id:
+            return sum
+        sum += chrom[1]
+
+
+def get_substitutions(hit, seq):
+    """
+    :param hit: mappy.Alignment object (result of a.map())
+    :param seq: The query sequence string
+    """
+    substitutions = []
+
+    # mappy provides hit.cs (difference string)
+    # Format: :[len] (match), *[ref][query] (substitution), +[seq] (insertion), -[seq] (deletion)
+    # Example: :10*at:5+cc:2
+
+    curr_pos = 0  # Position relative to target start (hit.ts)
+    read_pos = 0  # Position relative to read start (including soft clipping)
+
+    # 1. Handle Leading Soft Clipping
+    # mappy.Alignment.cigar is a list of (length, op)
+    if hit.cigar[0][1] == 4:
+        sc_len = hit.cigar[0][0]
+        substitutions.append(
+            {"pos": -sc_len, "length": sc_len, "type": "S", "variant": seq[:sc_len]}
+        )
+        read_pos += sc_len
+
+    # 2. Parse the CS tag for Mismatches, Inserts, and Deletes
+    # We use regex to split the CS tag into its components
+    import re
+
+    cs_parts = re.findall(r"(:[0-9]+|\*[a-z][a-z]|\+[a-z]+|-[a-z]+)", hit.cs)
+
+    for part in cs_parts:
+        op = part[0]
+
+        if op == ":":  # Match
+            ln = int(part[1:])
+            curr_pos += ln
+            read_pos += ln
+
+        elif op == "*":  # Substitution (Mismatch)
+            # val is 'ag' meaning ref was 'a', read is 'g'
+            ref_base = part[1].upper()  # The first char is the REF
+            query_base = part[2].upper()  # The second char is the QUERY
+            substitutions.append(
+                {
+                    "pos": curr_pos,
+                    "length": 1,
+                    "type": "X",
+                    "base": ref_base,  # Original base
+                    "variant": query_base,  # Mismatched base
+                }
+            )
+            curr_pos += 1
+            read_pos += 1
+
+        elif op == "+":  # Insertion
+            val = part[1:].upper()
+            ins_len = len(val)
+            substitutions.append(
+                {
+                    "pos": curr_pos,
+                    "length": ins_len,
+                    "type": "I",
+                    "base": "",
+                    "variant": val.upper(),
+                }
+            )
+            read_pos += ins_len
+
+        elif op == "-":  # Deletion
+            val = part[1:].upper()
+            substitutions.append(
+                {
+                    "pos": curr_pos,
+                    "length": len(val),
+                    "type": "D",
+                    "base": val,  # Original bases that were deleted
+                    "variant": "",  # No variant base in query
+                }
+            )
+            curr_pos += len(val)
+
+    # 3. Handle Trailing Soft Clipping
+    if hit.cigar[-1][1] == 4:
+        sc_len = hit.cigar[-1][0]
+        substitutions.append(
+            {
+                "pos": hit.te - hit.ts,
+                "length": sc_len,
+                "type": "S",
+                "variant": seq[-sc_len:],
+            }
+        )
+
+    return substitutions
+
+
+def align_sequences(seq1, seq2):
+    """Align two sequences to each other and return an alignment object."""
+    aligner = Align.PairwiseAligner()
+
+    aligner.match_score = 1
+    aligner.mismatch_score = -4
+    aligner.open_gap_score = -6
+    aligner.extend_gap_score = -1
+
+    alignments = aligner.align(seq1, seq2)
+
+    best_alignment = alignments[0]
+
+    return best_alignment
+
+
+def tile_functions(seqs, refseqs, cluster=None, values=None, chromsizes=None):
+    """Return a dictionary of tile functions for the pileup track."""
+    longest_seq = sum([c[1] for c in chromsizes])
+
+    def tileset_info():
+        return {
+            "tile_size": longest_seq,
+            "resolutions": [1],
+            "max_tile_width": longest_seq,
+            "format": "subs",
+            "min_pos": [0],
+            "max_pos": [longest_seq],
+            "chromsizes": chromsizes,
+        }
+
+    if cluster == "linkage":
+        seqs = order_by_clustering(seqs)
+
+    tile = []
+    for i, seq in enumerate(seqs):
+        for refseq in refseqs:
+            a = align_sequences(refseq["seq"], seq)
+            start, end, subs = alignment_to_subs(a)
+
+            chr_offset = calc_chr_offset(chromsizes, refseq["id"])
+
+            tv = {
+                "id": f"r{i}_{refseq['id']}",
+                "from": start + chr_offset,
+                "to": end + chr_offset,
+                "substitutions": subs,
+                "color": 0,
+            }
+
+            if values:
+                tv["extra"] = values[i]
+
+            tile.append(tv)
+
+    def tiles(tile_ids):
+        tiles = []
+
+        for tile_id in tile_ids:
+            parts = tile_id.split(".")
+            z = int(parts[1])
+            x = int(parts[2])
+
+            if z != 0 and x != 0:
+                # return an empty tile
+                tiles += [(tile_id, [])]
+            else:
+                # return the entire tile
+                tiles += [(tile_id, tile)]
+
+        return tiles
+
+    return {"tileset_info": tileset_info, "tiles": tiles}
+
+
+def tile_functions_fasta(seqs, refseqs, cluster=None, values=None, chromsizes=None):
+    """Return a dictionary of tile functions for the pileup track using FASTA and mappy."""
+    import mappy as mp
+
+    longest_seq = sum([c[1] for c in chromsizes])
+
+    def tileset_info():
+        return {
+            "tile_size": longest_seq,
+            "resolutions": [1],
+            "max_tile_width": longest_seq,
+            "format": "subs",
+            "min_pos": [0],
+            "max_pos": [longest_seq],
+            "chromsizes": chromsizes,
+        }
+
+    if cluster == "linkage":
+        seqs = order_by_clustering(seqs)
+
+    # Write refseqs to temp file in FASTA format
+    with tempfile.NamedTemporaryFile(
+        mode="w", suffix=".fasta", dir="/tmp", delete=False
+    ) as tmp_file:
+        for refseq in refseqs:
+            tmp_file.write(f">{refseq['id']}\n{refseq['seq']}\n")
+        tmp_filename = tmp_file.name
+
+    # Create mappy aligner from temp file
+    aligner = mp.Aligner(tmp_filename, preset="sr")
+
+    tile = []
+    for i, seq in enumerate(seqs):
+        for hit in aligner.map(seq, cs=True):
+            # Convert mappy alignment to substitutions format (0-based to 1-based)
+            start = hit.r_st + 1
+            end = hit.r_en + 1
+            # Find chromosome offset
+            chr_offset = calc_chr_offset(chromsizes, hit.ctg)
+
+            substitutions = get_substitutions(hit, seq)
+            tv = {
+                "id": f"r{i}_{hit.ctg}",
+                "from": start + chr_offset,
+                "to": end + chr_offset,
+                "substitutions": substitutions,
+                "color": 0,
+            }
+
+            if values:
+                tv["extra"] = values[i]
+
+            tile.append(tv)
+
+    def tiles(tile_ids):
+        tiles = []
+
+        for tile_id in tile_ids:
+            parts = tile_id.split(".")
+            z = int(parts[1])
+            x = int(parts[2])
+
+            if z != 0 and x != 0:
+                tiles += [(tile_id, [])]
+            else:
+                tiles += [(tile_id, tile)]
+
+        return tiles
+
+    return {"tileset_info": tileset_info, "tiles": tiles}
+
+
+def csv_tileset_info(filename, *csv_args, **csv_kwargs):
+    """Get tileset info for a sequence logo file file from
+    a csv file.
+
+    Parameters
+    ----------
+    filename: string
+        The name of the csv file
+    colname: Optional[str]
+        The name of the column containing the sequences.
+    colnum: Optional[int]
+        The column number of the sequence logo file. 0-based.
+        Only used if colname is not provided.
+    header: bool
+        Whether to assume that a header is present in the csv file
+    sep: string
+        The separator used in the csv file
+    refrow: A row to use as a reference sequence when calculating
+        alignments. Should be 1-based
+    """
+    tf = csv_sequence_tileset_functions(
+        filename, tile_functions=tile_functions, *csv_args, **csv_kwargs
+    )
+    return tf["tileset_info"]()
+
+
+def csv_tiles(filename, tile_ids, *csv_args, **csv_kwargs):
+    tf = csv_sequence_tileset_functions(
+        filename, tile_functions=tile_functions, *csv_args, **csv_kwargs
+    )
+
+    return tf["tiles"](tile_ids)
+
+
+def _chromsizes_from_fasta(fasta_file):
+    """Compute chromsizes list from a FASTA file.
+
+    Parameters
+    ----------
+    fasta_file: str or file-like
+        Path to the FASTA file or a binary file-like object.
+
+    Returns
+    -------
+    list of [str, int]
+        List of [sequence_id, length] pairs.
+    """
+    from Bio import SeqIO
+    import io
+
+    if isinstance(fasta_file, str):
+        with open(fasta_file, "rb") as fh:
+            records = list(SeqIO.parse(io.TextIOWrapper(fh, "utf-8"), "fasta"))
+    else:
+        content = fasta_file.read()
+        fasta_file.seek(0)
+        records = list(
+            SeqIO.parse(io.TextIOWrapper(io.BytesIO(content), "utf-8"), "fasta")
+        )
+
+    return [[r.id, len(r.seq)] for r in records]
+
+
+def get_local_tiles(
+    filename, *csv_args, reffile=None, chromsizes_file=None, **csv_kwargs
+):
+    """Get local higlass tiles for a pileup-csv file.
+
+    Parameters
+    ----------
+    filename: str or file-like
+        Path to the CSV file or a file-like object.
+    reffile: str or file-like, optional
+        Path to a FASTA reference file or a file-like object.
+        Required when refrow is not provided in csv_kwargs.
+    chromsizes_file: str or file-like, optional
+        Path to a chromsizes TSV file or a file-like object.
+        When omitted, chromsizes are computed from reffile or refrow.
+    *csv_args, **csv_kwargs:
+        Additional arguments forwarded to csv_sequence_tileset_functions
+        (e.g. colname, colnum, header, sep, refrow).
+    """
+    import pandas as pd
+    from clodius.tiles.csv import csv_sequence_tileset_functions
+
+    chromsizes = None
+    if chromsizes_file is None:
+        if reffile is not None:
+            chromsizes = _chromsizes_from_fasta(reffile)
+        elif "refrow" in csv_kwargs:
+            refrow = csv_kwargs["refrow"]
+            sep = csv_kwargs.get("sep", ",")
+            header = csv_kwargs.get("header", True)
+            colname = csv_kwargs.get("colname")
+            colnum = csv_kwargs.get("colnum")
+            df = pd.read_csv(filename, header=0 if header else None, sep=sep)
+            if colname is None and colnum is not None:
+                colname = df.columns[colnum - 1]
+            seq = df[colname].values[refrow - 1]
+            if not isinstance(seq, str):
+                raise TypeError(
+                    f"Expected a string sequence in column '{colname}' (colnum={colnum}), "
+                    f"but got {type(seq).__name__!r} with value {seq!r}. "
+                    f"Available columns are: {list(df.columns)}. "
+                    f"Check that colnum/colname points to the sequence column."
+                )
+            chromsizes = [[f"row_{refrow}", len(seq)]]
+
+    tf = csv_sequence_tileset_functions(
+        filename,
+        *csv_args,
+        tile_functions=tile_functions_fasta if reffile is not None else tile_functions,
+        fasta_datafile=reffile,
+        chromsizes_datafile=chromsizes_file,
+        chromsizes=chromsizes,
+        **csv_kwargs,
+    )
+
+    tsinfo = tf["tileset_info"]()
+    max_resolution = max(tsinfo["resolutions"])
+
+    tile_ids = []
+
+    for i, res in enumerate(sorted(tsinfo["resolutions"], key=lambda x: -x)):
+        for j in range(0, max_resolution // res):
+            tile_ids += [f"x.{i}.{j}"]
+
+    tiles = dict(tf["tiles"](tile_ids))
+
+    return {"tilesetInfo": {"x": tsinfo}, "tiles": tiles}
diff --git a/clodius/tiles/sequence_logos.py b/clodius/tiles/sequence_logos.py
new file mode 100644
index 00000000..c853f689
--- /dev/null
+++ b/clodius/tiles/sequence_logos.py
@@ -0,0 +1,126 @@
+from clodius.alignment import (
+    generate_pwm_from_sequences,
+    DNA_ALPHABET,
+    PROTEIN_ALPHABET,
+)
+from typing import Literal
+from clodius.tiles import npvector
+from clodius.tiles.csv import csv_sequence_tileset_functions
+import numpy as np
+import base64
+from typing import Optional
+
+
+def tile_functions(
+    sequences,
+    seqtype: Optional[Literal["dna", "protein"]] = None,
+    refseq=None,
+    **kwargs,
+):
+    pwm, seqs = generate_pwm_from_sequences(sequences, seqtype=seqtype, refseq=refseq)
+
+    if seqtype is None:
+        seqtype = "dna" if len(pwm) == 4 else "protein"
+    if seqtype == "dna":
+        alphabet = DNA_ALPHABET
+    elif seqtype == "protein":
+        alphabet = PROTEIN_ALPHABET
+    else:
+        raise ValueError(f"Unknown type: {type}. Expected 'dna'.")
+
+    vector = np.array([pwm[b] for b in alphabet])
+
+    bin_size = 512
+    tsinfo = npvector.tileset_info(vector, bins_per_dimension=bin_size)
+
+    tsinfo["shape"] = [vector.shape[0], bin_size]
+    tsinfo["row_infos"] = alphabet
+    tsinfo["resolutions"] = sorted(
+        [2**i for i in range(tsinfo["max_zoom"] + 1)], key=lambda x: -x
+    )
+    tsinfo["aligned_seqs"] = seqs
+    # tsinfo["max_pos"] = len(vector[0])
+
+    del tsinfo["max_zoom"]
+    del tsinfo["max_width"]
+
+    def tileset_info():
+        return tsinfo
+
+    def tiles(tile_ids):
+        to_ret = []
+
+        for tile_id in tile_ids:
+            parts = tile_id.split(".")
+            z = int(parts[1])
+            x = int(parts[2])
+
+            t = npvector.tiles(vector.T, z, x, bin_size=bin_size)
+            dense = t.T.ravel().astype("float16")
+            d = base64.b64encode(np.array(dense, dtype="float16")).decode("utf-8")
+
+            to_ret += (
+                (
+                    tile_id,
+                    {
+                        # "dense": "ozhEMog68jvvNGw1+jrfNUU66i0ULDE4EjcAOcs7nDldO8I6CjvuO7wy/DgbOrQ7PTqtOYA0FjjNN+IyFDLhMs44WTTwNDU0ezTXK803pzgZOXA3sDn3NY86vzUVON43+TcmMfs1kzvoMMYsejYfNjI5hjCNOW86STjPLnE7cDeDM/k2QC+xOtA2ZTUeOJs5yyx9Mb4uRTW6LFMxlTc5Ovo4azqfOTE5YzPANyM5/TuXLTI50zchO407SDL7OFA7/Ti0OpwuczK4Njw6UjlPJRY7/zkdMNc4fTVYOjEpayb1MkY6BjglNfI7CDbQIJEcrSwcOLU1azrXOUw5ZjUcOiowdDmKOQI4nDVoO4IsBTleOm0xbTVlNoM1DDsbOcI7wDvSOcs31y7VOLwzizovM2IzPCyoHvkrVjc/ODM0CTM=",
+                        "dense": d,
+                        "dtype": "float16",
+                        "shape": [vector.shape[0], bin_size],
+                    },
+                ),
+            )
+        return to_ret
+
+    return {"tileset_info": tileset_info, "tiles": tiles}
+
+
+def get_local_tiles(filename, colname=None, colnum=None, sep=","):
+    """Get local higlass tiles for the provided file."""
+    tsinfo = csv_tileset_info(filename, colname=colname, colnum=colnum, sep=sep)
+    max_resolution = max(tsinfo["resolutions"])
+
+    tile_ids = []
+
+    for i, res in enumerate(sorted(tsinfo["resolutions"], key=lambda x: -x)):
+        print("res", i, res)
+        for j in range(0, max_resolution // res):
+            tile_ids += [f"x.{i}.{j}"]
+
+    tiles = dict(csv_tiles(filename, tile_ids, colname=colname, colnum=colnum, sep=sep))
+
+    return {"tilesetInfo": {"x": tsinfo}, "tiles": tiles}
+
+
+def csv_tileset_info(filename, *csv_args, **csv_kwargs):
+    """Get tileset info for a sequence logo file file from
+    a csv file.
+
+    Parameters
+    ----------
+    filename: string
+        The name of the csv file
+    colname: Optional[str]
+        The name of the column containing the sequences.
+    colnum: Optional[int]
+        The column number of the sequence logo file. 0-based.
+        Only used if colname is not provided.
+    header: bool
+        Whether to assume that a header is present in the csv file
+    sep: string
+        The separator used in the csv file
+    refrow: A row to use as a reference sequence when calculating
+        alignments. Should be 1-based
+    """
+    tf = csv_sequence_tileset_functions(
+        filename, tile_functions=tile_functions, *csv_args, **csv_kwargs
+    )
+    return tf["tileset_info"]()
+
+
+def csv_tiles(filename, tile_ids, *csv_args, **csv_kwargs):
+    tf = csv_sequence_tileset_functions(
+        filename, tile_functions=tile_functions, *csv_args, **csv_kwargs
+    )
+
+    return tf["tiles"](tile_ids)
diff --git a/clodius/tiles/tabix.py b/clodius/tiles/tabix.py
index bc13e69b..894de5d7 100644
--- a/clodius/tiles/tabix.py
+++ b/clodius/tiles/tabix.py
@@ -1,90 +1,102 @@
 import collections as col
 import gzip
 import struct
+import polars as pl
+import pandas as pd
+from typing import Literal
+
+from smart_open import open
 
 from clodius.tiles.bigwig import abs2genomic
+from clodius.utils import get_file_compression
 
 
-def load_bai_index(index_filename):
+def load_bai_index(index_file):
     """Load a reduced version of a bai index so that we can
     go through it and get a sense of how much data will be
     retrieved by a query."""
-    with open(index_filename, "rb") as f:
-        b = bytearray(f.read())
+    f = index_file
+    b = bytearray(f.read())
 
-        [_, _, _, _, n_ref] = struct.unpack("<4cI", b[:8])
-        c = 8
+    [_, _, _, _, n_ref] = struct.unpack("<4cI", b[:8])
+    c = 8
 
-        indeces = []
+    indeces = []
 
-        for i in range(n_ref):
-            n_bin = struct.unpack("<I", b[c : c + 4])[0]
-            c += 4
-            bins = col.defaultdict(list)
-            for j in range(n_bin):
-                [bin_no, n_chunk] = struct.unpack("<II", b[c : c + 8])
-                c += 8
+    for i in range(n_ref):
+        n_bin = struct.unpack("<I", b[c : c + 4])[0]
+        c += 4
+        bins = col.defaultdict(list)
+        for j in range(n_bin):
+            [bin_no, n_chunk] = struct.unpack("<II", b[c : c + 8])
+            c += 8
 
-                bytes_to_read = n_chunk * 2 * 8
-                unpack_str = f"<{2 * n_chunk}Q"
-                bins[bin_no] = struct.unpack(unpack_str, b[c : c + bytes_to_read])
-                c += bytes_to_read
+            bytes_to_read = n_chunk * 2 * 8
+            unpack_str = f"<{2 * n_chunk}Q"
+            bins[bin_no] = struct.unpack(unpack_str, b[c : c + bytes_to_read])
+            c += bytes_to_read
 
-            n_intv = struct.unpack("<I", b[c : c + 4])[0]
-            c += 4 + 8 * n_intv
+        n_intv = struct.unpack("<I", b[c : c + 4])[0]
+        c += 4 + 8 * n_intv
 
-            indeces += [bins]
+        indeces += [bins]
 
-        return indeces
+    return indeces
 
 
 def load_tbi_idx(index_filename):
     """Load a reduced version of a tabix index so that we can
     go through it and get a sense of how much data will be
     retrieved by a query."""
-    with gzip.open(index_filename, "rb") as f:
-        b = bytearray(f.read())
-
-        [
-            _,
-            _,
-            _,
-            _,
-            n_ref,
-            format,
-            col_seq,
-            col_beg,
-            col_end,
-            meta,
-            skip,
-            l_nm,
-        ] = struct.unpack("<4ciiiiiiii", b[:36])
-        c = 36
-
-        names = [n.decode("ascii") for n in b[c : c + l_nm].split(b"\0")]
-        c += l_nm
-
-        indeces = []
-
-        for i in range(n_ref):
-            n_bin = struct.unpack("<i", b[c : c + 4])[0]
-            c += 4
-            bins = col.defaultdict(list)
-            for j in range(n_bin):
-                [bin_no, n_chunk] = struct.unpack("<Ii", b[c : c + 8])
-                c += 8
-
-                bytes_to_read = n_chunk * 2 * 8
-                unpack_str = f"<{2 * n_chunk}Q"
-                bins[bin_no] = struct.unpack(unpack_str, b[c : c + bytes_to_read])
-                c += bytes_to_read
-
-            n_intv = struct.unpack("<i", b[c : c + 4])[0]
-            c += 4 + 8 * n_intv
-
-            indeces += [bins]
-
-        return dict(zip(names, indeces))
+    if isinstance(index_filename, str):
+        f = open(index_filename, "rb")
+    else:
+        f = index_filename
+
+    f.seek(0)
+
+    with gzip.GzipFile(fileobj=f, mode="rb") as gz:
+        b = gz.read()
+
+    [
+        _,
+        _,
+        _,
+        _,
+        n_ref,
+        format,
+        col_seq,
+        col_beg,
+        col_end,
+        meta,
+        skip,
+        l_nm,
+    ] = struct.unpack("<4ciiiiiiii", b[:36])
+    c = 36
+    names = [n.decode("ascii") for n in b[c : c + l_nm].split(b"\0")]
+    c += l_nm
+
+    indeces = []
+
+    for i in range(n_ref):
+        n_bin = struct.unpack("<i", b[c : c + 4])[0]
+        c += 4
+        bins = col.defaultdict(list)
+        for j in range(n_bin):
+            [bin_no, n_chunk] = struct.unpack("<Ii", b[c : c + 8])
+            c += 8
+
+            bytes_to_read = n_chunk * 2 * 8
+            unpack_str = f"<{2 * n_chunk}Q"
+            bins[bin_no] = struct.unpack(unpack_str, b[c : c + bytes_to_read])
+            c += bytes_to_read
+
+        n_intv = struct.unpack("<i", b[c : c + 4])[0]
+        c += 4 + 8 * n_intv
+
+        indeces += [bins]
+
+    return dict(zip(names, indeces))
 
 
 def chunks(lst, n):
@@ -115,13 +127,13 @@ def reg2bins(begin, end, n_lvls=5, min_shift=14):
     """
     begin, end = begin, end
     t, s = 0, min_shift + (n_lvls << 1) + n_lvls
-    for l in range(n_lvls + 1):  # noqa ignore ambiguous variable name
+    for level in range(n_lvls + 1):
         b, e = t + (begin >> s), t + (end >> s)
         n = e - b + 1
         for k in range(b, e + 1):
             yield k
             n += 1
-        t += 1 << ((l << 1) + l)
+        t += 1 << ((level << 1) + level)
         s -= 3
 
 
@@ -149,38 +161,116 @@ def est_query_size(index, name, start, end):
     return est_query_size_ix(ix, start, end)
 
 
+def dataframe_tabix_fetcher(file, index, ref, start, end):
+    """Fetch rows of a tabix indexed BED file into a dataframe."""
+    import oxbow as ox
+
+    if isinstance(index, str):
+        index = open(index, "rb", compression="disable")
+
+    if start == 0:
+        start = 1
+    pos = f"{ref}:{start}-{end}"
+
+    def file_src():
+        file.seek(0)
+        return file
+
+    def index_src():
+        index.seek(0)
+        return index
+
+    try:
+        df = ox.from_bed(file_src, compression="bgzf", index=index_src).regions(pos).to_polars()
+    except (ValueError, KeyError) as ex:
+        if "missing reference sequence" in str(ex) or "not found in index" in str(ex):
+            return None
+        raise
+
+    # Reconstruct raw column (full tab-separated line) for downstream compatibility
+    rest_col = pl.col("rest").fill_null("")
+    return df.with_columns(
+        (
+            pl.concat_str(
+                [pl.col("chrom"), pl.col("start").cast(pl.String), pl.col("end").cast(pl.String)],
+                separator="\t",
+            )
+            + pl.when(rest_col != "").then(pl.lit("\t") + rest_col).otherwise(pl.lit(""))
+        ).alias("raw")
+    )
+
+
+def raw_tabix_fetcher(file, index, ref, start, end):
+    """Fetch rows of a tabix-indexed GFF file into a structured dataframe."""
+    import oxbow as ox
+
+    if isinstance(index, str):
+        index = open(index, "rb", compression="disable")
+
+    if start == 0:
+        start = 1
+    pos = f"{ref}:{start}-{end}"
+
+    def file_src():
+        file.seek(0)
+        return file
+
+    def index_src():
+        index.seek(0)
+        return index
+
+    try:
+        df = (
+            ox.from_gff(
+                file_src,
+                compression="bgzf",
+                index=index_src,
+                attribute_defs=[
+                    ("ID", "String"),
+                    ("Name", "String"),
+                    ("Parent", "String"),
+                    ("gene_biotype", "String"),
+                    ("pseudo", "String"),
+                ],
+            )
+            .regions(pos)
+            .to_polars()
+        )
+    except (ValueError, KeyError) as ex:
+        if "missing reference sequence" in str(ex) or "not found in index" in str(ex):
+            return None
+        raise
+
+    return df
+
+
 def single_indexed_tile(
-    filename,
-    index_filename,
+    file,
+    index,
     chromsizes,
     tsinfo,
     z,
     x,
-    max_tile_width,
     tbx_index,
-    fetcher,
+    fetcher=dataframe_tabix_fetcher,
+    max_tile_width=None,
     max_results=None,
 ):
-    if max_results is None:
-        max_results = 2048
-
-    tile_width = tsinfo["max_width"] / 2 ** z
+    tile_width = tsinfo["max_width"] / 2**z
 
     if max_tile_width and tile_width > max_tile_width:
-        return {"error": "Tile too wide"}
+        raise ValueError(f"Tile too wide {tile_width}. Max width: {max_tile_width}.")
 
     query_size = 0
 
-    start_pos = x * tsinfo["max_width"] / 2 ** z
-    end_pos = (x + 1) * tsinfo["max_width"] / 2 ** z
-
-    # css = chromsizes.cumsum().shift().fillna(0).to_dict()
+    start_pos = x * tsinfo["max_width"] / 2**z
+    end_pos = (x + 1) * tsinfo["max_width"] / 2**z
 
     cids_starts_ends = list(abs2genomic(chromsizes, start_pos, end_pos))
-    ret_vals = []
+    ret_vals = None
 
     if tbx_index:
-        for (cid, start, end) in cids_starts_ends:
+        for cid, start, end in cids_starts_ends:
             if cid >= len(chromsizes):
                 continue
 
@@ -191,17 +281,83 @@ def single_indexed_tile(
     MAX_QUERY_SIZE = 1000000
 
     if query_size > MAX_QUERY_SIZE:
-        return {"error": f"Tile too large {query_size}"}
+        raise ValueError(f"Tile too large {query_size}")
 
-    for (cid, start, end) in cids_starts_ends:
+    for cid, start, end in cids_starts_ends:
         if cid >= len(chromsizes):
             continue
 
         chrom = chromsizes.index[cid]
+        df = fetcher(file, index, str(chrom), int(start), int(end))
+        if df is not None:
+            if ret_vals is None:
+                ret_vals = df
+            else:
+                ret_vals = pl.concat([ret_vals, df])
 
-        ret_vals += fetcher(str(chrom), int(start), int(end))
-
-    if len(ret_vals) > max_results:
-        return {"error": f"Too many values in tile {len(ret_vals)}"}
+    if ret_vals is not None and max_results and len(ret_vals) > max_results:
+        raise ValueError(f"Too many values in tile {len(ret_vals)}")
 
     return ret_vals
+
+
+def df_single_tile(filename, chromsizes, tsinfo, z, x, mode: Literal["gff", "bed"]):
+    """Load a single tile from the filename."""
+    tile_width = tsinfo["max_width"] / 2**z
+    start_pos = x * tile_width
+    end_pos = (x + 1) * tile_width
+
+    cids_starts_ends = list(abs2genomic(chromsizes, start_pos, end_pos))
+
+    # Reset file position to beginning if it's a file object
+    if hasattr(filename, "seek"):
+        filename.seek(0)
+
+    df = pl.from_pandas(
+        pd.read_csv(
+            filename,
+            delimiter="\t",
+            header=None,
+            comment="#",
+            compression=get_file_compression(filename),
+        )
+    )
+
+    if mode == "gff":
+        df.columns = [
+            "seqid",
+            "source",
+            "type",
+            "start",
+            "end",
+            "score",
+            "strand",
+            "phase",
+            "attributes",
+        ]
+
+    filtered_rows = []
+
+    for cid, tile_start, tile_end in cids_starts_ends:
+        if cid >= len(chromsizes):
+            continue
+
+        chrom = chromsizes.index[cid]
+
+        if mode == "gff":
+            chrom_col, start_col, end_col = "seqid", "start", "end"
+        else:  # bed
+            chrom_col, start_col, end_col = "column_1", "column_2", "column_3"
+
+        mask = (
+            (df[chrom_col] == chrom)
+            & (df[end_col] > tile_start)
+            & (df[start_col] < tile_end)
+        )
+
+        filtered_rows.append(df.filter(mask))
+
+    if filtered_rows:
+        return pl.concat(filtered_rows)
+    else:
+        return pl.DataFrame()
diff --git a/clodius/tiles/utils.py b/clodius/tiles/utils.py
index 93fced20..5e57b899 100644
--- a/clodius/tiles/utils.py
+++ b/clodius/tiles/utils.py
@@ -3,10 +3,12 @@
 import re
 from typing import List, Optional
 
+import math
 import numpy as np
 from pydantic import BaseModel, validator
 
 from clodius.chromosomes import load_chromsizes
+from clodius.utils import TILE_OPTIONS_CHAR
 
 
 def partition_by_adjacent_tiles(tile_ids, dimension=2):
@@ -103,6 +105,22 @@ def infer_datatype(filetype):
         return "bedlike"
 
 
+def tiles_wrapper_1d(tile_ids, tiles_function):
+    tile_values = []
+
+    for tile_id in tile_ids:
+        parts = tile_id.split(".")
+
+        if len(parts) < 2:
+            raise IndexError("Not enough tile info present")
+
+        z, x = map(int, [parts[1], parts[2]])
+
+        tile_values += [(tile_id, tiles_function(z, x))]
+
+    return tile_values
+
+
 def tiles_wrapper_2d(tile_ids, tiles_function):
     tile_values = []
 
@@ -180,7 +198,7 @@ def tile_bounds(tsinfo, z, x, y, width=1, height=1):
 
     max_width = max(max_pos[0] - min_pos[0], max_pos[1] - min_pos[1])
 
-    tile_width = max_width / 2 ** z
+    tile_width = max_width / 2**z
     from_x = min_pos[0] + x * tile_width
     to_x = min_pos[0] + (x + width) * tile_width
 
@@ -195,6 +213,7 @@ class TilesetInfo(BaseModel):
     max_width: int
     max_pos: List[int]
     min_pos: List[int]
+    chromizes: Optional[List] = None
 
     @validator("max_zoom")
     def max_zoom_zero_or_greater(cls, v):
@@ -226,19 +245,17 @@ def zoom_zero_or_greater(cls, v):
         return int(v)
 
 
-def parse_tile_id(tile_id, tsinfo):
-    tile_id_parts = tile_id.split("|")[0].split(".")
-    tile_position = list(map(int, tile_id_parts[1:3]))
-    zoom_level = int(tile_id_parts[1])
+def parse_tile_position(tile_position: List[int], tsinfo: TilesetInfo) -> TileInfo:
+    zoom_level = int(tile_position[0])
 
-    tile_width = tsinfo.max_width / 2 ** int(tile_position[0])
+    tile_width = tsinfo.max_width / 2**zoom_level
 
     starts = [
-        pos * (tsinfo.max_width / 2 ** zoom_level) + tsinfo.min_pos[i]
+        pos * (tsinfo.max_width / 2**zoom_level) + tsinfo.min_pos[i]
         for (i, pos) in enumerate(tile_position[1:])
     ]
     ends = [
-        (pos * (tsinfo.max_width / 2 ** zoom_level) + tsinfo.min_pos[i] + tile_width)
+        (pos * (tsinfo.max_width / 2**zoom_level) + tsinfo.min_pos[i] + tile_width)
         for (i, pos) in enumerate(tile_position[1:])
     ]
 
@@ -251,19 +268,14 @@ def parse_tile_id(tile_id, tsinfo):
     )
 
 
-def abs2genomic(chromsizes, start_pos, end_pos):
-    """
-    Convert absolute coordinates to genomic coordinates
+def parse_tile_id(tile_id: str, tsinfo: TilesetInfo) -> TileInfo:
+    tile_id_parts = tile_id.split(TILE_OPTIONS_CHAR)[0].split(".")
+    tile_position = list(map(int, tile_id_parts[1:3]))
 
-    Parameters:
-    -----------
-    chromsizes: [[chrom, size],...]
-        A list of chromosome sizes associated with this tileset
-    start_pos: int
-        The absolute start coordinate
-    end_pos: int
-        The absolute end coordinate
-    """
+    return parse_tile_position(tile_position, tsinfo)
+
+
+def abs2genomic(chromsizes, start_pos, end_pos):
     abs_chrom_offsets = np.r_[0, np.cumsum(chromsizes)]
     cid_lo, cid_hi = (
         np.searchsorted(abs_chrom_offsets, [start_pos, end_pos], side="right") - 1
@@ -290,7 +302,6 @@ def abs2genome_fn(chromsizes_filename, start, end):
     E.g. (1000,2000) => [('chr1', 1000, 1500), ('chr2', 1500, 2000)]
     """
     (chrom_info, chrom_names, chrom_sizes) = load_chromsizes(chromsizes_filename)
-
     for cid, start, end in abs2genomic(chrom_sizes, start, end):
         try:
             yield ChromosomeInterval(
@@ -363,3 +374,49 @@ def natsorted(iterable):
     Sort an iterable by natural genomic order
     """
     return sorted(iterable, key=ft.cmp_to_key(natcmp))
+
+
+def calc_max_width(length):
+    """Calculate the maximum width of a tileset assuming a max resolution of 1."""
+    return 2 ** (math.ceil(math.log(length) / math.log(2)))
+
+
+def interval_to_chrom_tiles(start, end, chrom_length):
+    """Convert a chromosome interval to chromosome tiles.
+
+    Assumes a base resolution of 1 base pairs.
+    """
+    max_width = calc_max_width(chrom_length)
+    interval_len = end - start
+    zoom_level = math.floor(math.log(max_width / interval_len) / math.log(2))
+    tile_size = int(max_width / 2**zoom_level)
+
+    tile_start = start // tile_size
+    tile_end = end // tile_size
+
+    return [(zoom_level, tile_pos) for tile_pos in range(tile_start, tile_end + 1)]
+
+
+def genome_tile_to_intervals(filename, chromsizes, tsinfo, z, x):
+    """Translate a genome tile into a set of chromosome intervals.
+
+    Genome / chromosome tiling
+
+        tile 0.0      1.0    1.1    0.0
+    |---------------|------|------|-------|
+        chr1           chr2        chr3
+    |---------------|-------------|-------|
+    |-------------------|-----------------|
+        tile 1.0             tile 1.1
+
+    Algorithm:
+
+    1. Given global [start, end] convert to [(chr, start, end), (chr, start, end)....] tuples
+    2. Convert (chr, start, end) convert to chrom tiles (chrom1, tile1), (chrom1, tile2), (chrom2, tile2)
+    3. Get data for each chrom tile
+    4. Downsample the whole dataset so that there's fewer than MAX_ENTRIES per tile
+    """
+    tile_info = parse_tile_position([z, x], tsinfo)
+    chrom_lengths = chromsizes.array
+    intervals = abs2genomic(chrom_lengths, tile_info.start[0], tile_info.end[0])
+    return intervals
diff --git a/clodius/tiles/vcf.py b/clodius/tiles/vcf.py
new file mode 100644
index 00000000..e0884b1e
--- /dev/null
+++ b/clodius/tiles/vcf.py
@@ -0,0 +1,218 @@
+import itertools
+import math
+import random
+
+import clodius.tiles.tabix as rtt
+from clodius.tiles.bigwig import abs2genomic
+from clodius.utils import TILE_OPTIONS_CHAR
+
+from pysam import VariantFile
+
+
+def grouper(n, iterable):
+    it = iter(iterable)
+    while True:
+        chunk = tuple(itertools.islice(it, n))
+        if not chunk:
+            return
+        yield chunk
+
+
+def generic_regions(fetcher, offset, limit):
+    if offset:
+        for i in range(offset):
+            try:
+                next(fetcher)
+            except StopIteration:
+                return {"offset": offset, "limit": limit, "results": [], "next": False}
+
+    curr_page = next(grouper(limit, fetcher))
+
+    try:
+        # see if there's another page of results
+        next_page = next(grouper(limit, fetcher))
+        next_page = True
+    except StopIteration:
+        next_page = False
+
+    ret = curr_page
+
+    return (ret, next_page)
+
+
+def regions(filename, chromsizes, offset, limit):
+    """Return a list of regions in the range.
+
+    Arguments:
+        filename: The name of the file
+        chromsizes: A dictionary containing the offsets of each chromosome
+            from the start of the genome
+        offset: The offset from the beginning of the file from which to start
+            fetching entries
+        limit: The total number of entries to fetch
+    """
+    vcf = VariantFile(filename)  # auto-detect input format
+
+    fetcher = vcf.fetch()
+    css = chromsizes.cumsum().shift().fillna(0).to_dict()
+
+    def regions_iterator():
+        for rec in fetcher:
+            yield {
+                "uid": rec.id,
+                "chrOffset": css[rec.chrom],
+                "xStart": css[rec.chrom] + rec.start,
+                "xEnd": css[rec.chrom] + rec.stop,
+                "fields": (rec.chrom, rec.start, rec.stop, str(rec)),
+            }
+
+    return generic_regions(regions_iterator(), offset, limit)
+
+
+def tileset_info(filename, chromsizes):
+    """
+
+    Return the bounds of this tileset. The bounds should encompass the entire
+    width of this dataset.
+
+    So how do we know what those are if we don't know chromsizes? We can assume
+    that the file is enormous (e.g. has a width of 4 trillion) and rely on the
+    browser to pass in a set of chromsizes
+    """
+
+    # do this so that we can serialize the int64s in the numpy array
+    chromsizes_list = []
+
+    for chrom, size in chromsizes.items():
+        chromsizes_list += [[chrom, int(size)]]
+
+    max_width = sum([c[1] for c in chromsizes_list])
+    MAX_TILE_WIDTH = 100000
+
+    return {
+        "max_width": max_width,
+        "max_zoom": int(math.log(max_width) / math.log(2)),
+        "chromsizes": chromsizes_list,
+        "min_pos": [0],
+        "max_pos": [max_width],
+        "max_tile_width": MAX_TILE_WIDTH,
+    }
+
+
+# def tiles_wrapper(array, tile_ids, not_nan_array=None):
+#     tile_values = []
+
+#     for tile_id in tile_ids:
+#         parts = tile_id.split(".")
+
+#         if len(parts) < 3:
+#             raise IndexError("Not enough tile info present")
+
+#         z = int(parts[1])
+#         x = int(parts[2])
+
+#         ret_array = tiles(array, z, x, not_nan_array).reshape((-1))
+
+#         tile_values += [(tile_id, ctf.format_dense_tile(ret_array))]
+
+#     return tile_values
+
+
+def single_tile(
+    filename, index_filename, chromsizes, tsinfo, z, x, max_tile_width, tbx_index=None
+):
+    # TODO: replace this function with the one in clodius.tiles.tabix
+    tile_width = tsinfo["max_width"] / 2**z
+
+    if max_tile_width and tile_width > max_tile_width:
+        return {"error": "Tile too wide"}
+
+    query_size = 0
+
+    start_pos = x * tsinfo["max_width"] / 2**z
+    end_pos = (x + 1) * tsinfo["max_width"] / 2**z
+
+    css = chromsizes.cumsum().shift().fillna(0).to_dict()
+
+    vcf = VariantFile(
+        filename, index_filename=index_filename
+    )  # auto-detect input format
+
+    cids_starts_ends = list(abs2genomic(chromsizes, start_pos, end_pos))
+    ret_vals = []
+
+    if tbx_index:
+        for cid, start, end in cids_starts_ends:
+            chrom = chromsizes.index[cid]
+
+            query_size += rtt.est_query_size(tbx_index, chrom, int(start), int(end))
+
+    MAX_QUERY_SIZE = 450000
+
+    if query_size > MAX_QUERY_SIZE:
+        return {"error": f"Tile too large {query_size}"}
+
+    for cid, start, end in cids_starts_ends:
+        chrom = chromsizes.index[cid]
+        ret_vals += [
+            {
+                "uid": r.id,
+                "importance": random.random(),
+                "xStart": css[chrom] + r.start,
+                "xEnd": css[chrom] + r.stop,
+                "chrOffset": css[chrom],
+                "fields": [r.chrom, r.start, r.stop, str(r)],
+            }
+            for r in vcf.fetch(str(chrom), int(start), int(end))
+        ]
+
+    return ret_vals
+
+
+def tiles(filename, tile_ids, index_filename, chromsizes, max_tile_width=None):
+    tsinfo = tileset_info(filename, chromsizes)
+
+    tile_values = []
+
+    index = None
+    if index_filename:
+        index = rtt.load_tbi_idx(index_filename)
+
+    for tile_id in tile_ids:
+        tile_no_options = tile_id.split(TILE_OPTIONS_CHAR)[0]
+        tile_id_parts = tile_no_options.split(".")
+        tile_position = list(map(int, tile_id_parts[1:3]))
+
+        if len(tile_position) < 2:
+            raise IndexError("Not enough tile info present")
+
+        tile_width = tsinfo["max_width"] / 2 ** int(tile_position[0])
+
+        if max_tile_width and tile_width >= max_tile_width:
+            # this tile is larger than the max allowed
+            return [
+                (
+                    tile_id,
+                    {
+                        "error": f"Tile too large, no data returned. Max tile size: {max_tile_width}"
+                    },
+                )
+            ]
+
+        z = tile_position[0]
+        x = tile_position[1]
+
+        values = single_tile(
+            filename,
+            index_filename,
+            chromsizes,
+            tsinfo,
+            z,
+            x,
+            max_tile_width,
+            tbx_index=index,
+        )
+
+        tile_values += [(tile_id, values)]
+
+    return tile_values
diff --git a/clodius/utils.py b/clodius/utils.py
new file mode 100644
index 00000000..7a40dcf5
--- /dev/null
+++ b/clodius/utils.py
@@ -0,0 +1,105 @@
+
+FILETYPES = {
+    "bam": {
+        "description": "Read mappings",
+        "extensions": [".bam"],
+        "datatypes": ["reads", "alignments"],
+    },
+    "chromsizes-tsv": {
+        "description": "Chromosome sizes",
+        "extensions": [".chromsizes", ".fai", ".chrom.sizes"],
+        "datatypes": ["chromsizes"],
+    },
+    "cooler": {
+        "description": "multi-resolution cooler file",
+        "extensions": [".mcool"],
+        "datatypes": ["matrix"],
+    },
+    "bigwig": {
+        "description": "Genomics focused multi-resolution vector file",
+        "extensions": [".bw", ".bigwig"],
+        "datatypes": ["vector"],
+    },
+    "bedfile": {
+        "description": "BED file",
+        "extensions": [".bed", ".bed.gz", ".bed.bgz"],
+        "datatypes": ["bedlike", "gene-annotations"],
+    },
+    "beddb": {
+        "description": "SQLite-based multi-resolution annotation file",
+        "extensions": [".beddb", ".multires.db"],
+        "datatypes": ["bedlike", "gene-annotations"],
+    },
+    "fasta": {
+        "description": "FASTA sequence file",
+        "extensions": [".fa", ".fna", ".fasta"],
+        "datatypes": ["sequence"],
+    },
+    "gff": {
+        "description": "General feature format",
+        "extensions": [".gff", ".gff.gz", ".gff.bgz"],
+        "datatypes": ["bedlike"],
+    },
+    "hitile": {
+        "description": "Multi-resolution vector file",
+        "extensions": [".hitile"],
+        "datatypes": ["vector"],
+    },
+    "multivec": {
+        "description": "Multi-sample vector file",
+        "extensions": [".multivec"],
+        "datatypes": ["multivec"],
+    },
+    "time-interval-json": {
+        "description": "Time interval notation",
+        "extensions": [".htime"],
+        "datatypes": ["time-interval"],
+    },
+}
+
+
+def infer_filetype(filename):
+    for filetype, meta in FILETYPES.items():
+        for ext in meta["extensions"]:
+            if filename.endswith(ext.lower()):
+                return filetype
+
+    return None
+
+
+def infer_datatype(filetype):
+    if filetype in FILETYPES:
+        return FILETYPES[filetype]["datatypes"][0]
+
+    return None
+
+
+def get_file_compression(f) -> str:
+    """Get the compression type for an open file pointer.
+
+    Can recognize 'gz', 'bz2', 'zip' or 'xz' from the magic number.
+
+    :param f: The file pointer
+    :returns: The compression type."""
+    magic_dict = {
+        b"\x1f\x8b\x08": "gzip",
+        b"\x42\x5a\x68": "bz2",
+        b"\x50\x4b\x03\x04": "zip",
+        b"\xfd\x37\x7a\x58\x5a\x00": "xz",
+    }
+
+    max_len = max(len(x) for x in magic_dict)
+
+    prev_pos = f.tell()
+    file_start = f.read(max_len)
+    f.seek(prev_pos)
+
+    for magic, filetype in magic_dict.items():
+        # print("l", len(file_start), "file_start", file_start)
+        if file_start.startswith(magic):
+            return filetype
+
+    return None
+
+
+TILE_OPTIONS_CHAR = ","
diff --git a/data/Dixon2012-J1-NcoI-R1-filtered.100kb.multires.cool b/data/Dixon2012-J1-NcoI-R1-filtered.100kb.multires.cool
new file mode 100644
index 00000000..81147bdf
--- /dev/null
+++ b/data/Dixon2012-J1-NcoI-R1-filtered.100kb.multires.cool
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9717cb99bebc402bf12392fdadc70262b6467292a28c6350eb46f97fed5fa11
+size 124660908
diff --git a/data/GCA_000350705.1_Esch_coli_KTE11_V1_genomic.short.fna b/data/GCA_000350705.1_Esch_coli_KTE11_V1_genomic.short.fna
new file mode 100644
index 00000000..c77804e7
--- /dev/null
+++ b/data/GCA_000350705.1_Esch_coli_KTE11_V1_genomic.short.fna
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f05d5850ab43e4ed4c9a9adaadf2c27f33adff7968abcd390ac2be3f85f99a22
+size 2652
diff --git a/data/GCA_000350705.1_Esch_coli_KTE11_V1_genomic.short.fna.fai b/data/GCA_000350705.1_Esch_coli_KTE11_V1_genomic.short.fna.fai
new file mode 100644
index 00000000..13f1ef69
--- /dev/null
+++ b/data/GCA_000350705.1_Esch_coli_KTE11_V1_genomic.short.fna.fai
@@ -0,0 +1,3 @@
+KB732246.1	640	101	80	81
+KB732247.1	240	850	80	81
+KB732249.1	1440	1194	80	81
diff --git a/data/GCA_002918705.1_ASM291870v1_genomic.gff.gz b/data/GCA_002918705.1_ASM291870v1_genomic.gff.gz
new file mode 100644
index 00000000..b4a10019
--- /dev/null
+++ b/data/GCA_002918705.1_ASM291870v1_genomic.gff.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4642e18c0bcc07818f1b011afd5232edeae74a424b8afb51dc349a430d000f52
+size 317529
diff --git a/data/SRR1770413.different_index_filename.bai b/data/SRR1770413.different_index_filename.bai
new file mode 100644
index 00000000..aeeb2761
--- /dev/null
+++ b/data/SRR1770413.different_index_filename.bai
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4b0ea8a7e4f1fe2336f68ccb1217affb4dcd2083ce5baae6d3bc009a82d2d8c
+size 224
diff --git a/data/SRR1770413.mismatched_bai.bam b/data/SRR1770413.mismatched_bai.bam
new file mode 100644
index 00000000..2ad2e6fd
--- /dev/null
+++ b/data/SRR1770413.mismatched_bai.bam
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0294427d827c929fee0abdfccec2ea74394d1c68607e99bd86de9780da9048c
+size 1465908
diff --git a/data/SRR1770413.sorted.short.bam b/data/SRR1770413.sorted.short.bam
new file mode 100644
index 00000000..2ad2e6fd
--- /dev/null
+++ b/data/SRR1770413.sorted.short.bam
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0294427d827c929fee0abdfccec2ea74394d1c68607e99bd86de9780da9048c
+size 1465908
diff --git a/data/SRR1770413.sorted.short.bam.bai b/data/SRR1770413.sorted.short.bam.bai
new file mode 100644
index 00000000..aeeb2761
--- /dev/null
+++ b/data/SRR1770413.sorted.short.bam.bai
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4b0ea8a7e4f1fe2336f68ccb1217affb4dcd2083ce5baae6d3bc009a82d2d8c
+size 224
diff --git a/data/chm13v1.chrom.sizes b/data/chm13v1.chrom.sizes
new file mode 100644
index 00000000..56ff20f8
--- /dev/null
+++ b/data/chm13v1.chrom.sizes
@@ -0,0 +1,24 @@
+chr1	248387497
+chr2	242696747
+chr3	201106605
+chr4	193575430
+chr5	182045437
+chr6	172126870
+chr7	160567423
+chr8	146259322
+chr9	150617274
+chr10	134758122
+chr11	135127772
+chr12	133324781
+chr13	114240146
+chr14	101219177
+chr15	100338308
+chr16	96330493
+chr17	84277185
+chr18	80542536
+chr19	61707359
+chr20	66210247
+chr21	45827691
+chr22	51353906
+chrX	154259625
+chrM	16569
diff --git a/data/corrected.geneListwithStrand.bed.multires b/data/corrected.geneListwithStrand.bed.multires
new file mode 100644
index 00000000..e8d0394f
--- /dev/null
+++ b/data/corrected.geneListwithStrand.bed.multires
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbba467ee1b6c5fe59567263dec4f7299c339752aa6f0f865f799dfbadc47329
+size 179658752
diff --git a/data/geneAnnotationsExonUnions.1000.bed.v3.beddb b/data/geneAnnotationsExonUnions.1000.bed.v3.beddb
new file mode 100644
index 00000000..679f097e
--- /dev/null
+++ b/data/geneAnnotationsExonUnions.1000.bed.v3.beddb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:330470832b7d6900e69ddb8f5a926073be7d2b0c8642cde61243c448216acd44
+size 352256
diff --git a/data/gene_annotations.short.db b/data/gene_annotations.short.db
new file mode 100644
index 00000000..4fc01340
Binary files /dev/null and b/data/gene_annotations.short.db differ
diff --git a/data/genomic.10k.gff b/data/genomic.10k.gff
new file mode 100644
index 00000000..423e8b9a
--- /dev/null
+++ b/data/genomic.10k.gff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2230c9b753487634542a86d56cd11894a76df58dbdaf81d53e37ce33d3a82a62
+size 2616802
diff --git a/data/genomic.10k.gff.gz b/data/genomic.10k.gff.gz
new file mode 100644
index 00000000..b64c57a9
--- /dev/null
+++ b/data/genomic.10k.gff.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83265d3af45f7c49a1be68aed47f365fa52f273ea2abd7335000e2cb8975306a
+size 151353
diff --git a/data/genomic.10k.gff.gz.tbi b/data/genomic.10k.gff.gz.tbi
new file mode 100644
index 00000000..84fbfeb7
--- /dev/null
+++ b/data/genomic.10k.gff.gz.tbi
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b21f6f5ac4825cc8f2f358d7a751802ca6ed7f7f74b01603d681157e3330f861
+size 1059
diff --git a/data/hg38.chrom.sizes b/data/hg38.chrom.sizes
new file mode 100644
index 00000000..487acb10
--- /dev/null
+++ b/data/hg38.chrom.sizes
@@ -0,0 +1,455 @@
+chr1	248956422
+chr2	242193529
+chr3	198295559
+chr4	190214555
+chr5	181538259
+chr6	170805979
+chr7	159345973
+chr8	145138636
+chr9	138394717
+chr10	133797422
+chr11	135086622
+chr12	133275309
+chr13	114364328
+chr14	107043718
+chr15	101991189
+chr16	90338345
+chr17	83257441
+chr18	80373285
+chr19	58617616
+chr20	64444167
+chr21	46709983
+chr22	50818468
+chrX	156040895
+chrY	57227415
+chrM	16569
+chr15_KI270905v1_alt	5161414
+chr6_GL000256v2_alt	4929269
+chr6_GL000254v2_alt	4827813
+chr6_GL000251v2_alt	4795265
+chr6_GL000253v2_alt	4677643
+chr6_GL000250v2_alt	4672374
+chr6_GL000255v2_alt	4606388
+chr6_GL000252v2_alt	4604811
+chr17_KI270857v1_alt	2877074
+chr16_KI270853v1_alt	2659700
+chr16_KI270728v1_random	1872759
+chr17_GL000258v2_alt	1821992
+chr5_GL339449v2_alt	1612928
+chr14_KI270847v1_alt	1511111
+chr17_KI270908v1_alt	1423190
+chr14_KI270846v1_alt	1351393
+chr5_KI270897v1_alt	1144418
+chr7_KI270803v1_alt	1111570
+chr19_GL949749v2_alt	1091841
+chr19_KI270938v1_alt	1066800
+chr19_GL949750v2_alt	1066390
+chr19_GL949748v2_alt	1064304
+chr19_GL949751v2_alt	1002683
+chr19_GL949746v1_alt	987716
+chr19_GL949752v1_alt	987100
+chr8_KI270821v1_alt	985506
+chr1_KI270763v1_alt	911658
+chr6_KI270801v1_alt	870480
+chr19_GL949753v2_alt	796479
+chr19_GL949747v2_alt	729520
+chr8_KI270822v1_alt	624492
+chr4_GL000257v2_alt	586476
+chr12_KI270904v1_alt	572349
+chr4_KI270925v1_alt	555799
+chr15_KI270852v1_alt	478999
+chr15_KI270727v1_random	448248
+chr9_KI270823v1_alt	439082
+chr15_KI270850v1_alt	430880
+chr1_KI270759v1_alt	425601
+chr12_GL877876v1_alt	408271
+chrUn_KI270442v1	392061
+chr17_KI270862v1_alt	391357
+chr15_GL383555v2_alt	388773
+chr19_GL383573v1_alt	385657
+chr4_KI270896v1_alt	378547
+chr4_GL383528v1_alt	376187
+chr17_GL383563v3_alt	375691
+chr8_KI270810v1_alt	374415
+chr1_GL383520v2_alt	366580
+chr1_KI270762v1_alt	354444
+chr15_KI270848v1_alt	327382
+chr17_KI270909v1_alt	325800
+chr14_KI270844v1_alt	322166
+chr8_KI270900v1_alt	318687
+chr10_GL383546v1_alt	309802
+chr13_KI270838v1_alt	306913
+chr8_KI270816v1_alt	305841
+chr22_KI270879v1_alt	304135
+chr8_KI270813v1_alt	300230
+chr11_KI270831v1_alt	296895
+chr15_GL383554v1_alt	296527
+chr8_KI270811v1_alt	292436
+chr18_GL383567v1_alt	289831
+chrX_KI270880v1_alt	284869
+chr8_KI270812v1_alt	282736
+chr19_KI270921v1_alt	282224
+chr17_KI270729v1_random	280839
+chr17_JH159146v1_alt	278131
+chrX_KI270913v1_alt	274009
+chr6_KI270798v1_alt	271782
+chr7_KI270808v1_alt	271455
+chr22_KI270876v1_alt	263666
+chr15_KI270851v1_alt	263054
+chr22_KI270875v1_alt	259914
+chr1_KI270766v1_alt	256271
+chr19_KI270882v1_alt	248807
+chr3_KI270778v1_alt	248252
+chr15_KI270849v1_alt	244917
+chr4_KI270786v1_alt	244096
+chr12_KI270835v1_alt	238139
+chr17_KI270858v1_alt	235827
+chr19_KI270867v1_alt	233762
+chr16_KI270855v1_alt	232857
+chr8_KI270926v1_alt	229282
+chr5_GL949742v1_alt	226852
+chr3_KI270780v1_alt	224108
+chr17_GL383565v1_alt	223995
+chr2_KI270774v1_alt	223625
+chr4_KI270790v1_alt	220246
+chr11_KI270927v1_alt	218612
+chr19_KI270932v1_alt	215732
+chr11_KI270903v1_alt	214625
+chr2_KI270894v1_alt	214158
+chr14_GL000225v1_random	211173
+chrUn_KI270743v1	210658
+chr11_KI270832v1_alt	210133
+chr7_KI270805v1_alt	209988
+chr4_GL000008v2_random	209709
+chr7_KI270809v1_alt	209586
+chr19_KI270887v1_alt	209512
+chr4_KI270789v1_alt	205944
+chr3_KI270779v1_alt	205312
+chr19_KI270914v1_alt	205194
+chr19_KI270886v1_alt	204239
+chr11_KI270829v1_alt	204059
+chr14_GL000009v2_random	201709
+chr21_GL383579v2_alt	201197
+chr11_JH159136v1_alt	200998
+chr19_KI270930v1_alt	200773
+chrUn_KI270747v1	198735
+chr18_GL383571v1_alt	198278
+chr19_KI270920v1_alt	198005
+chr6_KI270797v1_alt	197536
+chr3_KI270935v1_alt	197351
+chr17_KI270861v1_alt	196688
+chr15_KI270906v1_alt	196384
+chr5_KI270791v1_alt	195710
+chr14_KI270722v1_random	194050
+chr16_GL383556v1_alt	192462
+chr13_KI270840v1_alt	191684
+chr14_GL000194v1_random	191469
+chr11_JH159137v1_alt	191409
+chr19_KI270917v1_alt	190932
+chr7_KI270899v1_alt	190869
+chr19_KI270923v1_alt	189352
+chr10_KI270825v1_alt	188315
+chr19_GL383576v1_alt	188024
+chr19_KI270922v1_alt	187935
+chrUn_KI270742v1	186739
+chr22_KI270878v1_alt	186262
+chr19_KI270929v1_alt	186203
+chr11_KI270826v1_alt	186169
+chr6_KB021644v2_alt	185823
+chr17_GL000205v2_random	185591
+chr1_KI270765v1_alt	185285
+chr19_KI270916v1_alt	184516
+chr19_KI270890v1_alt	184499
+chr3_KI270784v1_alt	184404
+chr12_GL383551v1_alt	184319
+chr20_KI270870v1_alt	183433
+chrUn_GL000195v1	182896
+chr1_GL383518v1_alt	182439
+chr22_KI270736v1_random	181920
+chr10_KI270824v1_alt	181496
+chr14_KI270845v1_alt	180703
+chr3_GL383526v1_alt	180671
+chr13_KI270839v1_alt	180306
+chr22_KI270733v1_random	179772
+chrUn_GL000224v1	179693
+chr10_GL383545v1_alt	179254
+chrUn_GL000219v1	179198
+chr5_KI270792v1_alt	179043
+chr17_KI270860v1_alt	178921
+chr19_GL000209v2_alt	177381
+chr11_KI270830v1_alt	177092
+chr9_KI270719v1_random	176845
+chrUn_GL000216v2	176608
+chr22_KI270928v1_alt	176103
+chr1_KI270712v1_random	176043
+chr6_KI270800v1_alt	175808
+chr1_KI270706v1_random	175055
+chr2_KI270776v1_alt	174166
+chr18_KI270912v1_alt	174061
+chr3_KI270777v1_alt	173649
+chr5_GL383531v1_alt	173459
+chr3_JH636055v2_alt	173151
+chr14_KI270725v1_random	172810
+chr5_KI270796v1_alt	172708
+chr9_GL383541v1_alt	171286
+chr19_KI270885v1_alt	171027
+chr19_KI270919v1_alt	170701
+chr19_KI270889v1_alt	170698
+chr19_KI270891v1_alt	170680
+chr19_KI270915v1_alt	170665
+chr19_KI270933v1_alt	170537
+chr19_KI270883v1_alt	170399
+chr19_GL383575v2_alt	170222
+chr19_KI270931v1_alt	170148
+chr12_GL383550v2_alt	169178
+chr13_KI270841v1_alt	169134
+chrUn_KI270744v1	168472
+chr18_KI270863v1_alt	167999
+chr18_GL383569v1_alt	167950
+chr12_GL877875v1_alt	167313
+chr21_KI270874v1_alt	166743
+chr3_KI270924v1_alt	166540
+chr1_KI270761v1_alt	165834
+chr3_KI270937v1_alt	165607
+chr22_KI270734v1_random	165050
+chr18_GL383570v1_alt	164789
+chr5_KI270794v1_alt	164558
+chr4_GL383527v1_alt	164536
+chrUn_GL000213v1	164239
+chr3_KI270936v1_alt	164170
+chr3_KI270934v1_alt	163458
+chr9_GL383539v1_alt	162988
+chr3_KI270895v1_alt	162896
+chr22_GL383582v2_alt	162811
+chr3_KI270782v1_alt	162429
+chr1_KI270892v1_alt	162212
+chrUn_GL000220v1	161802
+chr2_KI270767v1_alt	161578
+chr2_KI270715v1_random	161471
+chr2_KI270893v1_alt	161218
+chrUn_GL000218v1	161147
+chr18_GL383572v1_alt	159547
+chr8_KI270817v1_alt	158983
+chr4_KI270788v1_alt	158965
+chrUn_KI270749v1	158759
+chr7_KI270806v1_alt	158166
+chr7_KI270804v1_alt	157952
+chr18_KI270911v1_alt	157710
+chrUn_KI270741v1	157432
+chr17_KI270910v1_alt	157099
+chr19_KI270884v1_alt	157053
+chr19_GL383574v1_alt	155864
+chr19_KI270888v1_alt	155532
+chr3_GL000221v1_random	155397
+chr11_GL383547v1_alt	154407
+chr2_KI270716v1_random	153799
+chr12_GL383553v2_alt	152874
+chr6_KI270799v1_alt	152148
+chr22_KI270731v1_random	150754
+chrUn_KI270751v1	150742
+chrUn_KI270750v1	148850
+chr8_KI270818v1_alt	145606
+chrX_KI270881v1_alt	144206
+chr21_KI270873v1_alt	143900
+chr2_GL383521v1_alt	143390
+chr8_KI270814v1_alt	141812
+chr12_GL383552v1_alt	138655
+chrUn_KI270519v1	138126
+chr2_KI270775v1_alt	138019
+chr17_KI270907v1_alt	137721
+chrUn_GL000214v1	137718
+chr8_KI270901v1_alt	136959
+chr2_KI270770v1_alt	136240
+chr16_KI270854v1_alt	134193
+chr8_KI270819v1_alt	133535
+chr17_GL383564v2_alt	133151
+chr2_KI270772v1_alt	133041
+chr8_KI270815v1_alt	132244
+chr5_KI270795v1_alt	131892
+chr5_KI270898v1_alt	130957
+chr20_GL383577v2_alt	128386
+chr1_KI270708v1_random	127682
+chr7_KI270807v1_alt	126434
+chr5_KI270793v1_alt	126136
+chr6_GL383533v1_alt	124736
+chr2_GL383522v1_alt	123821
+chr19_KI270918v1_alt	123111
+chr12_GL383549v1_alt	120804
+chr2_KI270769v1_alt	120616
+chr4_KI270785v1_alt	119912
+chr12_KI270834v1_alt	119498
+chr7_GL383534v2_alt	119183
+chr20_KI270869v1_alt	118774
+chr21_GL383581v2_alt	116689
+chr3_KI270781v1_alt	113034
+chr17_KI270730v1_random	112551
+chrUn_KI270438v1	112505
+chr4_KI270787v1_alt	111943
+chr18_KI270864v1_alt	111737
+chr2_KI270771v1_alt	110395
+chr1_GL383519v1_alt	110268
+chr2_KI270768v1_alt	110099
+chr1_KI270760v1_alt	109528
+chr3_KI270783v1_alt	109187
+chr17_KI270859v1_alt	108763
+chr11_KI270902v1_alt	106711
+chr18_GL383568v1_alt	104552
+chr22_KI270737v1_random	103838
+chr13_KI270843v1_alt	103832
+chr22_KI270877v1_alt	101331
+chr5_GL383530v1_alt	101241
+chr11_KI270721v1_random	100316
+chr22_KI270738v1_random	99375
+chr22_GL383583v2_alt	96924
+chr2_GL582966v2_alt	96131
+chrUn_KI270748v1	93321
+chrUn_KI270435v1	92983
+chr5_GL000208v1_random	92689
+chrUn_KI270538v1	91309
+chr17_GL383566v1_alt	90219
+chr16_GL383557v1_alt	89672
+chr17_JH159148v1_alt	88070
+chr5_GL383532v1_alt	82728
+chr21_KI270872v1_alt	82692
+chrUn_KI270756v1	79590
+chr6_KI270758v1_alt	76752
+chr12_KI270833v1_alt	76061
+chr6_KI270802v1_alt	75005
+chr21_GL383580v2_alt	74653
+chr22_KB663609v1_alt	74013
+chr22_KI270739v1_random	73985
+chr9_GL383540v1_alt	71551
+chrUn_KI270757v1	71251
+chr2_KI270773v1_alt	70887
+chr17_JH159147v1_alt	70345
+chr11_KI270827v1_alt	67707
+chr1_KI270709v1_random	66860
+chrUn_KI270746v1	66486
+chr16_KI270856v1_alt	63982
+chr21_GL383578v2_alt	63917
+chrUn_KI270753v1	62944
+chr19_KI270868v1_alt	61734
+chr9_GL383542v1_alt	60032
+chr20_KI270871v1_alt	58661
+chr12_KI270836v1_alt	56134
+chr19_KI270865v1_alt	52969
+chr1_KI270764v1_alt	50258
+chrUn_KI270589v1	44474
+chr14_KI270726v1_random	43739
+chr19_KI270866v1_alt	43156
+chr22_KI270735v1_random	42811
+chr1_KI270711v1_random	42210
+chrUn_KI270745v1	41891
+chr1_KI270714v1_random	41717
+chr22_KI270732v1_random	41543
+chr1_KI270713v1_random	40745
+chrUn_KI270754v1	40191
+chr1_KI270710v1_random	40176
+chr12_KI270837v1_alt	40090
+chr9_KI270717v1_random	40062
+chr14_KI270724v1_random	39555
+chr9_KI270720v1_random	39050
+chr14_KI270723v1_random	38115
+chr9_KI270718v1_random	38054
+chrUn_KI270317v1	37690
+chr13_KI270842v1_alt	37287
+chrY_KI270740v1_random	37240
+chrUn_KI270755v1	36723
+chr8_KI270820v1_alt	36640
+chr1_KI270707v1_random	32032
+chrUn_KI270579v1	31033
+chrUn_KI270752v1	27745
+chrUn_KI270512v1	22689
+chrUn_KI270322v1	21476
+chrUn_GL000226v1	15008
+chrUn_KI270311v1	12399
+chrUn_KI270366v1	8320
+chrUn_KI270511v1	8127
+chrUn_KI270448v1	7992
+chrUn_KI270521v1	7642
+chrUn_KI270581v1	7046
+chrUn_KI270582v1	6504
+chrUn_KI270515v1	6361
+chrUn_KI270588v1	6158
+chrUn_KI270591v1	5796
+chrUn_KI270522v1	5674
+chrUn_KI270507v1	5353
+chrUn_KI270590v1	4685
+chrUn_KI270584v1	4513
+chrUn_KI270320v1	4416
+chrUn_KI270382v1	4215
+chrUn_KI270468v1	4055
+chrUn_KI270467v1	3920
+chrUn_KI270362v1	3530
+chrUn_KI270517v1	3253
+chrUn_KI270593v1	3041
+chrUn_KI270528v1	2983
+chrUn_KI270587v1	2969
+chrUn_KI270364v1	2855
+chrUn_KI270371v1	2805
+chrUn_KI270333v1	2699
+chrUn_KI270374v1	2656
+chrUn_KI270411v1	2646
+chrUn_KI270414v1	2489
+chrUn_KI270510v1	2415
+chrUn_KI270390v1	2387
+chrUn_KI270375v1	2378
+chrUn_KI270420v1	2321
+chrUn_KI270509v1	2318
+chrUn_KI270315v1	2276
+chrUn_KI270302v1	2274
+chrUn_KI270518v1	2186
+chrUn_KI270530v1	2168
+chrUn_KI270304v1	2165
+chrUn_KI270418v1	2145
+chrUn_KI270424v1	2140
+chrUn_KI270417v1	2043
+chrUn_KI270508v1	1951
+chrUn_KI270303v1	1942
+chrUn_KI270381v1	1930
+chrUn_KI270529v1	1899
+chrUn_KI270425v1	1884
+chrUn_KI270396v1	1880
+chrUn_KI270363v1	1803
+chrUn_KI270386v1	1788
+chrUn_KI270465v1	1774
+chrUn_KI270383v1	1750
+chrUn_KI270384v1	1658
+chrUn_KI270330v1	1652
+chrUn_KI270372v1	1650
+chrUn_KI270548v1	1599
+chrUn_KI270580v1	1553
+chrUn_KI270387v1	1537
+chrUn_KI270391v1	1484
+chrUn_KI270305v1	1472
+chrUn_KI270373v1	1451
+chrUn_KI270422v1	1445
+chrUn_KI270316v1	1444
+chrUn_KI270338v1	1428
+chrUn_KI270340v1	1428
+chrUn_KI270583v1	1400
+chrUn_KI270334v1	1368
+chrUn_KI270429v1	1361
+chrUn_KI270393v1	1308
+chrUn_KI270516v1	1300
+chrUn_KI270389v1	1298
+chrUn_KI270466v1	1233
+chrUn_KI270388v1	1216
+chrUn_KI270544v1	1202
+chrUn_KI270310v1	1201
+chrUn_KI270412v1	1179
+chrUn_KI270395v1	1143
+chrUn_KI270376v1	1136
+chrUn_KI270337v1	1121
+chrUn_KI270335v1	1048
+chrUn_KI270378v1	1048
+chrUn_KI270379v1	1045
+chrUn_KI270329v1	1040
+chrUn_KI270419v1	1029
+chrUn_KI270336v1	1026
+chrUn_KI270312v1	998
+chrUn_KI270539v1	993
+chrUn_KI270385v1	990
+chrUn_KI270423v1	981
+chrUn_KI270392v1	971
+chrUn_KI270394v1	970
diff --git a/data/hic-resolutions.cool b/data/hic-resolutions.cool
new file mode 100644
index 00000000..b94c9d64
--- /dev/null
+++ b/data/hic-resolutions.cool
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f9a34c5445d38f22b2ef48b74c8b2e25055fb35d17da4e6b659a38b9002bb92
+size 7258763
diff --git a/data/labels.h5 b/data/labels.h5
new file mode 100644
index 00000000..72782c04
--- /dev/null
+++ b/data/labels.h5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aeb6aa62f2f0c684a04bbafa17f485bcbd0debe6c5f1bcced29ea62e7d66e014
+size 5457264
diff --git a/data/masterlist_DHSs_733samples_WM20180608_all_mean_signal_colorsMax.bed.bb b/data/masterlist_DHSs_733samples_WM20180608_all_mean_signal_colorsMax.bed.bb
new file mode 100644
index 00000000..88bfa931
--- /dev/null
+++ b/data/masterlist_DHSs_733samples_WM20180608_all_mean_signal_colorsMax.bed.bb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0500645485bfb4deef92ef9ca9fe82c09a09a7cb4716a686eb4a893d72ea7c53
+size 132274337
diff --git a/data/no_item_rgb.bed b/data/no_item_rgb.bed
new file mode 100644
index 00000000..8df2533c
--- /dev/null
+++ b/data/no_item_rgb.bed
@@ -0,0 +1,4 @@
+chr1	100037575	100039165	Peak_101205	233	.	3.16783	23.38528	21.26926	772
+chr1	100037575	100039165	Peak_174032	77	.	2.07128	7.76076	5.92736	78
+chr1	100037575	100039165	Peak_37247	1000	.	6.60981	101.97690	99.34118	1269
+chr1	100037575	100039165	Peak_5433	1000	.	15.84791	421.48083	417.87122	486
diff --git a/data/points_density.h5 b/data/points_density.h5
new file mode 100644
index 00000000..f431279b
--- /dev/null
+++ b/data/points_density.h5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7457c266dc9cd18e744e6bc42202719a0ede13ea2ffc0ac4a8ba9ec71f5514e4
+size 7091318
diff --git a/data/regions.spaces.bed b/data/regions.spaces.bed
new file mode 100644
index 00000000..0415d790
--- /dev/null
+++ b/data/regions.spaces.bed
@@ -0,0 +1,32 @@
+chr1	129815520	129818520
+chr1	138517920	138518920
+chr1	138692220	138694220
+chr1	141936595	141941595
+chr2	92897773	92899773
+chr3	95760806	95775806
+chr4	55040250	55045250
+chr4	55070950	55073450
+chr5	50718690	50721690
+chr6	59234372	59239372
+chr6	60381572	60408572
+chr7	62483284	62484784
+chr8	45611701	45614701
+chr8	45736713	45743713
+chr10	40597845	40601245
+chr10	41271445	41272045
+chr11	53139401	53144101
+chr12   35580616        35587616
+chr12	36145316	36146316
+chr13	14710236	14717936
+chr13	16820436	16832036
+chr16	35874660	35878260
+chr18	17829529	17848429
+chr18	18217229	18261529
+chr18	18403029	18432829
+chr18	18833129	18837129
+chr19   25559300        25612200
+chr19	26463081	26467081
+chr19   28531801        28532201
+chr20	31857665	31862765
+chrX	58590632	58593132
+chrX	59827732	59830732
diff --git a/data/regions.valid.bed b/data/regions.valid.bed
new file mode 100644
index 00000000..19b055bf
--- /dev/null
+++ b/data/regions.valid.bed
@@ -0,0 +1,32 @@
+chr1	129815520	129818520
+chr1	138517920	138518920
+chr1	138692220	138694220
+chr1	141936595	141941595
+chr2	92897773	92899773
+chr3	95760806	95775806
+chr4	55040250	55045250
+chr4	55070950	55073450
+chr5	50718690	50721690
+chr6	59234372	59239372
+chr6	60381572	60408572
+chr7	62483284	62484784
+chr8	45611701	45614701
+chr8	45736713	45743713
+chr10	40597845	40601245
+chr10	41271445	41272045
+chr11	53139401	53144101
+chr12	35580616	35587616
+chr12	36145316	36146316
+chr13	14710236	14717936
+chr13	16820436	16832036
+chr16	35874660	35878260
+chr18	17829529	17848429
+chr18	18217229	18261529
+chr18	18403029	18432829
+chr18	18833129	18837129
+chr19	25559300	25612200
+chr19	26463081	26467081
+chr19	28531801	28532201
+chr20	31857665	31862765
+chrX	58590632	58593132
+chrX	59827732	59830732
diff --git a/data/regions.valid.bed.1.gz b/data/regions.valid.bed.1.gz
new file mode 100644
index 00000000..0c661d14
--- /dev/null
+++ b/data/regions.valid.bed.1.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a133374ce443cd9bedc4af9b549353e5857ca539a1abdc15a5f24da800733864
+size 351
diff --git a/data/regions.valid.bed.gz b/data/regions.valid.bed.gz
new file mode 100644
index 00000000..0dc281da
--- /dev/null
+++ b/data/regions.valid.bed.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ab70b46d586fe78339a8381b25be6958128868649695fa70e95351afe012550
+size 367
diff --git a/data/regions.valid.bed.gz.tbi b/data/regions.valid.bed.gz.tbi
new file mode 100644
index 00000000..83598ef1
--- /dev/null
+++ b/data/regions.valid.bed.gz.tbi
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8af2a866f8ac1a09582151cf1ca3ffdd5e0008753caa48b8e802d3190ce08962
+size 1493
diff --git a/data/sample_htime.json b/data/sample_htime.json
new file mode 100644
index 00000000..1f0eaad2
--- /dev/null
+++ b/data/sample_htime.json
@@ -0,0 +1 @@
+{"start": 0.0, "end": 66452.47751554489, "len": 6645370}
\ No newline at end of file
diff --git a/data/test.1.vcf.gz b/data/test.1.vcf.gz
new file mode 100644
index 00000000..a38a241c
--- /dev/null
+++ b/data/test.1.vcf.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:511057b19a99a8ccef6eee96e75e2ba26580abe60252414110350f88bdde354e
+size 839
diff --git a/data/wgEncodeCaltechRnaSeqHuvecR1x75dTh1014IlnaPlusSignalRep2.bigWig b/data/wgEncodeCaltechRnaSeqHuvecR1x75dTh1014IlnaPlusSignalRep2.bigWig
new file mode 100644
index 00000000..dc4ea1c9
--- /dev/null
+++ b/data/wgEncodeCaltechRnaSeqHuvecR1x75dTh1014IlnaPlusSignalRep2.bigWig
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e9eb37b991c710e56eafc6c4a036dbb9c6b9287f459001c95852d5366c5e204
+size 7940112
diff --git a/get_test_data.sh b/get_test_data.sh
deleted file mode 100755
index 246f6b84..00000000
--- a/get_test_data.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-wget -q -NP data/ https://s3.amazonaws.com/pkerp/public/chromSizes.tsv
-wget -q -NP data/ https://s3.amazonaws.com/pkerp/public/all.KL.bed.multires.mv5
-wget -q -NP data/ https://s3.amazonaws.com/pkerp/public/Dixon2012-J1-NcoI-R1-filtered.100kb.multires.cool
-wget -q -NP data/ https://s3.amazonaws.com/pkerp/public/hic-resolutions.cool
-wget -q -NP data/ https://s3.amazonaws.com/pkerp/public/sample_htime.json
-wget -q -NP data/ https://s3.amazonaws.com/pkerp/public/gene_annotations.short.db
-wget -q -NP data/ https://s3.amazonaws.com/pkerp/public/wgEncodeCaltechRnaSeqHuvecR1x75dTh1014IlnaPlusSignalRep2.bigWig
-wget -q -NP data/ https://s3.amazonaws.com/pkerp/public/points_density.h5
-wget -q -NP data/ https://s3.amazonaws.com/pkerp/public/int_matrices.hdf5
-wget -q -NP data/ https://s3.amazonaws.com/pkerp/public/corrected.geneListwithStrand.bed.multires
-wget -q -NP data/ https://s3.amazonaws.com/pkerp/public/labels.h5
-wget -q -NP data/ https://s3.amazonaws.com/pkerp/public/SRR1770413.sorted.short.bam
-wget -q -NP data/ https://s3.amazonaws.com/pkerp/public/SRR1770413.different_index_filename.bai
-wget -q -NP data/ https://s3.amazonaws.com/pkerp/public/SRR1770413.sorted.short.bam.bai
-wget -q -NP data/ https://s3.amazonaws.com/pkerp/public/SRR1770413.mismatched_bai.bam
-wget -q -NP data/ https://s3.amazonaws.com/pkerp/public/geneAnnotationsExonUnions.1000.bed.v3.beddb
-wget -q -NP data/ https://s3.amazonaws.com/areynolds/public/masterlist_DHSs_733samples_WM20180608_all_mean_signal_colorsMax.bed.bb
-wget -q -NP data/ https://resgen-public.s3.amazonaws.com/clodius/test-data/states_format_input_testfile.100.bed.multires.mv5
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index d6f737ef..774c2423 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "clodius"
-version = "0.20.3"
+version = "0.20.4"
 description = "Tile generation for big data"
 authors = [
     { name = "Peter Kerpedjiev", email = "pkerpedjiev@gmail.com" },
@@ -25,8 +25,17 @@ dependencies = [
     "slugid",
     "sortedcontainers",
     "tqdm",
-    "smart_open"
+    "smart_open",
+    "polars",
+    "oxbow>=0.7.0",
+    "apsw",
+    "sosqlite>=0.3.1",
+    "biopython",
+    "shortuuid",
+    "pybigtools",
+    "scipy"
 ]
+requires-python = ">=3.12"
 license = { text = "MIT" }
 readme = "README.md"
 urls = { homepage = "https://github.com/higlass/clodius" }
diff --git a/scripts/tsv_to_mrmatrix.py b/scripts/tsv_to_mrmatrix.py
new file mode 100644
index 00000000..65fe5dc0
--- /dev/null
+++ b/scripts/tsv_to_mrmatrix.py
@@ -0,0 +1,126 @@
+#!/usr/bin/python
+
+import dask.array as da
+import h5py
+import math
+import numpy as np
+import sys
+import argparse
+import time
+
+
+def coarsen(f, tile_size=256):
+    '''
+    Create data pyramid.
+    '''
+    grid = f['resolutions']['1']['values']
+    top_n = grid.shape[0]
+
+    max_zoom = math.ceil(math.log(top_n / tile_size) / math.log(2))
+
+    chunk_size = tile_size * 16
+    curr_size = grid.shape
+    dask_dset = da.from_array(grid, chunks=(chunk_size, chunk_size))
+
+    r = f['resolutions']
+    curr_resolution = 1
+
+    while curr_resolution < 2 ** max_zoom:
+        curr_size = tuple(np.array(curr_size) / 2)
+        print('coarsening')
+        curr_resolution *= 2
+
+        print("curr_size:", curr_size)
+        g = r.create_group(str(curr_resolution))
+        values = g.require_dataset(
+            'values', curr_size, dtype='f4',
+            compression='lzf', fillvalue=np.nan)
+
+        dask_dset = dask_dset.rechunk((chunk_size, chunk_size))
+        dask_dset = da.coarsen(np.nansum, dask_dset, {0: 2, 1: 2})
+        da.store(dask_dset, values)
+
+
+def parse(input_handle, output_hdf5, top_n=None):
+    input_handle
+    first_line = next(input_handle)
+    parts = first_line.strip().split('\t')
+    # TODO: Use the python built-in csv module, instead of parsing by hand?
+
+    if top_n is None:
+        top_n = len(parts) - 1
+        # TODO: So if it's taller than it is wide, it will be truncated to a square,
+        # unless an explicit top_n is provided? That doesn't seem right.
+
+    labels = parts[1:top_n + 1]
+    tile_size = 256
+    max_zoom = math.ceil(math.log(top_n / tile_size) / math.log(2))
+    max_width = tile_size * 2 ** max_zoom
+
+    output_hdf5.create_dataset(
+        'labels', data=np.array(labels, dtype=h5py.special_dtype(vlen=str)),
+        compression='lzf')
+
+    g = output_hdf5.create_group('resolutions')
+    g1 = g.create_group('1')
+    ds = g1.create_dataset(
+        'values', (max_width, max_width),
+        dtype='f4', compression='lzf', fillvalue=np.nan)
+    g1.create_dataset(
+        'nan_values', (max_width, max_width),
+        dtype='f4', compression='lzf', fillvalue=0)
+    # TODO: We don't write to this... Is it necessary?
+
+    start_time = time.time()
+    counter = 0
+    for line in input_handle:
+        parts = line.strip().split('\t')[1:top_n + 1]
+        x = np.array([float(p) for p in parts])
+        ds[counter, :len(x)] = x
+
+        counter += 1
+        if counter == top_n:
+            break
+
+        time_elapsed = time.time() - start_time
+        time_per_entry = time_elapsed / counter
+
+        time_remaining = time_per_entry * (top_n - counter)
+        print("counter:", counter, "sum(x):", sum(x), "time remaining: {:d} seconds".format(int(time_remaining)))
+
+    coarsen(output_hdf5)
+    output_hdf5.close()
+
+
+def main():
+    parser = argparse.ArgumentParser(description="""
+
+    python tsv-dense-to-sparse
+""")
+
+    parser.add_argument('input_file')
+    parser.add_argument('output_file')
+    # parser.add_argument('-o', '--options', default='yo',
+    #                      help="Some option", type='str')
+    # parser.add_argument('-u', '--useless', action='store_true',
+    #                      help='Another useless option')
+    parser.add_argument('-n', '--first-n', type=int, default=None,
+                        help="Only use the first n entries in the matrix")
+
+    args = parser.parse_args()
+
+    top_n = args.first_n
+
+    if args.input_file == '-':
+        f_in = sys.stdin
+    else:
+        f_in = open(args.input_file, 'r')
+
+    parse(f_in, h5py.File(args.output_file, 'w'), top_n)
+
+    f = h5py.File(args.output_file, 'r')
+    print("sum1:", np.nansum(f['resolutions']['1']['values'][0]))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/test/alignment_test.py b/test/alignment_test.py
new file mode 100644
index 00000000..02115b28
--- /dev/null
+++ b/test/alignment_test.py
@@ -0,0 +1,21 @@
+from clodius.alignment import align_sequences, alignment_to_subs
+
+
+def test_alignment_to_subs():
+    a = align_sequences("TTTTT", "TTATT")
+    s = alignment_to_subs(a)
+
+    assert s[0] == 1
+    assert s[1] == 6
+    assert s[2][0]["pos"] == 2  # subs are 0-based
+    assert s[2][0]["base"] == "T"
+    assert s[2][0]["variant"] == "A"
+
+    a = align_sequences("TTTTT", "TTATTT")
+    s = alignment_to_subs(a)
+
+    assert s[0] == 1
+    assert s[1] == 6
+    assert s[2][0]["pos"] == 2
+    assert s[2][0]["type"] == "I"
+    assert s[2][0]["length"] == 1
diff --git a/test/bed2ddb_test.py b/test/bed2ddb_test.py
new file mode 100644
index 00000000..dd324990
--- /dev/null
+++ b/test/bed2ddb_test.py
@@ -0,0 +1,94 @@
+from __future__ import print_function
+
+import clodius.db_tiles as cdt
+import clodius.cli.aggregate as cca
+import os
+import os.path as op
+import sys
+import tempfile
+
+sys.path.append("scripts")
+
+testdir = op.realpath(op.dirname(__file__))
+
+
+def test_clodius_aggregate_bedpe():
+    input_file = op.join(testdir, "sample_data", "isidro.bedpe")
+
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        output_file = op.join(tmpdirname, "isidro.bed2ddb")
+
+        cca._bedpe(
+            input_file,
+            output_file,
+            "b37",
+            importance_column=None,
+            chromosome=None,
+            max_per_tile=100,
+            tile_size=1024,
+            has_header=True,
+        )
+
+        """
+        runner = clt.CliRunner()
+        result = runner.invoke(
+                cca.bedpe,
+                [input_file,
+                '--output-file', output_file,
+                '--importance-column', 'random',
+                '--has-header',
+                '--assembly', 'b37'])
+
+        # print('output:', result.output, result)
+        assert(result.exit_code == 0)
+        """
+
+        cdt.get_2d_tiles(output_file, 0, 0, 0)
+        # print("entries:", entries)
+
+        cdt.get_tileset_info(output_file)
+        # TODO: Make assertions about result
+        # print('tileset_info', tileset_info)
+
+        cdt.get_2d_tiles(output_file, 1, 0, 0, numx=2, numy=2)
+        # TODO: Make assertions about result
+        # print("entries:", entries)
+
+        cdt.get_tileset_info(output_file)
+        # TODO: Make assertion
+
+
+def test_clodius_aggregate_bedpe2():
+    """Use galGal6 chromsizes file"""
+    input_file = op.join(testdir, "sample_data", "galGal6.bed")
+    chromsizes_file = op.join(testdir, "sample_data", "galGal6.chrom.sizes")
+    expected_file = op.join(testdir, "sample_data", "galGal6.bed.multires.db")
+
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        output_file = op.join(tmpdirname, "blah.bed2ddb")
+        # the test is here to ensure that this doesn't raise an error
+        cca._bedpe(
+            input_file,
+            output_file,
+            None,
+            chr1_col=1,
+            chr2_col=1,
+            from1_col=2,
+            from2_col=2,
+            to1_col=3,
+            to2_col=3,
+            importance_column=None,
+            chromosome=None,
+            chromsizes_filename=chromsizes_file,
+            max_per_tile=100,
+            tile_size=1024,
+            has_header=True,
+        )
+
+        tsinfo = cdt.get_tileset_info(output_file)
+
+        stat_output = os.stat(output_file)
+        stat_expected = os.stat(expected_file)
+
+        assert tsinfo["max_length"] == 1065365426
+        assert stat_output.st_size == stat_expected.st_size
diff --git a/test/bedpe_test.py b/test/bedpe_test.py
index dd324990..df755f42 100644
--- a/test/bedpe_test.py
+++ b/test/bedpe_test.py
@@ -1,94 +1,42 @@
-from __future__ import print_function
-
-import clodius.db_tiles as cdt
-import clodius.cli.aggregate as cca
-import os
 import os.path as op
-import sys
-import tempfile
-
-sys.path.append("scripts")
-
-testdir = op.realpath(op.dirname(__file__))
-
 
-def test_clodius_aggregate_bedpe():
-    input_file = op.join(testdir, "sample_data", "isidro.bedpe")
+import pytest
 
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        output_file = op.join(tmpdirname, "isidro.bed2ddb")
+import clodius.chromosomes as cs
+import clodius.tiles.bedpe as ctbp
 
-        cca._bedpe(
-            input_file,
-            output_file,
-            "b37",
-            importance_column=None,
-            chromosome=None,
-            max_per_tile=100,
-            tile_size=1024,
-            has_header=True,
-        )
-
-        """
-        runner = clt.CliRunner()
-        result = runner.invoke(
-                cca.bedpe,
-                [input_file,
-                '--output-file', output_file,
-                '--importance-column', 'random',
-                '--has-header',
-                '--assembly', 'b37'])
-
-        # print('output:', result.output, result)
-        assert(result.exit_code == 0)
-        """
-
-        cdt.get_2d_tiles(output_file, 0, 0, 0)
-        # print("entries:", entries)
-
-        cdt.get_tileset_info(output_file)
-        # TODO: Make assertions about result
-        # print('tileset_info', tileset_info)
+testdir = op.realpath(op.dirname(__file__))
 
-        cdt.get_2d_tiles(output_file, 1, 0, 0, numx=2, numy=2)
-        # TODO: Make assertions about result
-        # print("entries:", entries)
 
-        cdt.get_tileset_info(output_file)
-        # TODO: Make assertion
+@pytest.mark.parametrize(
+    "filename,header",
+    [
+        (
+            "isidro.bedpe",
+            "chrom1\tstart1\tend1\tchrom2\tstart2\tend2\tsv_id\tpe_support\tstrand1\tstrand2\tsvclass\tsvmethod",
+        ),
+        ("hg19_myc.bedpe", ""),
+    ],
+)
+def test_bedpe_tileset_info(filename, header):
+    input_file = op.join(testdir, "sample_data", filename)
+    chromsizes_fn = op.join(testdir, "sample_data", "b37.chrom.sizes")
 
+    chromsizes = cs.chromsizes_as_series(chromsizes_fn)
+    tileset_info = ctbp.tileset_info(input_file, chromsizes)
 
-def test_clodius_aggregate_bedpe2():
-    """Use galGal6 chromsizes file"""
-    input_file = op.join(testdir, "sample_data", "galGal6.bed")
-    chromsizes_file = op.join(testdir, "sample_data", "galGal6.chrom.sizes")
-    expected_file = op.join(testdir, "sample_data", "galGal6.bed.multires.db")
+    assert "max_width" in tileset_info
+    assert tileset_info["header"] == header
 
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        output_file = op.join(tmpdirname, "blah.bed2ddb")
-        # the test is here to ensure that this doesn't raise an error
-        cca._bedpe(
-            input_file,
-            output_file,
-            None,
-            chr1_col=1,
-            chr2_col=1,
-            from1_col=2,
-            from2_col=2,
-            to1_col=3,
-            to2_col=3,
-            importance_column=None,
-            chromosome=None,
-            chromsizes_filename=chromsizes_file,
-            max_per_tile=100,
-            tile_size=1024,
-            has_header=True,
-        )
 
-        tsinfo = cdt.get_tileset_info(output_file)
+@pytest.mark.parametrize(
+    "filename", [("hg19_myc.bedpe"), "hg19_myc.1.bedpe.gz"],
+)
+def test_bedpe_tiles(filename):
+    input_file = op.join(testdir, "sample_data", filename)
+    chromsizes_fn = op.join(testdir, "sample_data", "b37.chrom.sizes")
 
-        stat_output = os.stat(output_file)
-        stat_expected = os.stat(expected_file)
+    chromsizes = cs.chromsizes_as_series(chromsizes_fn)
 
-        assert tsinfo["max_length"] == 1065365426
-        assert stat_output.st_size == stat_expected.st_size
+    tiles = ctbp.tiles(input_file, ["x.0.0.0"], chromsizes)
+    assert len(tiles) > 0
diff --git a/test/cli_test.py b/test/cli_test.py
index f3c1088d..96db8a80 100644
--- a/test/cli_test.py
+++ b/test/cli_test.py
@@ -1,16 +1,15 @@
 from __future__ import print_function
 
-import os.path as op
-import sys
-
-import h5py
-import numpy as np
-
-import click.testing as clt
-import clodius.cli.aggregate as cca
 import clodius.db_tiles as cdt
 import clodius.hdf_tiles as cht
+import click.testing as clt
+import clodius.cli.aggregate as cca
+import h5py
 import negspy.coordinates as nc
+import numpy as np
+import os.path as op
+import sys
+
 from clodius.tiles import bed2ddb
 
 sys.path.append("scripts")
@@ -24,27 +23,6 @@ def test_clodius_aggregate_bedfile():
     )
     output_file = "/tmp/geneAnnotationsExonsUnions.hg19.short.bed"
 
-    # make sure that running a command without an assembly
-    # throws an error
-    runner = clt.CliRunner()
-    result = runner.invoke(
-        cca.bedfile,
-        [
-            input_file,
-            "--max-per-tile",
-            20,
-            "--importance-column",
-            5,
-            "--output-file",
-            output_file,
-            "--delimiter",
-            "\t",
-        ],
-    )
-
-    a, b, tb = result.exc_info
-    assert result.exit_code == 1
-
     runner = clt.CliRunner()
     result = runner.invoke(
         cca.bedfile,
@@ -221,32 +199,6 @@ def test_clodius_aggregate_bedpe():
     input_file = op.join(testdir, "sample_data", "Rao_RepA_GM12878_Arrowhead.txt")
     output_file = "/tmp/bedpe.db"
 
-    # make sure that aggregating without an assembly throws
-    # doesn't succeed
-    runner = clt.CliRunner()
-    result = runner.invoke(
-        cca.bedpe,
-        [
-            input_file,
-            "--output-file",
-            output_file,
-            "--chr1-col",
-            "1",
-            "--from1-col",
-            "2",
-            "--to1-col",
-            "3",
-            "--chr2-col",
-            "1",
-            "--from2-col",
-            "2",
-            "--to2-col",
-            "3",
-        ],
-    )
-
-    assert result.exit_code == 1
-
     runner = clt.CliRunner()
     result = runner.invoke(
         cca.bedpe,
@@ -282,11 +234,11 @@ def test_clodius_aggregate_bedpe():
 
     assert "\n" not in tiles[(0, 0)][0]["fields"][2]
 
-    tiles_2d = bed2ddb.tiles(output_file, ["x.0.0.0"])
+    tiles_2d = bed2ddb.tiles(output_file, ['x.0.0.0'])
 
     assert len(tiles_2d[0][1][0]["fields"]) == 3
 
-    tiles_1d = bed2ddb.tiles(output_file, ["x.0.0"])
+    tiles_1d = bed2ddb.tiles(output_file, ['x.0.0'])
 
     assert len(tiles_1d[0][1][0]["fields"]) == 3
 
diff --git a/test/fasta_test.py b/test/fasta_test.py
new file mode 100644
index 00000000..df133326
--- /dev/null
+++ b/test/fasta_test.py
@@ -0,0 +1,38 @@
+import os.path as op
+
+import clodius.tiles.fasta as ctf
+
+fasta_filename = op.join("data", "GCA_000350705.1_Esch_coli_KTE11_V1_genomic.short.fna")
+fai_filename = op.join(
+    "data", "GCA_000350705.1_Esch_coli_KTE11_V1_genomic.short.fna.fai"
+)
+
+
+def test_tileset_info():
+    tsinfo = ctf.tileset_info(fai_filename)
+
+    assert "max_zoom" in tsinfo
+    assert "max_width" in tsinfo
+
+
+def test_multivec_tiles():
+    tiles = ctf.multivec_tiles(
+        fasta_filename, index_filename=fai_filename, tile_ids=["x.0.0"]
+    )
+
+    assert "shape" in tiles[0][1]
+
+
+def test_sequence_tiles():
+
+    tsinfo = ctf.tileset_info(fai_filename)
+
+    tiles = ctf.sequence_tiles(
+        fasta_filename, index_filename=fai_filename, tile_ids=["x.2.0"]
+    )
+    assert len(tiles[0][1]["sequence"]) == ctf.TILE_SIZE
+
+    tiles = ctf.sequence_tiles(
+        fasta_filename, index_filename=fai_filename, tile_ids=["x.0.0"]
+    )
+    assert len(tiles[0][1]["sequence"]) == tsinfo["max_pos"][0]
diff --git a/test/gff_comprehensive_test.py b/test/gff_comprehensive_test.py
new file mode 100644
index 00000000..703777ab
--- /dev/null
+++ b/test/gff_comprehensive_test.py
@@ -0,0 +1,32 @@
+import polars as pl
+from clodius.tiles.gff import parse_gff_to_models
+
+
+def test_parse_gff_comprehensive():
+    """Test parsing both genomic.10k.gff and genomic.gff files"""
+
+    for gff_file in ["data/genomic.10k.gff"]:
+        df = pl.read_csv(
+            gff_file,
+            separator='\t',
+            comment_prefix='#',
+            has_header=False,
+            new_columns=['seqid', 'source', 'type', 'start', 'end', 'score', 'strand', 'phase', 'attributes'],
+            n_rows=5000  # Test subset for performance
+        )
+
+        genes, transcripts = parse_gff_to_models(df)
+
+        # Basic assertions
+        assert isinstance(genes, dict)
+        assert isinstance(transcripts, dict)
+
+        # Should have some genes if there are gene features in the data
+        gene_features = df.filter(pl.col('type') == 'gene')
+        if len(gene_features) > 0:
+            assert len(genes) > 0, f"No genes parsed from {gff_file}"
+
+        # Should have transcripts if there are transcript features
+        transcript_features = df.filter(pl.col('type').is_in(['mRNA', 'lnc_RNA', 'tRNA', 'rRNA', 'snoRNA']))
+        if len(transcript_features) > 0:
+            assert len(transcripts) > 0, f"No transcripts parsed from {gff_file}"
diff --git a/test/gff_models_parsing_test.py b/test/gff_models_parsing_test.py
new file mode 100644
index 00000000..9775e33b
--- /dev/null
+++ b/test/gff_models_parsing_test.py
@@ -0,0 +1,68 @@
+import polars as pl
+from clodius.tiles.gff import parse_gff_to_models
+
+
+def test_load_and_parse_gff_positions():
+    """Test loading positions 879 to 5039 for contig NC_004353.4 from genomic_10k.gff"""
+
+    # Load the GFF file
+    gff_file = "data/genomic.10k.gff"
+
+    # Read GFF file, filtering for the specified contig and position range
+    df = pl.read_csv(
+        gff_file,
+        separator='\t',
+        comment_prefix='#',
+        has_header=False,
+        new_columns=['seqid', 'source', 'type', 'start', 'end', 'score', 'strand', 'phase', 'attributes']
+    )
+
+    # Filter for NC_004353.4 contig and position range 879-5039 (JYalpha gene)
+    filtered_df = df.filter(
+        (pl.col('seqid') == 'NC_004353.4') &
+        (pl.col('start') >= 879) &
+        (pl.col('end') <= 5039)
+    )
+
+    assert len(filtered_df) > 0, "No entries found in the specified range"
+
+    # Parse the filtered dataframe into models
+    genes, transcripts = parse_gff_to_models(filtered_df)
+
+    # Assertions to verify parsing worked correctly
+    assert len(genes) > 0, "No genes were parsed"
+    assert len(transcripts) > 0, "No transcripts were parsed"
+
+    # Check specific gene exists (JYalpha gene should be in this range)
+    jyalpha_gene = None
+    for gene_model in genes.values():
+        if 'JYalpha' in gene_model.get('gene', {}).get('attributes', {}).get('Name', ''):
+            jyalpha_gene = gene_model
+            break
+
+    assert jyalpha_gene is not None, "JYalpha gene not found in parsed results"
+    assert jyalpha_gene['gene']['start'] == 879, "JYalpha gene start position incorrect"
+    assert jyalpha_gene['gene']['end'] == 5039, "JYalpha gene end position incorrect"
+    assert len(jyalpha_gene['transcripts']) > 0, "JYalpha gene should have transcripts"
+
+    # Check that transcripts have exons
+    transcript_with_exons = None
+    for transcript in transcripts.values():
+        if len(transcript.get('exons', [])) > 0:
+            transcript_with_exons = transcript
+            break
+
+    assert transcript_with_exons is not None, "No transcripts with exons found"
+    assert len(transcript_with_exons['exons']) >= 1, "Transcript should have at least one exon"
+
+    # Check that mRNA transcripts have CDS
+    mrna_with_cds = None
+    for transcript in transcripts.values():
+        if len(transcript.get('cds', [])) > 0:
+            mrna_with_cds = transcript
+            break
+
+    assert mrna_with_cds is not None, "No mRNA transcripts with CDS found"
+    assert len(mrna_with_cds['cds']) >= 1, "mRNA transcript should have at least one CDS"
+
+    return genes, transcripts
diff --git a/test/gff_test.py b/test/gff_test.py
new file mode 100644
index 00000000..6c531db1
--- /dev/null
+++ b/test/gff_test.py
@@ -0,0 +1,42 @@
+from __future__ import print_function
+
+import os.path as op
+
+import clodius.tiles.gff as ctg
+
+testdir = op.realpath(op.dirname(__file__))
+
+
+def test_tileset_info():
+    filename = op.join("data", "GCA_002918705.1_ASM291870v1_genomic.gff.gz")
+
+    tsinfo = ctg.tileset_info(filename)
+
+    assert "max_zoom" in tsinfo
+
+
+def test_tiles():
+    filename = op.join("data", "GCA_002918705.1_ASM291870v1_genomic.gff.gz")
+
+    tiles = ctg.tiles(filename, ["x.0.0"])
+
+    assert len(tiles) == 1
+    assert tiles[0][0] == "x.0.0"
+
+    assert len(tiles[0][1]["genes"].keys()) > 20
+
+    tiles1 = ctg.tiles(filename, ["x.1.0"])
+    assert len(tiles1[0][1]["genes"].keys()) < len(tiles[0][1]["genes"].keys())
+
+
+def test_indexed_tiles():
+    filename = op.join("data", "genomic.10k.gff.gz")
+    index = op.join("data", "genomic.10k.gff.gz.tbi")
+
+    tiles = ctg.tiles(filename, ["x.0.0"], index_filename=index)
+    assert len(tiles) == 1
+
+    # genes
+    assert len(tiles[0][1]["genes"].keys()) > 10
+    # transcripts
+    assert len(tiles[0][1]["transcripts"].keys()) > 10
diff --git a/test/mrmatrix_test.py b/test/mrmatrix_test.py
index 4ff96edb..c7f180e6 100644
--- a/test/mrmatrix_test.py
+++ b/test/mrmatrix_test.py
@@ -3,10 +3,11 @@
 import numpy as np
 from numpy.testing import assert_array_equal
 
-from clodius.tiles.mrmatrix import tiles, tileset_info
+from clodius.tiles.mrmatrix import tileset_info, single_tile
 
 
-class AttrDict(dict):
+class MockHdf5(dict):
+    # By wrapping a dict in our own class, we can add arbitrary attributes.
     pass
 
 
@@ -14,20 +15,20 @@ class TilesetInfoTest(unittest.TestCase):
     def setUp(self):
         tileset_stub = {"resolutions": {"1": {"values": np.array([[1, 2], [3, 4]])}}}
 
-        self.tileset = AttrDict(tileset_stub)
+        self.tileset = MockHdf5(tileset_stub)
         self.tileset.attrs = {}
 
-        self.tileset_min = AttrDict(tileset_stub)
+        self.tileset_min = MockHdf5(tileset_stub)
         self.tileset_min.attrs = {"min-pos": (1, 1)}
 
-        self.tileset_max = AttrDict(tileset_stub)
+        self.tileset_max = MockHdf5(tileset_stub)
         self.tileset_max.attrs = {"max-pos": (9, 9)}
 
         self.info = {
             "bins_per_dimension": 256,
-            "max_pos": (2, 2),  # TODO: Nothing uses these...
-            "min_pos": [0, 0],  # ...
-            "mirror_tiles": "false",  # Can we remove them?
+            "max_pos": (2, 2),
+            "min_pos": [0, 0],
+            "mirror_tiles": "false",
             "resolutions": [1],
         }
 
@@ -49,34 +50,35 @@ def test_with_max(self):
 class TilesTest(unittest.TestCase):
     def test_zoom_out_of_bounds(self):
         def should_fail():
-            tileset_stub = AttrDict(
+            tileset_stub = MockHdf5(
                 {"resolutions": {"1": {"values": np.array([[1, 2], [3, 4]])}}}
             )
             tileset_stub.attrs = {}
-            tiles(tileset_stub, 2, 0, 0)
+            single_tile(tileset_stub, 2, 0, 0)
 
         self.assertRaisesRegex(ValueError, r"Zoom level out of bounds", should_fail)
 
     def test_padding(self):
-        tileset = AttrDict(
+        tileset = MockHdf5(
             {
                 "resolutions": {
                     "1": {
                         "values": np.array([[1.0, 2], [3, 4]])
                         # It's important that there is a float value:
-                        # If there isn't, np.nan will be converted to a large negative integer.
+                        # If there isn't, np.nan will be converted
+                        # to a large negative integer.
                     }
                 }
             }
         )
         tileset.attrs = {}
-        zoomed = tiles(tileset, 0, 0, 0)
+        zoomed = single_tile(tileset, 0, 0, 0)
         self.assertEqual(zoomed.shape, (256, 256))
         assert_array_equal(zoomed[0:2, 0:2], [[1, 2], [3, 4]])
         assert_array_equal(zoomed[2:256, 0], [np.nan for x in range(254)])
 
     def test_bins(self):
-        tileset = AttrDict(
+        tileset = MockHdf5(
             {
                 "resolutions": {
                     "1": {
@@ -89,13 +91,14 @@ def test_bins(self):
         )
         tileset.attrs = {}
 
-        zoomed_0 = tiles(tileset, 0, 0, 0)
+        zoomed_0 = single_tile(tileset, 0, 0, 0)
         self.assertEqual(zoomed_0.shape, (256, 256))
         self.assertEqual(zoomed_0[0, 0], 0)
 
-        zoomed_1 = tiles(tileset, 0, 1, 1)
+        zoomed_1 = single_tile(tileset, 0, 1, 1)
         self.assertEqual(zoomed_1.shape, (256, 256))
         self.assertEqual(zoomed_1[0, 0], 256)
+
         self.assertEqual(zoomed_1[1, 0], 256)  # Constant dimension
         self.assertEqual(zoomed_1[0, 1], 257)  # Changing dimension
         self.assertEqual(zoomed_1[0, 256 - 13], 499)
@@ -103,24 +106,22 @@ def test_bins(self):
         # Plain assertEqual gave: nan != nan
 
     def test_zoom(self):
-        tileset = AttrDict(
+        tileset = MockHdf5(
             {
                 "resolutions": {
-                    # TODO: It's not actually enforced that zoom levels be sequential integers?
-                    # TODO: Should we check that the sizes are reasonable during initialization?
-                    "1": {"values": np.array([[1.0, 2.0], [3.0, 4.0]])},
-                    "5": {"values": np.array([[3.0, 4.0], [5.0, 6.0]])},
-                    "11": {"values": np.array([[5.0, 6.0], [7.0, 8.0]])},
+                    "1": {"values": np.array([[1.0, 2], [3, 4]])},
+                    "5": {"values": np.array([[3.0, 4], [5, 6]])},
+                    "11": {"values": np.array([[5.0, 6], [7, 8]])},
                 }
             }
         )
         tileset.attrs = {}
 
-        zoomed_0 = tiles(tileset, 0, 0, 0)
+        zoomed_0 = single_tile(tileset, 0, 0, 0)
         assert_array_equal(zoomed_0[0:2, 0:2], [[5, 6], [7, 8]])
 
-        zoomed_1 = tiles(tileset, 1, 0, 0)
+        zoomed_1 = single_tile(tileset, 1, 0, 0)
         assert_array_equal(zoomed_1[0:2, 0:2], [[3, 4], [5, 6]])
 
-        zoomed_2 = tiles(tileset, 2, 0, 0)
+        zoomed_2 = single_tile(tileset, 2, 0, 0)
         assert_array_equal(zoomed_2[0:2, 0:2], [[1, 2], [3, 4]])
diff --git a/test/sample_data/b37.chrom.sizes b/test/sample_data/b37.chrom.sizes
new file mode 100644
index 00000000..6a627d19
--- /dev/null
+++ b/test/sample_data/b37.chrom.sizes
@@ -0,0 +1,25 @@
+1	249250621
+2	243199373
+3	198022430
+4	191154276
+5	180915260
+6	171115067
+7	159138663
+8	146364022
+9	141213431
+10	135534747
+11	135006516
+12	133851895
+13	115169878
+14	107349540
+15	102531392
+16	90354753
+17	81195210
+18	78077248
+19	59128983
+20	63025520
+21	48129895
+22	51304566
+X	155270560
+Y	59373566
+MT	16569
diff --git a/test/sample_data/hg19.chrom.sizes b/test/sample_data/hg19.chrom.sizes
new file mode 100644
index 00000000..e80528d8
--- /dev/null
+++ b/test/sample_data/hg19.chrom.sizes
@@ -0,0 +1,93 @@
+chr1	249250621
+chr2	243199373
+chr3	198022430
+chr4	191154276
+chr5	180915260
+chr6	171115067
+chr7	159138663
+chr8	146364022
+chr9	141213431
+chr10	135534747
+chr11	135006516
+chr12	133851895
+chr13	115169878
+chr14	107349540
+chr15	102531392
+chr16	90354753
+chr17	81195210
+chr18	78077248
+chr19	59128983
+chr20	63025520
+chr21	48129895
+chr22	51304566
+chrX	155270560
+chrY	59373566
+chrM	16571
+chr6_ssto_hap7	4928567
+chr6_mcf_hap5	4833398
+chr6_cox_hap2	4795371
+chr6_mann_hap4	4683263
+chr6_apd_hap1	4622290
+chr6_qbl_hap6	4611984
+chr6_dbb_hap3	4610396
+chr17_ctg5_hap1	1680828
+chr4_ctg9_hap1	590426
+chr1_gl000192_random	547496
+chrUn_gl000225	211173
+chr4_gl000194_random	191469
+chr4_gl000193_random	189789
+chr9_gl000200_random	187035
+chrUn_gl000222	186861
+chrUn_gl000212	186858
+chr7_gl000195_random	182896
+chrUn_gl000223	180455
+chrUn_gl000224	179693
+chrUn_gl000219	179198
+chr17_gl000205_random	174588
+chrUn_gl000215	172545
+chrUn_gl000216	172294
+chrUn_gl000217	172149
+chr9_gl000199_random	169874
+chrUn_gl000211	166566
+chrUn_gl000213	164239
+chrUn_gl000220	161802
+chrUn_gl000218	161147
+chr19_gl000209_random	159169
+chrUn_gl000221	155397
+chrUn_gl000214	137718
+chrUn_gl000228	129120
+chrUn_gl000227	128374
+chr1_gl000191_random	106433
+chr19_gl000208_random	92689
+chr9_gl000198_random	90085
+chr17_gl000204_random	81310
+chrUn_gl000233	45941
+chrUn_gl000237	45867
+chrUn_gl000230	43691
+chrUn_gl000242	43523
+chrUn_gl000243	43341
+chrUn_gl000241	42152
+chrUn_gl000236	41934
+chrUn_gl000240	41933
+chr17_gl000206_random	41001
+chrUn_gl000232	40652
+chrUn_gl000234	40531
+chr11_gl000202_random	40103
+chrUn_gl000238	39939
+chrUn_gl000244	39929
+chrUn_gl000248	39786
+chr8_gl000196_random	38914
+chrUn_gl000249	38502
+chrUn_gl000246	38154
+chr17_gl000203_random	37498
+chr8_gl000197_random	37175
+chrUn_gl000245	36651
+chrUn_gl000247	36422
+chr9_gl000201_random	36148
+chrUn_gl000235	34474
+chrUn_gl000239	33824
+chr21_gl000210_random	27682
+chrUn_gl000231	27386
+chrUn_gl000229	19913
+chrUn_gl000226	15008
+chr18_gl000207_random	4262
diff --git a/test/sample_data/hg19_myc.1.bedpe.gz b/test/sample_data/hg19_myc.1.bedpe.gz
new file mode 100644
index 00000000..fb246d6b
Binary files /dev/null and b/test/sample_data/hg19_myc.1.bedpe.gz differ
diff --git a/test/sample_data/hg19_myc.bedpe b/test/sample_data/hg19_myc.bedpe
new file mode 100644
index 00000000..9ca3f0e6
--- /dev/null
+++ b/test/sample_data/hg19_myc.bedpe
@@ -0,0 +1,26 @@
+#columns color=11;thickness=12
+8	127310000	127320000	8	127820000	127830000	.	.	.	.	0,0,150	2	
+8	127880000	127890000	8	128310000	128320000	.	.	.	.	0,0,150	2	
+8	127880000	127890000	8	130550000	130560000	.	.	.	.	0,0,150	2	
+8	127885000	127890000	8	128745000	128750000	.	.	.	.	0,0,150	2	
+8	127890000	127900000	8	128180000	128190000	.	.	.	.	0,0,150	2	
+8	128180000	128190000	8	128740000	128750000	.	.	.	.	0,0,150	2	
+8	128190000	128200000	8	129080000	129090000	.	.	.	.	0,0,150	2	
+8	128220000	128225000	8	128310000	128315000	.	.	.	.	0,0,150	2	
+8	128220000	128225000	8	128575000	128580000	.	.	.	.	0,0,150	2	
+8	128220000	128225000	8	128745000	128750000	.	.	.	.	0,0,150	2	
+8	128220000	128230000	8	130560000	130570000	.	.	.	.	0,0,150	2	
+8	128310000	128315000	8	128575000	128580000	.	.	.	.	0,0,150	2	
+8	128310000	128315000	8	128745000	128750000	.	.	.	.	0,0,150	2	
+8	128310000	128315000	8	128805000	128810000	.	.	.	.	0,0,150	2	
+8	128310000	128315000	8	130560000	130565000	.	.	.	.	0,0,150	2	
+8	128575000	128580000	8	128805000	128810000	.	.	.	.	0,0,150	2	
+8	128740000	128750000	8	129660000	129670000	.	.	.	.	0,0,150	2	
+8	128745000	128750000	8	129870000	129875000	.	.	.	.	0,0,150	2	
+8	128745000	128750000	8	130555000	130560000	.	.	.	.	0,0,150	2	
+8	130030000	130040000	8	130690000	130700000	.	.	.	.	0,0,150	2	
+8	130035000	130040000	8	130550000	130555000	.	.	.	.	0,0,150	2	
+8	130310000	130320000	8	130690000	130700000	.	.	.	.	0,0,150	2	
+8	130315000	130320000	8	130540000	130545000	.	.	.	.	0,0,150	2	
+8	130830000	130840000	8	131020000	131030000	.	.	.	.	0,0,150	2	
+8	130950000	130955000	8	131025000	131030000	.	.	.	.	0,0,150	2	
diff --git a/test/tiles/bam_test.py b/test/tiles/bam_test.py
new file mode 100644
index 00000000..1c954e12
--- /dev/null
+++ b/test/tiles/bam_test.py
@@ -0,0 +1,55 @@
+from __future__ import print_function
+
+import json
+import os.path as op
+import unittest
+
+import clodius.tiles.bam as ctb
+
+
+class MyTestCase(unittest.TestCase):
+    def test_tileset_info(self):
+        filename_matched = op.join("data", "SRR1770413.sorted.short.bam")
+
+        filename_mismatched = op.join("data", "SRR1770413.mismatched_bai.bam")
+
+        tsinfo = ctb.tileset_info(filename_matched)
+        assert "max_zoom" in tsinfo
+
+        tsinfo = ctb.tileset_info(filename_mismatched)
+        assert "max_zoom" in tsinfo
+
+        assert "chromsizes" in tsinfo
+
+        # the following is in here to make sure no error
+        # gets thrown when dumping to JSON (e.g. from int64)
+        json_str = json.dumps(tsinfo)
+        assert len(json_str)
+
+    def test_tiles(self):
+        filename_matched = op.join("data", "SRR1770413.sorted.short.bam")
+
+        filename_mismatched = op.join("data", "SRR1770413.mismatched_bai.bam")
+
+        index_filename = op.join("data", "SRR1770413.different_index_filename.bai")
+
+        tile = ctb.tiles(filename_matched, ["x.9.0"])
+
+        assert len(tile) > 0
+
+        # missing index
+        self.assertRaises(FileNotFoundError, ctb.tiles, filename_mismatched, ["x.9.0"])
+
+        tile = ctb.tiles(filename_mismatched, ["x.9.0"], index_file=index_filename)
+
+        assert len(tile) > 0
+        assert len(tile[0][1]["id"]) > 10
+
+        tile = ctb.tiles(
+            filename_mismatched,
+            ["x.9.0"],
+            index_file=index_filename,
+            max_tile_width=10,
+        )
+
+        assert "error" not in tile[0][1]
diff --git a/test/tiles/beddb_test.py b/test/tiles/beddb_test.py
index 05f96eaf..da91a9c6 100644
--- a/test/tiles/beddb_test.py
+++ b/test/tiles/beddb_test.py
@@ -29,6 +29,6 @@ def test_name_in_tile():
 def test_tileset_info():
     filename = op.join("data", "geneAnnotationsExonUnions.1000.bed.v3.beddb")
 
-    tsinfo = hgbe.tileset_info(filename)
+    tileset_info = hgbe.tileset_info(filename)
 
-    assert "chromsizes" in tsinfo
+    assert len(tileset_info["chromsizes"]) > 4
diff --git a/test/tiles/bedfile_test.py b/test/tiles/bedfile_test.py
new file mode 100644
index 00000000..eca5b633
--- /dev/null
+++ b/test/tiles/bedfile_test.py
@@ -0,0 +1,76 @@
+import os.path as op
+
+import clodius.chromosomes as cc
+import clodius.tiles.bedfile as ctb
+
+
+def test_gzip_tiles():
+    valid_filename = op.join("data", "regions.valid.bed.1.gz")
+    chromsizes_fn = op.join("data", "chm13v1.chrom.sizes")
+
+    chromsizes = cc.chromsizes_as_series(chromsizes_fn)
+    tiles = ctb.tiles(valid_filename, ["x.0.0"], chromsizes, index_filename=None)
+
+    assert len(tiles) > 0
+
+
+def test_bed_tiles():
+    valid_filename = op.join("data", "regions.valid.bed")
+    invalid_filename = op.join("data", "regions.spaces.bed")
+
+    chromsizes_fn = op.join("data", "chm13v1.chrom.sizes")
+
+    chromsizes = cc.chromsizes_as_series(chromsizes_fn)
+    tiles = ctb.tiles(valid_filename, ["x.0.0"], chromsizes, index_filename=None)
+
+    assert len(tiles) > 0
+
+    tiles = ctb.tiles(invalid_filename, ["x.0.0"], chromsizes, index_filename=None)
+
+    assert "error" in tiles[0][1]
+
+
+class MockCache:
+    def __init__(self):
+        self.cache = {}
+
+    def get(self, key):
+        return self.cache.get(key)
+
+    def set(self, key, value):
+        self.cache[key] = value
+
+
+def test_bed_regions():
+    valid_filename = op.join("data", "regions.valid.bed")
+    chromsizes_fn = op.join("data", "chm13v1.chrom.sizes")
+    chromsizes = cc.chromsizes_as_series(chromsizes_fn)
+
+    regions = ctb.regions(valid_filename, chromsizes, 0, 10)
+    assert len(regions[0]) == 10
+
+    regions = ctb.regions(valid_filename, chromsizes, 0, 10, MockCache())
+
+    assert len(regions[0]) == 10
+
+
+def test_no_item_rgb():
+    chromsizes_fn = op.join("data", "chm13v1.chrom.sizes")
+    chromsizes = cc.chromsizes_as_series(chromsizes_fn)
+    filename = op.join("data", "no_item_rgb.bed")
+
+    ctb.tiles(filename, ["x.0.0"], chromsizes, index_filename=None)
+
+
+def test_indexed_bedfile_tiles():
+    valid_filename = op.join("data", "regions.valid.bed.gz")
+    index_filename = op.join("data", "regions.valid.bed.gz.tbi")
+    chromsizes_fn = op.join("data", "chm13v1.chrom.sizes")
+
+    chromsizes = cc.chromsizes_as_series(chromsizes_fn)
+    tiles = ctb.tiles(
+        valid_filename, ["x.0.0"], chromsizes, index_filename=index_filename
+    )
+
+    assert len(tiles) > 0
+    assert "error" not in tiles[0][1]
diff --git a/test/tiles/bigbed_test.py b/test/tiles/bigbed_test.py
index 615cb3d1..af7da753 100644
--- a/test/tiles/bigbed_test.py
+++ b/test/tiles/bigbed_test.py
@@ -1,8 +1,12 @@
-import clodius.tiles.bigbed as hgbb
-import clodius.tiles.utils as hgut
 import os.path as op
 
+import pytest
+
+import clodius.tiles.bigbed as hgbb
+import clodius.tiles.bigwig as hgbw
+
 
+@pytest.mark.skip(reason="Changed the bigbed tile fetching function rendering this test obsolete")
 def test_bigbed_tiles():
     filename = op.join(
         "data", "masterlist_DHSs_733samples_WM20180608_all_mean_signal_colorsMax.bed.bb"
@@ -87,7 +91,7 @@ def test_natsorted():
     ]
 
     for test in chromname_tests:
-        sorted_output = hgut.natsorted(test["input"])
+        sorted_output = hgbw.natsorted(test["input"])
         assert (
             sorted_output == test["expected"]
         ), "Sorted output was %s\nExpected: %s" % (sorted_output, test["expected"])
diff --git a/test/tiles/bigwig_test.py b/test/tiles/bigwig_test.py
index dc62a9f4..507a2d89 100644
--- a/test/tiles/bigwig_test.py
+++ b/test/tiles/bigwig_test.py
@@ -1,5 +1,4 @@
 import clodius.tiles.bigwig as hgbi
-import clodius.tiles.utils as hgut
 import os.path as op
 import numpy as np
 import base64
@@ -106,15 +105,16 @@ def test_tileset_info():
 
     tileset_info = hgbi.tileset_info(filename)
 
-    assert len(tileset_info["aggregation_modes"]) == 4
-    assert tileset_info["aggregation_modes"]["mean"]
-    assert tileset_info["aggregation_modes"]["min"]
-    assert tileset_info["aggregation_modes"]["max"]
-    assert tileset_info["aggregation_modes"]["std"]
+    assert len(tileset_info["aggregation_modes"]) == 5
+
+    assert "mean" in [m["value"] for m in tileset_info["aggregation_modes"]]
+    assert "min" in [m["value"] for m in tileset_info["aggregation_modes"]]
+    assert "max" in [m["value"] for m in tileset_info["aggregation_modes"]]
+    assert "std" in [m["value"] for m in tileset_info["aggregation_modes"]]
 
     assert len(tileset_info["range_modes"]) == 2
-    assert tileset_info["range_modes"]["minMax"]
-    assert tileset_info["range_modes"]["whisker"]
+    assert "minMax" in [m["value"] for m in tileset_info["range_modes"]]
+    assert "whisker" in [m["value"] for m in tileset_info["range_modes"]]
 
 
 def test_natsorted():
@@ -139,7 +139,7 @@ def test_natsorted():
     ]
 
     for test in chromname_tests:
-        sorted_output = hgut.natsorted(test["input"])
+        sorted_output = hgbi.natsorted(test["input"])
         assert (
             sorted_output == test["expected"]
         ), "Sorted output was %s\nExpected: %s" % (sorted_output, test["expected"])
diff --git a/test/tiles/chromsizes_test.py b/test/tiles/chromsizes_test.py
index 5fa5d760..bfaf23d2 100644
--- a/test/tiles/chromsizes_test.py
+++ b/test/tiles/chromsizes_test.py
@@ -5,17 +5,10 @@
 
 
 def test_get_tileset_info():
-    filename = op.join("data", "chromSizes.tsv")
+    filename = op.join("data", "hg38.chrom.sizes")
 
-    # Test loading tileset info using a filename
     tsinfo = TilesetInfo(**ctcs.tileset_info(filename))
 
     assert tsinfo.max_width > 100
     assert len(tsinfo.chromsizes) > 2
-
-    with open(filename, "rb") as f:
-        # Test loading using a file-like object
-        tsinfo = TilesetInfo(**ctcs.tileset_info(f))
-
-        assert tsinfo.max_width > 100
-        assert len(tsinfo.chromsizes) > 2
+    # TODO: Do something with the return value
diff --git a/test/tiles/cooler_test.py b/test/tiles/cooler_test.py
index 0eaadb4b..d801d2f9 100644
--- a/test/tiles/cooler_test.py
+++ b/test/tiles/cooler_test.py
@@ -1,7 +1,9 @@
-import clodius.tiles.cooler as hgco
-import numpy as np
-import os.path as op
 import base64
+import os.path as op
+
+import numpy as np
+
+import clodius.tiles.cooler as hgco
 
 
 def test_cooler_info():
@@ -12,15 +14,10 @@ def test_cooler_info():
 
     tiles = hgco.generate_tiles(filename, ["a.0.0.0"])
 
-    r = base64.b64decode(tiles[0][1]["dense"].encode("utf-8"))
+    r = base64.decodebytes(tiles[0][1]["dense"].encode("utf-8"))
     q = np.frombuffer(r, dtype=np.float32)
 
     q = q.reshape((256, 256))
 
     filename = op.join("data", "hic-resolutions.cool")
     # print(hgco.tileset_info(filename))
-
-
-def test_cooler_tiles():
-    filename = op.join("data", "hic-resolutions.cool")
-    hgco.tiles(filename, ["x.0.0.0"])
diff --git a/test/tiles/geo_test.py b/test/tiles/geo_test.py
new file mode 100644
index 00000000..d91599d2
--- /dev/null
+++ b/test/tiles/geo_test.py
@@ -0,0 +1,115 @@
+import os
+import sqlite3
+import tempfile
+import unittest
+
+from clodius.tiles import geo
+
+
+class GeoTest(unittest.TestCase):
+    def setUp(self):
+        self.db_file = tempfile.NamedTemporaryFile(delete=False, suffix=".db")
+        self.db_file.close()
+        self._create_test_db(self.db_file.name)
+
+    def tearDown(self):
+        if os.path.exists(self.db_file.name):
+            os.unlink(self.db_file.name)
+
+    def _create_test_db(self, filepath):
+        conn = sqlite3.connect(filepath)
+        c = conn.cursor()
+
+        c.execute(
+            """CREATE TABLE tileset_info (
+                zoom_step INTEGER,
+                tile_size INTEGER,
+                max_zoom INTEGER,
+                min_lng REAL,
+                max_lng REAL,
+                min_lat REAL,
+                max_lat REAL
+            )"""
+        )
+        c.execute(
+            "INSERT INTO tileset_info VALUES (1, 256, 10, -180.0, 180.0, -90.0, 90.0)"
+        )
+
+        c.execute(
+            """CREATE TABLE intervals (
+                id INTEGER PRIMARY KEY,
+                minLng REAL,
+                maxLng REAL,
+                maxLat REAL,
+                minLat REAL,
+                uid TEXT,
+                importance REAL,
+                geometry TEXT,
+                properties TEXT
+            )"""
+        )
+        c.execute(
+            """CREATE TABLE position_index (
+                id INTEGER,
+                zoomLevel INTEGER,
+                rMinLng REAL,
+                rMaxLng REAL,
+                rMinLat REAL,
+                rMaxLat REAL
+            )"""
+        )
+
+        c.execute(
+            """INSERT INTO intervals VALUES (
+                1, -122.5, -122.0, 37.8, 37.5, 'test-uid-1', 1.0,
+                '{"type": "Point", "coordinates": [-122.25, 37.65]}',
+                '{"name": "Test Location"}'
+            )"""
+        )
+        c.execute(
+            "INSERT INTO position_index VALUES (1, 10, -122.5, -122.0, 37.5, 37.8)"
+        )
+
+        conn.commit()
+        conn.close()
+
+    def test_tileset_info_with_filepath(self):
+        info = geo.tileset_info(self.db_file.name)
+        self.assertEqual(info["zoom_step"], 1)
+        self.assertEqual(info["tile_size"], 256)
+        self.assertEqual(info["max_zoom"], 10)
+        self.assertEqual(info["min_pos"], [-180.0, -90.0])
+        self.assertEqual(info["max_pos"], [180.0, 90.0])
+
+    def test_tileset_info_with_s3_uri(self):
+        # Test that s3:// URIs work (file-like behavior via smart_open)
+        info = geo.tileset_info(self.db_file.name)
+        self.assertEqual(info["zoom_step"], 1)
+        self.assertEqual(info["tile_size"], 256)
+        self.assertEqual(info["max_zoom"], 10)
+
+    def test_get_tiles_with_filepath(self):
+        tiles = geo.get_tiles(self.db_file.name, 5, 5, 6)
+        self.assertIsInstance(tiles, dict)
+
+    def test_get_tiles_with_s3_uri(self):
+        # Test that s3:// URIs work (file-like behavior via smart_open)
+        tiles = geo.get_tiles(self.db_file.name, 5, 5, 6)
+        self.assertIsInstance(tiles, dict)
+
+    def test_get_tile_box(self):
+        minlng, maxlng, minlat, maxlat = geo.get_tile_box(0, 0, 0)
+        self.assertAlmostEqual(minlng, -180.0)
+        self.assertAlmostEqual(maxlng, 180.0)
+        self.assertAlmostEqual(minlat, 85.05112877980659, places=5)
+        self.assertAlmostEqual(maxlat, -85.05112877980659, places=5)
+
+    def test_get_lng_lat_from_tile_pos(self):
+        lng, lat = geo.get_lng_lat_from_tile_pos(1, 0, 0)
+        self.assertAlmostEqual(lng, -180.0)
+        self.assertAlmostEqual(lat, 85.05112877980659, places=5)
+
+    def test_get_tile_pos_from_lng_lat(self):
+        x, y = geo.get_tile_pos_from_lng_lat(0, 0, 1)
+        self.assertAlmostEqual(x, 1.0)
+        self.assertAlmostEqual(y, 1.0)
diff --git a/test/tiles/multivec_test.py b/test/tiles/multivec_test.py
index 57ee0a0e..6f55eaed 100644
--- a/test/tiles/multivec_test.py
+++ b/test/tiles/multivec_test.py
@@ -39,16 +39,3 @@ def test_multivec():
         assert (
             base64.b64encode(single_tile.ravel()).decode("utf-8") == tile_value["dense"]
         )
-
-
-def test_states():
-    filename = op.join(
-        "data", "states_format_input_testfile.100.bed.multires.mv5"
-    )
-
-    # make sure we can retrieve the tileset info
-    tsinfo = hgmu.tileset_info(filename)
-    assert 10000000 in tsinfo["resolutions"]
-
-    tiles = hgmu.tiles(filename, ["x.0.0"])
-    assert "shape" in tiles[0][1]
diff --git a/test/tiles/npmatrix_test.py b/test/tiles/npmatrix_test.py
index 70d7f5bc..db461a7b 100644
--- a/test/tiles/npmatrix_test.py
+++ b/test/tiles/npmatrix_test.py
@@ -1,19 +1,10 @@
-import numpy as np
-
 import clodius.tiles.npmatrix as hgnp
+import numpy as np
 
 
 def test_numpy_matrix():
     grid = np.array(np.random.rand(100, 100))
+    # print('grid:', grid)
 
     tile = hgnp.tiles(grid, 0, 0, 0)
     assert tile.shape == (256, 256)
-
-
-def test_numpy_narrow_matrix():
-    grid = np.array(np.random.rand(2, 10000))
-
-    # make sure we can fetch a tile that would be empty
-    # because of the narrowness of the matrix
-    tile = hgnp.tiles(grid, 1, 1, 0)
-    assert tile.shape == (256, 256)
diff --git a/test/tiles/npvector_test.py b/test/tiles/npvector_test.py
index d0de2676..d610770b 100644
--- a/test/tiles/npvector_test.py
+++ b/test/tiles/npvector_test.py
@@ -1,10 +1,9 @@
 import numpy as np
-
 import clodius.tiles.npvector as hgnv
 
 
 def test_npvector():
-    array = np.array([float(f) for f in range(100)])
+    array = np.array([float(i) for i in range(100)])
     # print('ts:', hgnv.tileset_info(array))
     assert "max_width" in hgnv.tileset_info(array)
 
diff --git a/test/tiles/pileup_test.py b/test/tiles/pileup_test.py
new file mode 100644
index 00000000..2d4afbae
--- /dev/null
+++ b/test/tiles/pileup_test.py
@@ -0,0 +1,127 @@
+import os.path as op
+
+import pytest
+
+pytest.importorskip("mappy")
+
+from clodius.tiles.pileup import get_local_tiles  # noqa: E402
+from clodius.alignment import align_sequences, alignment_to_subs  # noqa: E402
+
+
+def test_alignment_to_subs():
+    a = align_sequences("TTTTT", "AAAATTATTAAAA")
+    print("")
+    print(a)
+    s = alignment_to_subs(a)
+
+    print("s", s)
+
+    assert s[2][0]["type"] == "I"
+    assert s[2][0]["pos"] == 0
+    assert s[2][0]["length"] == 4
+
+    assert s[2][-1]["type"] == "I"
+    assert s[2][-1]["pos"] == 5
+    assert s[2][-1]["length"] == 4
+
+    a = align_sequences("TTTTT", "TTATT")
+    s = alignment_to_subs(a)
+
+    # assert 1-based start positions and closed intervals
+    assert s[0] == 1
+    assert s[1] == 6
+    assert s[2][0]["pos"] == 2  # subs are 0-based
+    assert s[2][0]["base"] == "T"
+    assert s[2][0]["variant"] == "A"
+
+    a = align_sequences("TTTTT", "TTATTT")
+    s = alignment_to_subs(a)
+
+    assert s[0] == 1
+    assert s[1] == 6
+    assert s[2][0]["pos"] == 2
+    assert s[2][0]["type"] == "I"
+    assert s[2][0]["length"] == 1
+
+
+CSV_PATH = op.join("data", "pileup_test.csv")
+REF_PATH = op.join("data", "pileup_ref.fa")
+CHROMSIZES_PATH = op.join("data", "pileup_chromsizes.tsv")
+
+
+def _assert_result_structure(result):
+    assert "tilesetInfo" in result
+    assert "tiles" in result
+    tsinfo = result["tilesetInfo"]["x"]
+    assert "resolutions" in tsinfo
+    assert "chromsizes" in tsinfo
+    assert "columns" in tsinfo
+    # The single tile at zoom 0, position 0 should be present
+    assert "x.0.0" in result["tiles"]
+    tile = result["tiles"]["x.0.0"]
+    assert isinstance(tile, list)
+    assert len(tile) > 0
+    for entry in tile:
+        assert "from" in entry
+        assert "to" in entry
+        assert "substitutions" in entry
+
+
+def test_get_local_tiles_with_refrow():
+    """get_local_tiles uses a CSV row as the reference sequence."""
+    result = get_local_tiles(CSV_PATH, colname="seq", refrow=1)
+    _assert_result_structure(result)
+    tsinfo = result["tilesetInfo"]["x"]
+    assert tsinfo["chromsizes"] == [["row_1", 60]]
+
+
+def test_get_local_tiles_with_reffile_path():
+    """get_local_tiles accepts a string filepath for the reference FASTA."""
+    result = get_local_tiles(CSV_PATH, colname="seq", reffile=REF_PATH)
+    _assert_result_structure(result)
+    tsinfo = result["tilesetInfo"]["x"]
+    assert tsinfo["chromsizes"] == [["ref1", 60]]
+
+
+def test_get_local_tiles_with_reffile_object():
+    """get_local_tiles accepts a binary file-like object for the reference FASTA."""
+    with open(REF_PATH, "rb") as f:
+        result = get_local_tiles(CSV_PATH, colname="seq", reffile=f)
+    _assert_result_structure(result)
+
+
+def test_get_local_tiles_with_chromsizes_path():
+    """get_local_tiles accepts a string filepath for the chromsizes file."""
+    result = get_local_tiles(
+        CSV_PATH,
+        colname="seq",
+        reffile=REF_PATH,
+        chromsizes_file=CHROMSIZES_PATH,
+    )
+    _assert_result_structure(result)
+    tsinfo = result["tilesetInfo"]["x"]
+    assert tsinfo["chromsizes"] == [["ref1", 60]]
+
+
+def test_get_local_tiles_with_chromsizes_object():
+    """get_local_tiles accepts a binary file-like object for the chromsizes file."""
+    with open(CHROMSIZES_PATH, "rb") as f:
+        result = get_local_tiles(
+            CSV_PATH,
+            colname="seq",
+            reffile=REF_PATH,
+            chromsizes_file=f,
+        )
+    _assert_result_structure(result)
+    tsinfo = result["tilesetInfo"]["x"]
+    assert tsinfo["chromsizes"] == [["ref1", 60]]
+
+
+def test_get_local_tiles_substitution_detected():
+    """The substitution in sample3 is reflected in the tile data."""
+    result = get_local_tiles(CSV_PATH, colname="seq", reffile=REF_PATH)
+    tile = result["tiles"]["x.0.0"]
+    # At least one entry should have a substitution (sample3 differs at pos 20)
+    all_subs = [s for entry in tile for s in entry["substitutions"]]
+    mismatch_subs = [s for s in all_subs if s.get("type") == "X"]
+    assert len(mismatch_subs) > 0
diff --git a/test/tiles/sequence_logo_tests.py b/test/tiles/sequence_logo_tests.py
new file mode 100644
index 00000000..7e31453f
--- /dev/null
+++ b/test/tiles/sequence_logo_tests.py
@@ -0,0 +1,82 @@
+import unittest
+from unittest.mock import patch
+from clodius.tiles.sequence_logos import tile_functions
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+from Bio.Align import MultipleSeqAlignment
+
+
+class TestSequenceLogos(unittest.TestCase):
+
+    def create_mock_alignment(self, sequences):
+        """Create a mock alignment from sequences"""
+        records = [SeqRecord(Seq(seq), id=f"seq{i}") for i, seq in enumerate(sequences)]
+        return MultipleSeqAlignment(records)
+
+    @patch("clodius.alignment.run_clustal_omega")
+    def test_dna_sequences(self, mock_clustal):
+        """Test tile_functions with DNA sequences"""
+        sequences = ["ATCG", "ATGG", "ACCG"]
+        mock_clustal.return_value = self.create_mock_alignment(sequences)
+        result = tile_functions(sequences, seqtype="dna")
+
+        # Check that we get the expected functions
+        self.assertIn("tileset_info", result)
+        self.assertIn("tiles", result)
+
+        # Test tileset_info
+        tsinfo = result["tileset_info"]()
+        self.assertEqual(tsinfo["shape"], [4, 128])  # 4 DNA bases
+        self.assertEqual(tsinfo["row_infos"], ["A", "C", "G", "T"])
+        self.assertEqual(tsinfo["resolutions"], [1])
+
+        # Test tiles function
+        tile_data = result["tiles"](0, 0)
+        self.assertIn("dense", tile_data)
+        self.assertIn("dtype", tile_data)
+        self.assertIn("shape", tile_data)
+        self.assertEqual(tile_data["dtype"], "float16")
+        self.assertEqual(tile_data["shape"], [4, 128])
+
+    @patch("clodius.alignment.run_clustal_omega")
+    def test_protein_sequences(self, mock_clustal):
+        """Test tile_functions with protein sequences"""
+        sequences = ["ACDE", "ACDF", "ACDG"]
+        mock_clustal.return_value = self.create_mock_alignment(sequences)
+        result = tile_functions(sequences, seqtype="protein")
+
+        # Check that we get the expected functions
+        self.assertIn("tileset_info", result)
+        self.assertIn("tiles", result)
+
+        # Test tileset_info
+        tsinfo = result["tileset_info"]()
+        self.assertEqual(tsinfo["shape"], [20, 128])  # 20 amino acids
+        self.assertEqual(len(tsinfo["row_infos"]), 20)
+
+        # Test tiles function
+        tile_data = result["tiles"](0, 0)
+        self.assertEqual(tile_data["shape"], [20, 128])
+
+    @patch("clodius.alignment.run_clustal_omega")
+    def test_invalid_seqtype(self, mock_clustal):
+        """Test that invalid seqtype raises ValueError"""
+        sequences = ["ATCG"]
+        mock_clustal.return_value = self.create_mock_alignment(sequences)
+        with self.assertRaises(ValueError):
+            tile_functions(sequences, seqtype="invalid")
+
+    @patch("clodius.alignment.run_clustal_omega")
+    def test_empty_sequences(self, mock_clustal):
+        """Test with empty sequences list"""
+        sequences = []
+        mock_clustal.return_value = self.create_mock_alignment(sequences)
+        result = tile_functions(sequences, seqtype="dna")
+
+        # Should still return valid structure
+        self.assertIn("tileset_info", result)
+        self.assertIn("tiles", result)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/tiles/vcf_test.py b/test/tiles/vcf_test.py
new file mode 100644
index 00000000..7d5f2a25
--- /dev/null
+++ b/test/tiles/vcf_test.py
@@ -0,0 +1,55 @@
+import os.path as op
+
+import clodius.chromosomes as cc
+import clodius.tiles.bedfile as ctb
+
+import pytest
+
+
+@pytest.mark.parametrize(
+    "file",
+    [
+        # "test.vcf",
+        "test.1.vcf.gz"
+    ],
+)
+def test_vcf_tiles(file):
+    valid_filename = op.join("data", file)
+    chromsizes_fn = op.join("data", "chm13v1.chrom.sizes")
+
+    chromsizes = cc.chromsizes_as_series(chromsizes_fn)
+
+    tiles = ctb.tiles(
+        valid_filename,
+        ["x.0.0"],
+        chromsizes,
+        index_filename=None,
+        settings={"filetype": "vcf"},
+    )
+
+    ends = set()
+    starts = set()
+
+    # Make sure the tile starts are after the tile ends
+    # and keep track of how many different starts and ends
+    # there are
+    for t in tiles[0][1]:
+        starts.add(t["xStart"])
+        ends.add(t["xEnd"])
+
+        assert t["xStart"] < t["xEnd"]
+
+    assert len(ends) > 1
+    assert len(tiles) > 0
+
+    # try as file pointer
+    with open(valid_filename, "rb") as f:
+        tiles = ctb.tiles(
+            f,
+            ["x.0.0"],
+            chromsizes,
+            index_filename=None,
+            settings={"filetype": "vcf"},
+        )
+
+    assert len(tiles) > 0
diff --git a/test/tsv_to_mrmatrix_test.py b/test/tsv_to_mrmatrix_test.py
index 8a54b980..3f4b9b18 100644
--- a/test/tsv_to_mrmatrix_test.py
+++ b/test/tsv_to_mrmatrix_test.py
@@ -7,7 +7,7 @@
 import numpy as np
 from numpy.testing import assert_array_equal
 
-from clodius._tsv_to_mrmatrix import coarsen, parse
+from scripts.tsv_to_mrmatrix import coarsen, parse
 
 
 class CoarsenTest(unittest.TestCase):
@@ -17,45 +17,45 @@ def test_5_layer_pyramid(self):
         max_width = tile_size * 2 ** max_zoom
 
         with TemporaryDirectory() as tmp_dir:
-            hdf5 = h5py.File(tmp_dir + "/temp.hdf5", "w")
-            g = hdf5.create_group("resolutions")
-            g1 = g.create_group("1")
+            hdf5 = h5py.File(tmp_dir + '/temp.hdf5', 'w')
+            g = hdf5.create_group('resolutions')
+            g1 = g.create_group('1')
             ds = g1.create_dataset(
-                "values",
-                (max_width, max_width),
-                dtype="f4",
-                compression="lzf",
-                fillvalue=np.nan,
-            )
+                'values', (max_width, max_width),
+                dtype='f4', compression='lzf', fillvalue=np.nan)
             for y in range(max_width):
                 a = np.array([float(x) for x in range(max_width)])
                 ds[y, :max_width] = a
 
             # before coarsen()
-            self.assertEqual(list(hdf5.keys()), ["resolutions"])
-            self.assertEqual(list(hdf5["resolutions"].keys()), ["1"])
-            self.assertEqual(list(hdf5["resolutions"]["1"].keys()), ["values"])
-            self.assertEqual(list(hdf5["resolutions"]["1"]["values"].shape), [64, 64])
+            self.assertEqual(list(hdf5.keys()), ['resolutions'])
+            self.assertEqual(list(hdf5['resolutions'].keys()), ['1'])
+            self.assertEqual(list(hdf5['resolutions']['1'].keys()), ['values'])
+            self.assertEqual(list(hdf5['resolutions']['1']['values'].shape), [64, 64])
             self.assertEqual(
-                hdf5["resolutions"]["1"]["values"][:].tolist()[0],
-                [float(x) for x in range(64)],
+                hdf5['resolutions']['1']['values'][:].tolist()[0],
+                [float(x) for x in range(64)]
             )
 
             coarsen(hdf5, tile_size=tile_size)
 
             # after coarsen()
-            self.assertEqual(list(hdf5.keys()), ["resolutions"])
-            self.assertEqual(
-                list(hdf5["resolutions"].keys()), ["1", "16", "2", "4", "8"]
-            )
-            self.assertEqual(list(hdf5["resolutions"]["16"].keys()), ["values"])
-            shapes = {"1": 64, "2": 32, "4": 16, "8": 8, "16": 4}
+            self.assertEqual(list(hdf5.keys()), ['resolutions'])
+            self.assertEqual(list(hdf5['resolutions'].keys()), ['1', '16', '2', '4', '8'])
+            self.assertEqual(list(hdf5['resolutions']['16'].keys()), ['values'])
+            shapes = {
+                '1': 64,
+                '2': 32,
+                '4': 16,
+                '8': 8,
+                '16': 4
+            }
             for (k, v) in shapes.items():
-                self.assertEqual(hdf5["resolutions"][k]["values"].shape, (v, v))
+                self.assertEqual(hdf5['resolutions'][k]['values'].shape, (v, v))
             row = [1920, 6016, 10112, 14208]
             self.assertEqual(
-                hdf5["resolutions"]["16"]["values"][:].tolist(), [row, row, row, row]
-            )
+                hdf5['resolutions']['16']['values'][:].tolist(),
+                [row, row, row, row])
             # TODO: Check the math
 
     def test_math(self):
@@ -64,16 +64,12 @@ def test_math(self):
         max_width = tile_size * 2 ** max_zoom
 
         with TemporaryDirectory() as tmp_dir:
-            hdf5 = h5py.File(tmp_dir + "/temp.hdf5", "w")
-            g = hdf5.create_group("resolutions")
-            g1 = g.create_group("1")
+            hdf5 = h5py.File(tmp_dir + '/temp.hdf5', 'w')
+            g = hdf5.create_group('resolutions')
+            g1 = g.create_group('1')
             ds = g1.create_dataset(
-                "values",
-                (max_width, max_width),
-                dtype="f4",
-                compression="lzf",
-                fillvalue=np.nan,
-            )
+                'values', (max_width, max_width),
+                dtype='f4', compression='lzf', fillvalue=np.nan)
             for y in range(max_width):
                 a = np.array([float(x) for x in range(max_width)])
                 ds[y, :max_width] = a
@@ -81,72 +77,78 @@ def test_math(self):
             coarsen(hdf5, tile_size=tile_size)
 
             # after coarsen()
-            self.assertEqual(list(hdf5.keys()), ["resolutions"])
-            self.assertEqual(list(hdf5["resolutions"].keys()), ["1", "2", "4"])
-
-            shapes = {"1": 8, "2": 4, "4": 2}
+            self.assertEqual(list(hdf5.keys()), ['resolutions'])
+            self.assertEqual(list(hdf5['resolutions'].keys()), ['1', '2', '4'])
+
+            shapes = {
+                '1': 8,
+                '2': 4,
+                '4': 2
+            }
             for (k, v) in shapes.items():
-                self.assertEqual(hdf5["resolutions"][k]["values"].shape, (v, v))
+                self.assertEqual(hdf5['resolutions'][k]['values'].shape, (v, v))
 
             row8 = list(range(8))
             assert_array_equal(
-                hdf5["resolutions"]["1"]["values"], [row8 for _ in range(8)]
-            )
+                hdf5['resolutions']['1']['values'],
+                [row8 for _ in range(8)])
 
             row4 = [8 * x + 2 for x in range(4)]
             assert_array_equal(
-                hdf5["resolutions"]["2"]["values"], [row4 for _ in range(4)]
-            )
+                hdf5['resolutions']['2']['values'],
+                [row4 for _ in range(4)])
 
             row2 = [24, 88]
             assert_array_equal(
-                hdf5["resolutions"]["4"]["values"], [row2 for _ in range(2)]
-            )
+                hdf5['resolutions']['4']['values'],
+                [row2 for _ in range(2)])
 
 
 class ParseTest(unittest.TestCase):
     def test_parse(self):
         with TemporaryDirectory() as tmp_dir:
-            csv_path = tmp_dir + "/tmp.csv"
-            with open(csv_path, "w", newline="") as csv_file:
-                writer = csv.writer(csv_file, delimiter="\t")
+            csv_path = tmp_dir + '/tmp.csv'
+            with open(csv_path, 'w', newline='') as csv_file:
+                writer = csv.writer(csv_file, delimiter='\t')
                 # header:
-                labels = ["col-{}".format(x) for x in range(513)]
+                labels = ['col-{}'.format(x) for x in range(513)]
                 writer.writerow(labels)
                 # body:
                 for y in range(0, 3):
-                    writer.writerow(["row-{}".format(y)] + [0] * 512)
+                    writer.writerow(['row-{}'.format(y)] + [0] * 512)
                 for y in range(3, 6):
-                    writer.writerow(["row-{}".format(y)] + [1] * 512)
+                    writer.writerow(['row-{}'.format(y)] + [1] * 512)
                 for y in range(6, 9):
-                    writer.writerow(["row-{}".format(y)] + [1, -1] * 256)
-            csv_handle = open(csv_path, "r")
+                    writer.writerow(['row-{}'.format(y)] + [1, -1] * 256)
+            csv_handle = open(csv_path, 'r')
 
-            hdf5_path = tmp_dir + "tmp.hdf5"
-            hdf5_write_handle = h5py.File(hdf5_path, "w")
+            hdf5_path = tmp_dir + 'tmp.hdf5'
+            hdf5_write_handle = h5py.File(hdf5_path, 'w')
 
             parse(csv_handle, hdf5_write_handle)
 
-            hdf5 = h5py.File(hdf5_path, "r")
-            self.assertEqual(list(hdf5.keys()), ["labels", "resolutions"])
-            self.assertEqual([h.decode("utf8") for h in hdf5["labels"]], labels[1:])
+            hdf5 = h5py.File(hdf5_path, 'r')
 
-            self.assertEqual(list(hdf5["resolutions"].keys()), ["1", "2"])
+            def decode_if_possible(keys):
+                return [x.decode() if hasattr(x, 'decode') else x for x in keys]
 
-            self.assertEqual(
-                list(hdf5["resolutions"]["1"].keys()), ["nan_values", "values"]
-            )
+            self.assertEqual(decode_if_possible(list(hdf5.keys())), ['labels', 'resolutions'])
+            self.assertEqual(decode_if_possible(list(hdf5['labels'])), labels[1:])
+
+            self.assertEqual(decode_if_possible(list(hdf5['resolutions'].keys())), ['1', '2'])
+
+            self.assertEqual(decode_if_possible(list(hdf5['resolutions']['1'].keys())), ['nan_values', 'values'])
             assert_array_equal(
-                hdf5["resolutions"]["1"]["nan_values"], [[0] * 512] * 512
+                hdf5['resolutions']['1']['nan_values'], [[0] * 512] * 512
             )
-            res_1 = hdf5["resolutions"]["1"]["values"]
+            res_1 = hdf5['resolutions']['1']['values']
             assert_array_equal(res_1[0], [0] * 512)
             assert_array_equal(res_1[3], [1] * 512)
             assert_array_equal(res_1[6], [1, -1] * 256)
             assert_array_equal(res_1[9], [nan] * 512)
 
-            self.assertEqual(list(hdf5["resolutions"]["2"].keys()), ["values"])
-            res_2 = hdf5["resolutions"]["2"]["values"]
+            self.assertEqual(decode_if_possible(list(hdf5['resolutions']['2'].keys())), ['values'])
+            res_2 = hdf5['resolutions']['2']['values']
             assert_array_equal(res_2[0], [0] * 256)
             assert_array_equal(res_2[1], [2] * 256)  # Stradles the 0 and 1 rows
             assert_array_equal(res_2[2], [4] * 256)
diff --git a/test/utils_test.py b/test/utils_test.py
new file mode 100644
index 00000000..39cbb75c
--- /dev/null
+++ b/test/utils_test.py
@@ -0,0 +1,67 @@
+from __future__ import print_function
+
+import os.path as op
+
+import clodius.utils as cu
+from clodius.tiles.utils import (
+    abs2genome_fn,
+    parse_tile_id,
+    parse_tile_position,
+    TilesetInfo,
+)
+
+
+def test_infer_filetype():
+    assert cu.infer_filetype("blah.gff") == "gff"
+    assert cu.infer_filetype("blah.gff.gz") == "gff"
+    assert cu.infer_filetype("blah.xyz") is None
+    assert cu.infer_filetype("blah.bam") == "bam"
+    assert cu.infer_filetype("blah.bed.bgz") == "bedfile"
+    assert cu.infer_filetype("blah.bed") == "bedfile"
+
+
+def test_infer_datatype():
+    assert cu.infer_datatype("gff") == "bedlike"
+    assert cu.infer_datatype("cooler") == "matrix"
+    assert cu.infer_datatype("bedfile") == "bedlike"
+    assert cu.infer_datatype("bam") == "reads"
+
+
+def test_abs2genome_fn():
+    fai_filename = op.join(
+        "data", "GCA_000350705.1_Esch_coli_KTE11_V1_genomic.short.fna.fai"
+    )
+    sections = list(abs2genome_fn(fai_filename, 0, 1000))
+
+    assert len(sections) == 3
+    assert sections[0].end == 640
+
+
+def test_parse_tile_position():
+    tsinfo = TilesetInfo(
+        max_width=2**16,
+        max_zoom=4,
+        min_pos=[0, 0],
+        max_pos=[2**15 + 10, 2**15 + 10],
+    )
+
+    x = parse_tile_position([1, 2], tsinfo)
+
+    assert x.zoom == 1
+    assert x.position[0] == 2
+    assert x.start[0] == 65536
+    assert x.end[0] == 98304
+
+
+def test_parse_tile_id():
+    tsinfo = TilesetInfo(
+        max_width=2**16,
+        max_zoom=4,
+        min_pos=[0, 0],
+        max_pos=[2**15 + 10, 2**15 + 10],
+    )
+    x = parse_tile_id("uid.1.2", tsinfo)
+
+    assert x.zoom == 1
+    assert x.position[0] == 2
+    assert x.start[0] == 65536