Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
a7eed18
first pass at implementing liftover requests
jennifer-bowser May 28, 2026
aa75e7d
update 'retrieve_allele_by_expression' in HTTP client to prevent 'res…
jennifer-bowser May 28, 2026
6de4087
add definition of 'retreive_allele_by_id' for Python client
jennifer-bowser May 28, 2026
4ed709d
fix HTTP implementation of 'get_liftover_variation_id' and add Python…
jennifer-bowser May 28, 2026
4601609
fix tests
jennifer-bowser May 28, 2026
7e1c189
clean up 'get_caf' to handle edge cases where variant/variant id are …
jennifer-bowser May 29, 2026
e7a446e
fix some failing tests
jennifer-bowser Jun 3, 2026
0e9dcb9
update import to align w/ latest AnyVar variable naming
jennifer-bowser Jun 17, 2026
e6d7fb1
update python client's 'get_liftover_variation_id' to use correct syn…
jennifer-bowser Jun 17, 2026
1fbcf8d
fix failing test
jennifer-bowser Jun 17, 2026
3fca09f
fix failing test: 'test_variant_counts_endpoint_anyvar_unavailable'
jennifer-bowser Jun 17, 2026
c760414
fix failing test
jennifer-bowser Jun 17, 2026
e20a8a9
rename tests + fixtures to reflect new function name
jennifer-bowser Jun 17, 2026
ff9a4c0
fix cassette
jennifer-bowser Jun 17, 2026
4ea0079
bring back accidentally nuked cassette + rename it
jennifer-bowser Jun 17, 2026
d5aa375
revert change to 'Nucleotide' that would've allowed multiple Nucleoti…
jennifer-bowser Jun 24, 2026
b46ea3d
use enum instead of text matching
jennifer-bowser Jun 24, 2026
5f36d3a
rename 'get_caf_*' to 'get_cafs' + fix '_validate_allele' logic
jennifer-bowser Jun 24, 2026
7104877
remove id check in '_retrieve_cafs_with_resolved_alleles', since '_va…
jennifer-bowser Jun 24, 2026
fe697f9
fix 'http client's 'get_liftover_variation_id'
jennifer-bowser Jun 24, 2026
fd47157
rename cassettes folder from '*get_caf' to '*get_cafs'
jennifer-bowser Jun 24, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -220,3 +220,7 @@ uv.lock

# Mac things:
.DS_Store


# UTA download
uta_20241220.pgd.gz
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ classifiers = [
dependencies = [
"ga4gh.vrs>=2.3.0,<3.0",
"ga4gh.va_spec~=0.4.3",
"anyvar>=1.0,<2.0",
"anyvar @ git+https://github.com/biocommons/anyvar.git@79cf327aa9f50d4236664d76ad840cc007498549", # Points to the most recent AnyVar commit on `main` as of 06-17-26. TODO: Replace with the latest version of AnyVar once we do the next release
"fastapi>=0.95.0",
"python-multipart", # required for fastapi file uploads
"uvicorn",
Expand Down
26 changes: 23 additions & 3 deletions src/anyvlm/anyvar/base_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import abc
from collections.abc import Iterable, Sequence

from anyvar.core.objects import SupportedVrsVariation
from anyvar.mapping.liftover import ReferenceAssembly
from ga4gh.vrs.models import Allele

Expand All @@ -22,18 +23,26 @@ class BaseAnyVarClient(abc.ABC):
"""Interface elements for an AnyVar client"""

@abc.abstractmethod
def get_registered_allele(
def retrieve_allele_by_id(self, vrs_id: str) -> SupportedVrsVariation | None:
"""Retrieve VRS Allele for given VRS ID

:param vrs_id: The ID to dereference
:return: The VRS Allele, or `None` if unable to retrieve the Allele.
"""

@abc.abstractmethod
def retrieve_allele_by_expression(
self, expression: str, assembly: ReferenceAssembly = ReferenceAssembly.GRCH38
) -> Allele | None:
"""Retrieve registered VRS Allele for given allele expression
"""Retrieve VRS Allele for given allele expression

Currently, only expressions supported by the VRS-Python translator are supported.
This could change depending on the AnyVar implementation, though, and probably
can't be validated on the AnyVLM side.

:param expression: variation expression to get VRS Allele for
:param assembly: reference assembly used in expression
:return: VRS Allele if translation succeeds and VRS Allele has already been registered, else `None`
:return: VRS Allele if translation succeeds, else `None`
"""

@abc.abstractmethod
Expand All @@ -54,6 +63,17 @@ def put_allele_expressions(
else `None`, for the i'th expression
"""

@abc.abstractmethod
def get_liftover_variation_id(
self, vrs_id: str, starting_assembly: ReferenceAssembly
) -> str | None:
"""Get the VRS ID for the lifted-over equivalent of the variation specified by the provided VRS ID.

:param vrs_id: The VRS ID of the variation to lift over
:param starting_assembly: The assembly to liftover FROM (i.e., the assembly of the starting variant)
:return: The VRS ID of the lifted-over variation, or `None` if liftover is unsuccessful
"""

@abc.abstractmethod
def close(self) -> None:
"""Clean up AnyVar connection."""
68 changes: 53 additions & 15 deletions src/anyvlm/anyvar/http_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,23 @@
from typing import Literal

import requests
from anyvar.core.metadata import VariationMapping
from anyvar.mapping.liftover import ReferenceAssembly
from anyvar.restapi.schema import GetObjectResponse, RegisterVariationResponse
from anyvar.restapi.schema import (
GetMappingResponse,
GetObjectResponse,
RegisterVariationResponse,
)
from ga4gh.vrs import VrsType, models
from requests.models import Response

from anyvlm.anyvar.base_client import (
AnyVarClientConnectionError,
AnyVarClientError,
BaseAnyVarClient,
)
from anyvlm.utils.exceptions import LiftoverError
from anyvlm.utils.functions import validate_allele

_logger = logging.getLogger(__name__)

Expand All @@ -36,9 +44,11 @@ def __init__(

def _make_http_request(
self,
method: Literal[HTTPMethod.POST] | Literal[HTTPMethod.PUT],
method: Literal[HTTPMethod.POST]
| Literal[HTTPMethod.PUT]
| Literal[HTTPMethod.GET],
url: str,
payload: dict | list,
payload: dict | list | None = None,
) -> requests.Response:
"""Issue an HTTP request to an AnyVar server.

Expand Down Expand Up @@ -71,18 +81,29 @@ def _make_http_request(
raise
return response

def get_registered_allele(
def retrieve_allele_by_id(self, vrs_id: str) -> models.Allele | None:
"""Retrieve VRS Allele for given VRS ID

:param vrs_id: The ID to dereference
:return: The VRS Allele, or `None` if unable to retrieve the Allele.
"""
url = f"{self.hostname}/object/{vrs_id}"
response = self._make_http_request(method=HTTPMethod.GET, url=url)
validated_response: GetObjectResponse = GetObjectResponse(**response.json())
return validate_allele(allele=validated_response.data)

def retrieve_allele_by_expression(
self, expression: str, assembly: ReferenceAssembly = ReferenceAssembly.GRCH38
) -> models.Allele | None:
"""Retrieve registered VRS Allele for given allele expression
"""Retrieve VRS Allele for given allele expression

Currently, only expressions supported by the VRS-Python translator are supported.
This could change depending on the AnyVar implementation, though, and probably
can't be validated on the AnyVLM side.

:param expression: variation expression to get VRS Allele for
:param assembly: reference assembly used in expression
:return: VRS Allele if translation succeeds and VRS Allele has already been registered, else `None`
:return: VRS Allele if translation succeeds, else `None`
"""
url = f"{self.hostname}/variation"
payload = {
Expand All @@ -91,21 +112,16 @@ def get_registered_allele(
"input_type": VrsType.ALLELE.value,
}
try:
response = self._make_http_request(HTTPMethod.POST, url, payload)
response: Response = self._make_http_request(HTTPMethod.PUT, url, payload)
except requests.HTTPError as e:
if e.response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY:
_logger.debug(
"Translation failed for variant expression '%s'", expression
)
return None
return None

if e.response.status_code == HTTPStatus.NOT_FOUND:
_logger.debug("No variation found for expression '%s'", expression)
return None
raise AnyVarClientError from e

validated_response = GetObjectResponse(**response.json())
return validated_response.data # type: ignore (input_type=Allele guarantees return type)
validated_response = RegisterVariationResponse(**response.json())
return validated_response.object

def put_allele_expressions(
self,
Expand Down Expand Up @@ -139,6 +155,28 @@ def put_allele_expressions(
raise AnyVarClientError from e
return [RegisterVariationResponse(**r).object_id for r in response.json()]

def get_liftover_variation_id(
self, vrs_id: str, starting_assembly: ReferenceAssembly
) -> str | None:
"""Get the VRS ID for the lifted-over equivalent of the variation specified by the provided VRS ID.

:param vrs_id: The VRS ID of the variation to lift over
:param starting_assembly: The assembly to liftover FROM (i.e., the assembly of the starting variant)
:return: The VRS ID of the lifted-over variation, or `None` if liftover is unsuccessful
"""
as_source: bool = starting_assembly == ReferenceAssembly.GRCH37
url: str = f"{self.hostname}/object/{vrs_id}/mappings/liftover_to?as_source={as_source}"
response = self._make_http_request(HTTPMethod.GET, url)
validated_response: GetMappingResponse = GetMappingResponse(**response.json())

variation_mappings: list[VariationMapping] = list(validated_response.mappings)
if len(variation_mappings) > 1:
error_message: str = "Multiple liftover mappings found"
raise LiftoverError(error_message)

mapping_result: VariationMapping = variation_mappings[0]
return mapping_result.dest_id if as_source else mapping_result.source_id

def close(self) -> None:
"""Clean up AnyVar connection.

Expand Down
39 changes: 36 additions & 3 deletions src/anyvlm/anyvar/python_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from collections.abc import Iterable, Sequence

from anyvar import AnyVar
from anyvar.core.metadata import VariationMapping, VariationMappingType
from anyvar.core.objects import SupportedVrsVariation
from anyvar.mapping.liftover import ReferenceAssembly
from anyvar.restapi.schema import SupportedVariationType
from anyvar.storage.base import Storage
Expand Down Expand Up @@ -53,18 +55,26 @@ def _translate_allele_expression(
_logger.exception("Failed to translate expression: %s", expression)
return translated_variation # type: ignore

def get_registered_allele(
def retrieve_allele_by_id(self, vrs_id: str) -> SupportedVrsVariation | None:
"""Retrieve VRS Allele for given VRS ID

:param vrs_id: The ID to dereference
:return: The VRS Allele, or `None` if unable to retrieve the Allele.
"""
return self.av.get_object(object_id=vrs_id, object_type=Allele)

def retrieve_allele_by_expression(
self, expression: str, assembly: ReferenceAssembly = ReferenceAssembly.GRCH38
) -> Allele | None:
"""Retrieve registered VRS Allele for given allele expression
"""Retrieve VRS Allele for given allele expression

Currently, only expressions supported by the VRS-Python translator are supported.
This could change depending on the AnyVar implementation, though, and probably
can't be validated on the AnyVLM side.

:param expression: variation expression to get VRS Allele for
:param assembly: reference assembly used in expression
:return: VRS Allele if translation succeeds and VRS Allele has already been registered, else `None`
:return: VRS Allele if translation succeeds, else `None`
"""
translated_variation = self._translate_allele_expression(expression, assembly)
if not translated_variation:
Expand Down Expand Up @@ -109,6 +119,29 @@ def put_allele_expressions(
results.append(None)
return results

def get_liftover_variation_id(
self, vrs_id: str, starting_assembly: ReferenceAssembly
) -> str | None:
"""Get the VRS ID for the lifted-over equivalent of the variation specified by the provided VRS ID.

:param vrs_id: The VRS ID of the variation to lift over
:param starting_assembly: The assembly to liftover FROM (i.e., the assembly of the starting variant)
:return: The VRS ID of the lifted-over variation, or `None` if liftover is unsuccessful
"""
as_source: bool = starting_assembly == ReferenceAssembly.GRCH37
liftover_mappings: Iterable[VariationMapping] = self.av.get_object_mappings(
object_id=vrs_id,
mapping_type=VariationMappingType.LIFTOVER_TO,
as_source=as_source,
)
liftover_mapping: VariationMapping | None = next(iter(liftover_mappings), None)

return (
(liftover_mapping.dest_id if as_source else liftover_mapping.source_id)
if liftover_mapping
else None
)

def close(self) -> None:
"""Clean up AnyVar instance."""
_logger.info("Closing AnyVar client.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import logging

from ga4gh.core.models import iriReference
from ga4gh.vrs.models import Allele

from anyvlm.anyvar.base_client import BaseAnyVarClient
from anyvlm.storage.base_storage import Storage
from anyvlm.utils.functions import validate_allele
from anyvlm.utils.types import (
ASSEMBLY_MAP,
AnyVlmCohortAlleleFrequencyResult,
Expand All @@ -18,11 +20,27 @@
_logger = logging.getLogger(__name__)


class VariantNotRegisteredError(Exception):
"""Raised when a variant is not registered in the AnyVar client"""
def _retrieve_cafs_with_resolved_alleles(
variation: Allele, anyvlm_storage: Storage
) -> list[AnyVlmCohortAlleleFrequencyResult]:
"""Retrieve CAF data for a resolved allele.

:param variation: The allele to retrieve CAF data for
:param anyvlm_storage: The storage for this AnyVLM instance
:return: A list of AnyVlmCohortAlleleFrequencyResult objects
"""
cafs: list[AnyVlmCohortAlleleFrequencyResult] = (
anyvlm_storage.get_cafs_by_vrs_allele_id(vrs_allele_id=variation.id) # pyright: ignore[reportArgumentType]
)

for caf in cafs:
if isinstance(caf.focusAllele, iriReference):

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this necessary? it should always be one type, right?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was part of the original get_caf function. I don't think we prevent people from submitting Alleles with iriReferences, so I think we need this to make sure we catch any of those?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah interesting. I think these come directly from storage so they should always be IRIs, but if it's not new code then probably not worth figuring out

caf.focusAllele = variation

return cafs

def get_caf(

def get_cafs(
anyvar_client: BaseAnyVarClient,
anyvlm_storage: Storage,
assembly_id: GrcAssemblyId | UcscAssemblyBuild,
Expand All @@ -43,7 +61,7 @@ def get_caf(
:param reference_bases: Single genomic base (A/G/C/T)
:param alternate_bases: Single genomic base (A/G/C/T)
:raises ValueError: if unsupported assembly ID is provided
:raises VariantNotRegisteredError: if variant is not registered in AnyVar
:raises VariantLookupError: if variant is not registered in AnyVar
:return: list of AnyVlmCohortAlleleFrequencyResult objects for the provided variant
"""
gnomad_vcf: str = f"{reference_name}-{start}-{reference_base}-{alternate_base}"
Expand All @@ -53,18 +71,32 @@ def get_caf(
msg = "Unsupported assembly ID: {assembly_id}"
raise ValueError(msg) from e

vrs_variation = anyvar_client.get_registered_allele(gnomad_vcf, assembly)
if not vrs_variation:
msg = f"Variant {assembly.value} {gnomad_vcf} is not registered in AnyVar"
_logger.debug(msg)
raise VariantNotRegisteredError(msg)
vrs_variation: Allele = validate_allele(
allele=anyvar_client.retrieve_allele_by_expression(gnomad_vcf, assembly)
)

cafs: list[AnyVlmCohortAlleleFrequencyResult] = (
anyvlm_storage.get_caf_by_vrs_allele_id(vrs_variation.id) # type: ignore
_retrieve_cafs_with_resolved_alleles(
variation=vrs_variation, anyvlm_storage=anyvlm_storage
)
)

for caf in cafs:
if isinstance(caf.focusAllele, iriReference):
caf.focusAllele = vrs_variation
liftover_vrs_id: str | None = anyvar_client.get_liftover_variation_id(
vrs_id=vrs_variation.id, # type: ignore
starting_assembly=assembly,
)

if liftover_vrs_id:
liftover_variation: Allele = validate_allele(
allele=anyvar_client.retrieve_allele_by_id(vrs_id=liftover_vrs_id)
)

liftover_cafs: list[AnyVlmCohortAlleleFrequencyResult] = (
_retrieve_cafs_with_resolved_alleles(
variation=liftover_variation, anyvlm_storage=anyvlm_storage
)
)

cafs.extend(liftover_cafs)

return cafs
7 changes: 4 additions & 3 deletions src/anyvlm/restapi/vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@

from anyvlm.anyvar.base_client import AnyVarClientConnectionError, BaseAnyVarClient
from anyvlm.functions.build_vlm_response import build_vlm_response
from anyvlm.functions.get_caf import VariantNotRegisteredError, get_caf
from anyvlm.functions.get_cafs import get_cafs
from anyvlm.functions.ingest_vcf import VcfAfColumnsError
from anyvlm.functions.ingest_vcf import ingest_vcf as ingest_vcf_function
from anyvlm.schemas.vlm import VlmResponse
from anyvlm.storage.base_storage import Storage
from anyvlm.utils.exceptions import VariantLookupError
from anyvlm.utils.types import (
AnyVlmCohortAlleleFrequencyResult,
ChromosomeName,
Expand Down Expand Up @@ -313,7 +314,7 @@ def variant_counts(
anyvlm_storage: Storage = request.app.state.anyvlm_storage

try:
caf_data: list[AnyVlmCohortAlleleFrequencyResult] = get_caf(
caf_data: list[AnyVlmCohortAlleleFrequencyResult] = get_cafs(
anyvar_client,
anyvlm_storage,
assemblyId,
Expand All @@ -322,7 +323,7 @@ def variant_counts(
referenceBases,
alternateBases,
)
except VariantNotRegisteredError:
except VariantLookupError:
caf_data = []
except AnyVarClientConnectionError as e:
raise HTTPException(
Expand Down
Loading
Loading