diff --git a/.gitignore b/.gitignore index ff290af..2e2f92a 100644 --- a/.gitignore +++ b/.gitignore @@ -220,3 +220,7 @@ uv.lock # Mac things: .DS_Store + + +# UTA download +uta_20241220.pgd.gz diff --git a/pyproject.toml b/pyproject.toml index ddfcc1c..4f2071c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ dependencies = [ "ga4gh.vrs>=2.3.0,<3.0", "ga4gh.va_spec~=0.4.3", - "anyvar>=1.0,<2.0", + "anyvar @ git+https://github.com/biocommons/anyvar.git@79cf327aa9f50d4236664d76ad840cc007498549", # Points to the most recent AnyVar commit on `main` as of 06-17-26. TODO: Replace with the latest version of AnyVar once we do the next release "fastapi>=0.95.0", "python-multipart", # required for fastapi file uploads "uvicorn", diff --git a/src/anyvlm/anyvar/base_client.py b/src/anyvlm/anyvar/base_client.py index 2679aae..6ac83e5 100644 --- a/src/anyvlm/anyvar/base_client.py +++ b/src/anyvlm/anyvar/base_client.py @@ -3,6 +3,7 @@ import abc from collections.abc import Iterable, Sequence +from anyvar.core.objects import SupportedVrsVariation from anyvar.mapping.liftover import ReferenceAssembly from ga4gh.vrs.models import Allele @@ -22,10 +23,18 @@ class BaseAnyVarClient(abc.ABC): """Interface elements for an AnyVar client""" @abc.abstractmethod - def get_registered_allele( + def retrieve_allele_by_id(self, vrs_id: str) -> SupportedVrsVariation | None: + """Retrieve VRS Allele for given VRS ID + + :param vrs_id: The ID to dereference + :return: The VRS Allele, or `None` if unable to retrieve the Allele. + """ + + @abc.abstractmethod + def retrieve_allele_by_expression( self, expression: str, assembly: ReferenceAssembly = ReferenceAssembly.GRCH38 ) -> Allele | None: - """Retrieve registered VRS Allele for given allele expression + """Retrieve VRS Allele for given allele expression Currently, only expressions supported by the VRS-Python translator are supported. This could change depending on the AnyVar implementation, though, and probably @@ -33,7 +42,7 @@ def get_registered_allele( :param expression: variation expression to get VRS Allele for :param assembly: reference assembly used in expression - :return: VRS Allele if translation succeeds and VRS Allele has already been registered, else `None` + :return: VRS Allele if translation succeeds, else `None` """ @abc.abstractmethod @@ -54,6 +63,17 @@ def put_allele_expressions( else `None`, for the i'th expression """ + @abc.abstractmethod + def get_liftover_variation_id( + self, vrs_id: str, starting_assembly: ReferenceAssembly + ) -> str | None: + """Get the VRS ID for the lifted-over equivalent of the variation specified by the provided VRS ID. + + :param vrs_id: The VRS ID of the variation to lift over + :param starting_assembly: The assembly to liftover FROM (i.e., the assembly of the starting variant) + :return: The VRS ID of the lifted-over variation, or `None` if liftover is unsuccessful + """ + @abc.abstractmethod def close(self) -> None: """Clean up AnyVar connection.""" diff --git a/src/anyvlm/anyvar/http_client.py b/src/anyvlm/anyvar/http_client.py index b5ce003..e3054d2 100644 --- a/src/anyvlm/anyvar/http_client.py +++ b/src/anyvlm/anyvar/http_client.py @@ -6,15 +6,23 @@ from typing import Literal import requests +from anyvar.core.metadata import VariationMapping from anyvar.mapping.liftover import ReferenceAssembly -from anyvar.restapi.schema import GetObjectResponse, RegisterVariationResponse +from anyvar.restapi.schema import ( + GetMappingResponse, + GetObjectResponse, + RegisterVariationResponse, +) from ga4gh.vrs import VrsType, models +from requests.models import Response from anyvlm.anyvar.base_client import ( AnyVarClientConnectionError, AnyVarClientError, BaseAnyVarClient, ) +from anyvlm.utils.exceptions import LiftoverError +from anyvlm.utils.functions import validate_allele _logger = logging.getLogger(__name__) @@ -36,9 +44,11 @@ def __init__( def _make_http_request( self, - method: Literal[HTTPMethod.POST] | Literal[HTTPMethod.PUT], + method: Literal[HTTPMethod.POST] + | Literal[HTTPMethod.PUT] + | Literal[HTTPMethod.GET], url: str, - payload: dict | list, + payload: dict | list | None = None, ) -> requests.Response: """Issue an HTTP request to an AnyVar server. @@ -71,10 +81,21 @@ def _make_http_request( raise return response - def get_registered_allele( + def retrieve_allele_by_id(self, vrs_id: str) -> models.Allele | None: + """Retrieve VRS Allele for given VRS ID + + :param vrs_id: The ID to dereference + :return: The VRS Allele, or `None` if unable to retrieve the Allele. + """ + url = f"{self.hostname}/object/{vrs_id}" + response = self._make_http_request(method=HTTPMethod.GET, url=url) + validated_response: GetObjectResponse = GetObjectResponse(**response.json()) + return validate_allele(allele=validated_response.data) + + def retrieve_allele_by_expression( self, expression: str, assembly: ReferenceAssembly = ReferenceAssembly.GRCH38 ) -> models.Allele | None: - """Retrieve registered VRS Allele for given allele expression + """Retrieve VRS Allele for given allele expression Currently, only expressions supported by the VRS-Python translator are supported. This could change depending on the AnyVar implementation, though, and probably @@ -82,7 +103,7 @@ def get_registered_allele( :param expression: variation expression to get VRS Allele for :param assembly: reference assembly used in expression - :return: VRS Allele if translation succeeds and VRS Allele has already been registered, else `None` + :return: VRS Allele if translation succeeds, else `None` """ url = f"{self.hostname}/variation" payload = { @@ -91,21 +112,16 @@ def get_registered_allele( "input_type": VrsType.ALLELE.value, } try: - response = self._make_http_request(HTTPMethod.POST, url, payload) + response: Response = self._make_http_request(HTTPMethod.PUT, url, payload) except requests.HTTPError as e: if e.response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY: _logger.debug( "Translation failed for variant expression '%s'", expression ) - return None + return None - if e.response.status_code == HTTPStatus.NOT_FOUND: - _logger.debug("No variation found for expression '%s'", expression) - return None - raise AnyVarClientError from e - - validated_response = GetObjectResponse(**response.json()) - return validated_response.data # type: ignore (input_type=Allele guarantees return type) + validated_response = RegisterVariationResponse(**response.json()) + return validated_response.object def put_allele_expressions( self, @@ -139,6 +155,28 @@ def put_allele_expressions( raise AnyVarClientError from e return [RegisterVariationResponse(**r).object_id for r in response.json()] + def get_liftover_variation_id( + self, vrs_id: str, starting_assembly: ReferenceAssembly + ) -> str | None: + """Get the VRS ID for the lifted-over equivalent of the variation specified by the provided VRS ID. + + :param vrs_id: The VRS ID of the variation to lift over + :param starting_assembly: The assembly to liftover FROM (i.e., the assembly of the starting variant) + :return: The VRS ID of the lifted-over variation, or `None` if liftover is unsuccessful + """ + as_source: bool = starting_assembly == ReferenceAssembly.GRCH37 + url: str = f"{self.hostname}/object/{vrs_id}/mappings/liftover_to?as_source={as_source}" + response = self._make_http_request(HTTPMethod.GET, url) + validated_response: GetMappingResponse = GetMappingResponse(**response.json()) + + variation_mappings: list[VariationMapping] = list(validated_response.mappings) + if len(variation_mappings) > 1: + error_message: str = "Multiple liftover mappings found" + raise LiftoverError(error_message) + + mapping_result: VariationMapping = variation_mappings[0] + return mapping_result.dest_id if as_source else mapping_result.source_id + def close(self) -> None: """Clean up AnyVar connection. diff --git a/src/anyvlm/anyvar/python_client.py b/src/anyvlm/anyvar/python_client.py index 2a3d2a8..85ea297 100644 --- a/src/anyvlm/anyvar/python_client.py +++ b/src/anyvlm/anyvar/python_client.py @@ -4,6 +4,8 @@ from collections.abc import Iterable, Sequence from anyvar import AnyVar +from anyvar.core.metadata import VariationMapping, VariationMappingType +from anyvar.core.objects import SupportedVrsVariation from anyvar.mapping.liftover import ReferenceAssembly from anyvar.restapi.schema import SupportedVariationType from anyvar.storage.base import Storage @@ -53,10 +55,18 @@ def _translate_allele_expression( _logger.exception("Failed to translate expression: %s", expression) return translated_variation # type: ignore - def get_registered_allele( + def retrieve_allele_by_id(self, vrs_id: str) -> SupportedVrsVariation | None: + """Retrieve VRS Allele for given VRS ID + + :param vrs_id: The ID to dereference + :return: The VRS Allele, or `None` if unable to retrieve the Allele. + """ + return self.av.get_object(object_id=vrs_id, object_type=Allele) + + def retrieve_allele_by_expression( self, expression: str, assembly: ReferenceAssembly = ReferenceAssembly.GRCH38 ) -> Allele | None: - """Retrieve registered VRS Allele for given allele expression + """Retrieve VRS Allele for given allele expression Currently, only expressions supported by the VRS-Python translator are supported. This could change depending on the AnyVar implementation, though, and probably @@ -64,7 +74,7 @@ def get_registered_allele( :param expression: variation expression to get VRS Allele for :param assembly: reference assembly used in expression - :return: VRS Allele if translation succeeds and VRS Allele has already been registered, else `None` + :return: VRS Allele if translation succeeds, else `None` """ translated_variation = self._translate_allele_expression(expression, assembly) if not translated_variation: @@ -109,6 +119,29 @@ def put_allele_expressions( results.append(None) return results + def get_liftover_variation_id( + self, vrs_id: str, starting_assembly: ReferenceAssembly + ) -> str | None: + """Get the VRS ID for the lifted-over equivalent of the variation specified by the provided VRS ID. + + :param vrs_id: The VRS ID of the variation to lift over + :param starting_assembly: The assembly to liftover FROM (i.e., the assembly of the starting variant) + :return: The VRS ID of the lifted-over variation, or `None` if liftover is unsuccessful + """ + as_source: bool = starting_assembly == ReferenceAssembly.GRCH37 + liftover_mappings: Iterable[VariationMapping] = self.av.get_object_mappings( + object_id=vrs_id, + mapping_type=VariationMappingType.LIFTOVER_TO, + as_source=as_source, + ) + liftover_mapping: VariationMapping | None = next(iter(liftover_mappings), None) + + return ( + (liftover_mapping.dest_id if as_source else liftover_mapping.source_id) + if liftover_mapping + else None + ) + def close(self) -> None: """Clean up AnyVar instance.""" _logger.info("Closing AnyVar client.") diff --git a/src/anyvlm/functions/get_caf.py b/src/anyvlm/functions/get_cafs.py similarity index 54% rename from src/anyvlm/functions/get_caf.py rename to src/anyvlm/functions/get_cafs.py index b62994c..bf3a55b 100644 --- a/src/anyvlm/functions/get_caf.py +++ b/src/anyvlm/functions/get_cafs.py @@ -3,9 +3,11 @@ import logging from ga4gh.core.models import iriReference +from ga4gh.vrs.models import Allele from anyvlm.anyvar.base_client import BaseAnyVarClient from anyvlm.storage.base_storage import Storage +from anyvlm.utils.functions import validate_allele from anyvlm.utils.types import ( ASSEMBLY_MAP, AnyVlmCohortAlleleFrequencyResult, @@ -18,11 +20,27 @@ _logger = logging.getLogger(__name__) -class VariantNotRegisteredError(Exception): - """Raised when a variant is not registered in the AnyVar client""" +def _retrieve_cafs_with_resolved_alleles( + variation: Allele, anyvlm_storage: Storage +) -> list[AnyVlmCohortAlleleFrequencyResult]: + """Retrieve CAF data for a resolved allele. + + :param variation: The allele to retrieve CAF data for + :param anyvlm_storage: The storage for this AnyVLM instance + :return: A list of AnyVlmCohortAlleleFrequencyResult objects + """ + cafs: list[AnyVlmCohortAlleleFrequencyResult] = ( + anyvlm_storage.get_cafs_by_vrs_allele_id(vrs_allele_id=variation.id) # pyright: ignore[reportArgumentType] + ) + for caf in cafs: + if isinstance(caf.focusAllele, iriReference): + caf.focusAllele = variation + + return cafs -def get_caf( + +def get_cafs( anyvar_client: BaseAnyVarClient, anyvlm_storage: Storage, assembly_id: GrcAssemblyId | UcscAssemblyBuild, @@ -43,7 +61,7 @@ def get_caf( :param reference_bases: Single genomic base (A/G/C/T) :param alternate_bases: Single genomic base (A/G/C/T) :raises ValueError: if unsupported assembly ID is provided - :raises VariantNotRegisteredError: if variant is not registered in AnyVar + :raises VariantLookupError: if variant is not registered in AnyVar :return: list of AnyVlmCohortAlleleFrequencyResult objects for the provided variant """ gnomad_vcf: str = f"{reference_name}-{start}-{reference_base}-{alternate_base}" @@ -53,18 +71,32 @@ def get_caf( msg = "Unsupported assembly ID: {assembly_id}" raise ValueError(msg) from e - vrs_variation = anyvar_client.get_registered_allele(gnomad_vcf, assembly) - if not vrs_variation: - msg = f"Variant {assembly.value} {gnomad_vcf} is not registered in AnyVar" - _logger.debug(msg) - raise VariantNotRegisteredError(msg) + vrs_variation: Allele = validate_allele( + allele=anyvar_client.retrieve_allele_by_expression(gnomad_vcf, assembly) + ) cafs: list[AnyVlmCohortAlleleFrequencyResult] = ( - anyvlm_storage.get_caf_by_vrs_allele_id(vrs_variation.id) # type: ignore + _retrieve_cafs_with_resolved_alleles( + variation=vrs_variation, anyvlm_storage=anyvlm_storage + ) ) - for caf in cafs: - if isinstance(caf.focusAllele, iriReference): - caf.focusAllele = vrs_variation + liftover_vrs_id: str | None = anyvar_client.get_liftover_variation_id( + vrs_id=vrs_variation.id, # type: ignore + starting_assembly=assembly, + ) + + if liftover_vrs_id: + liftover_variation: Allele = validate_allele( + allele=anyvar_client.retrieve_allele_by_id(vrs_id=liftover_vrs_id) + ) + + liftover_cafs: list[AnyVlmCohortAlleleFrequencyResult] = ( + _retrieve_cafs_with_resolved_alleles( + variation=liftover_variation, anyvlm_storage=anyvlm_storage + ) + ) + + cafs.extend(liftover_cafs) return cafs diff --git a/src/anyvlm/restapi/vlm.py b/src/anyvlm/restapi/vlm.py index fdb7e4a..4c7f18e 100644 --- a/src/anyvlm/restapi/vlm.py +++ b/src/anyvlm/restapi/vlm.py @@ -20,11 +20,12 @@ from anyvlm.anyvar.base_client import AnyVarClientConnectionError, BaseAnyVarClient from anyvlm.functions.build_vlm_response import build_vlm_response -from anyvlm.functions.get_caf import VariantNotRegisteredError, get_caf +from anyvlm.functions.get_cafs import get_cafs from anyvlm.functions.ingest_vcf import VcfAfColumnsError from anyvlm.functions.ingest_vcf import ingest_vcf as ingest_vcf_function from anyvlm.schemas.vlm import VlmResponse from anyvlm.storage.base_storage import Storage +from anyvlm.utils.exceptions import VariantLookupError from anyvlm.utils.types import ( AnyVlmCohortAlleleFrequencyResult, ChromosomeName, @@ -313,7 +314,7 @@ def variant_counts( anyvlm_storage: Storage = request.app.state.anyvlm_storage try: - caf_data: list[AnyVlmCohortAlleleFrequencyResult] = get_caf( + caf_data: list[AnyVlmCohortAlleleFrequencyResult] = get_cafs( anyvar_client, anyvlm_storage, assemblyId, @@ -322,7 +323,7 @@ def variant_counts( referenceBases, alternateBases, ) - except VariantNotRegisteredError: + except VariantLookupError: caf_data = [] except AnyVarClientConnectionError as e: raise HTTPException( diff --git a/src/anyvlm/storage/base_storage.py b/src/anyvlm/storage/base_storage.py index 3d78196..c31a262 100644 --- a/src/anyvlm/storage/base_storage.py +++ b/src/anyvlm/storage/base_storage.py @@ -39,7 +39,7 @@ def add_allele_frequencies( """ @abstractmethod - def get_caf_by_vrs_allele_id( + def get_cafs_by_vrs_allele_id( self, vrs_allele_id: str ) -> list[AnyVlmCohortAlleleFrequencyResult]: """Retrieve cohort allele frequency study results by VRS Allele ID diff --git a/src/anyvlm/storage/postgres.py b/src/anyvlm/storage/postgres.py index 07d9544..0be1571 100644 --- a/src/anyvlm/storage/postgres.py +++ b/src/anyvlm/storage/postgres.py @@ -70,7 +70,7 @@ def add_allele_frequencies( with self.session_factory() as session, session.begin(): session.execute(stmt, [entity.to_dict() for entity in db_entities]) - def get_caf_by_vrs_allele_id( + def get_cafs_by_vrs_allele_id( self, vrs_allele_id: str ) -> list[AnyVlmCohortAlleleFrequencyResult]: """Retrieve cohort allele frequency study results by VRS Allele ID diff --git a/src/anyvlm/utils/exceptions.py b/src/anyvlm/utils/exceptions.py new file mode 100644 index 0000000..5ee08ae --- /dev/null +++ b/src/anyvlm/utils/exceptions.py @@ -0,0 +1,17 @@ +"""Defines custom exceptions for AnyVLM""" + + +class IncompleteVariantError(Exception): + """Raised when a variant is missing one or more properties required by AnyVLM""" + + +class LiftoverError(Exception): + """Raised when an error occurs while attempting to lift over a variant""" + + +class UnexpectedVariantTypeError(Exception): + """Raised when .type is not of the type expected by AnyVLM""" + + +class VariantLookupError(Exception): + """Raised when a variant cannot be retrieved from AnyVar""" diff --git a/src/anyvlm/utils/functions.py b/src/anyvlm/utils/functions.py new file mode 100644 index 0000000..a2614f8 --- /dev/null +++ b/src/anyvlm/utils/functions.py @@ -0,0 +1,32 @@ +"""Defines utility functions for use throughout AnyVLM""" + +from typing import cast + +from anyvar.core.objects import SupportedVrsObject +from ga4gh.vrs.models import Allele + +from anyvlm.utils.exceptions import ( + IncompleteVariantError, + UnexpectedVariantTypeError, + VariantLookupError, +) + + +def validate_allele(allele: SupportedVrsObject | None) -> Allele: + """Validates that the provided object is indeed a VRS Allele, with all the properties AnyVLM requires + + :param allele: The allele we're validating + :return: A VRS Allele object + """ + if not allele: + raise VariantLookupError + + if not allele.id: + raise IncompleteVariantError + + try: + validated_allele: Allele = cast(Allele, allele) + except (ValueError, TypeError) as e: + raise UnexpectedVariantTypeError from e + + return validated_allele diff --git a/tests/integration/functions/cassettes/test_get_caf/test_get_caf_no_results_returned.yaml b/tests/integration/functions/cassettes/test_get_cafs/test_get_cafs_no_results_returned.yaml similarity index 100% rename from tests/integration/functions/cassettes/test_get_caf/test_get_caf_no_results_returned.yaml rename to tests/integration/functions/cassettes/test_get_cafs/test_get_cafs_no_results_returned.yaml diff --git a/tests/integration/functions/cassettes/test_get_caf/test_get_caf_results_returned.yaml b/tests/integration/functions/cassettes/test_get_cafs/test_get_cafs_results_returned.yaml similarity index 100% rename from tests/integration/functions/cassettes/test_get_caf/test_get_caf_results_returned.yaml rename to tests/integration/functions/cassettes/test_get_cafs/test_get_cafs_results_returned.yaml diff --git a/tests/integration/functions/cassettes/test_get_caf/test_get_caf_variant_not_registered.yaml b/tests/integration/functions/cassettes/test_get_cafs/test_get_cafs_variant_not_registered.yaml similarity index 100% rename from tests/integration/functions/cassettes/test_get_caf/test_get_caf_variant_not_registered.yaml rename to tests/integration/functions/cassettes/test_get_cafs/test_get_cafs_variant_not_registered.yaml diff --git a/tests/integration/functions/test_get_caf.py b/tests/integration/functions/test_get_cafs.py similarity index 62% rename from tests/integration/functions/test_get_caf.py rename to tests/integration/functions/test_get_cafs.py index 29a7008..6aa8cb8 100644 --- a/tests/integration/functions/test_get_caf.py +++ b/tests/integration/functions/test_get_cafs.py @@ -1,12 +1,13 @@ -"""Test that get_caf function works correctly""" +"""Test that get_cafs function works correctly""" import pytest from deepdiff import DeepDiff from helpers import EXPECTED_VRS_ID, TEST_VARIANT, build_caf from anyvlm.anyvar.python_client import PythonAnyVarClient -from anyvlm.functions.get_caf import VariantNotRegisteredError, get_caf +from anyvlm.functions.get_cafs import get_cafs from anyvlm.storage.postgres import PostgresObjectStore +from anyvlm.utils.exceptions import VariantLookupError from anyvlm.utils.types import AnyVlmCohortAlleleFrequencyResult @@ -20,13 +21,13 @@ def expected_cafs(caf_iri: AnyVlmCohortAlleleFrequencyResult, alleles: dict): @pytest.mark.vcr -def test_get_caf_results_returned( +def test_get_cafs_results_returned( anyvar_populated_python_client: PythonAnyVarClient, populated_postgres_storage: PostgresObjectStore, expected_cafs: list[AnyVlmCohortAlleleFrequencyResult], ): - """Test get_caf when variants are registered and results are expected""" - cafs = get_caf( + """Test get_cafs when variants are registered and results are expected""" + cafs = get_cafs( anyvar_populated_python_client, populated_postgres_storage, TEST_VARIANT.assembly, @@ -44,12 +45,12 @@ def test_get_caf_results_returned( @pytest.mark.vcr -def test_get_caf_no_results_returned( +def test_get_cafs_no_results_returned( anyvar_populated_python_client: PythonAnyVarClient, postgres_storage: PostgresObjectStore, ): - """Test get_caf when variants are registered but no results are expected""" - cafs = get_caf( + """Test get_cafs when variants are registered but no results are expected""" + cafs: list[AnyVlmCohortAlleleFrequencyResult] = get_cafs( anyvar_populated_python_client, postgres_storage, TEST_VARIANT.assembly, @@ -62,21 +63,18 @@ def test_get_caf_no_results_returned( @pytest.mark.vcr -def test_get_caf_variant_not_registered( +def test_get_cafs_variant_not_registered( anyvar_minimal_populated_python_client: PythonAnyVarClient, populated_postgres_storage: PostgresObjectStore, ): - """Test get_caf raises exception due to variant not being registered""" - with pytest.raises( - VariantNotRegisteredError, - match="Variant GRCh38 chrY-2781761-C-A is not registered in AnyVar", - ): - get_caf( - anyvar_minimal_populated_python_client, - populated_postgres_storage, - TEST_VARIANT.assembly, - TEST_VARIANT.chromosome, - TEST_VARIANT.position, - TEST_VARIANT.ref, - TEST_VARIANT.alt, + """Test get_cafs raises exception due to variant not being registered""" + with pytest.raises(VariantLookupError): + get_cafs( + anyvar_client=anyvar_minimal_populated_python_client, + anyvlm_storage=populated_postgres_storage, + assembly_id=TEST_VARIANT.assembly, + reference_name=TEST_VARIANT.chromosome, + start=TEST_VARIANT.position, + reference_base=TEST_VARIANT.ref, + alternate_base=TEST_VARIANT.alt, ) diff --git a/tests/unit/anyvar/cassettes/test_clients/test_get_registered_allele_expressions_populated[anyvar_populated_http_client].yaml b/tests/unit/anyvar/cassettes/test_clients/test_retrieve_allele_by_expression_populated[anyvar_populated_http_client].yaml similarity index 63% rename from tests/unit/anyvar/cassettes/test_clients/test_get_registered_allele_expressions_populated[anyvar_populated_http_client].yaml rename to tests/unit/anyvar/cassettes/test_clients/test_retrieve_allele_by_expression_populated[anyvar_populated_http_client].yaml index c88330c..c2fdfa3 100644 --- a/tests/unit/anyvar/cassettes/test_clients/test_get_registered_allele_expressions_populated[anyvar_populated_http_client].yaml +++ b/tests/unit/anyvar/cassettes/test_clients/test_retrieve_allele_by_expression_populated[anyvar_populated_http_client].yaml @@ -68,11 +68,11 @@ interactions: body: '{"definition": "7-140753336-A-T", "assembly_name": "GRCh38", "input_type": "Allele"}' headers: {} - method: POST + method: PUT uri: http://localhost:8000/variation response: body: - string: '{"messages":[],"data":{"id":"ga4gh:VA.Otc5ovrw906Ack087o1fhegB4jDRqCAe","type":"Allele","digest":"Otc5ovrw906Ack087o1fhegB4jDRqCAe","location":{"id":"ga4gh:SL.nhul5x5P_fKjGEpY9PEkMIekJfZaKom2","type":"SequenceLocation","digest":"nhul5x5P_fKjGEpY9PEkMIekJfZaKom2","sequenceReference":{"type":"SequenceReference","refgetAccession":"SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul"},"start":140753335,"end":140753336},"state":{"type":"LiteralSequenceExpression","sequence":"T"}}}' + string: '{"input_variation":{"definition":"7-140753336-A-T","input_type":"Allele","assembly_name":"GRCh38"},"messages":[],"object":{"id":"ga4gh:VA.Otc5ovrw906Ack087o1fhegB4jDRqCAe","type":"Allele","digest":"Otc5ovrw906Ack087o1fhegB4jDRqCAe","location":{"id":"ga4gh:SL.nhul5x5P_fKjGEpY9PEkMIekJfZaKom2","type":"SequenceLocation","digest":"nhul5x5P_fKjGEpY9PEkMIekJfZaKom2","sequenceReference":{"type":"SequenceReference","refgetAccession":"SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul"},"start":140753335,"end":140753336},"state":{"type":"LiteralSequenceExpression","sequence":"T"}},"object_id":"ga4gh:VA.Otc5ovrw906Ack087o1fhegB4jDRqCAe"}' headers: {} status: code: 200 @@ -81,11 +81,11 @@ interactions: body: '{"definition": "Y-2781704-G-G", "assembly_name": "GRCh38", "input_type": "Allele"}' headers: {} - method: POST + method: PUT uri: http://localhost:8000/variation response: body: - string: '{"messages":[],"data":{"id":"ga4gh:VA.0ydMeUtVfU9ttSoziRnp0Nv8OMN359HC","type":"Allele","digest":"0ydMeUtVfU9ttSoziRnp0Nv8OMN359HC","location":{"id":"ga4gh:SL.JqeJ3V-75edWj03xbzw1gtSw3qPQVV2D","type":"SequenceLocation","digest":"JqeJ3V-75edWj03xbzw1gtSw3qPQVV2D","sequenceReference":{"type":"SequenceReference","refgetAccession":"SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5"},"start":2781703,"end":2781704},"state":{"type":"ReferenceLengthExpression","length":1,"sequence":"G","repeatSubunitLength":1}}}' + string: '{"input_variation":{"definition":"Y-2781704-G-G","input_type":"Allele","assembly_name":"GRCh38"},"messages":[],"object":{"id":"ga4gh:VA.0ydMeUtVfU9ttSoziRnp0Nv8OMN359HC","type":"Allele","digest":"0ydMeUtVfU9ttSoziRnp0Nv8OMN359HC","location":{"id":"ga4gh:SL.JqeJ3V-75edWj03xbzw1gtSw3qPQVV2D","type":"SequenceLocation","digest":"JqeJ3V-75edWj03xbzw1gtSw3qPQVV2D","sequenceReference":{"type":"SequenceReference","refgetAccession":"SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5"},"start":2781703,"end":2781704},"state":{"type":"ReferenceLengthExpression","length":1,"sequence":"G","repeatSubunitLength":1}},"object_id":"ga4gh:VA.0ydMeUtVfU9ttSoziRnp0Nv8OMN359HC"}' headers: {} status: code: 200 @@ -94,11 +94,11 @@ interactions: body: '{"definition": "Y-2781761-C-C", "assembly_name": "GRCh38", "input_type": "Allele"}' headers: {} - method: POST + method: PUT uri: http://localhost:8000/variation response: body: - string: '{"messages":[],"data":{"id":"ga4gh:VA.R4kbmdsn5VldGrBiAaByO5N9zM3qCSFw","type":"Allele","digest":"R4kbmdsn5VldGrBiAaByO5N9zM3qCSFw","location":{"id":"ga4gh:SL.sYiBcbbgF-1CANNCTfQ6zwZOU0iHhymR","type":"SequenceLocation","digest":"sYiBcbbgF-1CANNCTfQ6zwZOU0iHhymR","sequenceReference":{"type":"SequenceReference","refgetAccession":"SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5"},"start":2781760,"end":2781761},"state":{"type":"ReferenceLengthExpression","length":1,"sequence":"C","repeatSubunitLength":1}}}' + string: '{"input_variation":{"definition":"Y-2781761-C-C","input_type":"Allele","assembly_name":"GRCh38"},"messages":[],"object":{"id":"ga4gh:VA.R4kbmdsn5VldGrBiAaByO5N9zM3qCSFw","type":"Allele","digest":"R4kbmdsn5VldGrBiAaByO5N9zM3qCSFw","location":{"id":"ga4gh:SL.sYiBcbbgF-1CANNCTfQ6zwZOU0iHhymR","type":"SequenceLocation","digest":"sYiBcbbgF-1CANNCTfQ6zwZOU0iHhymR","sequenceReference":{"type":"SequenceReference","refgetAccession":"SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5"},"start":2781760,"end":2781761},"state":{"type":"ReferenceLengthExpression","length":1,"sequence":"C","repeatSubunitLength":1}},"object_id":"ga4gh:VA.R4kbmdsn5VldGrBiAaByO5N9zM3qCSFw"}' headers: {} status: code: 200 @@ -107,11 +107,11 @@ interactions: body: '{"definition": "Y-2781761-C-A", "assembly_name": "GRCh38", "input_type": "Allele"}' headers: {} - method: POST + method: PUT uri: http://localhost:8000/variation response: body: - string: '{"messages":[],"data":{"id":"ga4gh:VA.9VDxL0stMBOZwcTKw3yb3UoWQkpaI9OD","type":"Allele","digest":"9VDxL0stMBOZwcTKw3yb3UoWQkpaI9OD","location":{"id":"ga4gh:SL.sYiBcbbgF-1CANNCTfQ6zwZOU0iHhymR","type":"SequenceLocation","digest":"sYiBcbbgF-1CANNCTfQ6zwZOU0iHhymR","sequenceReference":{"type":"SequenceReference","refgetAccession":"SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5"},"start":2781760,"end":2781761},"state":{"type":"LiteralSequenceExpression","sequence":"A"}}}' + string: '{"input_variation":{"definition":"Y-2781761-C-A","input_type":"Allele","assembly_name":"GRCh38"},"messages":[],"object":{"id":"ga4gh:VA.9VDxL0stMBOZwcTKw3yb3UoWQkpaI9OD","type":"Allele","digest":"9VDxL0stMBOZwcTKw3yb3UoWQkpaI9OD","location":{"id":"ga4gh:SL.sYiBcbbgF-1CANNCTfQ6zwZOU0iHhymR","type":"SequenceLocation","digest":"sYiBcbbgF-1CANNCTfQ6zwZOU0iHhymR","sequenceReference":{"type":"SequenceReference","refgetAccession":"SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5"},"start":2781760,"end":2781761},"state":{"type":"LiteralSequenceExpression","sequence":"A"}},"object_id":"ga4gh:VA.9VDxL0stMBOZwcTKw3yb3UoWQkpaI9OD"}' headers: {} status: code: 200 @@ -120,11 +120,11 @@ interactions: body: '{"definition": "Y-2781761-CA-C", "assembly_name": "GRCh38", "input_type": "Allele"}' headers: {} - method: POST + method: PUT uri: http://localhost:8000/variation response: body: - string: '{"messages":[],"data":{"id":"ga4gh:VA.yi7A2l0uIUMaInQaJnHU_B2Cf_OuZRJg","type":"Allele","digest":"yi7A2l0uIUMaInQaJnHU_B2Cf_OuZRJg","location":{"id":"ga4gh:SL.JsFGLKlUDocinf7oWTXAvVT2WOso7R9u","type":"SequenceLocation","digest":"JsFGLKlUDocinf7oWTXAvVT2WOso7R9u","sequenceReference":{"type":"SequenceReference","refgetAccession":"SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5"},"start":2781761,"end":2781785},"state":{"type":"ReferenceLengthExpression","length":23,"sequence":"AAAAAAAAAAAAAAAAAAAAAAA","repeatSubunitLength":1}}}' + string: '{"input_variation":{"definition":"Y-2781761-CA-C","input_type":"Allele","assembly_name":"GRCh38"},"messages":[],"object":{"id":"ga4gh:VA.yi7A2l0uIUMaInQaJnHU_B2Cf_OuZRJg","type":"Allele","digest":"yi7A2l0uIUMaInQaJnHU_B2Cf_OuZRJg","location":{"id":"ga4gh:SL.JsFGLKlUDocinf7oWTXAvVT2WOso7R9u","type":"SequenceLocation","digest":"JsFGLKlUDocinf7oWTXAvVT2WOso7R9u","sequenceReference":{"type":"SequenceReference","refgetAccession":"SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5"},"start":2781761,"end":2781785},"state":{"type":"ReferenceLengthExpression","length":23,"sequence":"AAAAAAAAAAAAAAAAAAAAAAA","repeatSubunitLength":1}},"object_id":"ga4gh:VA.yi7A2l0uIUMaInQaJnHU_B2Cf_OuZRJg"}' headers: {} status: code: 200 diff --git a/tests/unit/anyvar/cassettes/test_clients/test_get_registered_allele_expressions_populated[anyvar_populated_python_client].yaml b/tests/unit/anyvar/cassettes/test_clients/test_retrieve_allele_by_expression_populated[anyvar_populated_python_client].yaml similarity index 100% rename from tests/unit/anyvar/cassettes/test_clients/test_get_registered_allele_expressions_populated[anyvar_populated_python_client].yaml rename to tests/unit/anyvar/cassettes/test_clients/test_retrieve_allele_by_expression_populated[anyvar_populated_python_client].yaml diff --git a/tests/unit/anyvar/cassettes/test_clients/test_get_registered_allele_expressions_unpopulated[anyvar_http_client].yaml b/tests/unit/anyvar/cassettes/test_clients/test_retrieve_allele_by_expression_unpopulated[anyvar_http_client].yaml similarity index 95% rename from tests/unit/anyvar/cassettes/test_clients/test_get_registered_allele_expressions_unpopulated[anyvar_http_client].yaml rename to tests/unit/anyvar/cassettes/test_clients/test_retrieve_allele_by_expression_unpopulated[anyvar_http_client].yaml index 681e180..d9da21a 100644 --- a/tests/unit/anyvar/cassettes/test_clients/test_get_registered_allele_expressions_unpopulated[anyvar_http_client].yaml +++ b/tests/unit/anyvar/cassettes/test_clients/test_retrieve_allele_by_expression_unpopulated[anyvar_http_client].yaml @@ -3,7 +3,7 @@ interactions: body: '{"definition": "7-140753336-A-T", "assembly_name": "GRCh38", "input_type": "Allele"}' headers: {} - method: POST + method: PUT uri: http://localhost:8000/variation response: body: @@ -17,7 +17,7 @@ interactions: body: '{"definition": "Y-2781704-G-G", "assembly_name": "GRCh38", "input_type": "Allele"}' headers: {} - method: POST + method: PUT uri: http://localhost:8000/variation response: body: @@ -31,7 +31,7 @@ interactions: body: '{"definition": "Y-2781761-C-C", "assembly_name": "GRCh38", "input_type": "Allele"}' headers: {} - method: POST + method: PUT uri: http://localhost:8000/variation response: body: @@ -45,7 +45,7 @@ interactions: body: '{"definition": "Y-2781761-C-A", "assembly_name": "GRCh38", "input_type": "Allele"}' headers: {} - method: POST + method: PUT uri: http://localhost:8000/variation response: body: @@ -59,7 +59,7 @@ interactions: body: '{"definition": "Y-2781761-CA-C", "assembly_name": "GRCh38", "input_type": "Allele"}' headers: {} - method: POST + method: PUT uri: http://localhost:8000/variation response: body: diff --git a/tests/unit/anyvar/cassettes/test_clients/test_get_registered_allele_expressions_unpopulated[anyvar_python_client].yaml b/tests/unit/anyvar/cassettes/test_clients/test_retrieve_allele_by_expression_unpopulated[anyvar_python_client].yaml similarity index 100% rename from tests/unit/anyvar/cassettes/test_clients/test_get_registered_allele_expressions_unpopulated[anyvar_python_client].yaml rename to tests/unit/anyvar/cassettes/test_clients/test_retrieve_allele_by_expression_unpopulated[anyvar_python_client].yaml diff --git a/tests/unit/anyvar/test_clients.py b/tests/unit/anyvar/test_clients.py index 0e8fca4..76d6ca6 100644 --- a/tests/unit/anyvar/test_clients.py +++ b/tests/unit/anyvar/test_clients.py @@ -54,29 +54,31 @@ def anyvar_client(request): @pytest.mark.vcr @pytest.mark.parametrize("anyvar_client", UNPOPULATED_CLIENTS, indirect=True) -def test_get_registered_allele_expressions_unpopulated( +def test_retrieve_allele_by_expression_unpopulated( anyvar_client: BaseAnyVarClient, alleles: dict ): - """Test `get_registered_allele_expressions` for an unpopulated client""" + """Test `retrieve_allele_by_expression` for an unpopulated client""" for allele_fixture in alleles.values(): if "vcf_expression" not in allele_fixture: continue assert ( - anyvar_client.get_registered_allele(allele_fixture["vcf_expression"]) + anyvar_client.retrieve_allele_by_expression( + allele_fixture["vcf_expression"] + ) is None ) @pytest.mark.vcr @pytest.mark.parametrize("anyvar_client", POPULATED_CLIENTS, indirect=True) -def test_get_registered_allele_expressions_populated( +def test_retrieve_allele_by_expression_populated( anyvar_client: BaseAnyVarClient, alleles: dict ): - """Test `get_registered_allele_expressions` for a populated client""" + """Test `retrieve_allele_by_expression` for a populated client""" for allele_fixture in alleles.values(): if "vcf_expression" not in allele_fixture: continue - assert anyvar_client.get_registered_allele( + assert anyvar_client.retrieve_allele_by_expression( allele_fixture["vcf_expression"] ) == models.Allele(**allele_fixture["variation"]) diff --git a/tests/unit/functions/test_ingest_vcf.py b/tests/unit/functions/test_ingest_vcf.py index b0e071b..dae30f2 100644 --- a/tests/unit/functions/test_ingest_vcf.py +++ b/tests/unit/functions/test_ingest_vcf.py @@ -2,7 +2,9 @@ from pathlib import Path import pytest +from anyvar.core.objects import SupportedVrsVariation from anyvar.mapping.liftover import ReferenceAssembly +from ga4gh.vrs.models import Allele from anyvlm.anyvar.base_client import BaseAnyVarClient from anyvlm.functions.ingest_vcf import VcfAfColumnsError, ingest_vcf @@ -44,11 +46,18 @@ def stub_anyvar_client(): } class TestAnyVarClient(BaseAnyVarClient): - def get_registered_allele( + def retrieve_allele_by_id( + self, + vrs_id: str, + starting_assembly: ReferenceAssembly = ReferenceAssembly.GRCH38, + ) -> SupportedVrsVariation | None: + raise NotImplementedError + + def retrieve_allele_by_expression( self, expression: str, assembly: ReferenceAssembly = ReferenceAssembly.GRCH38, - ): + ) -> Allele | None: raise NotImplementedError def put_allele_expressions( @@ -61,6 +70,11 @@ def put_allele_expressions( for expr in expressions ] + def get_liftover_variation_id( + self, vrs_id: str, starting_assembly: ReferenceAssembly + ) -> str | None: + raise NotImplementedError + def close(self) -> None: """Clean up AnyVar connection.""" diff --git a/tests/unit/test_variant_counts_endpoint.py b/tests/unit/test_variant_counts_endpoint.py index c1af0c8..85a2b7b 100644 --- a/tests/unit/test_variant_counts_endpoint.py +++ b/tests/unit/test_variant_counts_endpoint.py @@ -221,14 +221,14 @@ def test_variant_counts_endpoint_anyvar_unavailable( ): """Test case where AnyVarClientConnectionError is raised""" - def mock_get_registered_allele(*args, **kwargs): + def retrieve_allele_by_expression(*args, **kwargs): raise AnyVarClientConnectionError anyvar_client = client_with_populated_dbs.app.state.anyvar_client monkeypatch.setattr( anyvar_client, - "get_registered_allele", - mock_get_registered_allele, + "retrieve_allele_by_expression", + retrieve_allele_by_expression, ) response = client_with_populated_dbs.get(ENDPOINT, params=TEST_QUERY)