From d20dd9e5f9dc9b61674f513d8511006ba700c601 Mon Sep 17 00:00:00 2001 From: Valentina Date: Thu, 25 Jun 2026 14:52:58 +0200 Subject: [PATCH] fix&update: added tests on viewer + fix single entity viz and interal/external anchors in a hrefs --- lode/reader/config/owl.yaml | 5 +- lode/reader/loader.py | 4 + lode/templates/_entity_card.html | 26 +- lode/templates/viewer.html | 23 +- lode/viewer/base_viewer.py | 35 ++- lode/viewer/owl_viewer.py | 1 - pyproject.toml | 2 +- tests/test_owl_viewer.py | 429 +++++++++++++++++++++++++++++++ uv.lock | 2 +- 9 files changed, 511 insertions(+), 16 deletions(-) create mode 100644 tests/test_owl_viewer.py diff --git a/lode/reader/config/owl.yaml b/lode/reader/config/owl.yaml index 62fc2db..f2d67b7 100644 --- a/lode/reader/config/owl.yaml +++ b/lode/reader/config/owl.yaml @@ -587,7 +587,10 @@ mapper: # ========== REWRITE BASE ========== - # empty for now + rdfs:isDefinedBy: + is: predicate + target_classes: [Resource] + setters: [set_is_defined_by: Individual] # involves in punning # ====================== diff --git a/lode/reader/loader.py b/lode/reader/loader.py index 0d250fa..606c926 100644 --- a/lode/reader/loader.py +++ b/lode/reader/loader.py @@ -32,6 +32,10 @@ def load(self, source: str) -> None: if self._is_url(source): self._load_from_url_with_content_negotiation(source) + + # ADD BY VALE TO FIX WINDOWS LOCAL BUG - COMMENTED FOR PRODUCTION + # elif os.path.isfile(source): + # self._load_from_local_file(source) else: # A value carrying a URL scheme that is not http(s) (file:, ftp:, ...) # must not be silently treated as a local path. Bare local paths diff --git a/lode/templates/_entity_card.html b/lode/templates/_entity_card.html index cb24963..6d43000 100644 --- a/lode/templates/_entity_card.html +++ b/lode/templates/_entity_card.html @@ -24,10 +24,14 @@ {{ v.text }} {% elif v.link in anchor_map %} {{ v.text }} - {% else %} -
{{ v.text }}
+ {% elif is_static %} + {{ v.text }} + {% else %} + {{ v.text }} {% endif %} {% if v_type in type_map %} @@ -61,6 +65,8 @@

{% endif %}

+ + {% if not single_resource %} back to ToC @@ -69,6 +75,7 @@

{{type_map[item.type.lower()].singular}} ToC {% endif %} + {% endif %}
@@ -188,8 +195,12 @@
{{ part.text }} {% elif part.link in anchor_map %} {{ part.text }} - {% else %} + {% elif part.is_external %} {{ part.text }} + {% elif is_static %} + {{ part.text }} + {% else %} + {{ part.text }} {% endif %} {% if v_type in type_map %} @@ -227,11 +238,16 @@
{{ rel_item.text }} {% elif rel_item.link in anchor_map %} {{ rel_item.text }} - {% else %} + {% elif rel_item.is_external %} {{ rel_item.text }} + {% if is_md %}data-md="{{ rel_item.text | e }}"{% endif %}>{{ rel_item.text }} + {% elif is_static %} + {{ rel_item.text }} + {% else %} + {{ rel_item.text }} {% endif %} + {% if v_type in type_map %} {{ type_map[v_type].abb }} {% endif %} diff --git a/lode/templates/viewer.html b/lode/templates/viewer.html index 3561275..bbda8d4 100644 --- a/lode/templates/viewer.html +++ b/lode/templates/viewer.html @@ -61,6 +61,22 @@ {% endif %} {% if metadata %} + {% if single_resource %} +
+ +

+ {% if metadata.title %} + {% for title in metadata.title %}{{ title.text }}{% endfor %} + {% elif metadata.label %}{{ metadata.label[0].text }}{% endif %} +

+ ontology context — click to expand +
+

{{ metadata.uri[0].link }}

+ +
+
+
+ {% else %}

@@ -79,6 +95,7 @@

+ {% endif %}
{% for key, val_list in metadata.items() %} @@ -137,7 +154,7 @@

{% endif %}

-
+ {% if single_resource %}
{% else %}
{% endif %} {% endif %} {% if grouped_view and sections %} @@ -202,7 +219,7 @@

Namespace Declarations @@ -220,7 +237,7 @@

- ← Back to Index + ← Back to full documentation {% endif %} diff --git a/lode/viewer/base_viewer.py b/lode/viewer/base_viewer.py index ced4d50..e94f69f 100644 --- a/lode/viewer/base_viewer.py +++ b/lode/viewer/base_viewer.py @@ -14,6 +14,18 @@ class BaseViewer: def __init__(self, reader): self.reader = reader self._cache = reader._instance_cache # it uses + self._internal_iris = None + + def _is_internal(self, uri) -> bool: + if not uri: + return False + if self._internal_iris is None: + self._internal_iris = { + str(i.get_has_identifier()) + for i in self.get_toc_instances() # <-- NON get_all_instances() + if hasattr(i, 'get_has_identifier') and i.get_has_identifier() + } + return str(uri) in self._internal_iris def get_all_instances(self) -> List: """Ottiene tutte le istanze (esclusi literal).""" @@ -106,6 +118,16 @@ def get_view_data(self, resource_uri: Optional[str] = None, language: Optional[s 'entities': self._format_entities(all_instances, language) } + def _is_toc_entity(self, instance) -> bool: + """Browsable (own card + clickable link) iff its type is in get_toc_config. + Resources not listed in get_o_config are shown when mentioned as plain text + in cards (external-ref), just never linked.""" + toc_keys = {key for key, _id, _title in self.get_toc_config()} + return type(instance).__name__ in toc_keys + + def get_toc_instances(self) -> List: + return [i for i in self.get_all_instances() if self._is_toc_entity(i)] + def _handle_single_resource(self, resource_uri: str, language: Optional[str] = None) -> Dict: """ Standard logic for displaying a single resource. @@ -117,6 +139,9 @@ def _handle_single_resource(self, resource_uri: str, language: Optional[str] = N return {'error': f'Resource {resource_uri} not found'} instances = list(instance_set) if isinstance(instance_set, set) else [instance_set] + instances = [i for i in instances if self._is_toc_entity(i)] + if not instances: + return {'error': f'{resource_uri} is not a browsable entity'} return { 'single_resource': True, @@ -384,14 +409,15 @@ def _resolve_resource_value(self, obj, language=None) -> dict: 'lan': None, 'parts': None, # This key is for restrictions 'type': None, - 'is_deprecated': False + 'is_deprecated': False, + 'is_external': False, } if not obj: return handler_dic # --- 1. INTERCEPT RESTRICTIONS --- restriction_types = ["Restriction", "PropertyConceptRestriction", "Quantifier", "Cardinality", "TruthFunction", - "OneOf", "Value"] + "OneOf", "Value", "DatatypeRestriction"] obj_type = type(obj).__name__ if obj_type in restriction_types: @@ -406,6 +432,7 @@ def _resolve_resource_value(self, obj, language=None) -> dict: handler_dic['parts'] = parts handler_dic['text'] = "".join([p['text'] for p in parts if p.get('text')]) handler_dic['link'] = None # Forces Jinja to ignore the blank node URI + handler_dic['is_external'] = not self._is_internal(handler_dic['link']) handler_dic['type'] = obj_type return handler_dic @@ -483,7 +510,7 @@ def _resolve_resource_value(self, obj, language=None) -> dict: # --- 5. Normal Resource Handling (Concepts, Properties, Individuals) --- if hasattr(obj, 'get_has_identifier'): handler_dic['link'] = obj.get_has_identifier() - + handler_dic['is_external'] = not self._is_internal(handler_dic['link']) is_dep = getattr(obj, 'get_is_deprecated')() if hasattr(obj, 'get_is_deprecated') else getattr(obj, 'is_deprecated', False) @@ -687,7 +714,7 @@ def _get(instance, prop_name, default=None): resolved = self._resolve_resource_value(obj, language) if resolved.get('text'): - return [{'text': resolved['text'], 'link': resolved.get('link'), 'type': resolved.get('type')}] + return [{'text': resolved['text'], 'link': resolved.get('link'), 'type': resolved.get('type'), 'is_external': resolved.get('is_external', False)}] return [] diff --git a/lode/viewer/owl_viewer.py b/lode/viewer/owl_viewer.py index 2cff6fc..0fabdae 100644 --- a/lode/viewer/owl_viewer.py +++ b/lode/viewer/owl_viewer.py @@ -11,7 +11,6 @@ class OwlViewer(BaseViewer): 'attribute': {'singular': 'Data Property', 'plural': 'Data Properties', 'abb': 'dp'}, 'annotation': {'singular': 'Annotation Property', 'plural': 'Annotation Properties', 'abb': 'ap'}, 'individual': {'singular': 'Named Individual', 'plural': 'Named Individuals', 'abb': 'ni'}, - 'model': {'singular': 'Ontology', 'plural': 'Ontologies', 'abb': 'o'}, } def get_toc_config(self): diff --git a/pyproject.toml b/pyproject.toml index 7970848..7dbcb3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "lode" -version = "0.3.0" +version = "0.3.1" description = "New reengineered version of LODE, maintained by OpenCitations" authors = [{name = "Valentina Pasqual, Silvio Peroni", email = "valentina.pasqual2@unibo.it"}] readme = "README.md" diff --git a/tests/test_owl_viewer.py b/tests/test_owl_viewer.py new file mode 100644 index 0000000..7cec7c4 --- /dev/null +++ b/tests/test_owl_viewer.py @@ -0,0 +1,429 @@ +""" +test_owl_viewer.py +Tests for OwlViewer / BaseViewer. + +Two layers: + - Unit: a fake reader with a hand-built _instance_cache, so we exercise + the viewer in isolation (no RDF parsing, no network). These pin + the ToC-filtering / internal-vs-external / single-resource + contract precisely. + - Integration: parametrized over the ontologies.json corpus (same pattern as + test_owl_integrity), asserting viewer invariants on real data. + Skips an ontology if it cannot be loaded. + +Run with: + pytest test_owl_viewer.py -v +""" + +import json +import os +from pathlib import Path + +import pytest + +from lode.models import ( + Concept, Relation, Attribute, Annotation, Individual, + Model, Statement, Literal, +) +from lode.viewer import OwlViewer +import re + + +# --------------------------------------------------------------------------- +# Unit-test scaffolding: a minimal fake reader the viewer can consume. +# --------------------------------------------------------------------------- + +class _FakeReader: + """Mimics the parts of Reader that BaseViewer touches: an instance cache, + a provenance subgraph hook, and a graph for namespace binding.""" + + def __init__(self, cache): + self._instance_cache = cache + self._graph = _FakeGraph() + + def get_provenance_subgraph(self, instance): + return _FakeGraph() + + +class _FakeGraph: + def namespaces(self): + return iter(()) + + def serialize(self, format="turtle"): + return "" + + def __iter__(self): + return iter(()) + + +def _entity(cls, uri, label=None): + inst = cls() + inst.set_has_identifier(str(uri)) + if label is not None: + lit = Literal() + lit.set_has_value(label) + lit.set_has_language("en") + inst.set_has_label(lit) + return inst + + +def _viewer(*instances): + """Cache keyed by URI -> set of instances (handles punning: same key, + multiple instances).""" + cache = {} + for inst in instances: + key = inst.get_has_identifier() + cache.setdefault(key, set()).add(inst) + return OwlViewer(_FakeReader(cache)) + +# rdflib BNode ids look like 'N' + 32 hex chars, or 'n' for parsed +# blank nodes. Anything matching means a raw blank node leaked into the view. +_BNODE_RE = re.compile(r'^(_:)?[Nn][0-9a-fA-F]{8,}$') + +def _looks_like_bnode(value) -> bool: + if not value: + return False + return bool(_BNODE_RE.match(str(value).strip())) + +def _iter_resolved_values(data): + """Yield every resolved value dict in an entity view (relations + statements), + flattening restriction parts.""" + for section in data.get("sections", []): + for ent in section["entities"]: + buckets = list(ent.get("relations", {}).values()) + list(ent.get("statements", {}).values()) + for values in buckets: + for v in (values if isinstance(values, list) else [values]): + for part in (v.get("parts") or [v]): + yield ent["uri"], part + + +C = "http://ex.org/MyClass" +P = "http://ex.org/myProp" +I = "http://ex.org/myIndividual" +ONTO = "http://ex.org/ontology" +EXT = "http://purl.org/spar/fabio/ProceedingsPaper" # not in our cache + + +# --------------------------------------------------------------------------- +# _is_toc_entity — the single source of truth for "is this browsable?" +# --------------------------------------------------------------------------- + +class TestIsTocEntity: + + def test_concept_is_toc(self): + v = _viewer() + assert v._is_toc_entity(_entity(Concept, C)) is True + + def test_relation_is_toc(self): + v = _viewer() + assert v._is_toc_entity(_entity(Relation, P)) is True + + def test_model_is_not_toc(self): + """An ontology (Model) is never a ToC entity: Model is not in get_toc_config().""" + v = _viewer() + assert v._is_toc_entity(_entity(Model, ONTO)) is False + + def test_statement_is_not_toc(self): + v = _viewer() + assert v._is_toc_entity(_entity(Statement, "http://ex.org/stmt")) is False + + def test_config_drives_membership(self): + """The decision is purely config-driven: every key in get_toc_config() + is accepted, nothing else is.""" + v = _viewer() + toc_keys = {key for key, _id, _title in v.get_toc_config()} + assert "Concept" in toc_keys + assert "Model" not in toc_keys + + +# --------------------------------------------------------------------------- +# get_toc_instances — only ToC entities surface +# --------------------------------------------------------------------------- + +class TestGetTocInstances: + + def test_excludes_model(self): + v = _viewer(_entity(Concept, C), _entity(Model, ONTO)) + types = {type(i).__name__ for i in v.get_toc_instances()} + assert "Concept" in types + assert "Model" not in types + + def test_keeps_all_toc_kinds(self): + v = _viewer( + _entity(Concept, C), + _entity(Relation, P), + _entity(Attribute, "http://ex.org/attr"), + _entity(Annotation, "http://ex.org/anno"), + _entity(Individual, I), + ) + types = {type(i).__name__ for i in v.get_toc_instances()} + assert types == {"Concept", "Relation", "Attribute", "Annotation", "Individual"} + + def test_punned_individual_still_surfaces(self): + """If an IRI is punned Model+Individual, the Individual is a legitimate + ToC entity (config-driven); only the Model facet is dropped.""" + onto = _entity(Model, ONTO) + indiv = _entity(Individual, ONTO) + v = _viewer(onto, indiv) + instances = v.get_toc_instances() + assert any(type(i).__name__ == "Individual" for i in instances) + assert not any(type(i).__name__ == "Model" for i in instances) + + +# --------------------------------------------------------------------------- +# _is_internal — internal == present among ToC entities +# --------------------------------------------------------------------------- + +class TestIsInternal: + + def test_known_concept_is_internal(self): + v = _viewer(_entity(Concept, C)) + assert v._is_internal(C) is True + + def test_external_uri_is_not_internal(self): + v = _viewer(_entity(Concept, C)) + assert v._is_internal(EXT) is False + + def test_punned_ontology_iri_is_internal_as_individual(self): + v = _viewer(_entity(Concept, C), _entity(Model, ONTO), _entity(Individual, ONTO)) + assert v._is_internal(ONTO) is True + + def test_none_is_not_internal(self): + v = _viewer(_entity(Concept, C)) + assert v._is_internal(None) is False + + +# --------------------------------------------------------------------------- +# _resolve_resource_value — is_external flag is set correctly +# --------------------------------------------------------------------------- + +class TestResolveResourceValue: + + def test_internal_resource_not_external(self): + v = _viewer(_entity(Concept, C, "My Class")) + target = _entity(Concept, C, "My Class") + d = v._resolve_resource_value(target) + assert d["link"] == C + assert d["is_external"] is False + + def test_external_resource_is_external(self): + v = _viewer(_entity(Concept, C)) + target = _entity(Concept, EXT, "Proceedings Paper") + d = v._resolve_resource_value(target) + assert d["link"] == EXT + assert d["is_external"] is True + + def test_punned_ontology_individual_is_internal(self): + v = _viewer(_entity(Concept, C), _entity(Model, ONTO), _entity(Individual, ONTO)) + target = _entity(Individual, ONTO, "fabio") + d = v._resolve_resource_value(target) + assert d["is_external"] is False + + +# --------------------------------------------------------------------------- +# _handle_single_resource — non-ToC URIs are not browsable +# --------------------------------------------------------------------------- + +class TestHandleSingleResource: + + def test_browsable_concept_returns_entities(self): + v = _viewer(_entity(Concept, C, "My Class")) + data = v._handle_single_resource(C) + assert data.get("single_resource") is True + assert len(data["entities"]) == 1 + assert data["entities"][0]["uri"] == C + + def test_ontology_iri_is_not_browsable(self): + """Opening ?resource= must NOT dump the whole model.""" + v = _viewer(_entity(Model, ONTO)) + data = v._handle_single_resource(ONTO) + assert "error" in data + assert "entities" not in data + + def test_punned_ontology_drops_model_keeps_individual(self): + v = _viewer(_entity(Model, ONTO), _entity(Individual, ONTO, "thing")) + data = v._handle_single_resource(ONTO) + assert data.get("single_resource") is True + types = {e["type"] for e in data["entities"]} + assert "Model" not in types + assert "Individual" in types + + def test_unknown_uri_returns_error(self): + v = _viewer(_entity(Concept, C)) + data = v._handle_single_resource("http://ex.org/nope") + assert "error" in data + + +# --------------------------------------------------------------------------- +# Grouped view — sections come only from ToC config, Model excluded +# --------------------------------------------------------------------------- + +class TestGroupedView: + + def test_model_absent_from_sections(self): + v = _viewer( + _entity(Concept, C, "Class"), + _entity(Individual, I, "Indiv"), + _entity(Model, ONTO), + ) + data = v.get_view_data() # no resource_uri -> grouped + section_titles = {s["title"] for s in data["sections"]} + assert "Concept" in section_titles + # Model has no section because it is not in get_toc_config() + assert all("Ontolog" not in t for t in section_titles) + + def test_each_section_entity_is_toc_kind(self): + v = _viewer( + _entity(Concept, C, "Class"), + _entity(Relation, P, "prop"), + _entity(Model, ONTO), + ) + data = v.get_view_data() + toc_keys = {key for key, _id, _title in v.get_toc_config()} + for section in data["sections"]: + for ent in section["entities"]: + assert ent["type"] in toc_keys + + +# =========================================================================== +# Integration: real corpus, same loader pattern as test_owl_integrity. +# =========================================================================== + +ONTOLOGIES_PATH = Path(__file__).parent / "ontologies_spar.json" + + +def _load_uris(): + single = os.environ.get("TEST_ONTOLOGY_URI") + if single: + return [single] + if not ONTOLOGIES_PATH.exists(): + return [] + with open(ONTOLOGIES_PATH) as f: + data = json.load(f) + return [entry["uri"] for entry in data["uris"]] + + +@pytest.fixture(scope="module", params=_load_uris()) +def viewer(request): + from lode.reader import Reader + reader = Reader() + try: + reader.load_instances(request.param, "owl") + except Exception as e: + pytest.skip(f"Could not load {request.param}: {e}") + v = reader.get_viewer() + yield v + reader.clear_cache() + + +class TestCorpusInvariants: + + def test_no_model_in_toc_instances(self, viewer): + """No ToC entity is a Model, on any real ontology.""" + assert all(type(i).__name__ != "Model" for i in viewer.get_toc_instances()) + + def test_grouped_sections_only_toc_kinds(self, viewer): + data = viewer.get_view_data() + toc_keys = {key for key, _id, _title in viewer.get_toc_config()} + for section in data.get("sections", []): + for ent in section["entities"]: + assert ent["type"] in toc_keys + + def test_single_resource_roundtrip(self, viewer): + """Opening any ToC entity by URI returns exactly that entity, browsable.""" + toc = viewer.get_toc_instances() + if not toc: + pytest.skip("no ToC entities in this ontology") + target = toc[0] + uri = str(target.get_has_identifier()) + data = viewer.get_view_data(resource_uri=uri) + assert "error" not in data + assert any(e["uri"] == uri for e in data["entities"]) + + def test_opening_ontology_iri_shows_no_model_facet(self, viewer): + """Opening the ontology IRI never surfaces the Model facet (only its + ToC facets, if any).""" + from rdflib.namespace import RDF, OWL + from rdflib import URIRef + for onto in viewer.reader._graph.subjects(RDF.type, OWL.Ontology): + if not isinstance(onto, URIRef): + continue + data = viewer.get_view_data(resource_uri=str(onto)) + if "error" not in data: + assert all(e["type"] != "Model" for e in data["entities"]) + break + + def test_no_raw_bnode_in_entity_links(self, viewer): + """No resolved relation/statement value points to a raw BNode id: + unprocessed blank nodes must never become clickable links.""" + data = viewer.get_view_data() + offenders = [] + for section in data.get("sections", []): + for ent in section["entities"]: + for rel_name, values in ent["relations"].items(): + for v in (values if isinstance(values, list) else [values]): + # restriction parts carry their own sub-links + for part in (v.get("parts") or [v]): + if _looks_like_bnode(part.get("link")): + offenders.append((ent["uri"], rel_name, part.get("link"))) + assert not offenders, f"raw BNode ids leaked as links: {offenders[:10]}" + + def test_no_raw_bnode_as_entity_uri(self, viewer): + """No ToC entity is itself a raw BNode (every card must have a real IRI).""" + bad = [ + str(i.get_has_identifier()) + for i in viewer.get_toc_instances() + if _looks_like_bnode(i.get_has_identifier()) + ] + assert not bad, f"ToC entities with BNode identifier: {bad[:10]}" + + def test_no_raw_bnode_in_displayed_text(self, viewer): + """No resolved value shows a raw BNode id as its visible text. + Restrictions are allowed (their text is composed), so we check leaf + values only — a leaf whose text equals a BNode id is a leak.""" + data = viewer.get_view_data() + offenders = [] + for section in data.get("sections", []): + for ent in section["entities"]: + for rel_name, values in ent["relations"].items(): + for v in (values if isinstance(values, list) else [values]): + if v.get("parts"): + continue # composed restriction text is fine + if _looks_like_bnode(v.get("text")): + offenders.append((ent["uri"], rel_name, v.get("text"))) + assert not offenders, f"raw BNode ids leaked as text: {offenders[:10]}" + + def test_external_links_are_not_clickable(self, viewer): + """Any value whose link is NOT a ToC entity (external URL, uncached + import, raw bnode) must be is_external=True, so the template renders it + as plain text — never as an /extract?resource= link. Regression for the + nlm.nih.gov 'see also' that wrongly reloaded the API.""" + data = viewer.get_view_data() + offenders = [] + for ent_uri, val in _iter_resolved_values(data): + link = val.get("link") + if not link: + continue + # ground truth: is this link a real ToC entity? + is_toc = viewer._is_internal(link) + if not is_toc and not val.get("is_external", False): + offenders.append((ent_uri, link)) + assert not offenders, ( + f"external/non-ToC links not marked is_external " + f"(would become clickable /extract links): {offenders[:10]}" + ) + + def test_toc_links_stay_internal(self, viewer): + """Mirror invariant: a link that IS a ToC entity must be is_external=False + (so internal navigation still works).""" + data = viewer.get_view_data() + offenders = [] + for ent_uri, val in _iter_resolved_values(data): + link = val.get("link") + if not link: + continue + if viewer._is_internal(link) and val.get("is_external", True): + offenders.append((ent_uri, link)) + assert not offenders, ( + f"ToC links wrongly marked external (lose internal navigation): {offenders[:10]}" + ) \ No newline at end of file diff --git a/uv.lock b/uv.lock index e7b3580..01cc53d 100644 --- a/uv.lock +++ b/uv.lock @@ -418,7 +418,7 @@ wheels = [ [[package]] name = "lode" -version = "0.3.0" +version = "0.3.1" source = { editable = "." } dependencies = [ { name = "fastapi" },