diff --git a/.gitignore b/.gitignore index 8f9453c..2b28c9e 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,6 @@ htmlcov/ docs/node_modules/ docs/.astro/ docs/dist/ + +# saved files +spool/ diff --git a/README.md b/README.md index 25741f0..b656eb2 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,8 @@ result = example_function() print(result) ``` + + ## Documentation Full documentation is available at: https://opencitations.github.io/lode/ diff --git a/lode/api.py b/lode/api.py index 3f94cd6..fb9768e 100644 --- a/lode/api.py +++ b/lode/api.py @@ -10,6 +10,10 @@ import os import traceback import logging +from urllib.parse import urlencode +from functools import lru_cache +from uuid import uuid4 +import hashlib # Configura logging logging.basicConfig( @@ -48,6 +52,88 @@ class ReadAsFormat(str, Enum): "text/n3": ("n3", "text/n3", "n3"), } +_EXT_TO_SERIALIZATION = { + "ttl": ("turtle", "text/turtle", "ttl"), + "rdf": ("xml", "application/rdf+xml", "rdf"), + "n3": ("n3", "text/n3", "n3"), +} + +import time +SPOOL_DIR = os.path.join(os.path.dirname(__file__), "spool") +os.makedirs(SPOOL_DIR, exist_ok=True) +_SPOOL_TTL = 60 * 60 + +def _spool_path(token: str) -> str: + return os.path.join(SPOOL_DIR, f"{token}.rdf") + +def _prune_spool(): + cutoff = time.time() - _SPOOL_TTL + for name in os.listdir(SPOOL_DIR): + p = os.path.join(SPOOL_DIR, name) + try: + if os.path.getmtime(p) < cutoff: + os.unlink(p) + except OSError: + pass + +# ---------------------------------------------------------- +# HELPERS FOR \extract endpoints using cache from the reader +# ---------------------------------------------------------- + +def _nav_qs(read_as: str, url, upload_id, lang) -> str: + p = {"read_as": read_as, "lang": lang or ""} + p["upload_id" if upload_id else "url"] = upload_id or (url or "") + return urlencode(p) + +def _render_view(request, reader, *, resource, lang, source_url, upload_id, read_as): + viewer = reader.get_viewer() + data = viewer.get_view_data(resource_uri=resource, language=lang) + data["warnings"] = reader.get_warnings() + return templates.TemplateResponse("viewer.html", { + "request": request, + "source_url": source_url, + "upload_id": upload_id, + "nav_qs": _nav_qs(read_as, source_url, upload_id, lang), + **data, + }) + +def _url_token(url, read_as, imported, closure) -> str: + key = f"{url}|{read_as}|{imported}|{closure}".encode() + return "url_" + hashlib.sha256(key).hexdigest()[:32] + +def _load_url(url, read_as, imported, closure, warnings): + _prune_spool() + token = _url_token(url, read_as, imported, closure) + path = _spool_path(token) + if os.path.exists(path): + # cache hit: ricostruisci dal Turtle salvato + reader = Reader() + reader.load_instances(path, read_as, imported=imported, closure=closure, warnings=warnings) + return reader + # cache miss: scarica e processa dalla URL + reader = Reader() + reader.load_instances(url, read_as, imported=imported, closure=closure, warnings=warnings) + # persisti il grafo normalizzato per i prossimi hit + try: + with open(path, "wb") as f: + f.write(reader._graph.serialize(format="turtle").encode("utf-8")) + except OSError: + pass + return reader + +def _resolve_reader(read_as: str, url, upload_id, imported, closure, warnings): + if upload_id: + path = _spool_path(upload_id) + if not os.path.exists(path): + raise ArtefactValidationError("Upload expired, please re-upload", + context={"upload_id": upload_id}) + reader = Reader() + reader.load_instances(path, read_as, imported=imported, closure=closure, warnings=warnings) + return reader + if url: + return _load_url(url, read_as, imported, closure, warnings) + raise ArtefactValidationError("Missing 'url' or 'upload_id'") + # ---------------------------------------------------------- # ERROR RENDERING # ---------------------------------------------------------- @@ -105,39 +191,40 @@ async def limit_upload_size(request: Request, call_next): async def extract_get( request: Request, read_as: ReadAsFormat, - url: str, + url: Optional[str] = None, + upload_id: Optional[str] = None, resource: Optional[str] = None, lang: Optional[str] = None, imported: Optional[bool] = None, - closure: Optional[bool] = None, + closure: Optional[bool] = None, + format: Optional[str] = None, warnings: bool = False ): _check_format_enabled(read_as) - - reader = Reader() - reader.load_instances(url, read_as.value, imported=imported, closure=closure, warnings=warnings) - + + reader = _resolve_reader(read_as.value, url, upload_id, imported, closure, warnings) + + # Content negotiation accept = request.headers.get("accept", "text/html") - if accept in _ACCEPT_TO_SERIALIZATION: - rdflib_fmt, mime_type, ext = _ACCEPT_TO_SERIALIZATION[accept] - serialized = reader._graph.serialize(format=rdflib_fmt) - filename = url.rstrip("/").split("/")[-1] or "graph" - return Response( - content=serialized, - media_type=mime_type, - headers={"Content-Disposition": f'attachment; filename="{filename}.{ext}"'} - ) - - viewer = reader.get_viewer() - data = viewer.get_view_data(resource_uri=resource, language=lang) - data['warnings'] = reader.get_warnings() + serial = None + if format and format.lower() in _EXT_TO_SERIALIZATION: + serial = _EXT_TO_SERIALIZATION[format.lower()] + elif accept in _ACCEPT_TO_SERIALIZATION: + serial = _ACCEPT_TO_SERIALIZATION[accept] + if serial: + rdflib_fmt, mime_type, ext = serial + if resource: + serialized = reader.get_viewer().export_resource(resource, rdflib_fmt) + filename = resource.rstrip("/").split("#")[-1].split("/")[-1] or "resource" + else: + serialized = reader._graph.serialize(format=rdflib_fmt) + filename = (url.rstrip("/").split("/")[-1] if url else "graph") or "graph" + return Response(content=serialized, media_type=mime_type, + headers={"Content-Disposition": f'inline; filename="{filename}.{ext}"'}) logger.info(f"=== REQUEST SUCCESS ===") - return templates.TemplateResponse("viewer.html", { - "request": request, - "source_url": url, - **data - }) + return _render_view(request, reader, resource=resource, lang=lang, + source_url=url, upload_id=upload_id, read_as=read_as.value) @app.post("/extract", response_class=HTMLResponse) async def extract_post( @@ -149,46 +236,29 @@ async def extract_post( imported: Optional[str] = Form(None), closure: Optional[str] = Form(None), warnings: bool = False, - ): """Visualizza semantic artefact da file.""" - temp_file_path = None - - try: - logger.info(f"=== FILE UPLOAD START ===") - logger.info(f"Filename: {file.filename}") - logger.info(f"Format: {read_as.value}") + logger.info(f"=== FILE UPLOAD START ===") + logger.info(f"Filename: {file.filename}") + logger.info(f"Format: {read_as.value}") - # SECURITY CHECKS: validate before writing to disk - _check_format_enabled(read_as) - security.check_extension(file.filename) - content = await security.read_upload_capped(file) # chunked read + size cap (anti-DoS) - security.check_is_text(content) # binary rejection - security.check_safe_xml(content.decode("utf-8-sig")) # XXE / billion laughs + # SECURITY CHECKS + _check_format_enabled(read_as) + security.check_extension(file.filename) + content = await security.read_upload_capped(file) + security.check_is_text(content) + security.check_safe_xml(content.decode("utf-8-sig")) - # Write temp file (valid input only) - with tempfile.NamedTemporaryFile(delete=False, suffix=".rdf") as tmp: - tmp.write(content) - temp_file_path = tmp.name + _prune_spool() + token = uuid4().hex + path = _spool_path(token) + with open(path, "wb") as f: + f.write(content) - # Initialise Reader and calls the Loader -> Reader -> Viewer and populates it - reader = Reader() - reader.load_instances(temp_file_path, read_as.value, imported=imported, closure=closure, warnings=warnings) - viewer = reader.get_viewer() - data = viewer.get_view_data(resource_uri=resource, language=lang) - data['warnings'] = reader.get_warnings() - - logger.info(f"=== UPLOAD SUCCESS ===") - return templates.TemplateResponse("viewer.html", { - "request": request, - "source_url": None, - **data - }) - - finally: - # Security: Flushes the temp file once its is loaded - if temp_file_path and os.path.exists(temp_file_path): - os.unlink(temp_file_path) + reader = Reader() + reader.load_instances(path, read_as.value, imported=imported, closure=closure, warnings=warnings) + return _render_view(request, reader, resource=resource, lang=lang, + source_url=None, upload_id=token, read_as=read_as.value) @app.get("/", response_class=HTMLResponse) async def input_web_interface(request: Request): diff --git a/lode/templates/_entity_card.html b/lode/templates/_entity_card.html index 23722e4..cb24963 100644 --- a/lode/templates/_entity_card.html +++ b/lode/templates/_entity_card.html @@ -74,12 +74,11 @@

IRI: {{ item.uri }} - {% if is_static %} - - {% else %} - - {% endif %} + {% if is_static %} + + {% else %} + + {% endif %}
diff --git a/lode/templates/_toc_sidebar.html b/lode/templates/_toc_sidebar.html index 9733678..c1a9c2a 100644 --- a/lode/templates/_toc_sidebar.html +++ b/lode/templates/_toc_sidebar.html @@ -1,30 +1,7 @@ {# _toc_sidebar.html — TOC sidebar shared by owl/skos/rdf viewers. - Requires `sections` in context: - section.id, section.title, section.entities[].{label,anchor_id,uri} - Optional: - section.tree -> nested [{label, anchor_id, uri, children:[...]}, ...] - type_map (viewer.html) #} - -{# Recursive macro: renders one tree node + its children #} -{% macro render_node(node) %} -
  • - {% if node.children %} -
    - - {{ node.label }} - -
      - {% for child in node.children %} - {{ render_node(child) }} - {% endfor %} -
    -
    - {% else %} - {{ node.label }} - {% endif %} -
  • -{% endmacro %} - + Requires `sections` in context (same shape used by viewer templates): + section.id, section.title, section.entities[].label, section.entities[].anchor_id + Optional: type_map (viewer.html). Falls back to section.title raw. #} {% if grouped_view and sections %}

    {{ section.entities|length }} - - {% if section.tree %} - - {% else %} - - {% endif %} + {% endfor %} @@ -88,6 +56,7 @@
    Table of Contents
    document.body.classList.toggle('toc-sidebar-closed', !open); } + // Default: open. Persist user choice across pages. const stored = localStorage.getItem('lode.tocSidebar'); apply(stored === null ? true : stored === 'open'); @@ -97,6 +66,7 @@
    Table of Contents
    localStorage.setItem('lode.tocSidebar', willOpen ? 'open' : 'closed'); }); + // Highlight currently-visible section/entity while scrolling. const links = sidebar.querySelectorAll('a[href^="#"]'); const byId = new Map(); links.forEach(a => byId.set(a.getAttribute('href').slice(1), a)); @@ -118,17 +88,6 @@
    Table of Contents
    }, { rootMargin: '-30% 0px -60% 0px', threshold: 0 }); targets.forEach(t => io.observe(t)); } - - // Auto-open ancestor
    when a child link is clicked - sidebar.addEventListener('click', function (ev) { - const a = ev.target.closest('a[href^="#"]'); - if (!a) return; - let n = a.parentElement; - while (n && n !== sidebar) { - if (n.tagName === 'DETAILS') n.open = true; - n = n.parentElement; - } - }); })(); {% endif %} \ No newline at end of file diff --git a/lode/templates/index.html b/lode/templates/index.html index 5b8caef..0bc0b39 100644 --- a/lode/templates/index.html +++ b/lode/templates/index.html @@ -18,7 +18,7 @@
    -
    +
    @@ -103,12 +103,14 @@
    - + - + + +
    @@ -132,7 +134,7 @@
    - +
    @@ -455,51 +457,16 @@

    Examples

    // ── POST (File mode) ── const file = fileInput.files[0]; - // Client-side pre-check (UX only; the server enforces the real limits). const preCheck = validateFile(file); if (preCheck) { showFileError(preCheck); return; } - const formData = new FormData(); - formData.append('file', file); - formData.append('read_as', readAsSelect.value); - - const resource = document.getElementById('resource').value.trim(); - if (resource) formData.append('resource', resource); - - const language = document.getElementById('language').value.trim(); - if (language) formData.append('lang', language); - - if (document.getElementById('warnings').checked) formData.append('warnings', 'true'); - - if (readAsSelect.value === 'owl') { - if (document.getElementById('imported').checked) formData.append('imported', 'true'); - if (document.getElementById('closure').checked) formData.append('closure', 'true'); - } - - submitBtn.disabled = true; - submitBtn.textContent = 'Processing...'; - - try { - const response = await fetch('/extract', { method: 'POST', body: formData }); - // The server renders an HTML page both on success (viewer.html) and on a - // handled error (error.html): render whatever it returns. - const html = await response.text(); - if (html && html.trim()) { - document.open(); - document.write(html); - document.close(); - } else { - showFileError('The upload could not be processed. Please try again.'); - } - } catch (error) { - showFileError('Network error during upload. Please try again.'); - } finally { - submitBtn.disabled = false; - submitBtn.textContent = 'Generate Documentation'; - } + // Validazione ok: submit nativo multipart. + // Il server fa PRG (303) e atterra su GET /extract?upload_id=... + // cosi' URL in barra e history sono corretti, e "back" torna all'index. + form.submit(); }); diff --git a/lode/templates/viewer.html b/lode/templates/viewer.html index b640c73..3561275 100644 --- a/lode/templates/viewer.html +++ b/lode/templates/viewer.html @@ -116,8 +116,9 @@

    WebVOWL Source - {% if source_url %} -
    Export graph
    + {% if source_url or upload_id %} + {% set _res = request.query_params.get('resource', '') %} +
    Export {% if _res %}subgraph{% else %}graph{% endif %}
    {% for mime, label, icon, ext in [ ("text/turtle", "Turtle", "bi-filetype-raw", "ttl"), @@ -127,7 +128,7 @@

    {% if not request.is_static %} + onclick="exportGraph('{{ mime }}', '{{ ext }}', '{{ nav_qs }}', '{{ _res }}'); return false;" {{ label }} {% endif %} @@ -219,7 +220,7 @@

    - ← Back to Index + ← Back to Index

    {% endif %} @@ -282,16 +283,23 @@

    r.blob()) - .then(blob => { - window.open(URL.createObjectURL(blob), "_blank"); - }); + function exportGraph(mime, ext, navQs, resource) { + let qs = navQs; + if (resource) qs += "&resource=" + encodeURIComponent(resource); + fetch(`/extract?${qs}`, { headers: { "Accept": mime } }) + .then(r => { if (!r.ok) throw new Error(r.status); return r.blob(); }) + .then(blob => { + const a = document.createElement("a"); + const slug = resource ? resource.split(/[#/]/).pop() : "ontology"; + a.href = URL.createObjectURL(blob); + a.download = `${slug}.${ext}`; + a.click(); + URL.revokeObjectURL(a.href); + }) + .catch(e => alert("Export failed: " + e.message)); } +

    \ No newline at end of file diff --git a/lode/viewer/base_viewer.py b/lode/viewer/base_viewer.py index 5225a9f..ced4d50 100644 --- a/lode/viewer/base_viewer.py +++ b/lode/viewer/base_viewer.py @@ -813,4 +813,17 @@ def _safe_serialize(g, fmt: str) -> str: try: return g.serialize(format=fmt) except Exception as e: - return f"# serialization error ({fmt}): {e}" \ No newline at end of file + return f"# serialization error ({fmt}): {e}" + + def export_resource(self, resource_uri: str, fmt: str = "turtle") -> str: + """Serializza il subgraph della singola risorsa (unione su punning).""" + g = Graph() + for prefix, ns in self.reader._graph.namespaces(): + g.bind(prefix, ns) + instances = self.get_instances_from_single_resource(resource_uri) + if instances: + inst_iter = instances if isinstance(instances, set) else [instances] + for inst in inst_iter: + for t in self.reader.get_provenance_subgraph(inst): # già esiste + g.add(t) + return self._safe_serialize(g, fmt) \ No newline at end of file diff --git a/lode/warnings.json b/lode/warnings.json new file mode 100644 index 0000000..07ae7e0 --- /dev/null +++ b/lode/warnings.json @@ -0,0 +1,52 @@ +[ + { + "code": "property_type_mismatch_hierarchy", + "subject": "http://purl.org/dc/terms/creator", + "message": "§5.8.1: Relation http://purl.org/dc/terms/creator subPropertyOf Annotation http://purl.org/dc/elements/1.1/creator" + }, + { + "code": "property_type_conflict", + "subject": "http://purl.org/dc/terms/creator", + "message": "§5.8.1: http://purl.org/dc/terms/creator has multiple property types: ['Annotation', 'Relation']" + }, + { + "code": "property_type_mismatch_hierarchy", + "subject": "http://purl.org/dc/terms/issued", + "message": "§5.8.1: Attribute http://purl.org/dc/terms/issued subPropertyOf Annotation http://purl.org/dc/elements/1.1/date" + }, + { + "code": "property_type_conflict", + "subject": "http://purl.org/dc/terms/issued", + "message": "§5.8.1: http://purl.org/dc/terms/issued has multiple property types: ['Annotation', 'Attribute']" + }, + { + "code": "property_type_mismatch_hierarchy", + "subject": "http://purl.org/dc/terms/modified", + "message": "§5.8.1: Attribute http://purl.org/dc/terms/modified subPropertyOf Annotation http://purl.org/dc/elements/1.1/date" + }, + { + "code": "property_type_conflict", + "subject": "http://purl.org/dc/terms/modified", + "message": "§5.8.1: http://purl.org/dc/terms/modified has multiple property types: ['Annotation', 'Attribute']" + }, + { + "code": "property_type_mismatch_hierarchy", + "subject": "http://purl.org/dc/terms/language", + "message": "§5.8.1: Relation http://purl.org/dc/terms/language subPropertyOf Annotation http://purl.org/dc/elements/1.1/language" + }, + { + "code": "property_type_conflict", + "subject": "http://purl.org/dc/terms/language", + "message": "§5.8.1: http://purl.org/dc/terms/language has multiple property types: ['Annotation', 'Relation']" + }, + { + "code": "property_type_mismatch_hierarchy", + "subject": "http://purl.org/dc/terms/license", + "message": "§5.8.1: Relation http://purl.org/dc/terms/license subPropertyOf Annotation http://purl.org/dc/elements/1.1/rights" + }, + { + "code": "property_type_conflict", + "subject": "http://purl.org/dc/terms/license", + "message": "§5.8.1: http://purl.org/dc/terms/license has multiple property types: ['Annotation', 'Relation']" + } +] \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index ea7c4df..5de1df7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "lode" -version = "0.2.2" +version = "0.2.5" description = "New reengineered version of LODE, maintained by OpenCitations" authors = [{name = "Valentina Pasqual, Silvio Peroni", email = "valentina.pasqual2@unibo.it"}] readme = "README.md" diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..115b626 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,145 @@ +# tests/test_api.py +""" +Integration tests for the /extract web layer: +content negotiation (Accept + ?format), full vs subgraph serialization, +upload/URL reader cache. + +fabio ontology is loaded ONCE per session and reused everywhere (patched into the +URL path, serialized to a temp file for the upload path), so the default +suite makes a single network call. +""" +import re +from unittest.mock import patch + +import pytest +from fastapi.testclient import TestClient + +from lode.api import app, _load_url +from lode.reader import Reader + +# raise_server_exceptions=False -> custom exception handlers render error.html +client = TestClient(app, raise_server_exceptions=False) + +# Reference ontology for the deterministic suite +FABIO_URL = "https://w3id.org/spar/fabio" +FABIO_NS = "http://purl.org/spar/fabio/" + + +# --- Fixtures ---------------------------------------------------------------- +@pytest.fixture(scope="session") +def fabio_reader(): + """Load fabio once (network). Swap FABIO_URL for a local file to go offline.""" + r = Reader() + r.load_instances(FABIO_URL, "owl") + return r + + +@pytest.fixture(scope="session") +def fabio_ttl(fabio_reader, tmp_path_factory): + p = tmp_path_factory.mktemp("fix") / "fabio.ttl" + p.write_text(fabio_reader._graph.serialize(format="turtle"), encoding="utf-8") + return p + +@pytest.fixture +def patched_url(fabio_reader): + """GET-url tests reuse the cached fabio reader instead of hitting the net.""" + with patch("lode.api._load_url", return_value=fabio_reader) as m: + yield m + +# --- POST (file) ------------------------------------------------------------- +def test_post_file_html(fabio_ttl): + with open(fabio_ttl, "rb") as f: + resp = client.post( + "/extract", + data={"read_as": "owl"}, + files={"file": ("fabio.ttl", f, "text/turtle")}, + ) + assert resp.status_code == 200 + assert "Abstract" in resp.text + # token cablato nei link di navigazione + assert "upload_id" in resp.text + + +def test_upload_then_navigate(fabio_ttl): + """End-to-end: upload caches the Reader, navigation via token works + without the original file (and without network).""" + with open(fabio_ttl, "rb") as f: + resp = client.post( + "/extract", + data={"read_as": "owl"}, + files={"file": ("fabio.ttl", f, "text/turtle")}, + ) + token = re.search(r"upload_id=([0-9a-f]{32})", resp.text) + assert token, "no upload_id token in the rendered page" + + resp2 = client.get( + "/extract", + params={"read_as": "owl", "upload_id": token.group(1), + "resource": FABIO_NS + "Abstract"}, + ) + assert resp2.status_code == 200 + assert "Abstract" in resp2.text + + +# --- GET content negotiation ------------------------------------------------- +def test_get_turtle_full(patched_url): + resp = client.get("/extract", + params={"read_as": "owl", "url": FABIO_URL, "format": "ttl"}) + assert resp.status_code == 200 + assert resp.headers["content-type"].startswith("text/turtle") + assert "fabio" in resp.text.lower() + +@pytest.mark.parametrize("accept,ctype", [ + ("text/turtle", "text/turtle"), + ("application/rdf+xml", "application/rdf+xml"), + ("text/n3", "text/n3"), +]) +def test_get_accept_negotiation(patched_url, accept, ctype): + resp = client.get("/extract", + params={"read_as": "owl", "url": FABIO_URL}, + headers={"Accept": accept}) + assert resp.status_code == 200 + assert resp.headers["content-type"].startswith(ctype) + + +def test_get_subgraph_only(patched_url): + resp = client.get("/extract", params={ + "read_as": "owl", "url": FABIO_URL, "format": "ttl", + "resource": FABIO_NS + "Abstract"}) + assert resp.status_code == 200 + assert "Abstract" in resp.text + # subgraph: non deve contenere entità scorrelate + assert "AcademicProceedings" not in resp.text + + +def test_disposition_filename(patched_url): + resp = client.get("/extract", params={ + "read_as": "owl", "url": FABIO_URL, "format": "ttl", + "resource": FABIO_NS + "Abstract"}) + cd = resp.headers["content-disposition"] + assert "inline" in cd + assert "Abstract.ttl" in cd + + +def test_get_single_resource_html(patched_url): + resp = client.get("/extract", params={ + "read_as": "owl", "url": FABIO_URL, "resource": FABIO_NS + "Abstract"}) + assert resp.status_code == 200 + assert "Abstract" in resp.text + +def test_format_wins_over_accept(patched_url): + # format=ttl deve vincere anche se Accept chiede rdf+xml + resp = client.get("/extract", + params={"read_as": "owl", "url": FABIO_URL, "format": "ttl"}, + headers={"Accept": "application/rdf+xml"}) + assert resp.headers["content-type"].startswith("text/turtle") + +# --- URL cache --------------------------------------------------------------- +def test_url_cache_single_load(): + """Same url+params -> Reader parsed once, served from lru_cache after.""" + _load_url.cache_clear() + with patch.object(Reader, "load_instances") as m: + _load_url("u", "owl", None, None, False) + _load_url("u", "owl", None, None, False) + assert m.call_count == 1 + _load_url.cache_clear() \ No newline at end of file diff --git a/tests/test_reader.py b/tests/test_reader.py index de666ff..5b3821d 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -2,6 +2,9 @@ from pathlib import Path from lode.reader import Reader +@pytest.fixture +def reader(): + return Reader() class TestReaderInitialization: def test_reader_creates_instance(self, reader): @@ -205,25 +208,31 @@ def test_property_has_domain_range_triples(self, reader_with_triples): assert any('range' in pred for pred in predicates), "Manca rdfs:range" def test_all_instances_have_triples(self, reader_with_triples): - """Verifica che ogni istanza abbia almeno una tripla""" + + # top sintetici creati da LODE come radici implicite: niente triple di backing + SYNTHETIC_TOP = { + "http://www.w3.org/2002/07/owl#Thing", + "http://www.w3.org/2000/01/rdf-schema#Literal", + } + viewer = reader_with_triples.get_viewer() - all_instances = viewer.get_all_instances() - triples_map = reader_with_triples._logic._triples_map - instances_without_triples = [] - - for instance in all_instances: - triples = triples_map.get(instance, set()) - if len(triples) == 0: - instances_without_triples.append(instance) - - if instances_without_triples: - print(f"\n=== INSTANCES SENZA TRIPLE ===") - for instance in instances_without_triples: - print(f" {type(instance).__name__}: {instance}") - - assert len(instances_without_triples) == 0, \ - f"Trovate {len(instances_without_triples)} istanze senza triple" + graph = reader_with_triples._logic.graph + + orphans = [] + for inst in viewer.get_all_instances(): + if triples_map.get(inst): + continue + ident = inst.get_has_identifier() + if ident in SYNTHETIC_TOP: # <-- aggiungi questo + continue + # referenced-only (datatype XSD / classe esterna): lecito se appare come oggetto + from rdflib import URIRef + if ident and (None, None, URIRef(ident)) in graph: + continue + orphans.append(inst) + + assert not orphans, f"Istanze orfane reali: {[(type(i).__name__, i.get_has_identifier()) for i in orphans]}" def test_triples_map_keys_are_instances(self, reader_with_triples): """Verifica che le chiavi della triples_map siano istanze Python""" diff --git a/uv.lock b/uv.lock index 2c81d1c..33dff77 100644 --- a/uv.lock +++ b/uv.lock @@ -418,7 +418,7 @@ wheels = [ [[package]] name = "lode" -version = "0.2.2" +version = "0.2.5" source = { editable = "." } dependencies = [ { name = "fastapi" },