Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,6 @@ htmlcov/
docs/node_modules/
docs/.astro/
docs/dist/

# saved files
spool/
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ result = example_function()
print(result)
```



## Documentation

Full documentation is available at: https://opencitations.github.io/lode/
Expand Down
188 changes: 129 additions & 59 deletions lode/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
import os
import traceback
import logging
from urllib.parse import urlencode
from functools import lru_cache
from uuid import uuid4
import hashlib

# Configura logging
logging.basicConfig(
Expand Down Expand Up @@ -48,6 +52,88 @@
"text/n3": ("n3", "text/n3", "n3"),
}

_EXT_TO_SERIALIZATION = {
"ttl": ("turtle", "text/turtle", "ttl"),
"rdf": ("xml", "application/rdf+xml", "rdf"),
"n3": ("n3", "text/n3", "n3"),
}

import time
SPOOL_DIR = os.path.join(os.path.dirname(__file__), "spool")
os.makedirs(SPOOL_DIR, exist_ok=True)
_SPOOL_TTL = 60 * 60

def _spool_path(token: str) -> str:
return os.path.join(SPOOL_DIR, f"{token}.rdf")

def _prune_spool():
cutoff = time.time() - _SPOOL_TTL
for name in os.listdir(SPOOL_DIR):
p = os.path.join(SPOOL_DIR, name)
try:
if os.path.getmtime(p) < cutoff:
os.unlink(p)
except OSError:
pass

# ----------------------------------------------------------
# HELPERS FOR \extract endpoints using cache from the reader
# ----------------------------------------------------------

def _nav_qs(read_as: str, url, upload_id, lang) -> str:
p = {"read_as": read_as, "lang": lang or ""}
p["upload_id" if upload_id else "url"] = upload_id or (url or "")
return urlencode(p)

def _render_view(request, reader, *, resource, lang, source_url, upload_id, read_as):
viewer = reader.get_viewer()
data = viewer.get_view_data(resource_uri=resource, language=lang)
data["warnings"] = reader.get_warnings()
return templates.TemplateResponse("viewer.html", {
"request": request,
"source_url": source_url,
"upload_id": upload_id,
"nav_qs": _nav_qs(read_as, source_url, upload_id, lang),
**data,
})

def _url_token(url, read_as, imported, closure) -> str:
key = f"{url}|{read_as}|{imported}|{closure}".encode()
return "url_" + hashlib.sha256(key).hexdigest()[:32]

def _load_url(url, read_as, imported, closure, warnings):
_prune_spool()
token = _url_token(url, read_as, imported, closure)
path = _spool_path(token)
if os.path.exists(path):
# cache hit: ricostruisci dal Turtle salvato
reader = Reader()
reader.load_instances(path, read_as, imported=imported, closure=closure, warnings=warnings)
return reader
# cache miss: scarica e processa dalla URL
reader = Reader()
reader.load_instances(url, read_as, imported=imported, closure=closure, warnings=warnings)
# persisti il grafo normalizzato per i prossimi hit
try:
with open(path, "wb") as f:
f.write(reader._graph.serialize(format="turtle").encode("utf-8"))
except OSError:
pass
return reader

def _resolve_reader(read_as: str, url, upload_id, imported, closure, warnings):
if upload_id:
path = _spool_path(upload_id)
if not os.path.exists(path):

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
raise ArtefactValidationError("Upload expired, please re-upload",
context={"upload_id": upload_id})
reader = Reader()
reader.load_instances(path, read_as, imported=imported, closure=closure, warnings=warnings)
return reader
if url:
return _load_url(url, read_as, imported, closure, warnings)
raise ArtefactValidationError("Missing 'url' or 'upload_id'")

# ----------------------------------------------------------
# ERROR RENDERING
# ----------------------------------------------------------
Expand Down Expand Up @@ -105,39 +191,40 @@
async def extract_get(
request: Request,
read_as: ReadAsFormat,
url: str,
url: Optional[str] = None,
upload_id: Optional[str] = None,
resource: Optional[str] = None,
lang: Optional[str] = None,
imported: Optional[bool] = None,
closure: Optional[bool] = None,
closure: Optional[bool] = None,
format: Optional[str] = None,
warnings: bool = False
):
_check_format_enabled(read_as)

reader = Reader()
reader.load_instances(url, read_as.value, imported=imported, closure=closure, warnings=warnings)

reader = _resolve_reader(read_as.value, url, upload_id, imported, closure, warnings)

# Content negotiation
accept = request.headers.get("accept", "text/html")
if accept in _ACCEPT_TO_SERIALIZATION:
rdflib_fmt, mime_type, ext = _ACCEPT_TO_SERIALIZATION[accept]
serialized = reader._graph.serialize(format=rdflib_fmt)
filename = url.rstrip("/").split("/")[-1] or "graph"
return Response(
content=serialized,
media_type=mime_type,
headers={"Content-Disposition": f'attachment; filename="{filename}.{ext}"'}
)

viewer = reader.get_viewer()
data = viewer.get_view_data(resource_uri=resource, language=lang)
data['warnings'] = reader.get_warnings()
serial = None
if format and format.lower() in _EXT_TO_SERIALIZATION:
serial = _EXT_TO_SERIALIZATION[format.lower()]
elif accept in _ACCEPT_TO_SERIALIZATION:
serial = _ACCEPT_TO_SERIALIZATION[accept]
if serial:
rdflib_fmt, mime_type, ext = serial
if resource:
serialized = reader.get_viewer().export_resource(resource, rdflib_fmt)
filename = resource.rstrip("/").split("#")[-1].split("/")[-1] or "resource"
else:
serialized = reader._graph.serialize(format=rdflib_fmt)
filename = (url.rstrip("/").split("/")[-1] if url else "graph") or "graph"
return Response(content=serialized, media_type=mime_type,
headers={"Content-Disposition": f'inline; filename="{filename}.{ext}"'})

logger.info(f"=== REQUEST SUCCESS ===")
return templates.TemplateResponse("viewer.html", {
"request": request,
"source_url": url,
**data
})
return _render_view(request, reader, resource=resource, lang=lang,
source_url=url, upload_id=upload_id, read_as=read_as.value)

@app.post("/extract", response_class=HTMLResponse)
async def extract_post(
Expand All @@ -149,46 +236,29 @@
imported: Optional[str] = Form(None),
closure: Optional[str] = Form(None),
warnings: bool = False,

):
"""Visualizza semantic artefact da file."""
temp_file_path = None

try:
logger.info(f"=== FILE UPLOAD START ===")
logger.info(f"Filename: {file.filename}")
logger.info(f"Format: {read_as.value}")
logger.info(f"=== FILE UPLOAD START ===")
logger.info(f"Filename: {file.filename}")
logger.info(f"Format: {read_as.value}")

# SECURITY CHECKS: validate before writing to disk
_check_format_enabled(read_as)
security.check_extension(file.filename)
content = await security.read_upload_capped(file) # chunked read + size cap (anti-DoS)
security.check_is_text(content) # binary rejection
security.check_safe_xml(content.decode("utf-8-sig")) # XXE / billion laughs
# SECURITY CHECKS
_check_format_enabled(read_as)
security.check_extension(file.filename)
content = await security.read_upload_capped(file)
security.check_is_text(content)
security.check_safe_xml(content.decode("utf-8-sig"))

# Write temp file (valid input only)
with tempfile.NamedTemporaryFile(delete=False, suffix=".rdf") as tmp:
tmp.write(content)
temp_file_path = tmp.name
_prune_spool()
token = uuid4().hex
path = _spool_path(token)
with open(path, "wb") as f:
f.write(content)

# Initialise Reader and calls the Loader -> Reader -> Viewer and populates it
reader = Reader()
reader.load_instances(temp_file_path, read_as.value, imported=imported, closure=closure, warnings=warnings)
viewer = reader.get_viewer()
data = viewer.get_view_data(resource_uri=resource, language=lang)
data['warnings'] = reader.get_warnings()

logger.info(f"=== UPLOAD SUCCESS ===")
return templates.TemplateResponse("viewer.html", {
"request": request,
"source_url": None,
**data
})

finally:
# Security: Flushes the temp file once its is loaded
if temp_file_path and os.path.exists(temp_file_path):
os.unlink(temp_file_path)
reader = Reader()
reader.load_instances(path, read_as.value, imported=imported, closure=closure, warnings=warnings)
return _render_view(request, reader, resource=resource, lang=lang,
source_url=None, upload_id=token, read_as=read_as.value)

@app.get("/", response_class=HTMLResponse)
async def input_web_interface(request: Request):
Expand Down
11 changes: 5 additions & 6 deletions lode/templates/_entity_card.html
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,11 @@ <h4 class="fw-bold mb-0">
<div class="card-body px-1 pb-1">
<div class="mb-2">
<small class="text-muted d-block mb-1">IRI: <code class="text-break bg-light p-2 rounded flex-grow-1 border">{{ item.uri }}</code>
{% if is_static %}
<a href="{{ resource_url(item.uri, item.type.lower() ~ 's') }}" class="ms-2" title="Detailed View">↗</a>
{% else %}
<a href="?read_as={{ request.query_params.read_as }}&url={{ request.query_params.url }}&lang={{ request.query_params.lang }}&resource={{ item.uri | urlencode }}"
class="ms-2" title="Detailed View">↗</a>
{% endif %}
{% if is_static %}
<a href="{{ resource_url(item.uri, item.type.lower() ~ 's') }}" class="ms-2" title="Detailed View">↗</a>
{% else %}
<a href="/extract?{{ nav_qs }}&resource={{ item.uri | urlencode }}" class="ms-2" title="Detailed View">↗</a>
{% endif %}
</small>
</div>

Expand Down
65 changes: 12 additions & 53 deletions lode/templates/_toc_sidebar.html
Original file line number Diff line number Diff line change
@@ -1,30 +1,7 @@
{# _toc_sidebar.html — TOC sidebar shared by owl/skos/rdf viewers.
Requires `sections` in context:
section.id, section.title, section.entities[].{label,anchor_id,uri}
Optional:
section.tree -> nested [{label, anchor_id, uri, children:[...]}, ...]
type_map (viewer.html) #}

{# Recursive macro: renders one tree node + its children #}
{% macro render_node(node) %}
<li class="toc-node">
{% if node.children %}
<details class="toc-tree-branch" open>
<summary class="toc-tree-summary">
<a href="#{{ node.anchor_id }}" title="{{ node.uri }}">{{ node.label }}</a>
</summary>
<ul class="toc-tree-children">
{% for child in node.children %}
{{ render_node(child) }}
{% endfor %}
</ul>
</details>
{% else %}
<a class="toc-tree-leaf" href="#{{ node.anchor_id }}" title="{{ node.uri }}">{{ node.label }}</a>
{% endif %}
</li>
{% endmacro %}

Requires `sections` in context (same shape used by viewer templates):
section.id, section.title, section.entities[].label, section.entities[].anchor_id
Optional: type_map (viewer.html). Falls back to section.title raw. #}
{% if grouped_view and sections %}
<aside id="toc-sidebar" class="toc-sidebar open" aria-label="Table of Contents">
<button type="button" class="toc-sidebar-toggle" aria-controls="toc-sidebar" aria-expanded="true"
Expand All @@ -50,22 +27,13 @@ <h6 class="toc-sidebar-title mb-0">Table of Contents</h6>
</span>
<span class="toc-sidebar-section-count">{{ section.entities|length }}</span>
</summary>

{% if section.tree %}
<ul class="toc-tree-root">
{% for node in section.tree %}
{{ render_node(node) }}
{% endfor %}
</ul>
{% else %}
<ul class="toc-sidebar-list">
{% for item in section.entities %}
<li>
<a href="#{{ item.anchor_id }}" title="{{ item.uri }}">{{ item.label }}</a>
</li>
{% endfor %}
</ul>
{% endif %}
<ul class="toc-sidebar-list">
{% for item in section.entities %}
<li>
<a href="#{{ item.anchor_id }}" title="{{ item.uri }}">{{ item.label }}</a>
</li>
{% endfor %}
</ul>
</details>
{% endfor %}
</nav>
Expand All @@ -88,6 +56,7 @@ <h6 class="toc-sidebar-title mb-0">Table of Contents</h6>
document.body.classList.toggle('toc-sidebar-closed', !open);
}

// Default: open. Persist user choice across pages.
const stored = localStorage.getItem('lode.tocSidebar');
apply(stored === null ? true : stored === 'open');

Expand All @@ -97,6 +66,7 @@ <h6 class="toc-sidebar-title mb-0">Table of Contents</h6>
localStorage.setItem('lode.tocSidebar', willOpen ? 'open' : 'closed');
});

// Highlight currently-visible section/entity while scrolling.
const links = sidebar.querySelectorAll('a[href^="#"]');
const byId = new Map();
links.forEach(a => byId.set(a.getAttribute('href').slice(1), a));
Expand All @@ -118,17 +88,6 @@ <h6 class="toc-sidebar-title mb-0">Table of Contents</h6>
}, { rootMargin: '-30% 0px -60% 0px', threshold: 0 });
targets.forEach(t => io.observe(t));
}

// Auto-open ancestor <details> when a child link is clicked
sidebar.addEventListener('click', function (ev) {
const a = ev.target.closest('a[href^="#"]');
if (!a) return;
let n = a.parentElement;
while (n && n !== sidebar) {
if (n.tagName === 'DETAILS') n.open = true;
n = n.parentElement;
}
});
})();
</script>
{% endif %}
Loading
Loading