From 1b9eef61c208301e984ad0f560d40e068f5049e3 Mon Sep 17 00:00:00 2001 From: rempairamore Date: Tue, 23 Jun 2026 15:14:56 +0200 Subject: [PATCH 1/3] fixing security --- lode/api.py | 13 +++++- lode/reader/loader.py | 36 +++++++++++++++- lode/reader/security.py | 24 +++++++---- lode/templates/index.html | 26 +++++++++--- tests/test_security.py | 86 ++++++++++++++++++++++++++++++++++++++- 5 files changed, 168 insertions(+), 17 deletions(-) diff --git a/lode/api.py b/lode/api.py index fb9768e..2bc68a1 100644 --- a/lode/api.py +++ b/lode/api.py @@ -59,12 +59,18 @@ class ReadAsFormat(str, Enum): } import time -SPOOL_DIR = os.path.join(os.path.dirname(__file__), "spool") +SPOOL_DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), "spool")) os.makedirs(SPOOL_DIR, exist_ok=True) _SPOOL_TTL = 60 * 60 def _spool_path(token: str) -> str: - return os.path.join(SPOOL_DIR, f"{token}.rdf") + # Spool tokens are opaque IDs we mint ourselves (uuid4 hex / "url_"+sha256). + # Resolve and confirm the path stays inside SPOOL_DIR, so a crafted upload_id + # cannot traverse out of it (path injection). + path = os.path.realpath(os.path.join(SPOOL_DIR, f"{token}.rdf")) + if os.path.commonpath((SPOOL_DIR, path)) != SPOOL_DIR: + raise ArtefactValidationError("Invalid upload token", context={"token": token}) + return path def _prune_spool(): cutoff = time.time() - _SPOOL_TTL @@ -102,6 +108,9 @@ def _url_token(url, read_as, imported, closure) -> str: return "url_" + hashlib.sha256(key).hexdigest()[:32] def _load_url(url, read_as, imported, closure, warnings): + # Enforce http(s)://host up front: a non-URL value (local path, file://, ...) + # must never reach the loader and be opened as a local file. + security.check_url_safe(url) _prune_spool() token = _url_token(url, read_as, imported, closure) path = _spool_path(token) diff --git a/lode/reader/loader.py b/lode/reader/loader.py index 230abfc..0d250fa 100644 --- a/lode/reader/loader.py +++ b/lode/reader/loader.py @@ -9,7 +9,7 @@ import lode.reader.modules as modules from lode.reader import security -from lode.exceptions import ArtefactLoadError, ArtefactNotFoundError +from lode.exceptions import ArtefactLoadError, ArtefactNotFoundError, ArtefactValidationError class Loader: @@ -33,6 +33,15 @@ def load(self, source: str) -> None: if self._is_url(source): self._load_from_url_with_content_negotiation(source) else: + # A value carrying a URL scheme that is not http(s) (file:, ftp:, ...) + # must not be silently treated as a local path. Bare local paths + # (no scheme) are still allowed here for the CLI. + scheme = urlparse(source).scheme + if scheme: + raise ArtefactValidationError( + "URL scheme not allowed; use http(s)://host", + context={"scheme": scheme}, + ) self._load_from_local_file(source) if len(self.graph) == 0: @@ -191,6 +200,9 @@ def _fetch_following_redirects(self, url: str, headers: dict, max_redirects: int security.check_url_safe(current) response = requests.get(current, headers=headers, timeout=10, stream=True, allow_redirects=False) + # Validate the IP we ACTUALLY connected to, before reading anything: + # defeats DNS rebinding between check_url_safe above and this connect. + self._verify_peer_ip(response) if response.status_code in (301, 302, 303, 307, 308): location = response.headers.get("Location") response.close() @@ -201,4 +213,26 @@ def _fetch_following_redirects(self, url: str, headers: dict, max_redirects: int return response raise ArtefactLoadError("Too many redirects", context={"url": url}) + def _verify_peer_ip(self, response) -> None: + """Re-check the SSRF policy against the socket's real peer IP. If it + cannot be determined (e.g. mocked in tests) fall back to the per-hop + check_url_safe already done before connecting.""" + ip = self._peer_ip(response) + if ip is None: + return + try: + security.check_ip_safe(ip) + except ArtefactValidationError: + response.close() + raise + + @staticmethod + def _peer_ip(response): + """Best-effort extraction of the connected peer IP from a streamed + requests response (reaches into urllib3 internals, hence defensive).""" + try: + return response.raw._connection.sock.getpeername()[0] + except Exception: + return None + diff --git a/lode/reader/security.py b/lode/reader/security.py index d677737..cf7067e 100644 --- a/lode/reader/security.py +++ b/lode/reader/security.py @@ -78,6 +78,22 @@ def check_extension(name: str) -> None: if ext not in ALLOWED_EXTENSIONS: raise ArtefactValidationError("Extension not allowed", context={"ext": ext}) +def check_ip_safe(ip_str: str) -> None: + """Reject an IP that points at an internal/non-routable range (SSRF). + + Reused both before connecting (each resolved IP) and after connecting (the + real peer IP), so a DNS rebinding between the two cannot reach an internal + host. + """ + ip = ipaddress.ip_address(ip_str) + # IPv4-mapped IPv6 (e.g. ::ffff:127.0.0.1) would bypass the v4 checks below. + if getattr(ip, "ipv4_mapped", None): + ip = ip.ipv4_mapped + if (ip.is_private or ip.is_loopback or ip.is_link_local + or ip.is_reserved or ip.is_multicast or ip.is_unspecified): + raise ArtefactValidationError("Blocked address", context={"ip": str(ip)}) + + def check_url_safe(url: str) -> None: """Block non-http schemes and SSRF toward private/internal hosts.""" parsed = urlparse(url) @@ -91,13 +107,7 @@ def check_url_safe(url: str) -> None: except socket.gaierror: raise ArtefactValidationError("Cannot resolve host", context={"host": host}) for info in infos: - ip = ipaddress.ip_address(info[4][0]) - # IPv4-mapped IPv6 (e.g. ::ffff:127.0.0.1) would bypass the v4 checks below. - if getattr(ip, "ipv4_mapped", None): - ip = ip.ipv4_mapped - if (ip.is_private or ip.is_loopback or ip.is_link_local - or ip.is_reserved or ip.is_multicast or ip.is_unspecified): - raise ArtefactValidationError("Blocked address", context={"host": host, "ip": str(ip)}) + check_ip_safe(info[4][0]) def check_is_text(data: bytes) -> None: """RDF serializations are text. Reject binary blobs.""" diff --git a/lode/templates/index.html b/lode/templates/index.html index 0bc0b39..6825d5b 100644 --- a/lode/templates/index.html +++ b/lode/templates/index.html @@ -18,7 +18,8 @@
-
+ +
@@ -28,14 +29,15 @@
- +
@@ -354,6 +356,7 @@

Examples

const fileSection = document.getElementById('fileSection'); const urlInput = document.getElementById('semanticArtefactUrl'); const fileInput = document.getElementById('semanticArtefactFile'); + const urlError = document.getElementById('urlError'); let isFileMode = false; @@ -424,6 +427,10 @@

Examples

return null; } fileInput.addEventListener('change', clearFileError); + urlInput.addEventListener('input', () => { + urlInput.classList.remove('is-invalid'); + urlError.textContent = ''; + }); form.addEventListener('submit', async (e) => { e.preventDefault(); @@ -431,6 +438,13 @@

Examples

// ── GET (URL mode) ── if (!isFileMode) { const url = urlInput.value.trim(); + // Mirror the server-side check: only http(s)://host is accepted. + if (!/^https?:\/\/.+/i.test(url)) { + urlInput.classList.add('is-invalid'); + urlError.textContent = 'Enter a full URL starting with http:// or https://'; + return; + } + urlInput.classList.remove('is-invalid'); const params = new URLSearchParams({ url }); diff --git a/tests/test_security.py b/tests/test_security.py index f0a006f..fc40113 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -16,6 +16,7 @@ """ import asyncio import io +import os import socket import pytest @@ -27,14 +28,23 @@ # ---------------------------------------------------------------------- # Helpers / fakes # ---------------------------------------------------------------------- +class _PeerRaw: + """Mimics urllib3 response internals so Loader._peer_ip can read the IP.""" + def __init__(self, ip): + sock = type("Sock", (), {"getpeername": staticmethod(lambda: (ip, 443))})() + self._connection = type("Conn", (), {"sock": sock})() + + class FakeResponse: """Minimal stand-in for requests.Response (stream=True).""" - def __init__(self, status_code=200, headers=None, body=b"", url="http://1.2.3.4/x"): + def __init__(self, status_code=200, headers=None, body=b"", url="http://1.2.3.4/x", peer_ip=None): self.status_code = status_code self.headers = headers or {} self.url = url self._body = body self.closed = False + # Only set when a test needs Loader._verify_peer_ip to see a peer IP. + self.raw = _PeerRaw(peer_ip) if peer_ip else None def iter_content(self, chunk_size): for i in range(0, len(self._body), chunk_size): @@ -501,3 +511,77 @@ def test_extra_internal_addresses_rejected(self, monkeypatch, ip): ) with pytest.raises(ArtefactValidationError): security.check_url_safe("http://whatever.example.org/x") + + +# ---------------------------------------------------------------------- +# api._spool_path (path injection hardening on upload_id) +# ---------------------------------------------------------------------- +class TestSpoolPathTraversal: + + @pytest.mark.parametrize("token", [ + "../../../etc/passwd", + "/etc/passwd", + "../secret", + ]) + def test_traversal_token_rejected(self, token): + from lode import api + with pytest.raises(ArtefactValidationError): + api._spool_path(token) + + def test_legit_token_stays_in_spool(self): + from lode import api + p = api._spool_path("0123abcd") + assert p.endswith("0123abcd.rdf") + assert os.path.commonpath((api.SPOOL_DIR, p)) == api.SPOOL_DIR + + +# ---------------------------------------------------------------------- +# URL input must be http(s)://host (blocks local-path / file:// as "url") +# ---------------------------------------------------------------------- +class TestUrlMustBeHttp: + + @pytest.mark.parametrize("bad", [ + "/etc/passwd", + "file:///etc/passwd", + "ftp://example.org/x", + "not-a-url", + ]) + def test_resolve_reader_rejects_non_http_url(self, bad): + from lode import api + with pytest.raises(ArtefactValidationError): + api._resolve_reader("owl", bad, None, None, None, False) + + def test_loader_rejects_non_http_scheme(self): + with pytest.raises(ArtefactValidationError): + Loader().load("file:///etc/passwd") + + +# ---------------------------------------------------------------------- +# Loader._verify_peer_ip (DNS-rebinding: validate the real connected IP) +# ---------------------------------------------------------------------- +class TestPeerIpRebinding: + + def _public_dns(self, monkeypatch): + monkeypatch.setattr( + "lode.reader.security.socket.getaddrinfo", + lambda host, port, *a, **k: [ + (socket.AF_INET, socket.SOCK_STREAM, 6, "", ("93.184.216.34", 0)) + ], + ) + + def test_peer_internal_ip_blocked(self, monkeypatch): + # Pre-check passes (host resolves public) but the socket actually + # connected to an internal IP (rebinding) -> abort before reading. + self._public_dns(monkeypatch) + resp = FakeResponse(status_code=200, peer_ip="169.254.169.254") + monkeypatch.setattr("lode.reader.loader.requests.get", make_fake_get([resp])) + with pytest.raises(ArtefactValidationError): + Loader()._fetch_following_redirects("http://sneaky.example.org/o.ttl", headers={}) + assert resp.closed is True + + def test_peer_public_ip_allowed(self, monkeypatch): + self._public_dns(monkeypatch) + resp = FakeResponse(status_code=200, peer_ip="93.184.216.34") + monkeypatch.setattr("lode.reader.loader.requests.get", make_fake_get([resp])) + out = Loader()._fetch_following_redirects("http://ok.example.org/o.ttl", headers={}) + assert out is resp From e590bd5f037c7a3a9a700007d0be88b9a3a64a4d Mon Sep 17 00:00:00 2001 From: rempairamore Date: Tue, 23 Jun 2026 15:35:26 +0200 Subject: [PATCH 2/3] add caching --- lode/api.py | 55 +++++++++++++++++++++++++++++++-------- lode/templates/index.html | 14 ++++++++++ tests/test_api.py | 33 ++++++++++++++++------- tests/test_security.py | 40 ++++++++++++++++++++++++++++ 4 files changed, 122 insertions(+), 20 deletions(-) diff --git a/lode/api.py b/lode/api.py index 2bc68a1..431b7ee 100644 --- a/lode/api.py +++ b/lode/api.py @@ -61,7 +61,8 @@ class ReadAsFormat(str, Enum): import time SPOOL_DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), "spool")) os.makedirs(SPOOL_DIR, exist_ok=True) -_SPOOL_TTL = 60 * 60 +_SPOOL_TTL = 4 * 60 * 60 # entries are cached for 4 hours +_SPOOL_MAX_BYTES = 1024 ** 3 # 1 GB total budget shared by uploads + URLs def _spool_path(token: str) -> str: # Spool tokens are opaque IDs we mint ourselves (uuid4 hex / "url_"+sha256). @@ -73,12 +74,36 @@ def _spool_path(token: str) -> str: return path def _prune_spool(): + """Evict expired entries, then enforce the total-size budget by deleting the + oldest (by cache-write time) until back under the cap. Uploads and URL caches + share the same budget. Best-effort across workers (races caught via OSError). + """ cutoff = time.time() - _SPOOL_TTL + survivors = [] # (mtime, size, path) of entries still within the TTL for name in os.listdir(SPOOL_DIR): p = os.path.join(SPOOL_DIR, name) try: - if os.path.getmtime(p) < cutoff: + st = os.stat(p) + except OSError: + continue + if st.st_mtime < cutoff: + try: os.unlink(p) + except OSError: + pass + continue + survivors.append((st.st_mtime, st.st_size, p)) + + total = sum(size for _, size, _ in survivors) + if total <= _SPOOL_MAX_BYTES: + return + survivors.sort() # oldest cache-write time first + for _, size, p in survivors: + if total <= _SPOOL_MAX_BYTES: + break + try: + os.unlink(p) + total -= size except OSError: pass @@ -107,19 +132,25 @@ def _url_token(url, read_as, imported, closure) -> str: key = f"{url}|{read_as}|{imported}|{closure}".encode() return "url_" + hashlib.sha256(key).hexdigest()[:32] -def _load_url(url, read_as, imported, closure, warnings): +def _load_url(url, read_as, imported, closure, warnings, use_cache=True): # Enforce http(s)://host up front: a non-URL value (local path, file://, ...) # must never reach the loader and be opened as a local file. security.check_url_safe(url) _prune_spool() token = _url_token(url, read_as, imported, closure) path = _spool_path(token) - if os.path.exists(path): + if use_cache and os.path.exists(path): # cache hit: ricostruisci dal Turtle salvato reader = Reader() reader.load_instances(path, read_as, imported=imported, closure=closure, warnings=warnings) return reader - # cache miss: scarica e processa dalla URL + if not use_cache: + # cache=false: drop the stale copy so the fresh fetch replaces it + try: + os.unlink(path) + except OSError: + pass + # cache miss (or forced refresh): scarica e processa dalla URL reader = Reader() reader.load_instances(url, read_as, imported=imported, closure=closure, warnings=warnings) # persisti il grafo normalizzato per i prossimi hit @@ -130,8 +161,9 @@ def _load_url(url, read_as, imported, closure, warnings): pass return reader -def _resolve_reader(read_as: str, url, upload_id, imported, closure, warnings): +def _resolve_reader(read_as: str, url, upload_id, imported, closure, warnings, use_cache=True): if upload_id: + # Uploads are not re-fetched, so the cache flag does not apply to them. path = _spool_path(upload_id) if not os.path.exists(path): raise ArtefactValidationError("Upload expired, please re-upload", @@ -140,7 +172,7 @@ def _resolve_reader(read_as: str, url, upload_id, imported, closure, warnings): reader.load_instances(path, read_as, imported=imported, closure=closure, warnings=warnings) return reader if url: - return _load_url(url, read_as, imported, closure, warnings) + return _load_url(url, read_as, imported, closure, warnings, use_cache=use_cache) raise ArtefactValidationError("Missing 'url' or 'upload_id'") # ---------------------------------------------------------- @@ -206,12 +238,13 @@ async def extract_get( lang: Optional[str] = None, imported: Optional[bool] = None, closure: Optional[bool] = None, - format: Optional[str] = None, - warnings: bool = False + format: Optional[str] = None, + warnings: bool = False, + cache: bool = True ): _check_format_enabled(read_as) - - reader = _resolve_reader(read_as.value, url, upload_id, imported, closure, warnings) + + reader = _resolve_reader(read_as.value, url, upload_id, imported, closure, warnings, use_cache=cache) # Content negotiation accept = request.headers.get("accept", "text/html") diff --git a/lode/templates/index.html b/lode/templates/index.html index 6825d5b..f9ac170 100644 --- a/lode/templates/index.html +++ b/lode/templates/index.html @@ -325,6 +325,19 @@

Parameters

closure=True + + cache + GET + boolean + No + + URL only. When true (default), the processed URL is served + from a 4-hour server-side cache when available. Set to false to + discard the cached copy and re-fetch the URL, so updates to the ontology are + picked up immediately; the fresh result then replaces the cache. + + cache=false + @@ -337,6 +350,7 @@

Examples

/extract?url=http://purl.org/spar/fabio&read_as=owl&imported=true&lang=en /extract?url=http://purl.org/spar/fabio&read_as=owl&closure=true /extract?url=http://purl.org/spar/fabio&read_as=owl&resource=http://purl.org/spar/fabio/Abstract&lang=it +/extract?url=http://purl.org/spar/fabio&read_as=owl&cache=false
diff --git a/tests/test_api.py b/tests/test_api.py index 115b626..d35147c 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -14,7 +14,7 @@ import pytest from fastapi.testclient import TestClient -from lode.api import app, _load_url +from lode.api import app from lode.reader import Reader # raise_server_exceptions=False -> custom exception handlers render error.html @@ -135,11 +135,26 @@ def test_format_wins_over_accept(patched_url): assert resp.headers["content-type"].startswith("text/turtle") # --- URL cache --------------------------------------------------------------- -def test_url_cache_single_load(): - """Same url+params -> Reader parsed once, served from lru_cache after.""" - _load_url.cache_clear() - with patch.object(Reader, "load_instances") as m: - _load_url("u", "owl", None, None, False) - _load_url("u", "owl", None, None, False) - assert m.call_count == 1 - _load_url.cache_clear() \ No newline at end of file +def test_url_spool_cache_and_bypass(tmp_path, monkeypatch): + """First load fetches from the URL and writes the spool; a second load is + served from the spool file; cache=false drops it and fetches again.""" + import os + from rdflib import Graph + from lode import api + + monkeypatch.setattr(api.security, "check_url_safe", lambda u: None) + monkeypatch.setattr(api, "SPOOL_DIR", os.path.realpath(str(tmp_path))) + + seen = [] + def fake_load(self, graph_path, read_as, **kw): + seen.append(graph_path) + self._graph = Graph() + monkeypatch.setattr(Reader, "load_instances", fake_load) + + api._load_url("http://x/o", "owl", None, None, False) # miss -> URL + api._load_url("http://x/o", "owl", None, None, False) # hit -> spool file + api._load_url("http://x/o", "owl", None, None, False, use_cache=False) # bypass -> URL + + assert seen[0] == "http://x/o" + assert seen[1].endswith(".rdf") and seen[1] != "http://x/o" # served from spool + assert seen[2] == "http://x/o" # cache=false refetched \ No newline at end of file diff --git a/tests/test_security.py b/tests/test_security.py index fc40113..7c25ff7 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -585,3 +585,43 @@ def test_peer_public_ip_allowed(self, monkeypatch): monkeypatch.setattr("lode.reader.loader.requests.get", make_fake_get([resp])) out = Loader()._fetch_following_redirects("http://ok.example.org/o.ttl", headers={}) assert out is resp + + +# ---------------------------------------------------------------------- +# api._prune_spool (4h TTL + 1GB budget, evict oldest) +# ---------------------------------------------------------------------- +class TestSpoolQuota: + + def _spool(self, tmp_path, monkeypatch): + from lode import api + monkeypatch.setattr(api, "SPOOL_DIR", os.path.realpath(str(tmp_path))) + return api + + def test_prune_evicts_oldest_over_budget(self, tmp_path, monkeypatch): + import time + api = self._spool(tmp_path, monkeypatch) + monkeypatch.setattr(api, "_SPOOL_MAX_BYTES", 300) + monkeypatch.setattr(api, "_SPOOL_TTL", 10_000) # don't expire by TTL here + now = time.time() + for i in range(4): # 4 x 100 bytes = 400 > 300 budget + p = os.path.join(api.SPOOL_DIR, f"f{i}.rdf") + with open(p, "wb") as fh: + fh.write(b"x" * 100) + os.utime(p, (now - (40 - i * 10), now - (40 - i * 10))) # f0 oldest, f3 newest + api._prune_spool() + names = {n for n in os.listdir(api.SPOOL_DIR) if n.endswith(".rdf")} + assert "f0.rdf" not in names # oldest evicted first + assert len(names) == 3 # back under budget + + def test_prune_expires_by_ttl(self, tmp_path, monkeypatch): + import time + api = self._spool(tmp_path, monkeypatch) + monkeypatch.setattr(api, "_SPOOL_TTL", 60) + for name in ("old.rdf", "new.rdf"): + with open(os.path.join(api.SPOOL_DIR, name), "wb") as fh: + fh.write(b"x") + os.utime(os.path.join(api.SPOOL_DIR, "old.rdf"), + (time.time() - 3600, time.time() - 3600)) # 1h old > 60s TTL + api._prune_spool() + names = set(os.listdir(api.SPOOL_DIR)) + assert "old.rdf" not in names and "new.rdf" in names From f0e79574bc0d6e23797b4f92db8b72f440947c0f Mon Sep 17 00:00:00 2001 From: rempairamore Date: Tue, 23 Jun 2026 15:50:31 +0200 Subject: [PATCH 3/3] dockerfile as non root user --- .dockerignore | 29 +++++++++++++++++++++++++++++ Dockerfile | 45 ++++++++++++++++++++++++--------------------- pyproject.toml | 2 +- uv.lock | 2 +- 4 files changed, 55 insertions(+), 23 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..11ebf31 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,29 @@ +# Python caches / build artifacts +__pycache__/ +*.py[cod] +*.so +build/ +dist/ +*.egg-info/ + +# Virtual environments — must NOT clobber the image-built venv +.venv/ + +# Testing +.pytest_cache/ +.coverage +htmlcov/ + +# Runtime spool cache +**/spool/ + +# VCS / CI / IDE +.git/ +.github/ +.vscode/ +.idea/ + +# Docs site build artifacts +docs/node_modules/ +docs/.astro/ +docs/dist/ diff --git a/Dockerfile b/Dockerfile index 84d18b4..af5ab1d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,41 +1,44 @@ # Base image: Python slim for a lightweight container FROM python:3.11-slim -# Define environment variables with default values -# These can be overridden during container runtime +# Environment defaults (overridable at runtime) ENV BASE_URL="lode.opencitations.net" - -# Ensure Python output is unbuffered ENV PYTHONUNBUFFERED=1 -# Install system dependencies + uv +# System dependencies + uv (installed to a system path so a non-root user can use it) RUN apt-get update && \ - apt-get install -y \ - git \ - python3-dev \ - build-essential \ - curl && \ - curl -LsSf https://astral.sh/uv/install.sh | sh && \ + apt-get install -y --no-install-recommends \ + git \ + python3-dev \ + build-essential \ + curl && \ + curl -LsSf https://astral.sh/uv/install.sh | \ + env UV_INSTALL_DIR=/usr/local/bin INSTALLER_NO_MODIFY_PATH=1 sh && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# Make uv available in PATH -ENV PATH="/root/.local/bin:$PATH" +# Non-root user that owns the app and runs the server +RUN useradd --create-home --uid 10001 appuser -# Set the working directory for our application WORKDIR /website +RUN chown appuser:appuser /website -# Copy dependency files first for better Docker layer caching -COPY pyproject.toml uv.lock README.md ./ +# Drop privileges before building and running: the venv and the runtime spool +# directory end up owned by appuser, so no root is needed at any point. +USER appuser -# Install dependencies (frozen = use exact lockfile versions) +# Install dependencies first for better layer caching (frozen = exact lockfile) +COPY --chown=appuser:appuser pyproject.toml uv.lock README.md ./ RUN uv sync --frozen --no-dev --no-install-project -# Copy application code -COPY . . +# Application code +COPY --chown=appuser:appuser . . + +# At runtime uv must only launch the entrypoint, never re-sync the prebuilt env +ENV UV_NO_SYNC=1 -# Expose the port that our service will listen on +# Service port (>1024, bindable by a non-root user) EXPOSE 8080 # Start the application with gunicorn via uv -CMD ["uv", "run", "gunicorn", "-c", "gunicorn.conf.py", "lode.api:app"] \ No newline at end of file +CMD ["uv", "run", "gunicorn", "-c", "gunicorn.conf.py", "lode.api:app"] diff --git a/pyproject.toml b/pyproject.toml index 5de1df7..7970848 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "lode" -version = "0.2.5" +version = "0.3.0" description = "New reengineered version of LODE, maintained by OpenCitations" authors = [{name = "Valentina Pasqual, Silvio Peroni", email = "valentina.pasqual2@unibo.it"}] readme = "README.md" diff --git a/uv.lock b/uv.lock index 33dff77..e7b3580 100644 --- a/uv.lock +++ b/uv.lock @@ -418,7 +418,7 @@ wheels = [ [[package]] name = "lode" -version = "0.2.5" +version = "0.3.0" source = { editable = "." } dependencies = [ { name = "fastapi" },