diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dc0c6b7..9fbfbb6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,3 +51,16 @@ jobs: run: | npm install --no-save @bjorn3/browser_wasi_shim@^0.4.2 node ../examples/node-demo.mjs + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Test the Python wrapper against the freshly built runtime + env: + # Use the runtime just built by build.sh instead of downloading it. + GEOLIBRE_WASM: ${{ github.workspace }}/npm/geolibre-cli.wasm + run: | + python -m pip install --upgrade pip + python -m pip install ./python pytest + pytest python/tests -q diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2a7b8ea..7c64a19 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -46,6 +46,10 @@ jobs: - name: Upgrade npm for Trusted Publishing run: npm install -g npm@latest + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Build both WASM artifacts (library + WASI runner) run: ./build.sh @@ -60,6 +64,31 @@ jobs: working-directory: npm run: npm publish --provenance --access public + # Keep the Python package version and the runtime version it downloads in + # lockstep with the tag, so a tag is the single source of truth. + - name: Sync Python package version to tag + if: startsWith(github.ref, 'refs/tags/v') + run: | + ver="${GITHUB_REF_NAME#v}" + sed -i "s/^version = .*/version = \"$ver\"/" python/pyproject.toml + sed -i "s/^__version__ = .*/__version__ = \"$ver\"/" python/src/geolibre_wasm/__init__.py + sed -i "s|^RUNTIME_VERSION = .*|RUNTIME_VERSION = \"v$ver\"|" python/src/geolibre_wasm/_core.py + + - name: Build Python wheel + sdist + run: | + python -m pip install --upgrade build + python -m build --outdir python-dist python + + # No token needed: PyPI authenticates via the OIDC id-token from the + # configured trusted publisher. Requires a PyPI trusted publisher for + # project "geolibre-wasm" (owner opengeos, repo geolibre-rust, workflow + # release.yml); for the first release, configure it as a pending publisher. + - name: Publish to PyPI (Trusted Publishing) + if: startsWith(github.ref, 'refs/tags/v') + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: python-dist + - name: Stage release assets if: startsWith(github.ref, 'refs/tags/v') run: | @@ -72,6 +101,8 @@ jobs: cp npm/geolibre-cli.wasm dist/geolibre-cli-${GITHUB_REF_NAME}.wasm cp npm/tools.mjs dist/tools-${GITHUB_REF_NAME}.mjs cp npm/tools.d.ts dist/tools-${GITHUB_REF_NAME}.d.ts + # Python wheel + sdist + cp python-dist/*.whl python-dist/*.tar.gz dist/ (cd dist && sha256sum * > SHA256SUMS.txt) - name: Attach to GitHub Release diff --git a/.gitignore b/.gitignore index 55f4e89..57447a3 100644 --- a/.gitignore +++ b/.gitignore @@ -10,5 +10,11 @@ npm/geolibre_wasm.d.ts npm/geolibre_wasm_bg.wasm.d.ts npm/snippets/ *.wasm-opt.wasm +# Python wrapper build artifacts +__pycache__/ +*.egg-info/ +python/build/ +python/dist/ +.pytest_cache/ # Cargo.lock is committed: this workspace ships a binary, and the lock pins the # exact whitebox_next_gen fork commit for reproducible WASI builds. diff --git a/README.md b/README.md index e2eebc3..06402d2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # geolibre-rust [![npm version](https://img.shields.io/npm/v/geolibre-wasm.svg)](https://www.npmjs.com/package/geolibre-wasm) +[![PyPI version](https://img.shields.io/pypi/v/geolibre-wasm.svg)](https://pypi.org/project/geolibre-wasm/) [![npm downloads](https://img.shields.io/npm/dm/geolibre-wasm.svg)](https://www.npmjs.com/package/geolibre-wasm) [![CI](https://github.com/opengeos/geolibre-rust/actions/workflows/ci.yml/badge.svg)](https://github.com/opengeos/geolibre-rust/actions/workflows/ci.yml) [![license](https://img.shields.io/npm/l/geolibre-wasm.svg)](https://github.com/opengeos/geolibre-rust#license) @@ -22,9 +23,10 @@ The published npm package (`geolibre-wasm`) ships two layers: GeoLibre's own tools**, run over an in-memory `/work` filesystem via [`@bjorn3/browser_wasi_shim`](https://github.com/bjorn3/browser_wasi_shim). -No server, no Python, no native install. New tools live in the `geolibre-tools` -crate and are registered alongside whitebox's, so GeoLibre sees them through the -same interface as the built-ins. +No server, no GDAL, no native install. Use it from JavaScript (npm +`geolibre-wasm`) or Python (PyPI `geolibre-wasm`). New tools live in the +`geolibre-tools` crate and are registered alongside whitebox's, so GeoLibre sees +them through the same interface as the built-ins. ## Try it in the browser @@ -160,6 +162,10 @@ When `kdtree 0.8.1` (or later) is published, delete `vendor/kdtree/` and the > Note: the repository is `geolibre-rust` (the Rust source), but the published > npm package is **`geolibre-wasm`** (the WASM artifact), mirroring `whitebox-wasm`. +```bash +npm install geolibre-wasm +``` + Browser library (the `.` export) -- typed GeoTIFF/projection/vector/LiDAR APIs: ```js @@ -185,6 +191,37 @@ const { files } = await runTool("slope", { const slopeCog = files["slope.tif"]; // Uint8Array (COG GeoTIFF) ``` +## Use from Python + +The `python/` package (`geolibre-wasm` on PyPI, `import geolibre_wasm`) runs the +same WASI tool runner in-process via `wasmtime`, mirroring the JS `./tools` API. +No native install, GDAL, or server. + +```bash +pip install geolibre-wasm +``` + +```python +import geolibre_wasm as gl + +tools = gl.list_tools() # every tool id +manifests = gl.list_manifests() # schemas + "source": geolibre|whitebox + +res = gl.run_tool( + "slope", + # Paths in `args` refer to the tool's sandbox (/work), NOT your host disk. + # `input` files are placed at /work/; `res.files` keys are relative + # to /work. Mixing in host paths (e.g. /content on Colab) will not work. + args=["--input=/work/dem.tif", "--output=/work/slope.tif", "--units=degrees"], + input={"dem.tif": open("dem.tif", "rb").read()}, # -> /work/dem.tif +) +assert res.exit_code == 0, res.stdout # surfaces tool errors +open("slope.tif", "wb").write(res.files["slope.tif"]) # key is relative to /work +``` + +The runtime `.wasm` is downloaded from the matching release on first use (or set +`GEOLIBRE_WASM`). See [`python/README.md`](python/README.md) for details. + ## GeoLibre integration The interface is byte-compatible with the existing `whitebox-wasm/tools` client: diff --git a/python/README.md b/python/README.md new file mode 100644 index 0000000..777de49 --- /dev/null +++ b/python/README.md @@ -0,0 +1,96 @@ +# geolibre-wasm (Python) + +Run the [`geolibre-rust`](https://github.com/opengeos/geolibre-rust) geospatial +tool suite (the `whitebox_next_gen` tools plus GeoLibre's own) from Python. The +tools are a single WebAssembly (WASI) module executed in-process via +[`wasmtime`](https://github.com/bytecodealliance/wasmtime-py), so there is **no +native install, no GDAL, and no server** — just `pip install`. + +This mirrors the JavaScript `geolibre-wasm/tools` API (`list_tools`, +`list_manifests`, `run_tool`), so the two stay in sync. + +> The import package is `geolibre_wasm` (the distribution is `geolibre-wasm`), +> matching the npm package name and avoiding a clash with the separate +> `geolibre` application package. + +## Install + +```bash +pip install geolibre-wasm +``` + +On first use the runtime (`geolibre-cli.wasm`, ~20 MB) is downloaded from the +matching GitHub release and cached under `~/.cache/geolibre/`. To use a local +copy instead, set `GEOLIBRE_WASM=/path/to/geolibre-cli.wasm` or pass +`wasm_path=` to any call. + +## Usage + +Inputs are passed as `bytes` under `/work`; the tool reads/writes there and any +new files come back as `bytes`. + +> **Paths are sandboxed.** Every path inside `args` refers to the tool's `/work` +> filesystem, **not** your host disk. `input` files are placed at `/work/`, +> and `res.files` keys are paths relative to `/work`. Do your own `open()` / +> `write()` against host paths (e.g. `/content/...` on Colab) on the Python side, +> never inside `args`. + +```python +import geolibre_wasm as gl + +# Discover tools (each manifest carries a "source": "geolibre" | "whitebox") +tools = gl.list_tools() +manifests = gl.list_manifests() + +# Raster: compute slope from a DEM +dem = open("dem.tif", "rb").read() # read from host disk +res = gl.run_tool( + "slope", + args=["--input=/work/dem.tif", "--output=/work/slope.tif", "--units=degrees"], + input={"dem.tif": dem}, # -> /work/dem.tif +) +assert res.exit_code == 0, res.stdout # surfaces tool errors +open("slope.tif", "wb").write(res.files["slope.tif"]) # key is relative to /work + +# Reproject (warp) to a target EPSG +res = gl.run_tool( + "reproject_raster", + args=["--input=/work/dem.tif", "--output=/work/wgs84.tif", "--epsg=4326"], + input={"dem.tif": dem}, +) + +# Vector: GeoJSON -> GeoParquet (Hilbert-sorted, bbox covering, ZSTD by default) +gj = open("cities.geojson", "rb").read() +res = gl.run_tool( + "write_geoparquet", + args=["--input=/work/in.geojson", "--output=/work/out.parquet"], + input={"in.geojson": gj}, +) +open("cities.parquet", "wb").write(res.files["out.parquet"]) +``` + +Tools that write a directory tree (e.g. `raster_to_tiles`) return nested keys: + +```python +res = gl.run_tool( + "raster_to_tiles", + args=["--input=/work/dem.tif", "--output_dir=/work/tiles", "--min_zoom=16", "--max_zoom=18"], + input={"dem.tif": dem}, +) +for path, data in res.files.items(): + # e.g. "tiles/16/9559/32767.png" + ... +``` + +## API + +- `list_tools(wasm_path=None) -> list[str]` +- `list_manifests(wasm_path=None) -> list[dict]` +- `run_tool(tool, args=None, input=None, wasm_path=None) -> ToolResult` +- `ToolResult(exit_code: int, stdout: list[str], files: dict[str, bytes])` +- `runtime_path(wasm_path=None) -> str` — resolve the runtime (explicit > `GEOLIBRE_WASM` > cached download) +- `download_runtime(dest=None) -> str` — fetch the runtime ahead of time + +## License + +MIT diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 0000000..629d251 --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,28 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "geolibre-wasm" +version = "0.4.0" +description = "Run the GeoLibre / whitebox_next_gen geospatial tool suite (WASM/WASI) from Python." +readme = "README.md" +requires-python = ">=3.9" +license = "MIT" +authors = [{ name = "Qiusheng Wu", email = "giswqs@gmail.com" }] +keywords = ["geospatial", "gis", "wasm", "wasi", "raster", "vector", "geoparquet", "whitebox", "geolibre"] +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Topic :: Scientific/Engineering :: GIS", +] +dependencies = ["wasmtime>=20,<50"] + +[project.urls] +Homepage = "https://github.com/opengeos/geolibre-rust" +Repository = "https://github.com/opengeos/geolibre-rust" +Issues = "https://github.com/opengeos/geolibre-rust/issues" + +[tool.hatch.build.targets.wheel] +packages = ["src/geolibre_wasm"] diff --git a/python/src/geolibre_wasm/__init__.py b/python/src/geolibre_wasm/__init__.py new file mode 100644 index 0000000..9567a93 --- /dev/null +++ b/python/src/geolibre_wasm/__init__.py @@ -0,0 +1,40 @@ +"""GeoLibre: run the whitebox_next_gen + GeoLibre geospatial tool suite from Python. + +The tools are compiled to a single WebAssembly (WASI) module and executed +in-process via wasmtime, so there is no native install, GDAL, or server. The API +mirrors the JavaScript ``geolibre-wasm/tools`` package. + +Example: + >>> import geolibre_wasm as gl + >>> dem = open("dem.tif", "rb").read() + >>> result = gl.run_tool( + ... "slope", + ... args=["--input=/work/dem.tif", "--output=/work/slope.tif", "--units=degrees"], + ... input={"dem.tif": dem}, + ... ) + >>> result.exit_code + 0 + >>> open("slope.tif", "wb").write(result.files["slope.tif"]) +""" + +from ._core import ( + RUNTIME_VERSION, + ToolResult, + download_runtime, + list_manifests, + list_tools, + run_tool, + runtime_path, +) + +__all__ = [ + "RUNTIME_VERSION", + "ToolResult", + "download_runtime", + "list_manifests", + "list_tools", + "run_tool", + "runtime_path", +] + +__version__ = "0.4.0" diff --git a/python/src/geolibre_wasm/_core.py b/python/src/geolibre_wasm/_core.py new file mode 100644 index 0000000..b4c925d --- /dev/null +++ b/python/src/geolibre_wasm/_core.py @@ -0,0 +1,236 @@ +"""Run the GeoLibre WASI tool runner from Python via an in-process wasmtime. + +The tools are the single ``geolibre-cli.wasm`` (a ``wasm32-wasip1`` module). Each +call runs it over a private temporary directory preopened as ``/work``: input +files are written there, the tool reads/writes via ordinary ``std::fs``, and any +new files are returned as ``bytes``. This mirrors the JavaScript ``tools.mjs`` +API (``list_tools`` / ``list_manifests`` / ``run_tool``) so the two stay in sync. +""" + +from __future__ import annotations + +import json +import os +import shutil +import tempfile +import urllib.request +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from pathlib import Path +from typing import Optional, Union + +import wasmtime + +#: Magic header of every WebAssembly module ("\0asm"). +_WASM_MAGIC = b"\x00asm" +#: Network timeout (seconds) for the one-time runtime download. +_DOWNLOAD_TIMEOUT = 120 + +#: Release whose ``geolibre-cli.wasm`` asset this wrapper downloads by default. +#: Kept in sync with the package version's ``vMAJOR.MINOR.PATCH`` tag. +RUNTIME_VERSION = "v0.4.0" +_ASSET = f"geolibre-cli-{RUNTIME_VERSION}.wasm" +_RELEASE_URL = ( + "https://github.com/opengeos/geolibre-rust/releases/download/" + f"{RUNTIME_VERSION}/{_ASSET}" +) +_STDOUT_CAPTURE = ".geolibre-stdout" + +PathLike = Union[str, os.PathLike] + + +@dataclass +class ToolResult: + """Result of running a tool. + + Attributes: + exit_code: Process exit code (0 = success). + stdout: Captured stdout/stderr lines. + files: New files the tool wrote, keyed by path relative to ``/work``. + Tools that write a tree (e.g. ``raster_to_tiles``) use nested keys + such as ``"tiles/15/4779/16383.png"``. + """ + + exit_code: int + stdout: list[str] + files: dict[str, bytes] + + +_engine: Optional[wasmtime.Engine] = None +_module_cache: dict[str, "wasmtime.Module"] = {} + + +def _get_engine() -> "wasmtime.Engine": + global _engine + if _engine is None: + _engine = wasmtime.Engine() + return _engine + + +def _cache_path() -> Path: + base = os.environ.get("XDG_CACHE_HOME") or (Path.home() / ".cache") + return Path(base) / "geolibre" / _ASSET + + +def download_runtime(dest: Optional[PathLike] = None) -> str: + """Download the ``geolibre-cli.wasm`` runtime from the GitHub release. + + Args: + dest: Where to write the file. Defaults to the per-user cache + (``$XDG_CACHE_HOME/geolibre/`` or ``~/.cache/geolibre/``). + + Returns: + The path to the downloaded runtime. + """ + target = Path(dest) if dest is not None else _cache_path() + target.parent.mkdir(parents=True, exist_ok=True) + with urllib.request.urlopen( # noqa: S310 (trusted release URL) + _RELEASE_URL, timeout=_DOWNLOAD_TIMEOUT + ) as response: + data = response.read() + # Guard against truncated or error-page downloads: every wasm module starts + # with the "\0asm" magic. + if not data.startswith(_WASM_MAGIC): + raise RuntimeError( + f"downloaded runtime from {_RELEASE_URL} is not a valid WASM module" + ) + tmp = target.with_name(target.name + ".download") + tmp.write_bytes(data) + tmp.replace(target) + return str(target) + + +def runtime_path(wasm_path: Optional[PathLike] = None) -> str: + """Resolve the runtime ``.wasm`` to use. + + Resolution order: the explicit ``wasm_path`` argument, the ``GEOLIBRE_WASM`` + environment variable, then the cached download (fetched on first use). + + Args: + wasm_path: Explicit path to a ``geolibre-cli.wasm``; takes precedence. + + Returns: + Filesystem path to the runtime module. + """ + if wasm_path is not None: + return str(wasm_path) + env = os.environ.get("GEOLIBRE_WASM") + if env: + return env + cache = _cache_path() + if not cache.exists(): + download_runtime(cache) + return str(cache) + + +def _load_module(path: str) -> "wasmtime.Module": + module = _module_cache.get(path) + if module is None: + module = wasmtime.Module.from_file(_get_engine(), path) + _module_cache[path] = module + return module + + +def _exec( + argv: Sequence[str], + inputs: Optional[Mapping[str, bytes]] = None, + wasm_path: Optional[PathLike] = None, +) -> ToolResult: + inputs = inputs or {} + module = _load_module(runtime_path(wasm_path)) + engine = _get_engine() + work = Path(tempfile.mkdtemp(prefix="geolibre-")) + try: + for name, data in inputs.items(): + dest = work / name + dest.parent.mkdir(parents=True, exist_ok=True) + dest.write_bytes(data) + + stdout_file = work / _STDOUT_CAPTURE + store = wasmtime.Store(engine) + wasi = wasmtime.WasiConfig() + wasi.argv = ["geolibre", *argv] + wasi.preopen_dir(str(work), "/work") + # Merge stdout + stderr into one capture, matching tools.mjs. + wasi.stdout_file = str(stdout_file) + wasi.stderr_file = str(stdout_file) + store.set_wasi(wasi) + + linker = wasmtime.Linker(engine) + linker.define_wasi() + instance = linker.instantiate(store, module) + start = instance.exports(store)["_start"] + + exit_code = 0 + try: + start(store) # WASI returns by calling proc_exit -> ExitTrap + except wasmtime.ExitTrap as exit_trap: + exit_code = exit_trap.code + + stdout: list[str] = [] + if stdout_file.exists(): + stdout = stdout_file.read_text(errors="replace").splitlines() + + input_names = set(inputs) + files: dict[str, bytes] = {} + for path in sorted(work.rglob("*")): + if not path.is_file(): + continue + rel = path.relative_to(work).as_posix() + if rel == _STDOUT_CAPTURE or rel in input_names: + continue + files[rel] = path.read_bytes() + + return ToolResult(exit_code=exit_code, stdout=stdout, files=files) + finally: + shutil.rmtree(work, ignore_errors=True) + + +def list_tools(wasm_path: Optional[PathLike] = None) -> list[str]: + """List every available tool id. + + Args: + wasm_path: Optional explicit runtime path (see :func:`runtime_path`). + + Returns: + The tool ids, one per registered tool. + """ + result = _exec(["list"], wasm_path=wasm_path) + return [line.strip() for line in result.stdout if line.strip()] + + +def list_manifests(wasm_path: Optional[PathLike] = None) -> list[dict]: + """Fetch every tool manifest (id, parameters, category, provenance, ...). + + Args: + wasm_path: Optional explicit runtime path (see :func:`runtime_path`). + + Returns: + A list of manifest dicts, each including a ``"source"`` field of + ``"geolibre"`` or ``"whitebox"``. + """ + result = _exec(["manifests"], wasm_path=wasm_path) + return json.loads("".join(result.stdout)) + + +def run_tool( + tool: str, + args: Optional[Sequence[str]] = None, + input: Optional[Mapping[str, bytes]] = None, + wasm_path: Optional[PathLike] = None, +) -> ToolResult: + """Run one tool over an in-memory ``/work`` filesystem. + + Args: + tool: Tool id, e.g. ``"slope"`` (see :func:`list_tools`). + args: CLI args, e.g. + ``["--input=/work/dem.tif", "--output=/work/slope.tif", "--units=degrees"]``. + input: Files placed under ``/work`` before the run, keyed by filename + (values are ``bytes``). + wasm_path: Optional explicit runtime path (see :func:`runtime_path`). + + Returns: + A :class:`ToolResult` with the exit code, captured output, and any new + files the tool wrote (keyed by path relative to ``/work``). + """ + return _exec([tool, *(args or [])], inputs=input, wasm_path=wasm_path) diff --git a/python/tests/test_smoke.py b/python/tests/test_smoke.py new file mode 100644 index 0000000..7a76e00 --- /dev/null +++ b/python/tests/test_smoke.py @@ -0,0 +1,65 @@ +"""Smoke tests for the geolibre Python wrapper. + +These run the real WASI tool runner. Point ``GEOLIBRE_WASM`` at a local +``geolibre-cli.wasm`` to avoid a network download (the build stages one into +``npm/``); otherwise the runtime is fetched from the GitHub release on first use. +""" + +from __future__ import annotations + +import json + +import geolibre_wasm as geolibre + + +def test_list_tools(): + tools = geolibre.list_tools() + assert len(tools) > 100 + # The GeoLibre-authored tools are registered alongside whitebox's. + for tool in ("slope", "reproject_raster", "write_geoparquet", "read_geoparquet"): + assert tool in tools + + +def test_manifests_carry_provenance(): + manifests = geolibre.list_manifests() + by_id = {m["id"]: m for m in manifests} + assert by_id["reproject_raster"]["source"] == "geolibre" + assert by_id["slope"]["source"] == "whitebox" + + +def test_geoparquet_roundtrip(): + geojson = json.dumps( + { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": {"name": "A", "val": 1}, + "geometry": {"type": "Point", "coordinates": [-122.3, 47.6]}, + }, + { + "type": "Feature", + "properties": {"name": "B", "val": 2}, + "geometry": {"type": "Point", "coordinates": [-122.4, 47.7]}, + }, + ], + } + ).encode() + + written = geolibre.run_tool( + "write_geoparquet", + args=["--input=/work/in.geojson", "--output=/work/out.parquet"], + input={"in.geojson": geojson}, + ) + assert written.exit_code == 0 + parquet = written.files["out.parquet"] + assert parquet[:4] == b"PAR1" + + back = geolibre.run_tool( + "read_geoparquet", + args=["--input=/work/x.parquet", "--output=/work/back.geojson"], + input={"x.parquet": parquet}, + ) + assert back.exit_code == 0 + fc = json.loads(back.files["back.geojson"]) + assert len(fc["features"]) == 2