Skip to content
Draft
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
5f38216
Add prototype paramdb api that allows conversion of ECMWF shortname t…
awarde96 Mar 19, 2026
d64ad19
Add tests for new paramDB api
awarde96 Mar 19, 2026
c2b7ee8
Merge branch 'develop' into feature/paramdb-api
awarde96 Mar 19, 2026
a8b8e31
Move generated yaml files from pymetkit to share
awarde96 Apr 8, 2026
dabc222
Add initial code for caching params when using online mode
awarde96 Apr 8, 2026
192e642
Allow users to pass in a yaml file to be used as paramDB instead of o…
awarde96 Apr 8, 2026
9c374d6
Address Copilot PR #185 review comments
awarde96 Apr 10, 2026
63f51f7
Allow context when converting from shortname to id, using center or t…
awarde96 May 18, 2026
0276e49
Merge branch 'develop' into feature/paramdb-api
awarde96 May 22, 2026
5bfdf4e
Allow use of origin to avoid parameter id clashes from shortname, def…
awarde96 May 22, 2026
8998c5f
Merge branch 'feature/paramdb-api' of github.com:ecmwf/metkit into fe…
awarde96 May 22, 2026
a5370ef
Add schema for parameter entries, add workflow for clashing variables…
awarde96 Jun 1, 2026
abbc236
feat: symlink parameter_metadata.yaml from share/metkit into package
HCookie Jun 12, 2026
53d5ccc
Make ParamDB importable when C library is unavailable; add benchmark …
awarde96 Jun 16, 2026
81f5c18
Suppress noisy print() on missing symbols; collect count into CFFIMod…
awarde96 Jun 16, 2026
9885271
Add offline vs online per-method comparison table to benchmark when u…
awarde96 Jun 16, 2026
e622f7f
Add json version of parameters for quicker loading, update paramdb to…
awarde96 Jun 17, 2026
917d33b
Merge pull request #225 from ecmwf/feature/paramdb-yaml-symlink
awarde96 Jun 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ requires-python = ">=3.10"
dependencies = [
"cffi",
"metkitlib",
"findlibs"
"findlibs",
"pyyaml",
"requests",
"platformdirs",
]

[tool.setuptools.dynamic]
Expand All @@ -39,7 +42,8 @@ zip-safe = false
[tool.setuptools.package-data]
"pymetkit" = [
"VERSION",
"metkit_c.h"
"metkit_c.h",
"parameter_metadata.yaml"
]

Copilot AI Apr 10, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parameter_metadata.yaml is declared as package data for the pymetkit package, but the repository’s YAML lives under share/metkit/parameter_metadata.yaml (there is no python/pymetkit/src/pymetkit/parameter_metadata.yaml). As a result, installed wheels/sdists will likely not contain the bundled YAML and offline ParamDB() will fail. Either move/copy the YAML into the package directory at build time, or adjust packaging and the loader to use importlib.resources/pkgutil to read the bundled file reliably.

Suggested change
"metkit_c.h",
"parameter_metadata.yaml"
]
"metkit_c.h"
]
[tool.setuptools.data-files]
"share/metkit" = [
"share/metkit/parameter_metadata.yaml"
]

Copilot uses AI. Check for mistakes.
[project.optional-dependencies]
Expand Down
1 change: 1 addition & 0 deletions python/pymetkit/src/pymetkit/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .pymetkit import *
from .pymetkit import ParamDB
144 changes: 144 additions & 0 deletions python/pymetkit/src/pymetkit/generate_parameter_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
"""
Standalone script to generate:
- parameter_metadata.yaml — one entry per ECMWF parameter
- unit_metadata.yaml — one entry per ECMWF unit

Usage
-----
python -m pymetkit.generate_parameter_metadata
# or directly:
python generate_parameter_metadata.py
"""

import requests
import yaml
from pathlib import Path

PARAM_URL = "https://codes.ecmwf.int/parameter-database/api/v1/param/"
UNIT_URL = "https://codes.ecmwf.int/parameter-database/api/v1/unit/"
PARAM_OUTPUT = Path(__file__).parent / "parameter_metadata.yaml"
UNIT_OUTPUT = Path(__file__).parent / "unit_metadata.yaml"

Comment on lines +18 to +34

Copilot AI Apr 10, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The generator writes parameter_metadata.yaml and unit_metadata.yaml next to the Python module (Path(__file__).parent), but the PR adds the YAML under share/metkit/ and ParamDB’s fallback search also expects share/metkit/parameter_metadata.yaml. Regenerating with this script will therefore write to a different location than the committed data. Align the output paths with the repository’s canonical YAML location (or update the rest of the codebase to consume the module-adjacent files).

Copilot uses AI. Check for mistakes.

# ---------------------------------------------------------------------------
# Units
# ---------------------------------------------------------------------------


def fetch_units(url: str = UNIT_URL) -> tuple[list[dict], dict[int, str]]:
"""
Fetch all units from the ECMWF parameter database API.

Returns
-------
units : list[dict]
Normalised unit records ready to be written to unit_metadata.yaml.
unit_map : dict[int, str]
Mapping of unit id -> unit name string for use in parameter enrichment.
"""
print(f"Fetching units from {url} ...")
response = requests.get(url)
response.raise_for_status()
raw_units = response.json()
Comment on lines +52 to +55

Copilot AI Apr 10, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both API calls use requests.get(...) without a timeout. If the endpoint stalls, this script can block indefinitely. Consider providing a default timeout (and possibly a retry strategy) to make regeneration more robust.

Copilot uses AI. Check for mistakes.
print(f" Received {len(raw_units)} units.")

units = []
unit_map: dict[int, str] = {}

for raw in raw_units:
uid = int(raw["id"])
# The API may use 'name', 'symbol', or 'label' for the unit string
name = raw.get("name") or raw.get("symbol") or raw.get("label") or ""

entry = {"id": uid}
# Preserve all fields the API returns, but ensure id comes first
for key, value in raw.items():
if key == "id":
continue
entry[key] = value

Copilot AI Apr 10, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fetch_units() computes a normalised unit string (name = raw.get("name") or raw.get("symbol") ...) for unit_map, but the YAML output preserves the raw keys and does not ensure there is a canonical name field. If the API returns symbol/label instead of name, unit_metadata.yaml will lack the expected key. Consider explicitly setting entry["name"] = name (and/or dropping the alternate keys) to keep the output schema stable.

Suggested change
# Always emit a canonical name field so unit_metadata.yaml has a stable schema
entry["name"] = name

Copilot uses AI. Check for mistakes.
units.append(entry)
unit_map[uid] = name

units.sort(key=lambda e: e["id"])
return units, unit_map


def write_unit_yaml(units: list[dict], output_path: Path = UNIT_OUTPUT) -> None:
"""Write the unit list to a YAML file."""
with output_path.open("w") as fh:
yaml.dump(
units,
fh,
default_flow_style=False,
allow_unicode=True,
sort_keys=False,
)
print(f"Written {len(units)} units to {output_path}")


# ---------------------------------------------------------------------------
# Parameters
# ---------------------------------------------------------------------------


def fetch_parameters(
url: str = PARAM_URL, unit_map: dict[int, str] = None
) -> list[dict]:
"""Fetch all parameters from the ECMWF parameter database API."""
print(f"Fetching parameters from {url} ...")
response = requests.get(url)
response.raise_for_status()
params = response.json()
print(f" Received {len(params)} parameters.")

result = []
for raw in params:
# Resolve short name (API may return 'shortName', 'short_name', or 'shortname')
shortname = (
raw.get("shortname") or raw.get("shortName") or raw.get("short_name") or ""
)

# Resolve units via unit_map if available
unit_id = raw.get("unit_id")
if unit_map and unit_id is not None:
units = unit_map.get(int(unit_id), "")
else:
units = ""

entry = {
"id": int(raw["id"]),
"shortname": shortname,
"longname": raw.get("name", ""),
"units": units,
"description": raw.get("description", ""),
}
result.append(entry)

result.sort(key=lambda e: e["id"])
return result


def write_param_yaml(params: list[dict], output_path: Path = PARAM_OUTPUT) -> None:
"""Write the parameter list to a YAML file."""
with output_path.open("w") as fh:
yaml.dump(
params,
fh,
default_flow_style=False,
allow_unicode=True,
sort_keys=False,
)
print(f"Written {len(params)} parameters to {output_path}")


# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------

if __name__ == "__main__":
units, unit_map = fetch_units()
write_unit_yaml(units)

parameters = fetch_parameters(unit_map=unit_map)
write_param_yaml(parameters)
Loading
Loading