From 155d4e2dc672cd689b00732f5cf8eee8357e7c66 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Wed, 6 Nov 2024 18:07:41 +0100 Subject: [PATCH 1/9] start with vector data cube support --- environment.yml | 1 + pyproject.toml | 1 + rtd-environment.yml | 1 + xcube/core/store/datatype.py | 6 +++ xcube/core/store/fs/impl/vectordatacube.py | 55 ++++++++++++++++++++++ xcube/plugin.py | 2 + 6 files changed, 66 insertions(+) create mode 100644 xcube/core/store/fs/impl/vectordatacube.py diff --git a/environment.yml b/environment.yml index 6b5113c6d..ea8b3da5e 100644 --- a/environment.yml +++ b/environment.yml @@ -50,3 +50,4 @@ dependencies: - pytest-cov >=2.6 - requests-mock >=1.8 - werkzeug + - xvec diff --git a/pyproject.toml b/pyproject.toml index 4f52fbd2c..b0e58df2d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ dependencies = [ "tornado>=6.0", "urllib3>=1.26", "xarray>=2022.6,<=2024.6", + "xvec", "zarr>=2.11" ] classifiers = [ diff --git a/rtd-environment.yml b/rtd-environment.yml index f345b8083..94139829a 100644 --- a/rtd-environment.yml +++ b/rtd-environment.yml @@ -44,6 +44,7 @@ dependencies: - urllib3 >=1.26 - werkzeug <2.2 # >=2.2 slows down S3 tests (deps: moto->flask->werkzeug) - xarray >=2022.6, <= 2024.6 + - xvec - zarr >=2.11 # Required by Coiled # These are very likely transitive deps anyway diff --git a/xcube/core/store/datatype.py b/xcube/core/store/datatype.py index 764fabc11..2f19a2ff6 100644 --- a/xcube/core/store/datatype.py +++ b/xcube/core/store/datatype.py @@ -198,6 +198,12 @@ def __next__(self) -> T: ["gdfiter", "DataIterator[geopandas.GeoDataFrame]"], ) +VECTOR_DATA_CUBE_TYPE = DataType(xarray.Dataset, ["vectordatacube", "xarray.Dataset"]) + +VECTOR_DATA_CUBE_ITERATOR_TYPE = DataType( + DataIterator, # Unfortunately we cannot use DataIterator[xarray.Dataset] + ["vdciter", "DataIterator[xarray.Dataset]"], +) def register_default_data_types(): for data_type in [ diff --git a/xcube/core/store/fs/impl/vectordatacube.py b/xcube/core/store/fs/impl/vectordatacube.py new file mode 100644 index 000000000..5ff76ee43 --- /dev/null +++ b/xcube/core/store/fs/impl/vectordatacube.py @@ -0,0 +1,55 @@ +# Copyright (c) 2024 by xcube team and contributors +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. + +import xarray as xr +import xvec + +from xcube.util.assertions import assert_instance +from ...datatype import DataType +from ...datatype import VECTOR_DATA_CUBE_TYPE + +from .dataset import DatasetZarrFsDataAccessor +from .dataset import DatasetNetcdfFsDataAccessor + + +class VectorDataCubeZarrFsDataAccessor(DatasetZarrFsDataAccessor): + """Opener/writer extension name: 'vectordatacube:zarr:'.""" + + @classmethod + def get_data_type(cls) -> DataType: + return VECTOR_DATA_CUBE_TYPE + + def open_data(self, data_id: str, **open_params) -> xr.Dataset: + dataset = super().open_data(data_id, **open_params) + return dataset.xvec.decode_cf() + + def write_data( + self, data: xr.Dataset, data_id: str, replace=False, **write_params + ) -> str: + assert_instance(data, xr.Dataset, name="data") + data_id = super().write_data( + data.xvec.encode_cf(), data_id, replace, **write_params + ) + return data_id + + +class VectorDataCubeNetcdfFsDataAccessor(DatasetNetcdfFsDataAccessor): + """Opener/writer extension name: 'vectordatacube:netcdf:'.""" + + @classmethod + def get_data_type(cls) -> DataType: + return VECTOR_DATA_CUBE_TYPE + + def open_data(self, data_id: str, **open_params) -> xr.Dataset: + dataset = super().open_data(data_id, **open_params) + return dataset.xvec.decode_cf() + + def write_data( + self, data: xr.Dataset, data_id: str, replace=False, **write_params + ) -> str: + assert_instance(data, xr.Dataset, name="data") + data_id = super().write_data( + data.xvec.encode_cf(), data_id, replace, **write_params + ) + return data_id diff --git a/xcube/plugin.py b/xcube/plugin.py index 5d20d0df0..49415f0f1 100644 --- a/xcube/plugin.py +++ b/xcube/plugin.py @@ -104,6 +104,8 @@ def _register_dataset_ios(ext_registry: extension.ExtensionRegistry): ), ("geodataframe", "shapefile", "gpd.GeoDataFrame in ESRI Shapefile format"), ("geodataframe", "geojson", "gpd.GeoDataFrame in GeoJSON format"), + ("vectordatacube", "netcdf", "xarray.Dataset in NetCDF format"), + ("vectordatacube", "zarr", "xarray.Dataset in Zarr format"), ) _FS_DATA_OPENER_ITEMS = _FS_DATA_ACCESSOR_ITEMS From e490dd94bdd058fac5aa01f53566a1627b6b77bd Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Mon, 11 Nov 2024 08:38:39 +0100 Subject: [PATCH 2/9] work on integrating vector data cubes --- .../core/store/fs/impl/test_vectordatacube.py | 32 +++ xcube/core/new.py | 211 ++++++++++++++++++ 2 files changed, 243 insertions(+) create mode 100644 test/core/store/fs/impl/test_vectordatacube.py diff --git a/test/core/store/fs/impl/test_vectordatacube.py b/test/core/store/fs/impl/test_vectordatacube.py new file mode 100644 index 000000000..4ba9f604a --- /dev/null +++ b/test/core/store/fs/impl/test_vectordatacube.py @@ -0,0 +1,32 @@ +# Copyright (c) 2024 by xcube team and contributors +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. + +import unittest + +from xcube.core.new import new_vector_data_cube + +from xcube.core.store.datatype import VECTOR_DATA_CUBE_TYPE +from xcube.core.store.fs.impl.vectordatacube import VectorDataCubeZarrFsDataAccessor +from xcube.core.store.fs.impl.vectordatacube import VectorDataCubeNetcdfFsDataAccessor + + +class VectorDataCubeZarrFsDataAccessorTest(unittest.TestCase): + + @classmethod + def setUp(self): + self.accessor = VectorDataCubeZarrFsDataAccessor() + # self.vdc = new_vector_data_cube() + + def test_get_data_type(self): + self.assertEqual(VECTOR_DATA_CUBE_TYPE, self.accessor.get_data_type()) + + def test_get_format_id(self) -> str: + self.assertEqual("zarr", self.accessor.get_format_id()) + + def test_write_and_open_data(self): + vdc = new_vector_data_cube() + data_id = self.accessor.write_data( + vdc, data_id="vdc_test.zarr", protocol="file", root="/test_data/" + ) + self.assertIsNotNone(data_id) diff --git a/xcube/core/new.py b/xcube/core/new.py index 0680f29f3..c1a9d4288 100644 --- a/xcube/core/new.py +++ b/xcube/core/new.py @@ -7,6 +7,8 @@ import numpy as np import pandas as pd import pyproj +import random +from shapely import Point import xarray as xr @@ -266,3 +268,212 @@ def my_func(time: int, y: int, x: int) -> Union[bool, int, float] data_vars[crs_name] = xr.DataArray(0, attrs=crs.to_cf()) return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) + + +def new_vector_data_cube( + title="Test Vector Data Cube", + geometries=None, + num_geometries=20, + geometry_name="geometry", + bbox=(-180.0, -90.0, 180.0, 90.0), + time_name="time", + time_dtype="datetime64[ns]", + time_units="seconds since 1970-01-01T00:00:00", + time_calendar="proleptic_gregorian", + time_periods=5, + time_freq="D", + time_start="2010-01-01T00:00:00", + use_cftime=False, + drop_bounds=False, + variables=None, + crs="WGS 84", + crs_name=None, + time_encoding_dtype="int64", +): + """Create a new empty vector data cube. Useful for creating vector data cube + templates with predefined coordinate variables and metadata. The function is also + used by xcube's unit tests. + + The values of the *variables* dictionary can be either constants, + array-like objects, or functions that compute their return value from + passed coordinate indexes. The expected signature is::: + + def my_func(time: int, y: int, x: int) -> Union[bool, int, float] + + Args: + title: A title. Defaults to 'Test Cube'. + width: Horizontal number of grid cells. Defaults to 360. + height: Vertical number of grid cells. Defaults to 180. + x_name: Name of the x coordinate variable. Defaults to 'lon'. + y_name: Name of the y coordinate variable. Defaults to 'lat'. + x_dtype: Data type of x coordinates. Defaults to 'float64'. + y_dtype: Data type of y coordinates. Defaults to 'float64'. + x_units: Units of the x coordinates. Defaults to 'degrees_east'. + y_units: Units of the y coordinates. Defaults to + 'degrees_north'. + x_start: Minimum x value. Defaults to -180. + y_start: Minimum y value. Defaults to -90. + x_res: Spatial resolution in x-direction. Defaults to 1.0. + y_res: Spatial resolution in y-direction. Defaults to 1.0. + inverse_y: Whether to create an inverse y axis. Defaults to + False. + time_name: Name of the time coordinate variable. Defaults to + 'time'. + time_periods: Number of time steps. Defaults to 5. + time_freq: Duration of each time step. Defaults to `1D'. + time_start: First time value. Defaults to '2010-01-01T00:00:00'. + time_dtype: Numpy data type for time coordinates. Defaults to + 'datetime64[s]'. If used, parameter 'use_cftime' must be + False. + time_units: Units for time coordinates. Defaults to 'seconds + since 1970-01-01T00:00:00'. + time_calendar: Calender for time coordinates. Defaults to + 'proleptic_gregorian'. + use_cftime: If True, the time will be given as data types + according to the 'cftime' package. If used, the + time_calendar parameter must be also be given with an + appropriate value such as 'gregorian' or 'julian'. If used, + parameter 'time_dtype' must be None. + drop_bounds: If True, coordinate bounds variables are not + created. Defaults to False. + variables: Dictionary of data variables to be added. None by + default. + crs: pyproj-compatible CRS string or instance of pyproj.CRS or + None + crs_name: Name of the variable that will hold the CRS + information. Ignored, if *crs* is not given. + time_encoding_dtype: data type used to encode the time variable + when serializing the dataset + + Returns: + A cube instance + """ + # y_dtype = y_dtype if y_dtype is not None else y_dtype + # y_res = y_res if y_res is not None else x_res + # if width < 0 or height < 0 or x_res <= 0.0 or y_res <= 0.0: + # if num_geometries < 0: + # raise ValueError() + + if isinstance(crs, str): + crs = pyproj.CRS.from_string(crs) + + if isinstance(crs, pyproj.CRS): + crs_name = crs_name or "crs" + + if geometries: + num_geometries = len(geometries) + else: + if num_geometries < 0: + raise ValueError() + geometries = [] + for _ in range(num_geometries): + x = random.uniform(bbox[0], bbox[2]) + y = random.uniform(bbox[1], bbox[3]) + geometries.append(Point(x, y)) + np_array = np.asarray(geometries, dtype=object) + + if time_periods < 0: + raise ValueError() + + if use_cftime and time_dtype is not None: + raise ValueError('If "use_cftime" is True,' ' "time_dtype" must not be set.') + + geometry_var = xr.DataArray(np_array, dims=geometry_name) + + if use_cftime: + time_data_p1 = xr.cftime_range( + start=time_start, + periods=time_periods + 1, + freq=time_freq, + calendar=time_calendar, + ).values + else: + time_data_p1 = pd.date_range( + start=time_start, periods=time_periods + 1, freq=time_freq + ).values + time_data_p1 = time_data_p1.astype(dtype=time_dtype) + + time_delta = time_data_p1[1] - time_data_p1[0] + time_data = time_data_p1[0:-1] + time_delta // 2 + time_var = xr.DataArray(time_data, dims=time_name) + time_var.encoding["units"] = time_units + time_var.encoding["calendar"] = time_calendar + time_var.encoding["dtype"] = time_encoding_dtype + + coords = {geometry_name: geometry_var, time_name: time_var} + if not drop_bounds: + time_bnds_name = f"{time_name}_bnds" + + bnds_dim = "bnds" + + time_bnds_data = np.zeros((time_periods, 2), dtype=time_data_p1.dtype) + time_bnds_data[:, 0] = time_data_p1[:-1] + time_bnds_data[:, 1] = time_data_p1[1:] + time_bnds_var = xr.DataArray(time_bnds_data, dims=(time_name, bnds_dim)) + time_bnds_var.encoding["units"] = time_units + time_bnds_var.encoding["calendar"] = time_calendar + time_bnds_var.encoding["dtype"] = time_encoding_dtype + + time_var.attrs["bounds"] = time_bnds_name + + coords.update( + { + time_bnds_name: time_bnds_var, + } + ) + + attrs = dict( + Conventions="CF-1.7", + title=title, + time_coverage_start=str(time_data_p1[0]), + time_coverage_end=str(time_data_p1[-1]), + ) + + if crs.to_epsg() == 4326: + attrs.update( + dict( + geospatial_lon_min=bbox[0], + geospatial_lon_max=bbox[2], + geospatial_lat_min=bbox[1], + geospatial_lat_max=bbox[3], + geospatial_units="degree", + + ) + ) + + data_vars = {} + if variables: + dims = (time_name, geometry_name) + shape = (time_periods, num_geometries) + size = time_periods * num_geometries + for var_name, data in variables.items(): + if isinstance(data, xr.DataArray): + data_vars[var_name] = data + elif ( + isinstance(data, int) + or isinstance(data, float) + or isinstance(data, bool) + ): + data_vars[var_name] = xr.DataArray(np.full(shape, data), dims=dims) + elif callable(data): + func = data + data = np.zeros(shape) + for index in itertools.product(*map(range, shape)): + data[index] = func(*index) + data_vars[var_name] = xr.DataArray(data, dims=dims) + elif data is None: + data_vars[var_name] = xr.DataArray( + np.random.uniform(0.0, 1.0, size).reshape(shape), dims=dims + ) + else: + data_vars[var_name] = xr.DataArray(data, dims=dims) + + if isinstance(crs, pyproj.CRS): + for v in data_vars.values(): + v.attrs["grid_mapping"] = crs_name + data_vars[crs_name] = xr.DataArray(0, attrs=crs.to_cf()) + + ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) + ds = ds.xvec.set_geom_indexes(geometry_name, crs=crs) + # ds = ds.xvec.encode_cf() + return ds From 969f1c4b9aac69cad510999a4b461d1163c6e2ef Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Mon, 18 Nov 2024 08:15:12 +0100 Subject: [PATCH 3/9] work on integrating vector data cubes --- .../core/store/fs/impl/test_vectordatacube.py | 11 +++++++++++ xcube/core/mldataset/abc.py | 19 +++++++++++++++++++ xcube/core/store/datatype.py | 2 ++ xcube/core/store/fs/store.py | 6 +++++- 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/test/core/store/fs/impl/test_vectordatacube.py b/test/core/store/fs/impl/test_vectordatacube.py index 4ba9f604a..7f46d765b 100644 --- a/test/core/store/fs/impl/test_vectordatacube.py +++ b/test/core/store/fs/impl/test_vectordatacube.py @@ -6,11 +6,22 @@ from xcube.core.new import new_vector_data_cube +from xcube.core.store import new_data_store from xcube.core.store.datatype import VECTOR_DATA_CUBE_TYPE from xcube.core.store.fs.impl.vectordatacube import VectorDataCubeZarrFsDataAccessor from xcube.core.store.fs.impl.vectordatacube import VectorDataCubeNetcdfFsDataAccessor +class VectorDataCubeStoreTest(unittest.TestCase): + + def test_write_to_and_read_from_store(self): + store = new_data_store("file") + vdc = new_vector_data_cube() + data_id = store.write_data( + vdc, data_id="vdc_test.zarr" + ) + self.assertIsNotNone(data_id) + class VectorDataCubeZarrFsDataAccessorTest(unittest.TestCase): @classmethod diff --git a/xcube/core/mldataset/abc.py b/xcube/core/mldataset/abc.py index 14450baf1..faa192581 100644 --- a/xcube/core/mldataset/abc.py +++ b/xcube/core/mldataset/abc.py @@ -9,6 +9,7 @@ from collections.abc import Sequence import xarray as xr +import xvec from xcube.core.gridmapping import GridMapping from xcube.core.tilingscheme import TilingScheme @@ -148,3 +149,21 @@ def get_level_for_resolution(self, xy_res: ScalarOrPair[float]) -> int: if x_res > given_x_res and y_res > given_y_res: return max(0, level - 1) return self.num_levels - 1 + + +class VectorDataCube(xr.Dataset): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + for coord_name, coord_values in self.coords.items(): + if isinstance(coord_values.to_index(), xvec.GeometryIndex): + return + raise ValueError(f"Dataset has no Coordinate with an xvec.GeometryIndex.") + + @classmethod + def from_dataset(cls, dataset): + return cls( + data_vars=dataset.data_vars, + coords=dataset.coords, + attrs=dataset.attrs + ) diff --git a/xcube/core/store/datatype.py b/xcube/core/store/datatype.py index 2f19a2ff6..0d1ba5846 100644 --- a/xcube/core/store/datatype.py +++ b/xcube/core/store/datatype.py @@ -214,6 +214,8 @@ def register_default_data_types(): MULTI_LEVEL_DATASET_ITERATOR_TYPE, GEO_DATA_FRAME_TYPE, GEO_DATA_FRAME_ITERATOR_TYPE, + VECTOR_DATA_CUBE_TYPE, + VECTOR_DATA_CUBE_ITERATOR_TYPE, ]: DataType.register_data_type(data_type) diff --git a/xcube/core/store/fs/store.py b/xcube/core/store/fs/store.py index 2852130ab..0e400e6c3 100644 --- a/xcube/core/store/fs/store.py +++ b/xcube/core/store/fs/store.py @@ -51,6 +51,7 @@ from ..datatype import DataTypeLike from ..datatype import GEO_DATA_FRAME_TYPE from ..datatype import MULTI_LEVEL_DATASET_TYPE +from ..datatype import VECTOR_DATA_CUBE_TYPE from ..descriptor import DataDescriptor from ..descriptor import new_data_descriptor from ..error import DataStoreError @@ -443,7 +444,10 @@ def _guess_writer_id(self, data, data_id: str = None): data_type = accessor_id_parts[0] format_id = accessor_id_parts[1] if isinstance(data, xr.Dataset): - data_type = DATASET_TYPE.alias + if len(data.xvec.geom_coords) > 0: + data_type = VECTOR_DATA_CUBE_TYPE.alias + else: + data_type = DATASET_TYPE.alias format_id = format_id or "zarr" elif isinstance(data, MultiLevelDataset): data_type = MULTI_LEVEL_DATASET_TYPE.alias From 20078d87d68cfe902959866d7bb29dcf71822c3e Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Mon, 18 Nov 2024 18:33:09 +0100 Subject: [PATCH 4/9] work on supporting vectordatacubes --- test/core/store/fs/impl/test_vectordatacube.py | 3 ++- xcube/core/mldataset/abc.py | 10 +++++----- xcube/core/new.py | 9 ++++++--- xcube/core/store/fs/registry.py | 4 ++++ 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/test/core/store/fs/impl/test_vectordatacube.py b/test/core/store/fs/impl/test_vectordatacube.py index 7f46d765b..9147969ca 100644 --- a/test/core/store/fs/impl/test_vectordatacube.py +++ b/test/core/store/fs/impl/test_vectordatacube.py @@ -18,9 +18,10 @@ def test_write_to_and_read_from_store(self): store = new_data_store("file") vdc = new_vector_data_cube() data_id = store.write_data( - vdc, data_id="vdc_test.zarr" + vdc, data_id="vdc_test.zarr", writer_id="vectordatacube:zarr:file" ) self.assertIsNotNone(data_id) + store.delete_data(data_id) class VectorDataCubeZarrFsDataAccessorTest(unittest.TestCase): diff --git a/xcube/core/mldataset/abc.py b/xcube/core/mldataset/abc.py index faa192581..4021740ad 100644 --- a/xcube/core/mldataset/abc.py +++ b/xcube/core/mldataset/abc.py @@ -153,12 +153,12 @@ def get_level_for_resolution(self, xy_res: ScalarOrPair[float]) -> int: class VectorDataCube(xr.Dataset): - def __init__(self, *args, **kwargs): + def __init__( + self, geometry_column: str = "geometry", crs: str = "crs", *args, **kwargs + ): super().__init__(*args, **kwargs) - for coord_name, coord_values in self.coords.items(): - if isinstance(coord_values.to_index(), xvec.GeometryIndex): - return - raise ValueError(f"Dataset has no Coordinate with an xvec.GeometryIndex.") + self.xvec.set_geom_indexes(geometry_column, crs=crs) + self.xvec.encode_cf() @classmethod def from_dataset(cls, dataset): diff --git a/xcube/core/new.py b/xcube/core/new.py index c1a9d4288..f2eb76db9 100644 --- a/xcube/core/new.py +++ b/xcube/core/new.py @@ -11,6 +11,8 @@ from shapely import Point import xarray as xr +from xcube.core.mldataset.abc import VectorDataCube + def new_cube( title="Test Cube", @@ -473,7 +475,8 @@ def my_func(time: int, y: int, x: int) -> Union[bool, int, float] v.attrs["grid_mapping"] = crs_name data_vars[crs_name] = xr.DataArray(0, attrs=crs.to_cf()) - ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) - ds = ds.xvec.set_geom_indexes(geometry_name, crs=crs) - # ds = ds.xvec.encode_cf() + ds = VectorDataCube( + geometry_column=geometry_name, crs=crs, data_vars=data_vars, + coords=coords, attrs=attrs + ) return ds diff --git a/xcube/core/store/fs/registry.py b/xcube/core/store/fs/registry.py index 724c55914..ae16f7e67 100644 --- a/xcube/core/store/fs/registry.py +++ b/xcube/core/store/fs/registry.py @@ -23,6 +23,8 @@ from .impl.geotiff import MultiLevelDatasetGeoTiffFsDataAccessor from .impl.mldataset import DatasetLevelsFsDataAccessor from .impl.mldataset import MultiLevelDatasetLevelsFsDataAccessor +from .impl.vectordatacube import VectorDataCubeNetcdfFsDataAccessor +from .impl.vectordatacube import VectorDataCubeZarrFsDataAccessor from .store import FsDataStore from ..assertions import assert_valid_params from ..error import DataStoreError @@ -120,6 +122,8 @@ def register_fs_data_accessor_class(fs_data_accessor_class: type[FsDataAccessor] MultiLevelDatasetLevelsFsDataAccessor, GeoDataFrameShapefileFsDataAccessor, GeoDataFrameGeoJsonFsDataAccessor, + VectorDataCubeNetcdfFsDataAccessor, + VectorDataCubeZarrFsDataAccessor ): register_fs_data_accessor_class(cls) From 90c3cbee65beecb6af42419264928b43a7d36f77 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Wed, 27 Nov 2024 08:41:27 +0100 Subject: [PATCH 5/9] work on integrating vector data cubes --- .../core/store/fs/impl/test_vectordatacube.py | 38 ++++++++++++++++++- xcube/core/mldataset/abc.py | 12 ++++-- xcube/core/store/fs/impl/vectordatacube.py | 7 +++- 3 files changed, 51 insertions(+), 6 deletions(-) diff --git a/test/core/store/fs/impl/test_vectordatacube.py b/test/core/store/fs/impl/test_vectordatacube.py index 9147969ca..b8a10b6aa 100644 --- a/test/core/store/fs/impl/test_vectordatacube.py +++ b/test/core/store/fs/impl/test_vectordatacube.py @@ -16,13 +16,49 @@ class VectorDataCubeStoreTest(unittest.TestCase): def test_write_to_and_read_from_store(self): store = new_data_store("file") - vdc = new_vector_data_cube() + vdc = new_vector_data_cube( + variables=dict( + precipitation=0.5, + soilmoisture=1.0 + ) + ) data_id = store.write_data( vdc, data_id="vdc_test.zarr", writer_id="vectordatacube:zarr:file" ) self.assertIsNotNone(data_id) + ds = store.open_data(data_id, opener_id="vectordatacube:zarr:file") + self.assertIsNotNone(ds) store.delete_data(data_id) + def test_read_from_store_s3(self): + store = new_data_store( + "s3", + root="doors-cubes/vdcs", + max_depth=1, + storage_options=dict( + anon=False + ) + ) + ds1 = store.open_data( + 'flowfm_his_ds.zarr', opener_id="vectordatacube:zarr:s3" + ) + self.assertIsNotNone(ds1) + # store.get + # print(store.list_data_ids()) + # vdc = new_vector_data_cube( + # variables=dict( + # precipitation=0.5, + # soilmoisture=1.0 + # ) + # ) + # data_id = store.write_data( + # vdc, data_id="vdc_test.zarr", writer_id="vectordatacube:zarr:file" + # ) + # self.assertIsNotNone(data_id) + # ds = store.open_data(data_id, opener_id="vectordatacube:zarr:file") + # self.assertIsNotNone(ds) + # store.delete_data(data_id) + class VectorDataCubeZarrFsDataAccessorTest(unittest.TestCase): @classmethod diff --git a/xcube/core/mldataset/abc.py b/xcube/core/mldataset/abc.py index 4021740ad..19bbae8e2 100644 --- a/xcube/core/mldataset/abc.py +++ b/xcube/core/mldataset/abc.py @@ -154,11 +154,17 @@ def get_level_for_resolution(self, xy_res: ScalarOrPair[float]) -> int: class VectorDataCube(xr.Dataset): def __init__( - self, geometry_column: str = "geometry", crs: str = "crs", *args, **kwargs + self, geometry_column: str = "geometry", crs = None, *args, **kwargs ): super().__init__(*args, **kwargs) - self.xvec.set_geom_indexes(geometry_column, crs=crs) - self.xvec.encode_cf() + # self.assign_coords( + # dict( + # geometry_column=self.get(geometry_column) + # ) + # ) + # {geometry_column= self[geometry_name]}) + # self.xvec.set_geom_indexes(geometry_column, crs=crs) + # self.xvec.encode_cf() @classmethod def from_dataset(cls, dataset): diff --git a/xcube/core/store/fs/impl/vectordatacube.py b/xcube/core/store/fs/impl/vectordatacube.py index 5ff76ee43..f882037b8 100644 --- a/xcube/core/store/fs/impl/vectordatacube.py +++ b/xcube/core/store/fs/impl/vectordatacube.py @@ -5,6 +5,7 @@ import xarray as xr import xvec +from xcube.core.mldataset.abc import VectorDataCube from xcube.util.assertions import assert_instance from ...datatype import DataType from ...datatype import VECTOR_DATA_CUBE_TYPE @@ -22,7 +23,9 @@ def get_data_type(cls) -> DataType: def open_data(self, data_id: str, **open_params) -> xr.Dataset: dataset = super().open_data(data_id, **open_params) - return dataset.xvec.decode_cf() + dataset = dataset.xvec.decode_cf() + dataset = VectorDataCube.from_dataset(dataset) + return dataset def write_data( self, data: xr.Dataset, data_id: str, replace=False, **write_params @@ -41,7 +44,7 @@ class VectorDataCubeNetcdfFsDataAccessor(DatasetNetcdfFsDataAccessor): def get_data_type(cls) -> DataType: return VECTOR_DATA_CUBE_TYPE - def open_data(self, data_id: str, **open_params) -> xr.Dataset: + def open_data(self, data_id: str, **open_params) -> VectorDataCube: dataset = super().open_data(data_id, **open_params) return dataset.xvec.decode_cf() From fe83f3454e9864946817c91d9297b6196b8fd517 Mon Sep 17 00:00:00 2001 From: Tonio Fincke Date: Fri, 24 Jan 2025 18:49:31 +0100 Subject: [PATCH 6/9] added type to init.py --- xcube/core/store/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xcube/core/store/__init__.py b/xcube/core/store/__init__.py index c567c5f97..b3ee5859a 100644 --- a/xcube/core/store/__init__.py +++ b/xcube/core/store/__init__.py @@ -20,6 +20,7 @@ from .datatype import DataTypeLike from .datatype import GEO_DATA_FRAME_TYPE from .datatype import MULTI_LEVEL_DATASET_TYPE +from .datatype import VECTOR_DATA_CUBE_TYPE from .descriptor import DataDescriptor from .descriptor import DatasetDescriptor from .descriptor import GeoDataFrameDescriptor From 612519861dbd2e41639bbf417074a109415943da Mon Sep 17 00:00:00 2001 From: TonioF Date: Fri, 7 Feb 2025 16:59:39 +0100 Subject: [PATCH 7/9] fix --- xcube/core/store/fs/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xcube/core/store/fs/registry.py b/xcube/core/store/fs/registry.py index 74b4fe586..6004116a1 100644 --- a/xcube/core/store/fs/registry.py +++ b/xcube/core/store/fs/registry.py @@ -32,7 +32,7 @@ DatasetLevelsFsDataAccessor, MultiLevelDatasetLevelsFsDataAccessor, ) -from impl.vectordatacube import ( +from .impl.vectordatacube import ( VectorDataCubeNetcdfFsDataAccessor, VectorDataCubeZarrFsDataAccessor ) From 9dbfedd23b8039cd9e9515a42dddec13821f344e Mon Sep 17 00:00:00 2001 From: TonioF Date: Fri, 7 Feb 2025 17:01:48 +0100 Subject: [PATCH 8/9] added test --- xcube/core/mldataset/abc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xcube/core/mldataset/abc.py b/xcube/core/mldataset/abc.py index 4f09f66d5..efe25af03 100644 --- a/xcube/core/mldataset/abc.py +++ b/xcube/core/mldataset/abc.py @@ -10,6 +10,7 @@ import xarray as xr import xvec +from jinja2.lexer import TOKEN_DOT from xcube.core.gridmapping import GridMapping from xcube.core.tilingscheme import TilingScheme @@ -156,6 +157,7 @@ def __init__( self, geometry_column: str = "geometry", crs = None, *args, **kwargs ): super().__init__(*args, **kwargs) + # TODO add check that dataset qualifies as vector data cube # self.assign_coords( # dict( # geometry_column=self.get(geometry_column) From cd46e14eabd24534e9b5fe74e73759b3d4edab03 Mon Sep 17 00:00:00 2001 From: TonioF Date: Fri, 7 Feb 2025 17:02:02 +0100 Subject: [PATCH 9/9] adjusted tests --- .../core/store/fs/impl/test_vectordatacube.py | 33 ++----------------- 1 file changed, 2 insertions(+), 31 deletions(-) diff --git a/test/core/store/fs/impl/test_vectordatacube.py b/test/core/store/fs/impl/test_vectordatacube.py index b8a10b6aa..068ef4c8a 100644 --- a/test/core/store/fs/impl/test_vectordatacube.py +++ b/test/core/store/fs/impl/test_vectordatacube.py @@ -6,10 +6,10 @@ from xcube.core.new import new_vector_data_cube +from xcube.core.mldataset.abc import VectorDataCube from xcube.core.store import new_data_store from xcube.core.store.datatype import VECTOR_DATA_CUBE_TYPE from xcube.core.store.fs.impl.vectordatacube import VectorDataCubeZarrFsDataAccessor -from xcube.core.store.fs.impl.vectordatacube import VectorDataCubeNetcdfFsDataAccessor class VectorDataCubeStoreTest(unittest.TestCase): @@ -28,43 +28,14 @@ def test_write_to_and_read_from_store(self): self.assertIsNotNone(data_id) ds = store.open_data(data_id, opener_id="vectordatacube:zarr:file") self.assertIsNotNone(ds) + self.assertIsInstance(ds, VectorDataCube) store.delete_data(data_id) - def test_read_from_store_s3(self): - store = new_data_store( - "s3", - root="doors-cubes/vdcs", - max_depth=1, - storage_options=dict( - anon=False - ) - ) - ds1 = store.open_data( - 'flowfm_his_ds.zarr', opener_id="vectordatacube:zarr:s3" - ) - self.assertIsNotNone(ds1) - # store.get - # print(store.list_data_ids()) - # vdc = new_vector_data_cube( - # variables=dict( - # precipitation=0.5, - # soilmoisture=1.0 - # ) - # ) - # data_id = store.write_data( - # vdc, data_id="vdc_test.zarr", writer_id="vectordatacube:zarr:file" - # ) - # self.assertIsNotNone(data_id) - # ds = store.open_data(data_id, opener_id="vectordatacube:zarr:file") - # self.assertIsNotNone(ds) - # store.delete_data(data_id) - class VectorDataCubeZarrFsDataAccessorTest(unittest.TestCase): @classmethod def setUp(self): self.accessor = VectorDataCubeZarrFsDataAccessor() - # self.vdc = new_vector_data_cube() def test_get_data_type(self): self.assertEqual(VECTOR_DATA_CUBE_TYPE, self.accessor.get_data_type())