From 192b64d4ae55441f9e62a878caa34fe4ac479c83 Mon Sep 17 00:00:00 2001 From: TonioF Date: Thu, 13 Mar 2025 10:16:25 +0100 Subject: [PATCH 1/6] work on supporting writing to geotiff --- test/core/store/fs/impl/test_geotiff.py | 24 +++++++++++++++++--- xcube/core/store/fs/impl/dataset.py | 29 ++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/test/core/store/fs/impl/test_geotiff.py b/test/core/store/fs/impl/test_geotiff.py index b534bc7a3..c9bc1c436 100644 --- a/test/core/store/fs/impl/test_geotiff.py +++ b/test/core/store/fs/impl/test_geotiff.py @@ -147,11 +147,11 @@ def test_read_geotiff(self): class DatasetGeoTiffFsDataAccessorTest(unittest.TestCase): """ - A Test class to test opening a GeoTIFF as multilevel dataset or + A Test class to test opening and writing a GeoTIFF as multilevel dataset or as normal dataset """ - def test_ml_to_dataset(self): + def test_open_ml_to_dataset(self): fs, cog_path = GeoTIFFMultiLevelDatasetTest.get_params("sample-cog.tif") data_accessor = DatasetGeoTiffFsDataAccessor() self.assertIsInstance(data_accessor, DatasetGeoTiffFsDataAccessor) @@ -164,7 +164,7 @@ def test_ml_to_dataset(self): data_accessor.get_open_data_params_schema(cog_path), JsonSchema ) - def test_dataset(self): + def test_open_dataset(self): fs, tiff_path = GeoTIFFMultiLevelDatasetTest.get_params("sample-geotiff.tif") data_accessor = DatasetGeoTiffFsDataAccessor() self.assertIsInstance(data_accessor, DatasetGeoTiffFsDataAccessor) @@ -178,6 +178,24 @@ def test_dataset(self): ) self.assertIsInstance(dataset, xarray.Dataset) + def test_write_dataset(self): + from xcube.core.new import new_cube + fs, _ = GeoTIFFMultiLevelDatasetTest.get_params("sample-geotiff.tif") + ds = new_cube(variables=dict(xxx=1, yyy=2)) + data_accessor = DatasetGeoTiffFsDataAccessor() + tiff_path = "geotiff_write_test.tif" + data_accessor.write_data( + ds, tiff_path, fs=fs + ) + dataset = data_accessor.open_data( + data_id=tiff_path, + fs=fs, + root=None, + tile_size=[256, 256], + overview_level=None, + ) + self.assertIsNotNone(dataset) + class ObjectStorageMultiLevelDatasetTest(S3Test): """ diff --git a/xcube/core/store/fs/impl/dataset.py b/xcube/core/store/fs/impl/dataset.py index e2d119f11..af13a865f 100644 --- a/xcube/core/store/fs/impl/dataset.py +++ b/xcube/core/store/fs/impl/dataset.py @@ -13,6 +13,7 @@ import zarr from rasterio.session import AWSSession +from xcube.core.gridmapping import GridMapping # Note, we need the following reference to register the # xarray property accessor # noinspection PyUnresolvedReferences @@ -413,7 +414,33 @@ def get_write_data_params_schema(self) -> JsonObjectSchema: def write_data( self, data: xr.Dataset, data_id: str, replace=False, **write_params ) -> str: - raise NotImplementedError("Writing of GeoTIFF not yet supported") + assert_instance(data, xr.Dataset, name="data") + assert_instance(data_id, str, name="data_id") + fs, root, write_params = self.load_fs(write_params) + + gm = GridMapping.from_dataset(data) + + try: + with fs.open(data_id, "wb") as fobj: + with rasterio.open( + fobj, + "w", + driver="GTiff", + height=gm.height, + width=gm.height, + count=len(data.data_vars) * len(data.time), + dtype=data[list(data.data_vars.keys())[0]].dtype, + crs=data.rio.crs, + transform=data.rio.transform(), + ) as dst: + band_index = 1 + for var in data.data_vars: + for time in data.time.values: + dst.write(data[var].sel(time=time).values, band_index) + band_index += 1 + except ValueError as e: + raise DataStoreError(f"Failed to write dataset {data_id!r}: {e}") from e + return data_id @classmethod def _sanitize_dataset_attrs(cls, dataset): From 30a135ddb47189867b3f021e3ebbd6637886baf2 Mon Sep 17 00:00:00 2001 From: TonioF Date: Mon, 24 Mar 2025 12:13:29 +0100 Subject: [PATCH 2/6] support kml as vector data format --- test/core/store/fs/test_registry.py | 154 ++++++++++++++++++++++- xcube/core/store/fs/impl/geodataframe.py | 86 ++++++++++++- xcube/core/store/fs/registry.py | 4 +- xcube/core/store/fs/store.py | 4 +- xcube/plugin.py | 1 + 5 files changed, 245 insertions(+), 4 deletions(-) diff --git a/test/core/store/fs/test_registry.py b/test/core/store/fs/test_registry.py index 8e68ea285..85a70bc7c 100644 --- a/test/core/store/fs/test_registry.py +++ b/test/core/store/fs/test_registry.py @@ -12,6 +12,7 @@ from typing import Any, Callable, Optional, Union import fsspec +import geopandas as gpd import numpy as np import pytest import xarray as xr @@ -25,6 +26,7 @@ DataDescriptor, DatasetDescriptor, DataStoreError, + GeoDataFrameDescriptor, MultiLevelDatasetDescriptor, MutableDataStore, ) @@ -69,6 +71,16 @@ def new_cube_data(): return cube.chunk(dict(time=1, y=90, x=180)) +def new_geodataframe(): + return gpd.GeoDataFrame( + {"placename": ["Place A", "Place B"], + "state": ["Active", "Disabled"], + "var_x": [10, 20], + "var_y": [0.5, 2.0]}, + geometry=gpd.points_from_xy([8.0, 8.1], [50.0, 50.1]), + crs="EPSG:4326" + ) + class NewCubeDataTestMixin(unittest.TestCase): path = f"{DATA_PATH}/data.zarr" @@ -225,7 +237,29 @@ def test_dataset_levels(self): assert_data_ok=self._assert_zarr_store_direct_ok, ) - # TODO: add assertGeoDataFrameSupport + def test_geodataframe_geojson(self): + data_store = self.create_data_store() + self._assert_geodataframe_supported( + data_store, + filename_ext=".geojson", + requested_dtype_alias="geodataframe", + expected_dtype_aliases={"geodataframe"}, + expected_return_type=gpd.GeoDataFrame, + expected_descriptor_type=GeoDataFrameDescriptor, + assert_data_ok = self._assert_geodataframe_ok + ) + + def test_geodataframe_kml(self): + data_store = self.create_data_store() + self._assert_geodataframe_supported( + data_store, + filename_ext=".kml", + requested_dtype_alias="geodataframe", + expected_dtype_aliases={"geodataframe"}, + expected_return_type=gpd.GeoDataFrame, + expected_descriptor_type=GeoDataFrameDescriptor, + assert_data_ok=self._assert_geodataframe_ok + ) def _assert_multi_level_dataset_format_supported(self, data_store: FsDataStore): self._assert_dataset_supported( @@ -332,6 +366,14 @@ def _assert_multi_level_dataset_data_ok(self, ml_dataset): ) self.assertNotIsInstance(dataset.zarr_store.get(), GenericZarrStore) + def _assert_geodataframe_ok(self, gdf: gpd.GeoDataFrame): + self.assertIn("placename", gdf.columns) + self.assertIn("var_x", gdf.columns) + self.assertIn("var_y", gdf.columns) + self.assertIn("geometry", gdf.columns) + self.assertEqual("int", str(gdf.var_x.dtype)[:3]) + self.assertEqual("float64", str(gdf.var_y.dtype)) + def _assert_multi_level_dataset_format_with_tile_size( self, data_store: FsDataStore ): @@ -481,6 +523,116 @@ def _assert_dataset_supported( self.assertNotIn(data_id, set(data_store.get_data_ids())) self.assertNotIn(data_id, data_store.list_data_ids()) + def _assert_geodataframe_supported( + self, + data_store: FsDataStore, + filename_ext: str, + requested_dtype_alias: Optional[str], + expected_dtype_aliases: set[str], + expected_return_type: type[gpd.GeoDataFrame], + expected_descriptor_type: Optional[type[GeoDataFrameDescriptor]] = None, + opener_id: str = None, + write_params: Optional[dict[str, Any]] = None, + open_params: Optional[dict[str, Any]] = None, + assert_data_ok: Optional[Callable[[Any], Any]] = None, + assert_warnings: bool = False, + warning_msg: str = None, + ): + """Call all DataStore operations to ensure data of type + gpd.GeoDataFrame is supported by *data_store*. + + Args: + data_store: The filesystem data store instance. + filename_ext: Filename extension that identifies + a supported dataset format. + expected_data_type_alias: The expected data type alias. + expected_return_type: The expected data type. + expected_descriptor_type: The expected data descriptor type. + opener_id: Optional opener identifier + write_params: Optional write parameters + open_params: Optional open parameters + assert_data_ok: Optional function to assert read data is ok + assert_warnings: Optional boolean if test may check for warnings + warning_msg: Optional warning message to be checked if + assert_warnings is True + """ + + data_id = f"{DATA_PATH}/ds{filename_ext}" + + write_params = write_params or {} + open_params = open_params or {} + + self.assertIsInstance(data_store, MutableDataStore) + + self.assertEqual( + {"dataset", "mldataset", "geodataframe"}, set(data_store.get_data_types()) + ) + + with pytest.raises( + DataStoreError, match=f'Data resource "{data_id}" does not exist in store' + ): + data_store.get_data_types_for_data(data_id) + self.assertEqual(False, data_store.has_data(data_id)) + self.assertNotIn(data_id, set(data_store.get_data_ids())) + self.assertNotIn(data_id, data_store.list_data_ids()) + + data = new_geodataframe() + written_data_id = data_store.write_data(data, data_id, **write_params) + self.assertEqual(data_id, written_data_id) + + self.assertEqual( + expected_dtype_aliases, set(data_store.get_data_types_for_data(data_id)) + ) + self.assertEqual(True, data_store.has_data(data_id)) + self.assertIn(data_id, set(data_store.get_data_ids())) + self.assertIn(data_id, data_store.list_data_ids()) + + if expected_descriptor_type is not None: + data_descriptors = list( + data_store.search_data(data_type=expected_return_type) + ) + self.assertEqual(1, len(data_descriptors)) + self.assertIsInstance(data_descriptors[0], DataDescriptor) + self.assertIsInstance(data_descriptors[0], expected_descriptor_type) + + if assert_warnings: + with warnings.catch_warnings(record=True) as w: + data = data_store.open_data( + data_id, + opener_id=opener_id, + data_type=requested_dtype_alias, + **open_params, + ) + # if "s3" data store is tested, warnings from other + # libraries like botocore occur + if data_store.protocol != "s3": + self.assertEqual(1, len(w)) + self.assertEqual(w[0].category, UserWarning) + self.assertEqual(warning_msg, w[0].message.args[0]) + else: + data = data_store.open_data( + data_id, + opener_id=opener_id, + data_type=requested_dtype_alias, + **open_params, + ) + self.assertIsInstance(data, expected_return_type) + if assert_data_ok is not None: + assert_data_ok(data) + + try: + data_store.delete_data(data_id) + except PermissionError as e: # May occur on win32 due to fsspec + warnings.warn(f"{e}") + return + with pytest.raises( + DataStoreError, + match=f'Data resource "{data_id}" does not exist in store', + ): + data_store.get_data_types_for_data(data_id) + self.assertEqual(False, data_store.has_data(data_id)) + self.assertNotIn(data_id, set(data_store.get_data_ids())) + self.assertNotIn(data_id, data_store.list_data_ids()) class FileFsDataStoresTest(FsDataStoresTestMixin, unittest.TestCase): def create_data_store(self) -> FsDataStore: diff --git a/xcube/core/store/fs/impl/geodataframe.py b/xcube/core/store/fs/impl/geodataframe.py index 7066a127a..b8d1b6b0c 100644 --- a/xcube/core/store/fs/impl/geodataframe.py +++ b/xcube/core/store/fs/impl/geodataframe.py @@ -6,12 +6,14 @@ import geopandas as gpd import pandas as pd +import simplekml from xcube.util.assertions import assert_instance from xcube.util.fspath import is_local_fs from xcube.util.jsonschema import JsonObjectSchema from xcube.util.temp import new_temp_file +from ... import DataStoreError from ...datatype import GEO_DATA_FRAME_TYPE, DataType from ..accessor import FsDataAccessor @@ -62,13 +64,17 @@ def write_data(self, data: gpd.GeoDataFrame, data_id: str, **write_params) -> st assert_instance(data, (gpd.GeoDataFrame, pd.DataFrame), "data") fs, root, write_params = self.load_fs(write_params) is_local = is_local_fs(fs) + replace = write_params.pop("replace", False) if is_local: file_path = data_id + if not replace and fs.exists(file_path): + raise DataStoreError(f"Data '{data_id}' already exists.") else: _, file_path = new_temp_file() data.to_file(file_path, driver=self.get_driver_name(), **write_params) if not is_local: - fs.put_file(file_path, data_id) + mode = "overwrite" if replace else "create" + fs.put_file(file_path, data_id, mode=mode) return data_id @@ -94,3 +100,81 @@ def get_format_id(cls) -> str: @classmethod def get_driver_name(cls) -> str: return "GeoJSON" + + +class GeoDataFrameKmlFsDataAccessor(GeoDataFrameFsDataAccessor): + """Extension name: 'geodataframe:kml:'.""" + + @classmethod + def get_format_id(cls) -> str: + return "kml" + + @classmethod + def get_driver_name(cls) -> str: + return "KML" + + def open_data(self, data_id: str, **open_params) -> gpd.GeoDataFrame: + gdf = super().open_data(data_id, **open_params) + kml_nan_columns = [ + "Name", "description", "timestamp", "begin", "end", "altitudeMode", + "drawOrder", "icon" + ] + kml_number_columns = { + "tessellate": -1, + "extrude": 0, + "visibility": -1, + } + for col in gdf.columns: + if ((col in kml_nan_columns and pd.isna(gdf[col]).all()) or + (col in kml_number_columns.keys() and + len(gdf[col].unique()) == 1 and + gdf[col].unique()[0] == kml_number_columns[col])): + del gdf[col] + continue + if col not in ["geometry"]: + try: + gdf[col] = pd.to_numeric(gdf[col]) + except ValueError: + if gdf[col].str.lower().isin(["true", "false"]).all(): + gdf[col] = gdf[col].map({"true": True, "false": False}) + else: + gdf[col] = gdf[col].astype(str) + return gdf + + def write_data(self, data: gpd.GeoDataFrame, data_id: str, **write_params) -> str: + assert_instance(data, (gpd.GeoDataFrame, pd.DataFrame), "data") + fs, root, write_params = self.load_fs(write_params) + is_local = is_local_fs(fs) + replace = write_params.pop("replace", False) + if is_local: + file_path = data_id + if not replace and fs.exists(file_path): + raise DataStoreError(f"Data '{data_id}' already exists.") + else: + _, file_path = new_temp_file() + + kml = simplekml.Kml() + + for _, row in data.iterrows(): + geom = row.geometry + + if geom.geom_type == "Point": + entry = kml.newpoint(coords=[(geom.x, geom.y)]) + elif geom.geom_type == "LineString": + entry = kml.newlinestring(coords=list(geom.coords)) + elif geom.geom_type == "Polygon": + entry = kml.newpolygon(outerboundaryis=list(geom.exterior.coords)) + else: + continue + if geom.geom_type in ["Point", "LineString", "Polygon"]: + for col in data.columns: + if col != "geometry": + if isinstance((row[col]), bool): + entry.extendeddata.newdata(col, str(row[col])) + else: + entry.extendeddata.newdata(col, row[col]) + kml.save(file_path) + if not is_local: + mode = "overwrite" if replace else "create" + fs.put_file(file_path, data_id, mode=mode) + return data_id diff --git a/xcube/core/store/fs/registry.py b/xcube/core/store/fs/registry.py index ebfe804d1..26d0cd9cd 100644 --- a/xcube/core/store/fs/registry.py +++ b/xcube/core/store/fs/registry.py @@ -25,6 +25,7 @@ ) from .impl.geodataframe import ( GeoDataFrameGeoJsonFsDataAccessor, + GeoDataFrameKmlFsDataAccessor, GeoDataFrameShapefileFsDataAccessor, ) from .impl.geotiff import MultiLevelDatasetGeoTiffFsDataAccessor @@ -125,8 +126,9 @@ def register_fs_data_accessor_class(fs_data_accessor_class: type[FsDataAccessor] DatasetLevelsFsDataAccessor, MultiLevelDatasetGeoTiffFsDataAccessor, MultiLevelDatasetLevelsFsDataAccessor, - GeoDataFrameShapefileFsDataAccessor, GeoDataFrameGeoJsonFsDataAccessor, + GeoDataFrameKmlFsDataAccessor, + GeoDataFrameShapefileFsDataAccessor, ): register_fs_data_accessor_class(cls) diff --git a/xcube/core/store/fs/store.py b/xcube/core/store/fs/store.py index 843a7311d..687bfb671 100644 --- a/xcube/core/store/fs/store.py +++ b/xcube/core/store/fs/store.py @@ -70,8 +70,9 @@ ".tif": "geotiff", ".tiff": "geotiff", ".geotiff": "geotiff", - ".shp": "shapefile", ".geojson": "geojson", + ".kml": "kml", + ".shp": "shapefile", } _FORMAT_TO_DATA_TYPE_ALIASES = { @@ -80,6 +81,7 @@ "levels": (MULTI_LEVEL_DATASET_TYPE.alias, DATASET_TYPE.alias), "geotiff": (DATASET_TYPE.alias, MULTI_LEVEL_DATASET_TYPE.alias), "geojson": (GEO_DATA_FRAME_TYPE.alias,), + "kml": (GEO_DATA_FRAME_TYPE.alias,), "shapefile": (GEO_DATA_FRAME_TYPE.alias,), } diff --git a/xcube/plugin.py b/xcube/plugin.py index 93af1ab8c..bff68974c 100644 --- a/xcube/plugin.py +++ b/xcube/plugin.py @@ -106,6 +106,7 @@ def _register_dataset_ios(ext_registry: extension.ExtensionRegistry): ), ("geodataframe", "shapefile", "gpd.GeoDataFrame in ESRI Shapefile format"), ("geodataframe", "geojson", "gpd.GeoDataFrame in GeoJSON format"), + ("geodataframe", "kml", "gpd.GeoDataFrame in KML format"), ) _FS_DATA_OPENER_ITEMS = _FS_DATA_ACCESSOR_ITEMS From a394bb920eab24d33f6d2a51c59be38482a8eedb Mon Sep 17 00:00:00 2001 From: TonioF Date: Mon, 24 Mar 2025 12:18:09 +0100 Subject: [PATCH 3/6] edited changelog --- CHANGES.md | 6 ++++++ environment.yml | 1 + xcube/version.py | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 851fa26f5..73c523175 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,9 @@ +## Changes in 1.9.1 (in development) + +### Enhancements + +* xcube supports reading from and writing to `kml` + ## Changes in 1.9.0 ### Enhancements diff --git a/environment.yml b/environment.yml index a025afb80..8bde43121 100644 --- a/environment.yml +++ b/environment.yml @@ -38,6 +38,7 @@ dependencies: - s3fs >=2021.6 - setuptools >=41.0 - shapely >=1.6 + - simplekml - tabulate >=0.9 - tornado >=6.0 - urllib3 >=2.0 diff --git a/xcube/version.py b/xcube/version.py index 8d49cf0d0..530305bee 100644 --- a/xcube/version.py +++ b/xcube/version.py @@ -2,4 +2,4 @@ # Permissions are hereby granted under the terms of the MIT License: # https://opensource.org/licenses/MIT. -version = "1.9.0" +version = "1.9.1.dev0" From e5557f33504702196736229bee2f441dcf1d5c2e Mon Sep 17 00:00:00 2001 From: TonioF Date: Mon, 24 Mar 2025 15:39:22 +0100 Subject: [PATCH 4/6] improved kml writing and reading --- test/core/store/fs/test_registry.py | 20 +++++++++---- xcube/core/store/fs/impl/geodataframe.py | 36 ++++++++++++++++-------- 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/test/core/store/fs/test_registry.py b/test/core/store/fs/test_registry.py index 85a70bc7c..414f360b9 100644 --- a/test/core/store/fs/test_registry.py +++ b/test/core/store/fs/test_registry.py @@ -14,6 +14,7 @@ import fsspec import geopandas as gpd import numpy as np +import pandas as pd import pytest import xarray as xr @@ -72,10 +73,12 @@ def new_cube_data(): def new_geodataframe(): + time_data = pd.date_range(start="2010-01-01T00:00:00", periods=2, freq="D").values return gpd.GeoDataFrame( - {"placename": ["Place A", "Place B"], - "state": ["Active", "Disabled"], - "var_x": [10, 20], + {"place_name": ["Place A", "Place B"], + "is_active": [True, False], + "timestamp": time_data, + "salinity [‰]": [10, 20], "var_y": [0.5, 2.0]}, geometry=gpd.points_from_xy([8.0, 8.1], [50.0, 50.1]), crs="EPSG:4326" @@ -367,12 +370,17 @@ def _assert_multi_level_dataset_data_ok(self, ml_dataset): self.assertNotIsInstance(dataset.zarr_store.get(), GenericZarrStore) def _assert_geodataframe_ok(self, gdf: gpd.GeoDataFrame): - self.assertIn("placename", gdf.columns) - self.assertIn("var_x", gdf.columns) + self.assertIn("place_name", gdf.columns) + self.assertIn("is_active", gdf.columns) + self.assertIn("salinity [‰]", gdf.columns) self.assertIn("var_y", gdf.columns) self.assertIn("geometry", gdf.columns) - self.assertEqual("int", str(gdf.var_x.dtype)[:3]) + self.assertIn("timestamp", gdf.columns) + self.assertEqual("object", str(gdf.place_name.dtype)) + self.assertEqual("bool", str(gdf.is_active.dtype)) + self.assertEqual("int32", gdf["salinity [‰]"].dtype) self.assertEqual("float64", str(gdf.var_y.dtype)) + self.assertTrue(pd.api.types.is_datetime64_any_dtype(gdf.timestamp)) def _assert_multi_level_dataset_format_with_tile_size( self, data_store: FsDataStore diff --git a/xcube/core/store/fs/impl/geodataframe.py b/xcube/core/store/fs/impl/geodataframe.py index b8d1b6b0c..085cbabbb 100644 --- a/xcube/core/store/fs/impl/geodataframe.py +++ b/xcube/core/store/fs/impl/geodataframe.py @@ -132,13 +132,13 @@ def open_data(self, data_id: str, **open_params) -> gpd.GeoDataFrame: del gdf[col] continue if col not in ["geometry"]: + if pd.api.types.is_datetime64_any_dtype(gdf[col]): + continue try: gdf[col] = pd.to_numeric(gdf[col]) except ValueError: - if gdf[col].str.lower().isin(["true", "false"]).all(): - gdf[col] = gdf[col].map({"true": True, "false": False}) - else: - gdf[col] = gdf[col].astype(str) + if gdf[col].isin(["True", "False"]).all(): + gdf[col] = gdf[col].map({"True": True, "False": False}) return gdf def write_data(self, data: gpd.GeoDataFrame, data_id: str, **write_params) -> str: @@ -154,6 +154,8 @@ def write_data(self, data: gpd.GeoDataFrame, data_id: str, **write_params) -> st _, file_path = new_temp_file() kml = simplekml.Kml() + kml_schema = kml.newschema(name="kmlschema") + append_cols = {} for _, row in data.iterrows(): geom = row.geometry @@ -166,14 +168,26 @@ def write_data(self, data: gpd.GeoDataFrame, data_id: str, **write_params) -> st entry = kml.newpolygon(outerboundaryis=list(geom.exterior.coords)) else: continue - if geom.geom_type in ["Point", "LineString", "Polygon"]: - for col in data.columns: - if col != "geometry": - if isinstance((row[col]), bool): - entry.extendeddata.newdata(col, str(row[col])) - else: - entry.extendeddata.newdata(col, row[col]) + schema = simplekml.SchemaData("kmlschema") + for col in data.columns: + if col != "geometry": + schema.newsimpledata(col, str(row[col])) + if col not in append_cols: + dtype_str = str(data[col].dtype) + if dtype_str == "object" or dtype_str == "bool": + dtype_str = "string" + elif dtype_str.startswith("int"): + dtype_str = "int" + elif dtype_str.startswith("float"): + dtype_str = "float" + append_cols[col] = dtype_str + entry.extendeddata.schemadata = schema + + for col, typ in append_cols.items(): + kml_schema.newsimplefield(col, type=typ) + kml.save(file_path) + if not is_local: mode = "overwrite" if replace else "create" fs.put_file(file_path, data_id, mode=mode) From 1e493e4076f40eb5c4d818837683026101920fba Mon Sep 17 00:00:00 2001 From: TonioF Date: Mon, 24 Mar 2025 16:26:20 +0100 Subject: [PATCH 5/6] updated docs --- docs/source/dataaccess.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/dataaccess.md b/docs/source/dataaccess.md index f927336fb..565883aff 100644 --- a/docs/source/dataaccess.md +++ b/docs/source/dataaccess.md @@ -250,7 +250,7 @@ The following `storage_options` can be used for the `abfs` data store: All filesystem data stores can open datasets from various data formats. Datasets in Zarr, GeoTIFF / COG, or NetCDF format will be provided either by [xarray.Dataset] or xcube [MultiLevelDataset] instances. -Datasets stored in GeoJSON or ESRI Shapefile will yield +Datasets stored in GeoJSON, KML or ESRI Shapefile will yield [geopandas.GeoDataFrame] instances. Common parameters for opening [xarray.Dataset] instances: From 5e5dedae33646b1ea646a6babd84a74fd53bff17 Mon Sep 17 00:00:00 2001 From: TonioF Date: Mon, 24 Mar 2025 16:34:42 +0100 Subject: [PATCH 6/6] revert erroneously committed changes --- test/core/store/fs/impl/test_geotiff.py | 24 +++----------------- xcube/core/store/fs/impl/dataset.py | 29 +------------------------ 2 files changed, 4 insertions(+), 49 deletions(-) diff --git a/test/core/store/fs/impl/test_geotiff.py b/test/core/store/fs/impl/test_geotiff.py index c9bc1c436..b534bc7a3 100644 --- a/test/core/store/fs/impl/test_geotiff.py +++ b/test/core/store/fs/impl/test_geotiff.py @@ -147,11 +147,11 @@ def test_read_geotiff(self): class DatasetGeoTiffFsDataAccessorTest(unittest.TestCase): """ - A Test class to test opening and writing a GeoTIFF as multilevel dataset or + A Test class to test opening a GeoTIFF as multilevel dataset or as normal dataset """ - def test_open_ml_to_dataset(self): + def test_ml_to_dataset(self): fs, cog_path = GeoTIFFMultiLevelDatasetTest.get_params("sample-cog.tif") data_accessor = DatasetGeoTiffFsDataAccessor() self.assertIsInstance(data_accessor, DatasetGeoTiffFsDataAccessor) @@ -164,7 +164,7 @@ def test_open_ml_to_dataset(self): data_accessor.get_open_data_params_schema(cog_path), JsonSchema ) - def test_open_dataset(self): + def test_dataset(self): fs, tiff_path = GeoTIFFMultiLevelDatasetTest.get_params("sample-geotiff.tif") data_accessor = DatasetGeoTiffFsDataAccessor() self.assertIsInstance(data_accessor, DatasetGeoTiffFsDataAccessor) @@ -178,24 +178,6 @@ def test_open_dataset(self): ) self.assertIsInstance(dataset, xarray.Dataset) - def test_write_dataset(self): - from xcube.core.new import new_cube - fs, _ = GeoTIFFMultiLevelDatasetTest.get_params("sample-geotiff.tif") - ds = new_cube(variables=dict(xxx=1, yyy=2)) - data_accessor = DatasetGeoTiffFsDataAccessor() - tiff_path = "geotiff_write_test.tif" - data_accessor.write_data( - ds, tiff_path, fs=fs - ) - dataset = data_accessor.open_data( - data_id=tiff_path, - fs=fs, - root=None, - tile_size=[256, 256], - overview_level=None, - ) - self.assertIsNotNone(dataset) - class ObjectStorageMultiLevelDatasetTest(S3Test): """ diff --git a/xcube/core/store/fs/impl/dataset.py b/xcube/core/store/fs/impl/dataset.py index af13a865f..e2d119f11 100644 --- a/xcube/core/store/fs/impl/dataset.py +++ b/xcube/core/store/fs/impl/dataset.py @@ -13,7 +13,6 @@ import zarr from rasterio.session import AWSSession -from xcube.core.gridmapping import GridMapping # Note, we need the following reference to register the # xarray property accessor # noinspection PyUnresolvedReferences @@ -414,33 +413,7 @@ def get_write_data_params_schema(self) -> JsonObjectSchema: def write_data( self, data: xr.Dataset, data_id: str, replace=False, **write_params ) -> str: - assert_instance(data, xr.Dataset, name="data") - assert_instance(data_id, str, name="data_id") - fs, root, write_params = self.load_fs(write_params) - - gm = GridMapping.from_dataset(data) - - try: - with fs.open(data_id, "wb") as fobj: - with rasterio.open( - fobj, - "w", - driver="GTiff", - height=gm.height, - width=gm.height, - count=len(data.data_vars) * len(data.time), - dtype=data[list(data.data_vars.keys())[0]].dtype, - crs=data.rio.crs, - transform=data.rio.transform(), - ) as dst: - band_index = 1 - for var in data.data_vars: - for time in data.time.values: - dst.write(data[var].sel(time=time).values, band_index) - band_index += 1 - except ValueError as e: - raise DataStoreError(f"Failed to write dataset {data_id!r}: {e}") from e - return data_id + raise NotImplementedError("Writing of GeoTIFF not yet supported") @classmethod def _sanitize_dataset_attrs(cls, dataset):