From 214f9d991443eff110ffc0fb3fb73aa1bfcdc8b5 Mon Sep 17 00:00:00 2001 From: Rolf Hut Date: Wed, 29 May 2024 07:25:14 +0200 Subject: [PATCH 1/8] change units of caravan forcing to match NETCDF CF and CMIP --- src/ewatercycle/_forcings/caravan.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/ewatercycle/_forcings/caravan.py b/src/ewatercycle/_forcings/caravan.py index e2f24de5..2d9bff79 100644 --- a/src/ewatercycle/_forcings/caravan.py +++ b/src/ewatercycle/_forcings/caravan.py @@ -209,6 +209,12 @@ def generate( # type: ignore[override] for temp in ["tas", "tasmin", "tasmax"]: ds_basin_time[temp].attrs.update({"height": "2m"}) + #convert units to Kelvin for compatiabillity with NetCDF-CF conventions + ds_basin_time[temp].pint.to(a=temp, u = "K") + + for var in ["evspsblpot", "pr"]: + #convert units to kg m-2 s-1 for compatiabillity with NetCDF-CF conventions + ds_basin_time[var].pint.to(a=var, u = "kg m-2 s-1") start_time_name = start_time[:10] end_time_name = end_time[:10] From 24587e49f404b4127218d3ddc4714238d9912c10 Mon Sep 17 00:00:00 2001 From: Rolf Hut Date: Wed, 29 May 2024 08:17:53 +0200 Subject: [PATCH 2/8] unit change caravan forcing fixed --- src/ewatercycle/_forcings/caravan.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/ewatercycle/_forcings/caravan.py b/src/ewatercycle/_forcings/caravan.py index 2d9bff79..bfcc094a 100644 --- a/src/ewatercycle/_forcings/caravan.py +++ b/src/ewatercycle/_forcings/caravan.py @@ -210,11 +210,15 @@ def generate( # type: ignore[override] for temp in ["tas", "tasmin", "tasmax"]: ds_basin_time[temp].attrs.update({"height": "2m"}) #convert units to Kelvin for compatiabillity with NetCDF-CF conventions - ds_basin_time[temp].pint.to(a=temp, u = "K") + if (ds_basin[temp].attrs["unit"]) == "°C": + ds_basin[temp].values = ds_basin[temp].values + 273.15 + ds_basin[temp].attrs["unit"] = "°K" for var in ["evspsblpot", "pr"]: #convert units to kg m-2 s-1 for compatiabillity with NetCDF-CF conventions - ds_basin_time[var].pint.to(a=var, u = "kg m-2 s-1") + if (ds_basin[var].attrs["unit"]) == "mm": + ds_basin[var].values = ds_basin[temp].values / (1000 * 86400) #NOTE THAT THIS CONVERSION ASSUMES DAILY DATA + ds_basin[var].attrs["unit"] = "kg m-2 s-1" start_time_name = start_time[:10] end_time_name = end_time[:10] From d20359a35ed6567edfa90b66087c2c309fca515f Mon Sep 17 00:00:00 2001 From: Rolf Hut Date: Wed, 29 May 2024 08:50:56 +0200 Subject: [PATCH 3/8] oops, unit conversion is hard --- src/ewatercycle/_forcings/caravan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ewatercycle/_forcings/caravan.py b/src/ewatercycle/_forcings/caravan.py index bfcc094a..615b0ce2 100644 --- a/src/ewatercycle/_forcings/caravan.py +++ b/src/ewatercycle/_forcings/caravan.py @@ -217,7 +217,7 @@ def generate( # type: ignore[override] for var in ["evspsblpot", "pr"]: #convert units to kg m-2 s-1 for compatiabillity with NetCDF-CF conventions if (ds_basin[var].attrs["unit"]) == "mm": - ds_basin[var].values = ds_basin[temp].values / (1000 * 86400) #NOTE THAT THIS CONVERSION ASSUMES DAILY DATA + ds_basin[var].values = ds_basin[temp].values / (86400) #NOTE THAT THIS CONVERSION ASSUMES DAILY DATA ds_basin[var].attrs["unit"] = "kg m-2 s-1" start_time_name = start_time[:10] From 83c6b9575e834fa83844ab1b52442f5ac90a9468 Mon Sep 17 00:00:00 2001 From: Rolf Hut Date: Tue, 18 Jun 2024 19:51:23 +0200 Subject: [PATCH 4/8] added support for data_source and version --- src/ewatercycle/_forcings/caravan.py | 33 +++++++++++++++++++++------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/ewatercycle/_forcings/caravan.py b/src/ewatercycle/_forcings/caravan.py index 615b0ce2..71e0d296 100644 --- a/src/ewatercycle/_forcings/caravan.py +++ b/src/ewatercycle/_forcings/caravan.py @@ -13,7 +13,8 @@ from ewatercycle.util import get_time COMMON_URL = "ca13056c-c347-4a27-b320-930c2a4dd207" -OPENDAP_URL = f"https://opendap.4tu.nl/thredds/dodsC/data2/djht/{COMMON_URL}/1/" +VERSION = "2" +OPENDAP_URL = f"https://opendap.4tu.nl/thredds/dodsC/data2/djht/{COMMON_URL}/{VERSION}/" SHAPEFILE_URL = ( f"https://data.4tu.nl/file/{COMMON_URL}/bbe94526-cf1a-4b96-8155-244f20094719" ) @@ -174,6 +175,16 @@ def generate( # type: ignore[override] name of a dataset in Caravan (for example, "camels" or "camelsgb"). For more information do `help(CaravanForcing.get_basin_id)` or see https://www.ewatercycle.org/caravan-map/. + data_source: The ID of the data source to be used. For some datasets multiple + datasources are available. currently this is only implemented for the + (basins in the) "camels" (ie. camels US) dataset. If "data_sources" is not + specified, it defaults to b'era5_land' (the default for caravan). Options for + Camels are: + - b'nldas' + - b'maurer' + - b'daymet' + See the documentation of Camels for details on the differences between these + data sources: https://dx.doi.org/10.5065/D6MW2F4D """ if "basin_id" not in kwargs: msg = ( @@ -182,10 +193,16 @@ def generate( # type: ignore[override] ) raise ValueError(msg) basin_id = str(kwargs["basin_id"]) + + if "data_source" not in kwargs + date_source = b'era5_land' + elif: + date_source = str(kwargs["data_source"]) dataset: str = basin_id.split("_")[0] ds = cls.get_dataset(dataset) - ds_basin = ds.sel(basin_id=basin_id.encode()) + ds_data_source = ds.sel(data_source = date_source.encode()) + ds_basin = ds_data_source.sel(basin_id=basin_id.encode()) ds_basin_time = crop_ds(ds_basin, start_time, end_time) if shape is None: @@ -210,15 +227,15 @@ def generate( # type: ignore[override] for temp in ["tas", "tasmin", "tasmax"]: ds_basin_time[temp].attrs.update({"height": "2m"}) #convert units to Kelvin for compatiabillity with NetCDF-CF conventions - if (ds_basin[temp].attrs["unit"]) == "°C": - ds_basin[temp].values = ds_basin[temp].values + 273.15 - ds_basin[temp].attrs["unit"] = "°K" + if (ds_basin_time[temp].attrs["unit"]) == "°C": + ds_basin_time[temp].values = ds_basin_time[temp].values + 273.15 + ds_basin_time[temp].attrs["unit"] = "°K" for var in ["evspsblpot", "pr"]: #convert units to kg m-2 s-1 for compatiabillity with NetCDF-CF conventions - if (ds_basin[var].attrs["unit"]) == "mm": - ds_basin[var].values = ds_basin[temp].values / (86400) #NOTE THAT THIS CONVERSION ASSUMES DAILY DATA - ds_basin[var].attrs["unit"] = "kg m-2 s-1" + if (ds_basin_time[var].attrs["unit"]) == "mm": + ds_basin_time[var].values = ds_basin_time[var].values / (86400) #NOTE THAT THIS CONVERSION ASSUMES DAILY DATA + ds_basin_time[var].attrs["unit"] = "kg m-2 s-1" start_time_name = start_time[:10] end_time_name = end_time[:10] From 7a009e43839540da9848c1f7e0f68c2065a3650f Mon Sep 17 00:00:00 2001 From: Rolf Hut Date: Wed, 19 Jun 2024 16:17:57 +0200 Subject: [PATCH 5/8] fixed basic errors, run into naming of variable error --- src/ewatercycle/_forcings/caravan.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/ewatercycle/_forcings/caravan.py b/src/ewatercycle/_forcings/caravan.py index 71e0d296..e6cee0d7 100644 --- a/src/ewatercycle/_forcings/caravan.py +++ b/src/ewatercycle/_forcings/caravan.py @@ -194,10 +194,15 @@ def generate( # type: ignore[override] raise ValueError(msg) basin_id = str(kwargs["basin_id"]) - if "data_source" not in kwargs - date_source = b'era5_land' - elif: + if "data_source" not in kwargs: + date_source = 'era5_land' + elif kwargs["data_source"] in ['era5_land', 'nldas', 'maurer', 'daymet']: date_source = str(kwargs["data_source"]) + else: + msg = ( + "If 'data_source' is provided it needs to be one of: 'era5_land', 'nldas', 'maurer', 'daymet'" + ) + raise ValueError(msg) dataset: str = basin_id.split("_")[0] ds = cls.get_dataset(dataset) From ce82b871dde783e094dab15378e4b0080d5ab8ec Mon Sep 17 00:00:00 2001 From: Rolf Hut Date: Thu, 20 Jun 2024 15:47:35 +0200 Subject: [PATCH 6/8] still doesn't work --- src/ewatercycle/_forcings/caravan.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/ewatercycle/_forcings/caravan.py b/src/ewatercycle/_forcings/caravan.py index e6cee0d7..5dc2eb39 100644 --- a/src/ewatercycle/_forcings/caravan.py +++ b/src/ewatercycle/_forcings/caravan.py @@ -195,9 +195,9 @@ def generate( # type: ignore[override] basin_id = str(kwargs["basin_id"]) if "data_source" not in kwargs: - date_source = 'era5_land' + data_source = 'era5_land' elif kwargs["data_source"] in ['era5_land', 'nldas', 'maurer', 'daymet']: - date_source = str(kwargs["data_source"]) + data_source = str(kwargs["data_source"]) else: msg = ( "If 'data_source' is provided it needs to be one of: 'era5_land', 'nldas', 'maurer', 'daymet'" @@ -206,7 +206,7 @@ def generate( # type: ignore[override] dataset: str = basin_id.split("_")[0] ds = cls.get_dataset(dataset) - ds_data_source = ds.sel(data_source = date_source.encode()) + ds_data_source = ds.sel(data_source = data_source.encode()) ds_basin = ds_data_source.sel(basin_id=basin_id.encode()) ds_basin_time = crop_ds(ds_basin, start_time, end_time) @@ -226,7 +226,12 @@ def generate( # type: ignore[override] for prop in properties: ds_basin_time.coords.update({prop: ds_basin_time[prop].to_numpy()}) - ds_basin_time = ds_basin_time.rename(RENAME_ERA5) + if data_source == 'era5_land': + for key,var in set(RENAME_ERA5.values()).intersection(ds_basin_time.data_vars.keys()): + ds_basin_time = ds_basin_time.drop_vars(var) + + ds_basin_time = ds_basin_time.rename(RENAME_ERA5) + variables = tuple([RENAME_ERA5[var] for var in variable_names]) for temp in ["tas", "tasmin", "tasmax"]: From a2c969f5852bb03faf066eb955a6e9684fc4158c Mon Sep 17 00:00:00 2001 From: Bart Schilperoort Date: Mon, 8 Jul 2024 14:40:14 +0200 Subject: [PATCH 7/8] Fix (some) issues --- src/ewatercycle/_forcings/caravan.py | 29 ++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/ewatercycle/_forcings/caravan.py b/src/ewatercycle/_forcings/caravan.py index a34bed92..54ef4da5 100644 --- a/src/ewatercycle/_forcings/caravan.py +++ b/src/ewatercycle/_forcings/caravan.py @@ -196,18 +196,29 @@ def generate( # type: ignore[override] if "data_source" not in kwargs: data_source = 'era5_land' - elif kwargs["data_source"] in ['era5_land', 'nldas', 'maurer', 'daymet']: + elif kwargs["data_source"] in ['era5_land', 'nldas', 'maurer', 'daymet']: data_source = str(kwargs["data_source"]) else: msg = ( - "If 'data_source' is provided it needs to be one of: 'era5_land', 'nldas', 'maurer', 'daymet'" + "If 'data_source' is provided it needs to be one of: 'era5_land', " + "'nldas', 'maurer', 'daymet'" ) raise ValueError(msg) - + dataset: str = basin_id.split("_")[0] ds = cls.get_dataset(dataset) - ds_data_source = ds.sel(data_source = data_source.encode()) - ds_basin = ds_data_source.sel(basin_id=basin_id.encode()) + + if dataset != "camels": + if data_source != "era5_land": + msg = ( + "Alternative data sources are only implemented for the camels " + "(USA) dataset" + ) + raise ValueError(msg) + else: + ds = ds.sel(data_source=data_source.encode()) + + ds_basin = ds.sel(basin_id=basin_id.encode()) ds_basin_time = crop_ds(ds_basin, start_time, end_time) if shape is None: @@ -227,9 +238,11 @@ def generate( # type: ignore[override] ds_basin_time.coords.update({prop: ds_basin_time[prop].to_numpy()}) if data_source == 'era5_land': - for _, var in set(RENAME_ERA5.values()).intersection(ds_basin_time.data_vars.keys()): - ds_basin_time = ds_basin_time.drop_vars(var) - + duplicate_vars = set(RENAME_ERA5.values()).intersection( + ds_basin_time.data_vars + ) + + ds_basin_time = ds_basin_time.drop_vars(duplicate_vars) ds_basin_time = ds_basin_time.rename(RENAME_ERA5) variables = tuple([RENAME_ERA5[var] for var in variable_names]) From bf1134a5a0eeb28a76b3662a8de75363bda0bdc0 Mon Sep 17 00:00:00 2001 From: Rolf Hut Date: Fri, 2 Aug 2024 11:22:35 +0200 Subject: [PATCH 8/8] fixed issue with different data sources having different variable names --- src/ewatercycle/_forcings/caravan.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/ewatercycle/_forcings/caravan.py b/src/ewatercycle/_forcings/caravan.py index 54ef4da5..a67a9d24 100644 --- a/src/ewatercycle/_forcings/caravan.py +++ b/src/ewatercycle/_forcings/caravan.py @@ -1,5 +1,6 @@ import shutil import zipfile +import time from pathlib import Path from typing import Type @@ -237,26 +238,26 @@ def generate( # type: ignore[override] for prop in properties: ds_basin_time.coords.update({prop: ds_basin_time[prop].to_numpy()}) - if data_source == 'era5_land': - duplicate_vars = set(RENAME_ERA5.values()).intersection( - ds_basin_time.data_vars - ) +# if data_source == 'era5_land': + duplicate_vars = set(RENAME_ERA5.values()).intersection( + ds_basin_time.data_vars + ) - ds_basin_time = ds_basin_time.drop_vars(duplicate_vars) - ds_basin_time = ds_basin_time.rename(RENAME_ERA5) + ds_basin_time = ds_basin_time.drop_vars(duplicate_vars) + ds_basin_time = ds_basin_time.rename(RENAME_ERA5) variables = tuple([RENAME_ERA5[var] for var in variable_names]) - # convert units to Kelvin for compatibility with CMOR MIP table units + # convert units from Celcius to Kelvin for compatibility with CMOR MIP table units for temp in ["tas", "tasmin", "tasmax"]: ds_basin_time[temp].attrs.update({"height": "2m"}) if (ds_basin_time[temp].attrs["unit"]) == "°C": ds_basin_time[temp].values = ds_basin_time[temp].values + 273.15 ds_basin_time[temp].attrs["unit"] = "K" + # convert units from mm/day to "kg m-2 s-1" for compatibility with CMOR MIP table units for var in ["evspsblpot", "pr"]: if (ds_basin_time[var].attrs["unit"]) == "mm": - # mm/day --> kg m-2 s-1 ds_basin_time[var].values = ds_basin_time[var].values / (86400) ds_basin_time[var].attrs["unit"] = "kg m-2 s-1" @@ -302,6 +303,7 @@ def get_shapefiles(directory: Path, basin_id: str) -> Path: with zipfile.ZipFile(zip_path) as myzip: myzip.extractall(path=directory) + time.sleep(5) extract_basin_shapefile(basin_id, combined_shapefile_path, shape_path) @@ -340,7 +342,7 @@ def extract_basin_shapefile( # kind of clunky but it works: select filtered polygon if i == basin_index: geom = feat.geometry - assert geom.type == "Polygon" + assert geom.type in ["Polygon","MultiPolygon"] # Add the signed area of the polygon and a timestamp # to the feature properties map.