Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change units caravan forcing, added support for data_sources, changed version #433

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
52 changes: 47 additions & 5 deletions src/ewatercycle/_forcings/caravan.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import shutil
import zipfile
import time
from pathlib import Path
from typing import Type

Expand All @@ -13,7 +14,8 @@
from ewatercycle.util import get_time

COMMON_URL = "ca13056c-c347-4a27-b320-930c2a4dd207"
OPENDAP_URL = f"https://opendap.4tu.nl/thredds/dodsC/data2/djht/{COMMON_URL}/1/"
VERSION = "2"
OPENDAP_URL = f"https://opendap.4tu.nl/thredds/dodsC/data2/djht/{COMMON_URL}/{VERSION}/"
SHAPEFILE_URL = (
f"https://data.4tu.nl/file/{COMMON_URL}/bbe94526-cf1a-4b96-8155-244f20094719"
)
Expand Down Expand Up @@ -174,6 +176,16 @@ def generate( # type: ignore[override]
name of a dataset in Caravan (for example, "camels" or "camelsgb").
For more information do `help(CaravanForcing.get_basin_id)` or see
https://www.ewatercycle.org/caravan-map/.
data_source: The ID of the data source to be used. For some datasets multiple
datasources are available. currently this is only implemented for the
(basins in the) "camels" (ie. camels US) dataset. If "data_sources" is not
specified, it defaults to b'era5_land' (the default for caravan). Options for
Camels are:
- b'nldas'
- b'maurer'
- b'daymet'
See the documentation of Camels for details on the differences between these
data sources: https://dx.doi.org/10.5065/D6MW2F4D
"""
if "basin_id" not in kwargs:
msg = (
Expand All @@ -182,9 +194,31 @@ def generate( # type: ignore[override]
)
raise ValueError(msg)
basin_id = str(kwargs["basin_id"])


if "data_source" not in kwargs:
data_source = 'era5_land'
elif kwargs["data_source"] in ['era5_land', 'nldas', 'maurer', 'daymet']:
data_source = str(kwargs["data_source"])
else:
msg = (
"If 'data_source' is provided it needs to be one of: 'era5_land', "
"'nldas', 'maurer', 'daymet'"
)
raise ValueError(msg)

dataset: str = basin_id.split("_")[0]
ds = cls.get_dataset(dataset)

if dataset != "camels":
if data_source != "era5_land":
msg = (
"Alternative data sources are only implemented for the camels "
"(USA) dataset"
)
raise ValueError(msg)
else:
ds = ds.sel(data_source=data_source.encode())

ds_basin = ds.sel(basin_id=basin_id.encode())
ds_basin_time = crop_ds(ds_basin, start_time, end_time)

Expand All @@ -204,19 +238,26 @@ def generate( # type: ignore[override]
for prop in properties:
ds_basin_time.coords.update({prop: ds_basin_time[prop].to_numpy()})

# if data_source == 'era5_land':
duplicate_vars = set(RENAME_ERA5.values()).intersection(
ds_basin_time.data_vars
)

ds_basin_time = ds_basin_time.drop_vars(duplicate_vars)
ds_basin_time = ds_basin_time.rename(RENAME_ERA5)

variables = tuple([RENAME_ERA5[var] for var in variable_names])

# convert units to Kelvin for compatibility with CMOR MIP table units
# convert units from Celcius to Kelvin for compatibility with CMOR MIP table units
for temp in ["tas", "tasmin", "tasmax"]:
ds_basin_time[temp].attrs.update({"height": "2m"})
if (ds_basin_time[temp].attrs["unit"]) == "°C":
ds_basin_time[temp].values = ds_basin_time[temp].values + 273.15
ds_basin_time[temp].attrs["unit"] = "K"

# convert units from mm/day to "kg m-2 s-1" for compatibility with CMOR MIP table units
for var in ["evspsblpot", "pr"]:
if (ds_basin_time[var].attrs["unit"]) == "mm":
# mm/day --> kg m-2 s-1
ds_basin_time[var].values = ds_basin_time[var].values / (86400)
ds_basin_time[var].attrs["unit"] = "kg m-2 s-1"

Expand Down Expand Up @@ -262,6 +303,7 @@ def get_shapefiles(directory: Path, basin_id: str) -> Path:

with zipfile.ZipFile(zip_path) as myzip:
myzip.extractall(path=directory)
time.sleep(5)

extract_basin_shapefile(basin_id, combined_shapefile_path, shape_path)

Expand Down Expand Up @@ -300,7 +342,7 @@ def extract_basin_shapefile(
# kind of clunky but it works: select filtered polygon
if i == basin_index:
geom = feat.geometry
assert geom.type == "Polygon"
assert geom.type in ["Polygon","MultiPolygon"]

# Add the signed area of the polygon and a timestamp
# to the feature properties map.
Expand Down