Skip to content

Commit

Permalink
Merge pull request #1212 from PCMDI/ao_xcdat_open
Browse files Browse the repository at this point in the history
Update xcdat_open()
  • Loading branch information
lee1043 authored Dec 23, 2024
2 parents 4a9b79d + 7e482b8 commit 9205c7f
Showing 1 changed file with 59 additions and 4 deletions.
63 changes: 59 additions & 4 deletions pcmdi_metrics/io/xcdat_openxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
import xcdat as xc
import xmltodict

from pcmdi_metrics.io.xcdat_dataset_io import get_calendar


def xcdat_open(
infile: Union[str, list], data_var: str = None, decode_times: bool = True
infile: Union[str, list], data_var: str = None, decode_times: bool = True, chunks={}
) -> xr.Dataset:
"""
Open input file (netCDF, or xml generated by cdscan)
Expand All @@ -24,6 +26,8 @@ def xcdat_open(
decode_times : bool, optional
If True, attempt to decode times encoded in the standard NetCDF datetime format into cftime.datetime objects.
Otherwise, leave them encoded as numbers. This keyword may not be supported by all the backends, by default True.
chunks : int, "auto", dict, or None, optional
The chunk size used to load data into dask arrays.
Returns
-------
Expand All @@ -45,16 +49,67 @@ def xcdat_open(
>>> ds = xcdat_open('mydata.xml')
"""
if isinstance(infile, list) or "*" in infile:
ds = xc.open_mfdataset(infile, data_var=data_var, decode_times=decode_times)
try:
ds = xc.open_mfdataset(
infile, data_var=data_var, decode_times=decode_times, chunks=chunks
)
except (
ValueError
): # Could be due to non-cf-compliant calendar or other attribute
ds = xc.open_mfdataset(
infile, data_var=data_var, decode_times=False, chunks=chunks
)
ds = fix_noncompliant_attr(ds)
else:
if infile.split(".")[-1].lower() == "xml":
ds = _xcdat_openxml(infile, data_var=data_var, decode_times=decode_times)
try:
ds = _xcdat_openxml(
infile, data_var=data_var, decode_times=decode_times, chunks=chunks
)
except (
ValueError
): # Could be due to non-cf-compliant calendar or other attribute
ds = _xcdat_openxml(
infile, data_var=data_var, decode_times=False, chunks=chunks
)
ds = fix_noncompliant_attr(ds)
else:
ds = xc.open_dataset(infile, data_var=data_var, decode_times=decode_times)
try:
ds = xc.open_dataset(
infile, data_var=data_var, decode_times=decode_times, chunks=chunks
)
except (
ValueError
): # Could be due to non-cf-compliant calendar or other attribute
ds = xc.open_dataset(
infile, data_var=data_var, decode_times=False, chunks=chunks
)
ds = fix_noncompliant_attr(ds)

return ds.bounds.add_missing_bounds()


def fix_noncompliant_attr(ds: xr.Dataset) -> xr.Dataset:
"""Fix dataset attributes that do not meet cf standards
Parameters
----------
ds: xr.Dataset
xarray dataset to fix
Returns
-------
xr.Dataset
xarray dataset with updated attributes
"""
# Add any calendar fixes here
cal = get_calendar(ds)
cal = cal.replace("-", "_")
ds.time.attrs["calendar"] = cal
ds = xc.decode_time(ds)
return ds


def _xcdat_openxml(
xmlfile: str, data_var: str = None, decode_times: bool = True
) -> xr.Dataset:
Expand Down

0 comments on commit 9205c7f

Please sign in to comment.