From 181b5fe48f5d5bcf22eaafdda4a948ef715e9f8e Mon Sep 17 00:00:00 2001 From: Dan Nowacki Date: Thu, 15 Feb 2024 11:56:29 -0800 Subject: [PATCH] Clean up unused code and improve error messages (#185) * Remove trim_by_salinity as it's not being used and has a replacement * Commenting unused code * Remove unused code * Old code and update error messages * Actually remove it; don't just comment it out --- doc/config.rst | 3 +- stglib/aqd/aqdutils.py | 4 - stglib/aqd/wvscdf2nc.py | 2 - stglib/core/qaqc.py | 29 +--- stglib/core/runcmd.py | 15 ++- stglib/core/utils.py | 287 ++++++++++++++++++---------------------- stglib/rsk/cdf2nc.py | 64 ++++----- stglib/rsk/csv2cdf.py | 28 ++-- stglib/troll.py | 9 -- stglib/wxt.py | 8 -- 10 files changed, 189 insertions(+), 260 deletions(-) diff --git a/doc/config.rst b/doc/config.rst index 0d3187ef..869c4815 100644 --- a/doc/config.rst +++ b/doc/config.rst @@ -134,7 +134,6 @@ EXO EXO-specific options include: - ``skiprows``: number of lines to skip in the CSV before the real data begins -- ``trim_by_salinity``: if ``'true'``, use salinity (``S_41``) as a master variable. Wherever salinity is ``_FillValue``, all other variables will be filled as well. Useful for when the instrument comes out of the water. Note that negative numeric values in the YAML config file must be treated with care so as not to be interpreted as strings. If you want the minimum value to be, say, -0.2 units for a particular parameter, you must write this as ``-0.2`` and not ``-.2`` in the config file. The latter format will be interpreted as a string and will cause an error. @@ -198,4 +197,4 @@ Lowell TCM Hobo - All the _min, _max, _bad_ens, etc. options available to the EXO. - ``skipfooter``: number of lines to skip in the CSV file at the end of the file - ``ncols``: number of columns of data to read, starting at first -- ``names``: option for user specified column names (only recommended when code will not read names using automated/default method) \ No newline at end of file +- ``names``: option for user specified column names (only recommended when code will not read names using automated/default method) diff --git a/stglib/aqd/aqdutils.py b/stglib/aqd/aqdutils.py index fa4e751c..4bceb700 100644 --- a/stglib/aqd/aqdutils.py +++ b/stglib/aqd/aqdutils.py @@ -218,10 +218,6 @@ def coord_transform(vel1, vel2, vel3, heading, pitch, roll, T, T_orig, cs, out=" return u, v, w -def swap_bindist_to_depth(ds): - return ds.swap_dims({"bindist": "depth"}) - - def set_orientation(VEL, T): """ Create Z variable depending on instrument orientation diff --git a/stglib/aqd/wvscdf2nc.py b/stglib/aqd/wvscdf2nc.py index 267bb990..c7b83034 100755 --- a/stglib/aqd/wvscdf2nc.py +++ b/stglib/aqd/wvscdf2nc.py @@ -25,8 +25,6 @@ def cdf_to_nc(cdf_filename, atmpres=False, writefile=True): # Make bin_depth variable ds = aqdutils.make_bin_depth(ds, waves=True) - # Swap dimensions from bindist to depth - # ds = aqdutils.swap_bindist_to_depth(ds) ds = cdf2nc.ds_swap_dims(ds) # Rename DataArrays within Dataset for EPIC compliance # and append depth coord to velocities and amplitudes diff --git a/stglib/core/qaqc.py b/stglib/core/qaqc.py index 9a6a1c3c..32e2202b 100644 --- a/stglib/core/qaqc.py +++ b/stglib/core/qaqc.py @@ -186,29 +186,6 @@ def trim_bad_ens_indiv(ds, var): return ds -def trim_by_salinity(ds, var): - if ( - "trim_by_salinity" in ds.attrs - and ds.attrs["trim_by_salinity"].lower() == "true" - and var in ds - ): # xarray doesn't support writing attributes as booleans - if ( - "trim_by_salinity_exclude" in ds.attrs - and var in ds.attrs["trim_by_salinity_exclude"] - ): - pass - else: - print("%s: Trimming using valid salinity threshold" % var) - ds[var][ds["S_41"].isnull()] = np.nan - - if var != "S_41": - notetxt = "Values filled using valid salinity threshold. " - - ds = utils.insert_note(ds, var, notetxt) - - return ds - - def trim_by_any(ds, var): attrlist = [] for a in ds.attrs: @@ -406,8 +383,8 @@ def trim_std_ratio(ds, var): ds = utils.insert_note(ds, var, notetxt) else: - print( - f"{var}_std does not exist was NOT able to trim using standard deviation ratio method" + raise ValueError( + f"User specified {ds.attrs[var + '_std_ratio']=} but {var}_std does not exist. Was not able to trim using standard deviation ratio method" ) return ds @@ -416,9 +393,7 @@ def trim_std_ratio(ds, var): def trim_warmup(ds, var): if var + "_warmup_samples" in ds.attrs: if "sample" in ds[var].coords: - print(ds[var]) ds[var] = ds[var].where(ds["sample"] > ds.attrs[var + "_warmup_samples"]) - print(ds[var]) notetxt = f"Removed {ds.attrs[var + '_warmup_samples']} samples at the beginning of each burst. " ds = utils.insert_note(ds, var, notetxt) diff --git a/stglib/core/runcmd.py b/stglib/core/runcmd.py index 52baaed3..2be4e710 100644 --- a/stglib/core/runcmd.py +++ b/stglib/core/runcmd.py @@ -13,12 +13,15 @@ def get_metadata(args): with open(args.config) as f: config = yaml.safe_load(f) - for k in config: - if k in metadata: - warnings.warn( - f"attrs collision. Replacing '{k}={metadata[k]}' from global attributes file with '{k}={config[k]}' from YAML config file." - ) - metadata[k] = config[k] + try: + for k in config: + if k in metadata: + warnings.warn( + f"attrs collision. Replacing '{k}={metadata[k]}' from global attributes file with '{k}={config[k]}' from YAML config file." + ) + metadata[k] = config[k] + except TypeError: + raise TypeError(f"Could not load metadata from {args.config}") return metadata diff --git a/stglib/core/utils.py b/stglib/core/utils.py index f45197b5..31462e6e 100644 --- a/stglib/core/utils.py +++ b/stglib/core/utils.py @@ -673,31 +673,6 @@ def create_epic_times(ds, waves=False): return ds -def create_2d_time(ds): - print("Creating 2D time variable") - # time increment in milliseconds - td = ds.attrs["sample_interval"] * np.arange(ds.attrs["samples_per_burst"]) * 1000 - - # time_2d is a CF representation of a 2d time - ds["time_2d"] = xr.DataArray( - np.expand_dims(ds["time"], 1) + [np.timedelta64(int(x), "ms") for x in td], - dims=("time", "sample"), - ) - - raveljd = make_jd(pd.DatetimeIndex(np.ravel(ds["time_2d"]))) - jd_2d = np.reshape(raveljd, ds["time_2d"].shape) - - ds["epic_time_2d"] = xr.DataArray(make_epic_time(jd_2d), dims=("time", "sample")) - ds["epic_time_2d"].encoding["_FillValue"] = None - - ds["epic_time2_2d"] = xr.DataArray(make_epic_time2(jd_2d), dims=("time", "sample")) - ds["epic_time2_2d"].encoding["_FillValue"] = None - - ds = ds.drop("time_2d") # don't need it anymore - - return ds - - def check_update_attrs(ds, key, value): """Update attr and raise warning if attr already exists and is different from replacement value""" if key in ds.attrs and ds.attrs[key] != value: @@ -717,20 +692,20 @@ def add_start_stop_time(ds): return ds -def add_lat_lon(ds, var): - """Add lat and lon dimensions""" - - ds[var] = xr.concat([ds[var]], dim=ds["lon"]) - ds[var] = xr.concat([ds[var]], dim=ds["lat"]) - - # Reorder so lat, lon are at the end. - dims = [d for d in ds[var].dims if (d != "lon") and (d != "lat")] - dims.extend(["lat", "lon"]) - dims = tuple(dims) - - ds[var] = ds[var].transpose(*dims) - - return ds +# def add_lat_lon(ds, var): +# """Add lat and lon dimensions""" +# +# ds[var] = xr.concat([ds[var]], dim=ds["lon"]) +# ds[var] = xr.concat([ds[var]], dim=ds["lat"]) +# +# # Reorder so lat, lon are at the end. +# dims = [d for d in ds[var].dims if (d != "lon") and (d != "lat")] +# dims.extend(["lat", "lon"]) +# dims = tuple(dims) +# +# ds[var] = ds[var].transpose(*dims) +# +# return ds def ds_add_lat_lon(ds): @@ -850,66 +825,66 @@ def create_water_depth_var(ds): return ds -def create_water_depth(ds): - """Create WATER_DEPTH attribute""" - - press = None - - if "Pressure_ac" in ds: - press = "Pressure_ac" - elif "P_1ac" in ds: - press = "P_1ac" - elif "Pressure" in ds: - press = "Pressure" - elif "P_1" in ds: - press = "P_1" - - if "sample" in ds.dims: - dims = ("time", "sample") - else: - dims = "time" - - if "initial_instrument_height" in ds.attrs: - if press: - ds.attrs["nominal_instrument_depth"] = ( - ds[press].squeeze().mean(dim=dims).values - ) - # ds['water_depth'] = ds.attrs['nominal_instrument_depth'] - wdepth = ( - ds.attrs["nominal_instrument_depth"] - + ds.attrs["initial_instrument_height"] - ) - if "ac" in press: - ds.attrs["WATER_DEPTH_source"] = ( - "water depth = MSL from " - "pressure sensor, " - "atmospherically corrected" - ) - else: - ds.attrs["WATER_DEPTH_source"] = ( - "water depth = MSL from " "pressure sensor" - ) - ds.attrs["WATER_DEPTH_datum"] = "MSL" - else: - wdepth = ds.attrs["WATER_DEPTH"] - ds.attrs["nominal_instrument_depth"] = ( - ds.attrs["WATER_DEPTH"] - ds.attrs["initial_instrument_height"] - ) - # ds['Depth'] = ds.attrs['nominal_instrument_depth'] - # TODO: why is this being redefined here? Seems redundant - ds.attrs["WATER_DEPTH"] = wdepth - - elif "nominal_instrument_depth" in ds.attrs: - ds.attrs["initial_instrument_height"] = ( - ds.attrs["WATER_DEPTH"] - ds.attrs["nominal_instrument_depth"] - ) - # ds['water_depth'] = ds.attrs['nominal_instrument_depth'] - - if "initial_instrument_height" not in ds.attrs: - # TODO: do we really want to set to zero? - ds.attrs["initial_instrument_height"] = 0 - - return ds +# def create_water_depth(ds): +# """Create WATER_DEPTH attribute""" +# +# press = None +# +# if "Pressure_ac" in ds: +# press = "Pressure_ac" +# elif "P_1ac" in ds: +# press = "P_1ac" +# elif "Pressure" in ds: +# press = "Pressure" +# elif "P_1" in ds: +# press = "P_1" +# +# if "sample" in ds.dims: +# dims = ("time", "sample") +# else: +# dims = "time" +# +# if "initial_instrument_height" in ds.attrs: +# if press: +# ds.attrs["nominal_instrument_depth"] = ( +# ds[press].squeeze().mean(dim=dims).values +# ) +# # ds['water_depth'] = ds.attrs['nominal_instrument_depth'] +# wdepth = ( +# ds.attrs["nominal_instrument_depth"] +# + ds.attrs["initial_instrument_height"] +# ) +# if "ac" in press: +# ds.attrs["WATER_DEPTH_source"] = ( +# "water depth = MSL from " +# "pressure sensor, " +# "atmospherically corrected" +# ) +# else: +# ds.attrs["WATER_DEPTH_source"] = ( +# "water depth = MSL from " "pressure sensor" +# ) +# ds.attrs["WATER_DEPTH_datum"] = "MSL" +# else: +# wdepth = ds.attrs["WATER_DEPTH"] +# ds.attrs["nominal_instrument_depth"] = ( +# ds.attrs["WATER_DEPTH"] - ds.attrs["initial_instrument_height"] +# ) +# # ds['Depth'] = ds.attrs['nominal_instrument_depth'] +# # TODO: why is this being redefined here? Seems redundant +# ds.attrs["WATER_DEPTH"] = wdepth +# +# elif "nominal_instrument_depth" in ds.attrs: +# ds.attrs["initial_instrument_height"] = ( +# ds.attrs["WATER_DEPTH"] - ds.attrs["nominal_instrument_depth"] +# ) +# # ds['water_depth'] = ds.attrs['nominal_instrument_depth'] +# +# if "initial_instrument_height" not in ds.attrs: +# # TODO: do we really want to set to zero? +# ds.attrs["initial_instrument_height"] = 0 +# +# return ds def create_nominal_instrument_depth(ds): @@ -1086,63 +1061,63 @@ def create_z(ds): return ds -def add_z_if_no_pressure(ds, var): - # no_p = no pressure sensor. also use for exo - attrsbak = ds["z"].attrs - ds[var] = ds[var].expand_dims("z") - # reorder so z at end - dims = [d for d in ds[var].dims if (d != "z")] - dims.extend(["z"]) - dims = tuple(dims) - ds[var] = ds[var].transpose(*dims) - ds["z"].attrs = attrsbak - - return ds - - -def no_p_create_depth(ds): - # no_p = no pressure sensor. also use for exo - if "NAVD88_ref" in ds.attrs: - ds["depth"] = xr.DataArray( - [-ds.attrs["NAVD88_ref"] - ds.attrs["initial_instrument_height"]], - dims="depth", - ) - ds["depth"].attrs["VERT_DATUM"] = "NAVD88" - ds["depth"].attrs["NOTE"] = ( - "Computed as platform depth " - "[m NAVD88] minus " - "initial_instrument_height" - ) - else: - ds["depth"] = xr.DataArray( - [ds.attrs["WATER_DEPTH"] - ds.attrs["initial_instrument_height"]], - dims="depth", - ) - ds["depth"].attrs["NOTE"] = ( - "Computed as WATER_DEPTH minus " "initial_instrument_height" - ) - - ds["depth"].attrs["positive"] = "down" - ds["depth"].attrs["axis"] = "Z" - ds["depth"].attrs["units"] = "m" - ds["depth"].attrs["epic_code"] = 3 - ds["depth"].encoding["_FillValue"] = None - - return ds - - -def no_p_add_depth(ds, var): - # no_p = no pressure sensor. also use for exo - ds[var] = xr.concat([ds[var]], dim=ds["depth"]) - - # Reorder so lat, lon are at the end. - dims = [d for d in ds[var].dims if (d != "depth")] - dims.extend(["depth"]) - dims = tuple(dims) - - ds[var] = ds[var].transpose(*dims) - - return ds +# def add_z_if_no_pressure(ds, var): +# # no_p = no pressure sensor. also use for exo +# attrsbak = ds["z"].attrs +# ds[var] = ds[var].expand_dims("z") +# # reorder so z at end +# dims = [d for d in ds[var].dims if (d != "z")] +# dims.extend(["z"]) +# dims = tuple(dims) +# ds[var] = ds[var].transpose(*dims) +# ds["z"].attrs = attrsbak +# +# return ds + + +# def no_p_create_depth(ds): +# # no_p = no pressure sensor. also use for exo +# if "NAVD88_ref" in ds.attrs: +# ds["depth"] = xr.DataArray( +# [-ds.attrs["NAVD88_ref"] - ds.attrs["initial_instrument_height"]], +# dims="depth", +# ) +# ds["depth"].attrs["VERT_DATUM"] = "NAVD88" +# ds["depth"].attrs["NOTE"] = ( +# "Computed as platform depth " +# "[m NAVD88] minus " +# "initial_instrument_height" +# ) +# else: +# ds["depth"] = xr.DataArray( +# [ds.attrs["WATER_DEPTH"] - ds.attrs["initial_instrument_height"]], +# dims="depth", +# ) +# ds["depth"].attrs["NOTE"] = ( +# "Computed as WATER_DEPTH minus " "initial_instrument_height" +# ) +# +# ds["depth"].attrs["positive"] = "down" +# ds["depth"].attrs["axis"] = "Z" +# ds["depth"].attrs["units"] = "m" +# ds["depth"].attrs["epic_code"] = 3 +# ds["depth"].encoding["_FillValue"] = None +# +# return ds + + +# def no_p_add_depth(ds, var): +# # no_p = no pressure sensor. also use for exo +# ds[var] = xr.concat([ds[var]], dim=ds["depth"]) +# +# # Reorder so lat, lon are at the end. +# dims = [d for d in ds[var].dims if (d != "depth")] +# dims.extend(["depth"]) +# dims = tuple(dims) +# +# ds[var] = ds[var].transpose(*dims) +# +# return ds def insert_note(ds, var, notetxt): diff --git a/stglib/rsk/cdf2nc.py b/stglib/rsk/cdf2nc.py index 67b9c4be..bf54e7c2 100755 --- a/stglib/rsk/cdf2nc.py +++ b/stglib/rsk/cdf2nc.py @@ -217,37 +217,37 @@ def trim_min(ds, var): return ds -def ds_add_depth_dim(ds): - print("Creating depth dimension") - if "P_1ac" in ds: - p = "P_1ac" - else: - p = "P_1" - - if "NAVD88_ref" in ds.attrs: - ds["depth"] = xr.DataArray( - [-ds.attrs["NAVD88_ref"] - ds.attrs["initial_instrument_height"]], - dims="depth", - ) - ds["depth"].attrs["VERT_DATUM"] = "NAVD88" - ds["depth"].attrs["NOTE"] = ( - "Computed as platform depth " - "[m NAVD88] minus " - "initial_instrument_height" - ) - else: - dim = ["time"] - if "sample" in ds: - dim.append("sample") - ds["depth"] = xr.DataArray(np.atleast_1d(ds[p].mean(dim=dim)), dims="depth") - ds["depth"].attrs["NOTE"] = "Computed as mean of the pressure sensor" - ds["depth"].attrs["positive"] = "down" - ds["depth"].attrs["axis"] = "Z" - ds["depth"].attrs["units"] = "m" - ds["depth"].attrs["epic_code"] = 3 - ds["depth"].attrs["standard_name"] = "depth" - - return ds +# def ds_add_depth_dim(ds): +# print("Creating depth dimension") +# if "P_1ac" in ds: +# p = "P_1ac" +# else: +# p = "P_1" +# +# if "NAVD88_ref" in ds.attrs: +# ds["depth"] = xr.DataArray( +# [-ds.attrs["NAVD88_ref"] - ds.attrs["initial_instrument_height"]], +# dims="depth", +# ) +# ds["depth"].attrs["VERT_DATUM"] = "NAVD88" +# ds["depth"].attrs["NOTE"] = ( +# "Computed as platform depth " +# "[m NAVD88] minus " +# "initial_instrument_height" +# ) +# else: +# dim = ["time"] +# if "sample" in ds: +# dim.append("sample") +# ds["depth"] = xr.DataArray(np.atleast_1d(ds[p].mean(dim=dim)), dims="depth") +# ds["depth"].attrs["NOTE"] = "Computed as mean of the pressure sensor" +# ds["depth"].attrs["positive"] = "down" +# ds["depth"].attrs["axis"] = "Z" +# ds["depth"].attrs["units"] = "m" +# ds["depth"].attrs["epic_code"] = 3 +# ds["depth"].attrs["standard_name"] = "depth" +# +# return ds def ds_add_attrs(ds): @@ -262,7 +262,7 @@ def ds_add_attrs(ds): if utils.check_time_fits_in_int32(ds, "time"): ds["time"].encoding["dtype"] = "i4" else: - print("Casting time to double") + print("time variable will not fit in int32; casting to double") ds["time"].encoding["dtype"] = "double" else: if utils.check_time_fits_in_int32(ds, "time"): diff --git a/stglib/rsk/csv2cdf.py b/stglib/rsk/csv2cdf.py index c509aab7..cd0ba8f4 100644 --- a/stglib/rsk/csv2cdf.py +++ b/stglib/rsk/csv2cdf.py @@ -238,20 +238,20 @@ def csv_to_cdf(metadata): return ds -def create_lat_lon_vars_from_attrs(ds): - ds["latitude"] = xr.DataArray( - [ds.attrs["latitude"]], - dims="latitude", - attrs={"units": "degree_north", "standard_name": "latitude", "axis": "Y"}, - ) - - ds["longitude"] = xr.DataArray( - [ds.attrs["longitude"]], - dims="longitude", - attrs={"units": "degree_east", "standard_name": "longitude", "axis": "X"}, - ) - - return ds +# def create_lat_lon_vars_from_attrs(ds): +# ds["latitude"] = xr.DataArray( +# [ds.attrs["latitude"]], +# dims="latitude", +# attrs={"units": "degree_north", "standard_name": "latitude", "axis": "Y"}, +# ) +# +# ds["longitude"] = xr.DataArray( +# [ds.attrs["longitude"]], +# dims="longitude", +# attrs={"units": "degree_east", "standard_name": "longitude", "axis": "X"}, +# ) +# +# return ds def replace_spaces_in_var_names(ds): diff --git a/stglib/troll.py b/stglib/troll.py index 252d3e8b..b2bef286 100644 --- a/stglib/troll.py +++ b/stglib/troll.py @@ -152,15 +152,6 @@ def read_aquatroll(filnam, skiprows=69, encoding="utf-8", skipfooter=0): return df -def read_aquatroll_header(filnam, encoding="utf-8"): - with open(filnam, encoding=encoding) as f: - for line in f.readlines(): - if "Time Zone:" in line: - # remove commas and only return the value, - # not the 'Time Zone: ' part - return line.replace(",", "").strip()[11:] - - def ds_rename_vars(ds): varnames = { "pressure": "P_1ac", diff --git a/stglib/wxt.py b/stglib/wxt.py index 6aa5ac82..6e31bde7 100644 --- a/stglib/wxt.py +++ b/stglib/wxt.py @@ -178,14 +178,6 @@ def ds_rename_vars(ds): return ds.rename(newvars) -# Convert data from float 64 to float32 -def ds_convertfloat(ds): - for var in ds.variables: - if ds[var].name != "time": - ds[var] = ds[var].astype("float32") - return ds - - # Add attributes: units, standard name from CF website, epic code def ds_add_attrs(ds): ds = utils.ds_coord_no_fillvalue(ds)