diff --git a/doc/config.rst b/doc/config.rst index 4c7c851b..ae205852 100644 --- a/doc/config.rst +++ b/doc/config.rst @@ -1,6 +1,6 @@ Configuration files ******************* - + There are two required configuration files for processing data: the global attributes file, which describes attributes that apply to the mooring, and the instrument configuration file, which describes attributes that apply to an instrument on a mooring. Contents of both files will be included as attributes in both the xarray Dataset and the netCDF files. A note on time and time zones @@ -210,3 +210,8 @@ Vector - ``puv``: set to ``true`` to compute PUV wave statistics. **(EXPERIMENTAL)** - ``orientation``: ``UP`` means probe head is pointing up (sample volume above probe head). ``DOWN`` means probe head is pointing down (sample volume below probe head). - Many of the Aquadopp options apply to the Vector. + +SBE 37 MicroCAT +--------------- +- All the _min, _max, _bad_ens, etc. options available to the EXO +- ``skiprows``: number of lines to skip in the ASC before the real data begins diff --git a/doc/mc.rst b/doc/mc.rst new file mode 100644 index 00000000..9894c059 --- /dev/null +++ b/doc/mc.rst @@ -0,0 +1,29 @@ +Seabird SBE 37 MicroCAT +************** + +Data will generally be processed using a series of run scripts. The first script for each instrument type +depends on two :doc:`configuration files `. + +Instrument data to raw .cdf +=========================== + +Convert from exported .csv file to a raw netCDF file with .cdf extension using ``runmcasc2cdf.py``. + +runmcasc2cdf.py +---------------- + +.. argparse:: + :ref: stglib.core.cmd.mcasc2cdf_parser + :prog: runmcasc2cdf.py + +Raw .cdf to CF-compliant .nc +============================ + +Convert the raw .cdf data into a CF-compliant netCDF file with .nc extension using ``runmccdf2nc.py``. + +runmccdf2nc.py +--------------- + +.. argparse:: + :ref: stglib.core.cmd.mccdf2nc_parser + :prog: runmccdf2nc.py diff --git a/setup.py b/setup.py index 738e70cf..6ce622a9 100644 --- a/setup.py +++ b/setup.py @@ -78,6 +78,8 @@ "runlisstcdf2nc.py=stglib.core.runcmd:runlisstcdf2nc", "runtcmcsv2cdf.py=stglib.core.runcmd:runtcmcsv2cdf", "runtcmcdf2nc.py=stglib.core.runcmd:runtcmcdf2nc", + "runmcasc2cdf.py=stglib.core.runcmd:runmcasc2cdf", + "runmccdf2nc.py=stglib.core.runcmd:runmccdf2nc", ], }, include_package_data=True, diff --git a/stglib/__init__.py b/stglib/__init__.py index 15fcd11c..ec0575df 100644 --- a/stglib/__init__.py +++ b/stglib/__init__.py @@ -10,6 +10,7 @@ indexvel, iq, lisst, + mc, rdi, rsk, sig, diff --git a/stglib/core/cmd.py b/stglib/core/cmd.py index f71227d0..484fdf58 100644 --- a/stglib/core/cmd.py +++ b/stglib/core/cmd.py @@ -384,3 +384,20 @@ def tcmcdf2nc_parser(): cdfarg(parser) return parser + + +def mcasc2cdf_parser(): + description = "Convert SBE 37 MicroCAT .asc file to raw .cdf format. Run this script from the directory containing MicroCAT .asc file." + parser = argparse.ArgumentParser(description=description) + gattsarg(parser) + yamlarg(parser) + + return parser + + +def mccdf2nc_parser(): + description = "Convert raw SBE 37 MicroCAT .cdf format to processed .nc files" + parser = argparse.ArgumentParser(description=description) + cdfarg(parser) + + return parser diff --git a/stglib/core/runcmd.py b/stglib/core/runcmd.py index 590b8238..50eab2c1 100644 --- a/stglib/core/runcmd.py +++ b/stglib/core/runcmd.py @@ -294,3 +294,17 @@ def runtcmcsv2cdf(): metadata = get_metadata(args) stglib.tcm.csv_to_cdf(metadata) + + +def runmccdf2nc(): + args = stglib.cmd.mccdf2nc_parser().parse_args() + + run_cdf_to_nc(stglib.mc.cdf_to_nc, args) + + +def runmcasc2cdf(): + args = stglib.cmd.mcasc2cdf_parser().parse_args() + + metadata = get_metadata(args) + + stglib.mc.asc_to_cdf(metadata) diff --git a/stglib/exo.py b/stglib/exo.py index 8188ca8f..c4cf55d3 100644 --- a/stglib/exo.py +++ b/stglib/exo.py @@ -421,7 +421,7 @@ def ds_add_attrs(ds): "long_name": "Specific Conductivity", "comment": "Temperature compensated to 25 °C", "epic_code": 48, - # "standard_name": "sea_water_electrical_conductivity", + "standard_name": "sea_water_electrical_conductivity_at_reference_temperature", } ) diff --git a/stglib/hobo.py b/stglib/hobo.py index d3852f74..f01f13ad 100644 --- a/stglib/hobo.py +++ b/stglib/hobo.py @@ -172,7 +172,7 @@ def ds_add_attrs(ds): "long_name": "Conductivity", "comment": "Temperature compensated to 25 °C; low range", "epic_code": 48, - "standard_name": "sea_water_electrical_conductivity", + "standard_name": "sea_water_electrical_conductivity_at_reference_temperature", } ) @@ -196,7 +196,7 @@ def ds_add_attrs(ds): "long_name": "Conductivity", "comment": "Temperature compensated to 25 °C; high range", "epic_code": 48, - "standard_name": "sea_water_electrical_conductivity", + "standard_name": "sea_water_electrical_conductivity_at_reference_temperature", } ) diff --git a/stglib/mc.py b/stglib/mc.py new file mode 100644 index 00000000..c080688c --- /dev/null +++ b/stglib/mc.py @@ -0,0 +1,194 @@ +import pandas as pd +import xarray as xr + +from .core import qaqc, utils + + +def read_asc(filnam, skiprows=50, encoding="utf-8"): + """Read data from an SBE 37 MicroCAT .asc file into an xarray + Dataset. + + Parameters + ---------- + filnam : string + The filename + skiprows : int, optional + How many header rows to skip. Default 50 + encoding : string, optional + File encoding. Default 'utf-8' + Returns + ------- + xarray.Dataset + An xarray Dataset of the MicroCAT data + """ + + df = pd.read_csv( + filnam, + skiprows=skiprows, + header=None, + names=["Temp", "Cond", "Sal", "Date", "Time"], + parse_dates={"time": ["Date", "Time"]}, + encoding=encoding, + index_col=False, + ) + print(df) + df.set_index("time", inplace=True) + mc = df.to_xarray() + return mc + + +def asc_to_cdf(metadata): + """ + Load a raw .asc file and generate a .cdf file + """ + basefile = metadata["basefile"] + + ds = read_asc(basefile + ".asc", skiprows=metadata["skiprows"]) + + metadata.pop("skiprows") + + ds = utils.write_metadata(ds, metadata) + + ds = utils.ensure_cf(ds) + + # configure file + cdf_filename = ds.attrs["filename"] + "-raw.cdf" + + ds.to_netcdf(cdf_filename, unlimited_dims=["time"]) + + print(f"Finished writing data to {cdf_filename}") + + return ds + + +def cdf_to_nc(cdf_filename): + """ + Load a raw .cdf file and generate a processed .nc file + """ + + # Load raw .cdf data + ds = xr.open_dataset(cdf_filename) + + # remove units in case we change and we can use larger time steps + ds.time.encoding.pop("units") + + # Rename variables to CF compliant names + ds = ds_rename_vars(ds) + + # Add attributes + ds = ds_add_attrs(ds) + + # Call QAQC + ds = mc_qaqc(ds) + + # Run utilities + ds = utils.create_z(ds) + ds = utils.clip_ds(ds) + ds = utils.ds_add_lat_lon(ds) + ds = utils.create_nominal_instrument_depth(ds) + ds = utils.add_start_stop_time(ds) + ds = utils.add_min_max(ds) + ds = utils.add_delta_t(ds) + + # Write to .nc file + print("Writing cleaned/trimmed data to .nc file") + nc_filename = ds.attrs["filename"] + "-a.nc" + + ds.to_netcdf( + nc_filename, unlimited_dims=["time"], encoding={"time": {"dtype": "i4"}} + ) + utils.check_compliance(nc_filename, conventions=ds.attrs["Conventions"]) + + print(f"Done writing netCDF file {nc_filename}") + + +def ds_rename_vars(ds): + """ + Rename variables to be CF compliant + """ + varnames = {"Temp": "T_28", "Cond": "C_51", "Sal": "S_41"} + + # Check to make sure they exist before trying to rename + newvars = {} + for k in varnames: + if k in ds: + newvars[k] = varnames[k] + return ds.rename(newvars) + + +def ds_add_attrs(ds): + """ + Add attributes: units, standard name from CF website, long names + """ + ds = utils.ds_coord_no_fillvalue(ds) + + ds["time"].attrs.update( + {"standard_name": "time", "axis": "T", "long_name": "time (UTC)"} + ) + + if "T_28" in ds: + ds["T_28"].attrs.update( + { + "units": "degree_C", + "standard_name": "sea_water_temperature", + "long_name": "Temperature", + } + ) + + if "C_51" in ds: + ds["C_51"].attrs.update( + { + "units": "S/m", + "long_name": "Conductivity", + "standard_name": "sea_water_electrical_conductivity", + } + ) + + if "S_41" in ds: + ds["S_41"].attrs.update( + { + "units": "1", + "long_name": "Salinity, PSU", + "comments": "Practical salinity units (PSU)", + "standard_name": "sea_water_practical_salinity", + } + ) + + return ds + + +def mc_qaqc(ds): + """ + QA/QC + Trim MicroCAT data based on metadata + """ + + varlist = ["T_28", "C_51", "S_41"] + + [varlist.append(k) for k in ds.data_vars if k not in varlist] + + for var in varlist: + ds = qaqc.trim_min(ds, var) + + ds = qaqc.trim_max(ds, var) + + ds = qaqc.trim_min_diff(ds, var) + + ds = qaqc.trim_min_diff_pct(ds, var) + + ds = qaqc.trim_max_diff(ds, var) + + ds = qaqc.trim_max_diff_pct(ds, var) + + ds = qaqc.trim_med_diff(ds, var) + + ds = qaqc.trim_med_diff_pct(ds, var) + + ds = qaqc.trim_bad_ens(ds, var) + + for var in varlist: + ds = qaqc.trim_by_any( + ds, var + ) # re-run and trim by other variables as necessary + + return ds diff --git a/stglib/rsk/cdf2nc.py b/stglib/rsk/cdf2nc.py index a478e1f3..e0672512 100755 --- a/stglib/rsk/cdf2nc.py +++ b/stglib/rsk/cdf2nc.py @@ -35,7 +35,7 @@ def cdf_to_nc(cdf_filename, atmpres=None, writefile=True, format="NETCDF4"): # ds.attrs['burst_interval'] * # ds.attrs['sample_interval'] / 2) - ds = ds_add_attrs(ds) + ds = ds_add_attrs(ds, is_profile) # if "P_1" in ds: # ds = ds_add_depth_dim(ds) @@ -80,6 +80,27 @@ def cdf_to_nc(cdf_filename, atmpres=None, writefile=True, format="NETCDF4"): ds = dw_add_delta_t(ds) + if is_profile: + # reset obs and row_start after we are done trimming + # this is because row_start is supposed to begin at zero according to CF + # the original obs and row_start are based on the indexes from the raw file as downloaded from the instrument + # this makes it so there are no skips in obs from removed casts + + attrsbak = ds["obs"].attrs + obs = np.arange(len(ds["obs"])) + ds = ds.assign_coords(obs=obs) + ds["obs"].attrs = attrsbak + # reset dtype since we changed the values and it got reset to int64 + if utils.check_fits_in_int32(ds, "obs"): + ds["obs"].encoding["dtype"] = "i4" + + # TODO: this code is mostly redundant with the row_start code in csv2cdf.py. They should be calling the same function + row_start = np.zeros(ds.row_size.shape, dtype=int) + for p in range(len(row_start)): + if p > 0: + row_start[p] = row_start[p - 1] + ds.row_size[p - 1] + ds["row_start"].values = row_start + # if we are dealing with continuous instruments, drop sample since it is a singleton dimension if "sample" in ds: if len(ds["sample"]) == 1: @@ -89,6 +110,7 @@ def cdf_to_nc(cdf_filename, atmpres=None, writefile=True, format="NETCDF4"): if utils.check_time_fits_in_int32(ds, "obstime"): ds["obstime"].encoding["dtype"] = "i4" else: + print("Could not set obstime to i4; setting to float64 instead") ds["obstime"].encoding["dtype"] = "float64" if writefile: @@ -261,13 +283,15 @@ def trim_min(ds, var): # return ds -def ds_add_attrs(ds): +def ds_add_attrs(ds, is_profile): # Update attributes for EPIC and STG compliance ds = utils.ds_coord_no_fillvalue(ds) ds["time"].attrs.update( {"standard_name": "time", "axis": "T", "long_name": "time (UTC)"} ) + if is_profile: + ds["time"].attrs["long_name"] = "observation time (UTC)" if (ds.attrs["sample_mode"] == "CONTINUOUS") and ("sample" not in ds): if utils.check_time_fits_in_int32(ds, "time"): @@ -325,7 +349,7 @@ def ds_add_attrs(ds): if "SpC_48" in ds: ds["SpC_48"].attrs.update( { - "standard_name": "sea_water_electrical_conductivity", + "standard_name": "sea_water_electrical_conductivity_at_reference_temperature", "comment": "Temperature compensated to 25 °C", } ) diff --git a/stglib/rsk/csv2cdf.py b/stglib/rsk/csv2cdf.py index 3cc730f9..742cb012 100644 --- a/stglib/rsk/csv2cdf.py +++ b/stglib/rsk/csv2cdf.py @@ -191,10 +191,10 @@ def csv_to_cdf(metadata): ds = ds.drop("time") ds = ds.rename({"obs": "time"}).set_coords("time").rename({"time": "obs"}) - ds["obs"].attrs["long_name"] = "sample number" + ds["obs"].attrs["long_name"] = "observation number" ds["obstime"] = xr.DataArray(obstime, dims="obs") - ds["obstime"].attrs["long_name"] = "time (UTC)" + ds["obstime"].attrs["long_name"] = "observation start time (UTC)" ds["obstime"].attrs["standard_name"] = "time" ds = xr.merge([ds, pr]) diff --git a/stglib/tests/data/.gitattributes b/stglib/tests/data/.gitattributes index 355e0ab7..e4e63449 100644 --- a/stglib/tests/data/.gitattributes +++ b/stglib/tests/data/.gitattributes @@ -110,3 +110,4 @@ V1482304.ssl filter=lfs diff=lfs merge=lfs -text V1482304.vhd filter=lfs diff=lfs merge=lfs -text config_1126vec14823.yaml filter=lfs diff=lfs merge=lfs -text glob_att1126_msl.txt filter=lfs diff=lfs merge=lfs -text +mc3575.asc filter=lfs diff=lfs merge=lfs -text diff --git a/stglib/tests/data/11263mc_config.yaml b/stglib/tests/data/11263mc_config.yaml new file mode 100644 index 00000000..c6ad4977 --- /dev/null +++ b/stglib/tests/data/11263mc_config.yaml @@ -0,0 +1,6 @@ +basefile: mc3575 +filename: 11263mc +LatLonDatum: NAD83 +skiprows: 51 +initial_instrument_height: 0.15 +initial_instrument_height_note: height above seabed \ No newline at end of file diff --git a/stglib/tests/data/glob_att1126_mc.txt b/stglib/tests/data/glob_att1126_mc.txt new file mode 100644 index 00000000..b4855e39 --- /dev/null +++ b/stglib/tests/data/glob_att1126_mc.txt @@ -0,0 +1,29 @@ +SciPi; "John Warner" +PROJECT; "USGS Coastal and Marine Hazards and Resources Program" +EXPERIMENT; CCB21 +DESCRIPTION; "Cape Cod Bay, Sandy Neck Beach, Barnstable, MA" +title; "Time-series measurements of oceanographic and water quality data collected in Cape Cod Bay, Barnstable, MA, March 10 to April 7, 2021" +DATA_SUBTYPE; MOORED +DATA_ORIGIN; "USGS WHCMSC Coastal and Estuarine Dynamics" +COORD_SYSTEM; GEOGRAPHIC +Conventions; CF-1.8 +MOORING; 1126 +WATER_DEPTH; 8.7 +WATER_DEPTH_NOTE; "(meters), Mean water level from pressure data" +height_above_geopotential_datum; -0.5 +geopotential_datum_name; NAVD88 +latitude; 41.742668 +longitude; -70.330457 +magnetic_variation; -14.47 +Deployment_date; "10-Mar-2021 15:58" +Recovery_date; "07-Apr-2021 14:09" +DATA_CMNT; "No magnetic variation recorded, used data from IGRF model via ngdc.noaa.gov." +platform_type; Quadpod +DRIFTER; 0 +POS_CONST; 0 +DEPTH_CONST; 0 +WATER_MASS; "Cape Cod Bay" +VAR_FILL; NaN +institution; "United States Geological Survey, Woods Hole Coastal and Marine Science Center" +institution_url; https://woodshole.er.usgs.gov +Field_Activity_Number; 2021-018-FA diff --git a/stglib/tests/data/mc3575.asc b/stglib/tests/data/mc3575.asc new file mode 100644 index 00000000..056287d1 --- /dev/null +++ b/stglib/tests/data/mc3575.asc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75c8951c4f39d2a0bd3856a0796e1669e1be8c828ea22c237566c35a72dbc6f1 +size 512694 diff --git a/stglib/tests/test_scripts.py b/stglib/tests/test_scripts.py index 7abca3f4..de27046b 100644 --- a/stglib/tests/test_scripts.py +++ b/stglib/tests/test_scripts.py @@ -466,3 +466,26 @@ def test_ensure_cf(): "glob_att1123A_msl_EPIC.txt", "1123Aea_example_config.yaml", ) + + +def mc_raw(glob_att, config_yaml): + result = subprocess.run( + [scripts / "runmcasc2cdf.py", glob_att, config_yaml], + capture_output=True, + cwd=cwd, + ) + assert "Finished writing data" in result.stdout.decode("utf8") + + +def mc_nc(nc_file): + result = subprocess.run( + [scripts / "runmccdf2nc.py", nc_file], + capture_output=True, + cwd=cwd, + ) + assert "Done writing netCDF file" in result.stdout.decode("utf8") + + +def test_mc(): + mc_raw("glob_att1126_mc.txt", "11263mc_config.yaml") + mc_nc("11263mc-raw.cdf")