Add support for MicroCAT (#242)

* Added SBE 37 MicroCAT * Added .gitattributes * Add LFS tracking to .asc file * run autoformatting * Added SBE 37 MicroCAT * Added SBE37 MicroCat to setup and init * Fixed bugs in mc.py * Fix mc test script * Improve profiling CTD obs and row_start values (#243) * Reset row_start and obs indexes after trimming, and add warning message * Improve long_names for profiles * Update standard_name for specific conductance (#244) * Added SBE 37 MicroCAT * Added .gitattributes * Added .gitattribute * run autoformatting * Added SBE 37 MicroCAT * Added SBE37 MicroCat to setup and init * Fixed bugs in mc.py * Fix mc test script * Made pull request edits to mc.py * Added .gitignore * Added .gitignore * Fixed .gitattributes --------- Co-authored-by: De Meo <[email protected]> Co-authored-by: Dan Nowacki <[email protected]>
USGS-CMG · Jul 25, 2024 · c7593d0 · c7593d0
1 parent f7c6850
commit c7593d0
Show file tree

Hide file tree

Showing 12 changed files with 325 additions and 1 deletion.
diff --git a/doc/config.rst b/doc/config.rst
@@ -1,6 +1,6 @@
 Configuration files
 *******************
-
+    
 There are two required configuration files for processing data: the global attributes file, which describes attributes that apply to the mooring, and the instrument configuration file, which describes attributes that apply to an instrument on a mooring. Contents of both files will be included as attributes in both the xarray Dataset and the netCDF files.
 
 A note on time and time zones
@@ -210,3 +210,8 @@ Vector
 - ``puv``: set to ``true`` to compute PUV wave statistics. **(EXPERIMENTAL)**
 - ``orientation``: ``UP`` means probe head is pointing up (sample volume above probe head). ``DOWN`` means probe head is pointing down (sample volume below probe head).
 - Many of the Aquadopp options apply to the Vector.
+
+SBE 37 MicroCAT
+---------------
+- All the _min, _max, _bad_ens, etc. options available to the EXO
+- ``skiprows``: number of lines to skip in the ASC before the real data begins
diff --git a/doc/mc.rst b/doc/mc.rst
@@ -0,0 +1,29 @@
+Seabird SBE 37 MicroCAT
+**************
+
+Data will generally be processed using a series of run scripts. The first script for each instrument type
+depends on two :doc:`configuration files </config>`.
+
+Instrument data to raw .cdf
+===========================
+
+Convert from exported .csv file to a raw netCDF file with .cdf extension using ``runmcasc2cdf.py``.
+
+runmcasc2cdf.py
+----------------
+
+.. argparse::
+   :ref: stglib.core.cmd.mcasc2cdf_parser
+   :prog: runmcasc2cdf.py
+
+Raw .cdf to CF-compliant .nc
+============================
+
+Convert the raw .cdf data into a CF-compliant netCDF file with .nc extension using ``runmccdf2nc.py``.
+
+runmccdf2nc.py
+---------------
+
+.. argparse::
+   :ref: stglib.core.cmd.mccdf2nc_parser
+   :prog: runmccdf2nc.py
diff --git a/setup.py b/setup.py
@@ -78,6 +78,8 @@
             "runlisstcdf2nc.py=stglib.core.runcmd:runlisstcdf2nc",
             "runtcmcsv2cdf.py=stglib.core.runcmd:runtcmcsv2cdf",
             "runtcmcdf2nc.py=stglib.core.runcmd:runtcmcdf2nc",
+            "runmcasc2cdf.py=stglib.core.runcmd:runmcasc2cdf",
+            "runmccdf2nc.py=stglib.core.runcmd:runmccdf2nc",
         ],
     },
     include_package_data=True,

diff --git a/stglib/__init__.py b/stglib/__init__.py
@@ -10,6 +10,7 @@
     indexvel,
     iq,
     lisst,
+    mc,
     rdi,
     rsk,
     sig,

diff --git a/stglib/core/cmd.py b/stglib/core/cmd.py
@@ -384,3 +384,20 @@ def tcmcdf2nc_parser():
     cdfarg(parser)
 
     return parser
+
+
+def mcasc2cdf_parser():
+    description = "Convert SBE 37 MicroCAT .asc file to raw .cdf format. Run this script from the directory containing MicroCAT .asc file."
+    parser = argparse.ArgumentParser(description=description)
+    gattsarg(parser)
+    yamlarg(parser)
+
+    return parser
+
+
+def mccdf2nc_parser():
+    description = "Convert raw SBE 37 MicroCAT .cdf format to processed .nc files"
+    parser = argparse.ArgumentParser(description=description)
+    cdfarg(parser)
+
+    return parser
diff --git a/stglib/core/runcmd.py b/stglib/core/runcmd.py
@@ -294,3 +294,17 @@ def runtcmcsv2cdf():
     metadata = get_metadata(args)
 
     stglib.tcm.csv_to_cdf(metadata)
+
+
+def runmccdf2nc():
+    args = stglib.cmd.mccdf2nc_parser().parse_args()
+
+    run_cdf_to_nc(stglib.mc.cdf_to_nc, args)
+
+
+def runmcasc2cdf():
+    args = stglib.cmd.mcasc2cdf_parser().parse_args()
+
+    metadata = get_metadata(args)
+
+    stglib.mc.asc_to_cdf(metadata)
diff --git a/stglib/mc.py b/stglib/mc.py
@@ -0,0 +1,194 @@
+import pandas as pd
+import xarray as xr
+
+from .core import qaqc, utils
+
+
+def read_asc(filnam, skiprows=50, encoding="utf-8"):
+    """Read data from an SBE 37 MicroCAT .asc file into an xarray
+    Dataset.
+
+    Parameters
+    ----------
+    filnam : string
+        The filename
+    skiprows : int, optional
+        How many header rows to skip. Default 50
+    encoding : string, optional
+        File encoding. Default 'utf-8'
+    Returns
+    -------
+    xarray.Dataset
+        An xarray Dataset of the MicroCAT data
+    """
+
+    df = pd.read_csv(
+        filnam,
+        skiprows=skiprows,
+        header=None,
+        names=["Temp", "Cond", "Sal", "Date", "Time"],
+        parse_dates={"time": ["Date", "Time"]},
+        encoding=encoding,
+        index_col=False,
+    )
+    print(df)
+    df.set_index("time", inplace=True)
+    mc = df.to_xarray()
+    return mc
+
+
+def asc_to_cdf(metadata):
+    """
+    Load a raw .asc file and generate a .cdf file
+    """
+    basefile = metadata["basefile"]
+
+    ds = read_asc(basefile + ".asc", skiprows=metadata["skiprows"])
+
+    metadata.pop("skiprows")
+
+    ds = utils.write_metadata(ds, metadata)
+
+    ds = utils.ensure_cf(ds)
+
+    # configure file
+    cdf_filename = ds.attrs["filename"] + "-raw.cdf"
+
+    ds.to_netcdf(cdf_filename, unlimited_dims=["time"])
+
+    print(f"Finished writing data to {cdf_filename}")
+
+    return ds
+
+
+def cdf_to_nc(cdf_filename):
+    """
+    Load a raw .cdf file and generate a processed .nc file
+    """
+
+    # Load raw .cdf data
+    ds = xr.open_dataset(cdf_filename)
+
+    # remove units in case we change and we can use larger time steps
+    ds.time.encoding.pop("units")
+
+    # Rename variables to CF compliant names
+    ds = ds_rename_vars(ds)
+
+    # Add attributes
+    ds = ds_add_attrs(ds)
+
+    # Call QAQC
+    ds = mc_qaqc(ds)
+
+    # Run utilities
+    ds = utils.create_z(ds)
+    ds = utils.clip_ds(ds)
+    ds = utils.ds_add_lat_lon(ds)
+    ds = utils.create_nominal_instrument_depth(ds)
+    ds = utils.add_start_stop_time(ds)
+    ds = utils.add_min_max(ds)
+    ds = utils.add_delta_t(ds)
+
+    # Write to .nc file
+    print("Writing cleaned/trimmed data to .nc file")
+    nc_filename = ds.attrs["filename"] + "-a.nc"
+
+    ds.to_netcdf(
+        nc_filename, unlimited_dims=["time"], encoding={"time": {"dtype": "i4"}}
+    )
+    utils.check_compliance(nc_filename, conventions=ds.attrs["Conventions"])
+
+    print(f"Done writing netCDF file {nc_filename}")
+
+
+def ds_rename_vars(ds):
+    """
+    Rename variables to be CF compliant
+    """
+    varnames = {"Temp": "T_28", "Cond": "C_51", "Sal": "S_41"}
+
+    # Check to make sure they exist before trying to rename
+    newvars = {}
+    for k in varnames:
+        if k in ds:
+            newvars[k] = varnames[k]
+    return ds.rename(newvars)
+
+
+def ds_add_attrs(ds):
+    """
+    Add attributes: units, standard name from CF website, long names
+    """
+    ds = utils.ds_coord_no_fillvalue(ds)
+
+    ds["time"].attrs.update(
+        {"standard_name": "time", "axis": "T", "long_name": "time (UTC)"}
+    )
+
+    if "T_28" in ds:
+        ds["T_28"].attrs.update(
+            {
+                "units": "degree_C",
+                "standard_name": "sea_water_temperature",
+                "long_name": "Temperature",
+            }
+        )
+
+    if "C_51" in ds:
+        ds["C_51"].attrs.update(
+            {
+                "units": "S/m",
+                "long_name": "Conductivity",
+                "standard_name": "sea_water_electrical_conductivity",
+            }
+        )
+
+    if "S_41" in ds:
+        ds["S_41"].attrs.update(
+            {
+                "units": "1",
+                "long_name": "Salinity, PSU",
+                "comments": "Practical salinity units (PSU)",
+                "standard_name": "sea_water_practical_salinity",
+            }
+        )
+
+    return ds
+
+
+def mc_qaqc(ds):
+    """
+    QA/QC
+    Trim MicroCAT data based on metadata
+    """
+
+    varlist = ["T_28", "C_51", "S_41"]
+
+    [varlist.append(k) for k in ds.data_vars if k not in varlist]
+
+    for var in varlist:
+        ds = qaqc.trim_min(ds, var)
+
+        ds = qaqc.trim_max(ds, var)
+
+        ds = qaqc.trim_min_diff(ds, var)
+
+        ds = qaqc.trim_min_diff_pct(ds, var)
+
+        ds = qaqc.trim_max_diff(ds, var)
+
+        ds = qaqc.trim_max_diff_pct(ds, var)
+
+        ds = qaqc.trim_med_diff(ds, var)
+
+        ds = qaqc.trim_med_diff_pct(ds, var)
+
+        ds = qaqc.trim_bad_ens(ds, var)
+
+    for var in varlist:
+        ds = qaqc.trim_by_any(
+            ds, var
+        )  # re-run and trim by other variables as necessary
+
+    return ds
diff --git a/stglib/tests/data/.gitattributes b/stglib/tests/data/.gitattributes
@@ -110,3 +110,4 @@ V1482304.ssl filter=lfs diff=lfs merge=lfs -text
 V1482304.vhd filter=lfs diff=lfs merge=lfs -text
 config_1126vec14823.yaml filter=lfs diff=lfs merge=lfs -text
 glob_att1126_msl.txt filter=lfs diff=lfs merge=lfs -text
+mc3575.asc filter=lfs diff=lfs merge=lfs -text
diff --git a/stglib/tests/data/11263mc_config.yaml b/stglib/tests/data/11263mc_config.yaml
@@ -0,0 +1,6 @@
+basefile: mc3575
+filename: 11263mc
+LatLonDatum: NAD83
+skiprows: 51
+initial_instrument_height: 0.15
+initial_instrument_height_note: height above seabed
diff --git a/stglib/tests/data/glob_att1126_mc.txt b/stglib/tests/data/glob_att1126_mc.txt
@@ -0,0 +1,29 @@
+SciPi; "John Warner"
+PROJECT; "USGS Coastal and Marine Hazards and Resources Program"
+EXPERIMENT; CCB21
+DESCRIPTION; "Cape Cod Bay, Sandy Neck Beach, Barnstable, MA"
+title; "Time-series measurements of oceanographic and water quality data collected in Cape Cod Bay, Barnstable, MA, March 10 to April 7, 2021"
+DATA_SUBTYPE; MOORED
+DATA_ORIGIN; "USGS WHCMSC Coastal and Estuarine Dynamics"
+COORD_SYSTEM; GEOGRAPHIC
+Conventions; CF-1.8
+MOORING; 1126  
+WATER_DEPTH; 8.7
+WATER_DEPTH_NOTE; "(meters), Mean water level from pressure data"
+height_above_geopotential_datum; -0.5
+geopotential_datum_name; NAVD88
+latitude; 41.742668
+longitude; -70.330457
+magnetic_variation; -14.47
+Deployment_date; "10-Mar-2021 15:58"
+Recovery_date; "07-Apr-2021 14:09"
+DATA_CMNT; "No magnetic variation recorded, used data from IGRF model via ngdc.noaa.gov."
+platform_type; Quadpod
+DRIFTER; 0
+POS_CONST; 0
+DEPTH_CONST; 0
+WATER_MASS; "Cape Cod Bay"
+VAR_FILL; NaN
+institution; "United States Geological Survey, Woods Hole Coastal and Marine Science Center"
+institution_url; https://woodshole.er.usgs.gov
+Field_Activity_Number; 2021-018-FA
diff --git a/stglib/tests/data/mc3575.asc b/stglib/tests/data/mc3575.asc
diff --git a/stglib/tests/test_scripts.py b/stglib/tests/test_scripts.py
@@ -466,3 +466,26 @@ def test_ensure_cf():
         "glob_att1123A_msl_EPIC.txt",
         "1123Aea_example_config.yaml",
     )
+
+
+def mc_raw(glob_att, config_yaml):
+    result = subprocess.run(
+        [scripts / "runmcasc2cdf.py", glob_att, config_yaml],
+        capture_output=True,
+        cwd=cwd,
+    )
+    assert "Finished writing data" in result.stdout.decode("utf8")
+
+
+def mc_nc(nc_file):
+    result = subprocess.run(
+        [scripts / "runmccdf2nc.py", nc_file],
+        capture_output=True,
+        cwd=cwd,
+    )
+    assert "Done writing netCDF file" in result.stdout.decode("utf8")
+
+
+def test_mc():
+    mc_raw("glob_att1126_mc.txt", "11263mc_config.yaml")
+    mc_nc("11263mc-raw.cdf")
-Original file line number
+Diff line change
@@ Expand Up / @@ -10,6 +10,7 @@ @@
         indexvel,
         iq,
         lisst,
+        mc,
         rdi,
         rsk,
         sig,
@@ Expand Down @@