Agilent

dgbowl · Mar 28, 2024 · 1ce994b · 1ce994b
1 parent 57c773e
commit 1ce994b
Show file tree

Hide file tree

Showing 6 changed files with 441 additions and 50 deletions.
diff --git a/docs/apidoc_t/package.rst_t b/docs/apidoc_t/package.rst_t
@@ -23,40 +23,24 @@
 {{- "**example**: An example extractor" | heading }}
 {% elif 'yadg.extractors.custom.fhimcpt' == pkgname %}
 {{- "**fhimcpt**: Extractors for MCPT at FHI" | heading }}
-{% elif 'yadg.parsers.basiccsv' == pkgname %}
-{{- "**basiccsv**: Common tabular file parser" | heading }}
-{% elif 'yadg.parsers.chromdata' == pkgname %}
-{{- "**chromdata**: Post-processed chromatography data parser" | heading }}
-{% elif 'yadg.parsers.chromtrace' == pkgname %}
-{{- "**chromtrace**: Raw chromatogram trace file parser" | heading }}
-{% elif 'yadg.parsers.dummy' == pkgname %}
-{{- "**dummy**: A dummy parser" | heading }}
-{% elif 'yadg.parsers.electrochem' == pkgname %}
-{{- "**electrochem**: Electrochemistry data parser" | heading }}
-{% elif 'yadg.parsers.flowdata' == pkgname %}
-{{- "**flowdata**: Flow data parser" | heading }}
-{% elif 'yadg.parsers.masstrace' == pkgname %}
-{{- "**masstrace**: Mass spectroscopy trace file parser" | heading }}
-{% elif 'yadg.parsers.meascsv' == pkgname %}
-{{- "**meascsv**: Legacy MCPT log file parser" | heading }}
-{% elif 'yadg.parsers.qftrace' == pkgname %}
-{{- "**qftrace**: Network analyser trace file parser" | heading }}
-{% elif 'yadg.parsers.xpstrace' == pkgname %}
-{{- "**xpstrace**: XPS trace file parser" | heading }}
-{% elif 'yadg.parsers.xrdtrace' == pkgname %}
-{{- "**xrdtrace**: X-ray diffractogram trace file parser" | heading }}
-{% elif 'yadg.extractors.agilentch' == pkgname %}
-{{- "**agilent-ch**: Agilent ChemStation export ``.CH``" | heading }}
-{% elif 'yadg.extractors.agilentdx' == pkgname %}
-{{- "**agilent-dx**: Agilent OpenLab raw data ``.dx``" | heading }}
-{% elif 'yadg.extractors.eclabmpr' == pkgname %}
-{{- "**eclab-mpr**: BioLogic ECLab binary ``.mpr``" | heading }}
-{% elif 'yadg.extractors.eclabmpt' == pkgname %}
-{{- "**eclab-mpr**: BioLogic ECLab export ``.mpt``" | heading }}
-{% elif 'yadg.extractors.panalyticalxrdml' == pkgname %}
-{{- "**panalytical-xrdml**: PANalytical XRDML ``.xrdml``" | heading }}
-{% elif 'yadg.extractors.phispe' == pkgname %}
-{{- "**phi-spe**: ULVAC-PHI Multipak ``.spe``" | heading }}
+{% elif 'yadg.extractors.public.agilent' == pkgname %}
+{{- "**agilent**: Extract Agilent chromatograms" | heading }}
+{% elif 'yadg.extractors.public.drycal' == pkgname %}
+{{- "**drycal**: Extract Drycal flow meter data" | heading }}
+{% elif 'yadg.extractors.public.eclab' == pkgname %}
+{{- "**eclab**: Extract BioLogic potentiostat data" | heading }}
+{% elif 'yadg.extractors.public.ezchrom' == pkgname %}
+{{- "**ezchrom**: Extract EZChrom chromatograms" | heading }}
+{% elif 'yadg.extractors.public.fusion' == pkgname %}
+{{- "**fusion**: Extract Fusion chromatograms" | heading }}
+{% elif 'yadg.extractors.public.panalytical' == pkgname %}
+{{- "**panalytical**: Extract Panalytical X-ray diffraction data" | heading }}
+{% elif 'yadg.extractors.public.phi' == pkgname %}
+{{- "**phi**: Extract Phi XPS data" | heading }}
+{% elif 'yadg.extractors.public.quadstar' == pkgname %}
+{{- "**quadstar**: Extract Quadstar mass spectra" | heading }}
+{% elif 'yadg.extractors.public.tomato' == pkgname %}
+{{- "**tomato**: Extract data from tomato outputs" | heading }}
 {% else %}
 {{- [pkgname, "package"] | join(" ") | e | heading }}
 {% endif %}

diff --git a/src/yadg/extractors/custom/fhimcpt/vna.py b/src/yadg/extractors/custom/fhimcpt/vna.py
@@ -22,7 +22,7 @@
 ``````
 .. code-block:: yaml
 
-    DataTree:
+    datatree.DataTree:
       S11:              !!xarray.Dataset
         coords:
             freq:       !!float     # An array of measurement frequencies

diff --git a/src/yadg/extractors/public/agilent/__init__.py b/src/yadg/extractors/public/agilent/__init__.py
@@ -0,0 +1,4 @@
+"""
+Extractors for data files generated by various proprietary Agilent software.
+
+"""
diff --git a/src/yadg/extractors/public/agilent/ch.py b/src/yadg/extractors/public/agilent/ch.py
@@ -1,7 +1,173 @@
-from yadg.parsers.chromtrace.agilentch import process as extract
+"""
+**agilent.ch**
+--------------
 
-supports = {
-    "agilent.ch",
+Extractor of Agilent OpenLab binary signal trace files (``.ch`` and ``.it``).
+Currently supports version "179" of the files. Version information is defined in
+the ``magic_values`` (parameters & metadata) and `data_dtypes` (data) dictionaries.
+
+Adapted from `ImportAgilent.m <https://bit.ly/3HSelIR>`_ and
+`aston <https://github.com/bovee/Aston>`_.
+
+Usage
+`````
+Available since ``yadg-4.0``.
+
+.. autopydantic_model:: dgbowl_schemas.yadg.dataschema_5_1.filetype.Agilent_ch
+
+Schema
+``````
+.. code-block:: yaml
+
+    datatree.DataTree:
+      {{ detector_name }}:
+        coords:
+          uts:            !!float               # Unix timestamp
+          elution_time:   !!float               # Elution time
+        data_vars:
+          signal:         (uts, elution_time)   # Signal data
+
+Metadata
+````````
+The following metadata is extracted:
+
+    - ``sampleid``: Sample name.
+    - ``username``: User name used to generate the file.
+    - ``method``: Name of the chromatographic method.
+    - ``version``: Version of the CH file (only "179" is currently supported.)
+
+
+Notes on file structure
+```````````````````````
+The following magic values are used:
+.. code ::
+
+    0x0000 "version magic"
+    0x0108 "data offset"
+    0x011a "x-axis minimum (ms)"
+    0x011e "x-axis maximum (ms)"
+    0x035a "sample ID"
+    0x0559 "description"
+    0x0758 "username"
+    0x0957 "timestamp"
+    0x09e5 "instrument name"
+    0x09bc "inlet"
+    0x0a0e "method"
+    0x104c "y-axis unit"
+    0x1075 "detector name"
+    0x1274 "y-axis intercept"
+    0x127c "y-axis slope"
+
+Data is stored in a consecutive set of ``<f8``, starting at the offset (calculated
+as ``offset = ("data offset" - 1) * 512``) until the end of the file.
+
+.. codeauthor::
+    Peter Kraus
+
+"""
+
+import numpy as np
+from yadg import dgutils
+from yadg.dgutils.dateutils import str_to_uts
+import xarray as xr
+from datatree import DataTree
+
+magic_values = {}
+magic_values["179"] = {
+    0x035A: ("sampleid", "utf-16"),
+    0x0559: ("description", "utf-16"),
+    0x0A0E: ("method", "utf-16"),
+    0x0758: ("username", "utf-16"),
+    0x0957: ("timestamp", "utf-16"),
+    0x09E5: ("instrument", "utf-16"),
+    0x09BC: ("inlet", "utf-16"),
+    0x104C: ("yunit", "utf-16"),
+    0x1075: ("tracetitle", "utf-16"),
+    0x0108: ("offset", ">i4"),  # (x-1) * 512
+    0x011A: ("xmin", ">f4"),  # / 60000
+    0x011E: ("xmax", ">f4"),  # / 60000
+    0x1274: ("intercept", ">f8"),
+    0x127C: ("slope", ">f8"),
 }
 
-__all__ = ["supports", "extract"]
+data_dtypes = {}
+data_dtypes["179"] = (8, "<f8")
+
+
+def extract(
+    *,
+    fn: str,
+    timezone: str,
+    **kwargs: dict,
+) -> DataTree:
+    with open(fn, "rb") as inf:
+        ch = inf.read()
+
+    magic = dgutils.read_value(ch, 0, "utf-8")
+    pars = {}
+    if magic in magic_values.keys():
+        for offset, (tag, dtype) in magic_values[magic].items():
+            v = dgutils.read_value(ch, offset, dtype)
+            pars[tag] = v
+    pars["end"] = len(ch)
+    dsize, ddtype = data_dtypes[magic]
+    pars["start"] = (pars["offset"] - 1) * 512
+    nbytes = pars["end"] - pars["start"]
+    assert nbytes % dsize == 0
+    npoints = nbytes // dsize
+
+    metadata = dict()
+    for k in ["sampleid", "username", "method"]:
+        metadata[k] = pars[k]
+    metadata["version"] = str(magic)
+
+    xsn = np.linspace(pars["xmin"] / 1000, pars["xmax"] / 1000, num=npoints)
+    xss = np.ones(npoints) * xsn[0]
+    ysn = (
+        np.frombuffer(
+            ch,
+            offset=pars["start"],
+            dtype=ddtype,
+            count=npoints,
+        )
+        * pars["slope"]
+    )
+    yss = np.ones(npoints) * pars["slope"]
+
+    detector, title = pars["tracetitle"].split(",")
+
+    uts = str_to_uts(
+        timestamp=pars["timestamp"], format="%d-%b-%y, %H:%M:%S", timezone=timezone
+    )
+
+    ds = xr.Dataset(
+        data_vars={
+            "signal": (
+                ["uts", "elution_time"],
+                [ysn],
+                {"units": pars["yunit"], "ancillary_variables": "signal_std_err"},
+            ),
+            "signal_std_err": (
+                ["uts", "elution_time"],
+                [yss],
+                {"units": pars["yunit"], "standard_name": "signal standard_error"},
+            ),
+            "elution_time_std_err": (
+                ["elution_time"],
+                xss,
+                {"units": "s", "standard_name": "elution_time standard_error"},
+            ),
+        },
+        coords={
+            "elution_time": (
+                ["elution_time"],
+                xsn,
+                {"units": "s", "ancillary_variables": "elution_time_std_err"},
+            ),
+            "uts": (["uts"], [uts]),
+        },
+        attrs={"title": title},
+    )
+    dt = DataTree.from_dict({detector: ds})
+    dt.attrs = metadata
+    return dt