diff --git a/docs/apidoc_t/package.rst_t b/docs/apidoc_t/package.rst_t index d70e7b13..e8d10fd0 100644 --- a/docs/apidoc_t/package.rst_t +++ b/docs/apidoc_t/package.rst_t @@ -23,40 +23,24 @@ {{- "**example**: An example extractor" | heading }} {% elif 'yadg.extractors.custom.fhimcpt' == pkgname %} {{- "**fhimcpt**: Extractors for MCPT at FHI" | heading }} -{% elif 'yadg.parsers.basiccsv' == pkgname %} -{{- "**basiccsv**: Common tabular file parser" | heading }} -{% elif 'yadg.parsers.chromdata' == pkgname %} -{{- "**chromdata**: Post-processed chromatography data parser" | heading }} -{% elif 'yadg.parsers.chromtrace' == pkgname %} -{{- "**chromtrace**: Raw chromatogram trace file parser" | heading }} -{% elif 'yadg.parsers.dummy' == pkgname %} -{{- "**dummy**: A dummy parser" | heading }} -{% elif 'yadg.parsers.electrochem' == pkgname %} -{{- "**electrochem**: Electrochemistry data parser" | heading }} -{% elif 'yadg.parsers.flowdata' == pkgname %} -{{- "**flowdata**: Flow data parser" | heading }} -{% elif 'yadg.parsers.masstrace' == pkgname %} -{{- "**masstrace**: Mass spectroscopy trace file parser" | heading }} -{% elif 'yadg.parsers.meascsv' == pkgname %} -{{- "**meascsv**: Legacy MCPT log file parser" | heading }} -{% elif 'yadg.parsers.qftrace' == pkgname %} -{{- "**qftrace**: Network analyser trace file parser" | heading }} -{% elif 'yadg.parsers.xpstrace' == pkgname %} -{{- "**xpstrace**: XPS trace file parser" | heading }} -{% elif 'yadg.parsers.xrdtrace' == pkgname %} -{{- "**xrdtrace**: X-ray diffractogram trace file parser" | heading }} -{% elif 'yadg.extractors.agilentch' == pkgname %} -{{- "**agilent-ch**: Agilent ChemStation export ``.CH``" | heading }} -{% elif 'yadg.extractors.agilentdx' == pkgname %} -{{- "**agilent-dx**: Agilent OpenLab raw data ``.dx``" | heading }} -{% elif 'yadg.extractors.eclabmpr' == pkgname %} -{{- "**eclab-mpr**: BioLogic ECLab binary ``.mpr``" | heading }} -{% elif 'yadg.extractors.eclabmpt' == pkgname %} -{{- "**eclab-mpr**: BioLogic ECLab export ``.mpt``" | heading }} -{% elif 'yadg.extractors.panalyticalxrdml' == pkgname %} -{{- "**panalytical-xrdml**: PANalytical XRDML ``.xrdml``" | heading }} -{% elif 'yadg.extractors.phispe' == pkgname %} -{{- "**phi-spe**: ULVAC-PHI Multipak ``.spe``" | heading }} +{% elif 'yadg.extractors.public.agilent' == pkgname %} +{{- "**agilent**: Extract Agilent chromatograms" | heading }} +{% elif 'yadg.extractors.public.drycal' == pkgname %} +{{- "**drycal**: Extract Drycal flow meter data" | heading }} +{% elif 'yadg.extractors.public.eclab' == pkgname %} +{{- "**eclab**: Extract BioLogic potentiostat data" | heading }} +{% elif 'yadg.extractors.public.ezchrom' == pkgname %} +{{- "**ezchrom**: Extract EZChrom chromatograms" | heading }} +{% elif 'yadg.extractors.public.fusion' == pkgname %} +{{- "**fusion**: Extract Fusion chromatograms" | heading }} +{% elif 'yadg.extractors.public.panalytical' == pkgname %} +{{- "**panalytical**: Extract Panalytical X-ray diffraction data" | heading }} +{% elif 'yadg.extractors.public.phi' == pkgname %} +{{- "**phi**: Extract Phi XPS data" | heading }} +{% elif 'yadg.extractors.public.quadstar' == pkgname %} +{{- "**quadstar**: Extract Quadstar mass spectra" | heading }} +{% elif 'yadg.extractors.public.tomato' == pkgname %} +{{- "**tomato**: Extract data from tomato outputs" | heading }} {% else %} {{- [pkgname, "package"] | join(" ") | e | heading }} {% endif %} diff --git a/src/yadg/extractors/custom/fhimcpt/vna.py b/src/yadg/extractors/custom/fhimcpt/vna.py index 5f17982f..dd30cc2d 100644 --- a/src/yadg/extractors/custom/fhimcpt/vna.py +++ b/src/yadg/extractors/custom/fhimcpt/vna.py @@ -22,7 +22,7 @@ `````` .. code-block:: yaml - DataTree: + datatree.DataTree: S11: !!xarray.Dataset coords: freq: !!float # An array of measurement frequencies diff --git a/src/yadg/extractors/public/agilent/__init__.py b/src/yadg/extractors/public/agilent/__init__.py index e69de29b..1ce8b7e0 100644 --- a/src/yadg/extractors/public/agilent/__init__.py +++ b/src/yadg/extractors/public/agilent/__init__.py @@ -0,0 +1,4 @@ +""" +Extractors for data files generated by various proprietary Agilent software. + +""" diff --git a/src/yadg/extractors/public/agilent/ch.py b/src/yadg/extractors/public/agilent/ch.py index 31805290..a965306e 100644 --- a/src/yadg/extractors/public/agilent/ch.py +++ b/src/yadg/extractors/public/agilent/ch.py @@ -1,7 +1,173 @@ -from yadg.parsers.chromtrace.agilentch import process as extract +""" +**agilent.ch** +-------------- -supports = { - "agilent.ch", +Extractor of Agilent OpenLab binary signal trace files (``.ch`` and ``.it``). +Currently supports version "179" of the files. Version information is defined in +the ``magic_values`` (parameters & metadata) and `data_dtypes` (data) dictionaries. + +Adapted from `ImportAgilent.m `_ and +`aston `_. + +Usage +````` +Available since ``yadg-4.0``. + +.. autopydantic_model:: dgbowl_schemas.yadg.dataschema_5_1.filetype.Agilent_ch + +Schema +`````` +.. code-block:: yaml + + datatree.DataTree: + {{ detector_name }}: + coords: + uts: !!float # Unix timestamp + elution_time: !!float # Elution time + data_vars: + signal: (uts, elution_time) # Signal data + +Metadata +```````` +The following metadata is extracted: + + - ``sampleid``: Sample name. + - ``username``: User name used to generate the file. + - ``method``: Name of the chromatographic method. + - ``version``: Version of the CH file (only "179" is currently supported.) + + +Notes on file structure +``````````````````````` +The following magic values are used: +.. code :: + + 0x0000 "version magic" + 0x0108 "data offset" + 0x011a "x-axis minimum (ms)" + 0x011e "x-axis maximum (ms)" + 0x035a "sample ID" + 0x0559 "description" + 0x0758 "username" + 0x0957 "timestamp" + 0x09e5 "instrument name" + 0x09bc "inlet" + 0x0a0e "method" + 0x104c "y-axis unit" + 0x1075 "detector name" + 0x1274 "y-axis intercept" + 0x127c "y-axis slope" + +Data is stored in a consecutive set of ``i4"), # (x-1) * 512 + 0x011A: ("xmin", ">f4"), # / 60000 + 0x011E: ("xmax", ">f4"), # / 60000 + 0x1274: ("intercept", ">f8"), + 0x127C: ("slope", ">f8"), } -__all__ = ["supports", "extract"] +data_dtypes = {} +data_dtypes["179"] = (8, " DataTree: + with open(fn, "rb") as inf: + ch = inf.read() + + magic = dgutils.read_value(ch, 0, "utf-8") + pars = {} + if magic in magic_values.keys(): + for offset, (tag, dtype) in magic_values[magic].items(): + v = dgutils.read_value(ch, offset, dtype) + pars[tag] = v + pars["end"] = len(ch) + dsize, ddtype = data_dtypes[magic] + pars["start"] = (pars["offset"] - 1) * 512 + nbytes = pars["end"] - pars["start"] + assert nbytes % dsize == 0 + npoints = nbytes // dsize + + metadata = dict() + for k in ["sampleid", "username", "method"]: + metadata[k] = pars[k] + metadata["version"] = str(magic) + + xsn = np.linspace(pars["xmin"] / 1000, pars["xmax"] / 1000, num=npoints) + xss = np.ones(npoints) * xsn[0] + ysn = ( + np.frombuffer( + ch, + offset=pars["start"], + dtype=ddtype, + count=npoints, + ) + * pars["slope"] + ) + yss = np.ones(npoints) * pars["slope"] + + detector, title = pars["tracetitle"].split(",") + + uts = str_to_uts( + timestamp=pars["timestamp"], format="%d-%b-%y, %H:%M:%S", timezone=timezone + ) + + ds = xr.Dataset( + data_vars={ + "signal": ( + ["uts", "elution_time"], + [ysn], + {"units": pars["yunit"], "ancillary_variables": "signal_std_err"}, + ), + "signal_std_err": ( + ["uts", "elution_time"], + [yss], + {"units": pars["yunit"], "standard_name": "signal standard_error"}, + ), + "elution_time_std_err": ( + ["elution_time"], + xss, + {"units": "s", "standard_name": "elution_time standard_error"}, + ), + }, + coords={ + "elution_time": ( + ["elution_time"], + xsn, + {"units": "s", "ancillary_variables": "elution_time_std_err"}, + ), + "uts": (["uts"], [uts]), + }, + attrs={"title": title}, + ) + dt = DataTree.from_dict({detector: ds}) + dt.attrs = metadata + return dt diff --git a/src/yadg/extractors/public/agilent/csv.py b/src/yadg/extractors/public/agilent/csv.py index 0671ba41..aa55a144 100644 --- a/src/yadg/extractors/public/agilent/csv.py +++ b/src/yadg/extractors/public/agilent/csv.py @@ -1,7 +1,183 @@ -from yadg.parsers.chromtrace.agilentcsv import process as extract +""" +**agilent.csv** +--------------- -supports = { - "agilent.csv", -} +Extractor of Agilent Chemstation Chromtab tabulated data files. This file format may +include multiple timesteps consisting of several traces each in a single CSV file. It +contains a header section for each timestep, followed by a detector name, and a sequence +of "X, Y" datapoints, which are stored as ``elution_time`` and ``signal``. -__all__ = ["supports", "extract"] +.. warning :: + + It is not guaranteed that the X-axis of the chromatogram (i.e. ``elution_time``) is + consistent between the timesteps of the same trace. The traces are expanded to the + length of the longest trace, and the shorter traces are padded with ``NaNs``. + +Usage +````` +Available since ``yadg-4.0``. + +.. autopydantic_model:: dgbowl_schemas.yadg.dataschema_5_1.filetype.Agilent_csv + +Schema +`````` +.. code-block:: yaml + + datatree.DataTree: + {{ detector_name }}: + coords: + uts: !!float # Unix timestamp + elution_time: !!float # Elution time + data_vars: + signal: (uts, elution_time) # Signal data + +Metadata +```````` +The following metadata is extracted: + + - ``sampleid``: Sample name. + - ``datafile``: Original path of the data file. + +.. codeauthor:: + Peter Kraus + +""" + +import numpy as np +from uncertainties.core import str_to_number_with_uncert as tuple_fromstr +from yadg.dgutils.dateutils import str_to_uts +import xarray as xr +from datatree import DataTree + + +def _process_headers(headers: list, columns: list, timezone: str) -> dict: + res = {} + assert len(headers) == len( + columns + ), "chromtab: The number of headers and columns do not match." + assert "Date Acquired" in headers, "chromtab: Cannot infer date." + res["uts"] = str_to_uts( + timestamp=columns[headers.index("Date Acquired")].strip(), + format="%d %b %Y %H:%M", + timezone=timezone, + ) + fn = "" + if "Path" in headers: + fn += columns[headers.index("Path")] + if "File" in headers: + fn += columns[headers.index("File")] + res["datafile"] = fn + if "Sample" in headers: + res["sampleid"] = columns[headers.index("Sample")] + return res + + +def _to_trace(tx, ty): + tvals, tders = [x for x in zip(*tx)] + yvals, yders = [x for x in zip(*ty)] + trace = { + "tvals": np.array(tvals) * 60, + "tdevs": np.array(tders) * 60, + "yvals": list(yvals), + "ydevs": list(yders), + } + return trace + + +def extract( + *, + fn: str, + encoding: str, + timezone: str, + **kwargs: dict, +) -> DataTree: + with open(fn, "r", encoding=encoding, errors="ignore") as infile: + lines = infile.readlines() + metadata = {} + uts = [] + tx = [] + ty = [] + detname = None + tstep = dict() + data = [] + traces = set() + maxlen = dict() + for line in lines: + parts = line.strip().split(",") + if len(parts) > 2: + if '"Date Acquired"' in parts: + if tx != [] and ty != [] and detname is not None: + trace = _to_trace(tx, ty) + tstep[detname] = trace + maxlen[detname] = max(maxlen.get(detname, 0), len(trace["tvals"])) + tx = [] + ty = [] + if len(tstep) > 0: + data.append(tstep) + tstep = dict() + headers = [p.replace('"', "") for p in parts] + else: + columns = [p.replace('"', "") for p in parts] + ret = _process_headers(headers, columns, timezone) + uts.append(ret.pop("uts")) + metadata.update(ret) + elif len(parts) == 1: + if tx != [] and ty != [] and detname is not None: + trace = _to_trace(tx, ty) + tstep[detname] = trace + maxlen[detname] = max(maxlen.get(detname, 0), len(trace["tvals"])) + tx = [] + ty = [] + detname = parts[0].replace('"', "").split("\\")[-1] + traces.add(detname) + elif len(parts) == 2: + x, y = [tuple_fromstr(i) for i in parts] + tx.append(x) + ty.append(y) + trace = _to_trace(tx, ty) + tstep[detname] = trace + maxlen[detname] = max(maxlen.get(detname, 0), len(trace["tvals"])) + data.append(tstep) + + traces = sorted(traces) + vals = {} + for tr in traces: + dsets = [] + for ti, ts in enumerate(data): + thislen = len(ts[tr]["tvals"]) + fvals = {} + for k in {"yvals", "ydevs", "tvals", "tdevs"}: + fvals[k] = np.ones(maxlen[tr]) * np.nan + fvals[k][:thislen] = ts[tr][k] + ds = xr.Dataset( + data_vars={ + "signal": ( + ["elution_time"], + fvals["yvals"], + {"ancillary_variables": "signal_std_err"}, + ), + "signal_std_err": ( + ["elution_time"], + fvals["ydevs"], + {"standard_name": "signal standard_error"}, + ), + "elution_time": ( + ["_"], + fvals["tvals"], + {"units": "s", "ancillary_variables": "elution_time_std_err"}, + ), + "elution_time_std_err": ( + ["elution_time"], + fvals["tdevs"], + {"units": "s", "standard_name": "elution_time standard_error"}, + ), + }, + coords={}, + attrs={}, + ) + ds["uts"] = [uts[ti]] + dsets.append(ds) + vals[tr] = xr.concat(dsets, dim="uts") + dt = DataTree.from_dict(vals) + dt.attrs = metadata + return dt diff --git a/src/yadg/extractors/public/agilent/dx.py b/src/yadg/extractors/public/agilent/dx.py index 2dd473a8..278a46bb 100644 --- a/src/yadg/extractors/public/agilent/dx.py +++ b/src/yadg/extractors/public/agilent/dx.py @@ -1,8 +1,69 @@ -from yadg.parsers.chromtrace.agilentdx import process as extract +""" +**agilent.dx** +-------------- -supports = { - "agilent.dx", - "marda:agilent-dx", -} +Extractor of Agilent OpenLab DX archives. This is a wrapper parser which unzips the +provided DX file, and then uses the :mod:`yadg.extractors.public.agilent.ch` extractor +to parse every CH file present in the archive. The IT files in the archive are currently +ignored. -__all__ = ["supports", "extract"] +.. note:: + + Currently the timesteps from multiple CH files (if present) are appended in the + timesteps array without any further sorting. + +Usage +````` +Available since ``yadg-4.0``. + +.. autopydantic_model:: dgbowl_schemas.yadg.dataschema_5_1.filetype.Agilent_ch + +Schema +`````` +.. code-block:: yaml + + datatree.DataTree: + {{ detector_name }}: + coords: + uts: !!float # Unix timestamp + elution_time: !!float # Elution time + data_vars: + signal: (uts, elution_time) # Signal data + +Metadata +```````` +The following metadata is extracted: + + - ``sampleid``: Sample name. + - ``username``: User name used to generate the file. + - ``method``: Name of the chromatographic method. + - ``version``: Version of the CH file (only "179" is currently supported.) + +.. codeauthor:: + Peter Kraus + +""" + +import zipfile +import tempfile +import os +from yadg.extractors.public.agilent.ch import extract as extract_ch +from yadg.core import merge_dicttrees +from datatree import DataTree + + +def extract( + *, + fn: str, + **kwargs: dict, +) -> DataTree: + zf = zipfile.ZipFile(fn) + with tempfile.TemporaryDirectory() as tempdir: + zf.extractall(tempdir) + dt = None + for ffn in os.listdir(tempdir): + if ffn.endswith("CH"): + path = os.path.join(tempdir, ffn) + fdt = extract_ch(fn=path, **kwargs).to_dict() + dt = merge_dicttrees(dt, fdt, "identical") + return DataTree.from_dict(dt)