Skip to content

Commit

Permalink
Implement ezchrom.dat extractor. (#146)
Browse files Browse the repository at this point in the history
* Need olefile.

* Add test files.

* Working extractor.

* Fix up tests.

* Fix up `ezchrom.asc`

* Add extract.extract tests

* Add extract tests

* Comparison test.

* Update reference files for standard_error fix.

* Docs.
  • Loading branch information
PeterKraus authored Apr 6, 2024
1 parent 2ef10c8 commit f976fa5
Show file tree
Hide file tree
Showing 42 changed files with 99,420 additions and 10 deletions.
7 changes: 7 additions & 0 deletions docs/source/version.5_1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,19 @@ New features since ``yadg-5.0`` are:
- ``Fig8_*cm.s1p`` from https://zenodo.org/doi/10.5281/zenodo.10222705
- ``VNA_radial_middle.s*p`` from https://zenodo.org/doi/10.5281/zenodo.7339709

- Support for EZChrom ``.dat`` files using the :mod:`yadg.extractors.ezchrom.dat` extractor. Test files were provided by Z. Asahi from FU Berlin, and J. Schumann from HU Berlin. The data extracted from the ``.dat`` files is cross-checked against the data obtained from ``.asc`` files using the :mod:`yadg.extractors.ezchrom.asc` extractor.

Other changes in ``yadg-5.1`` are:

- The dataschema has been simplified, eliminating parsers in favour of extractors.
- The code has been reorganised to highlight the extractor functionality in favour of parsers.

Bug fixes in ``yadg-5.1`` include:

- Fixed incorrect unit assignment when ``/`` was substituted to ``_`` in column names.
- Fixed incorrect annotation of ancillary variables: ``standard error`` should be ``standard_error``.
- Fixed incorrect parsing of units in the :mod:`yadg.extractors.ezchrom.asc` parser. Now, the ``25 μV`` unit will be correctly replaced by just ``μV`` (without modifying data), which can be understood by :mod:`pint`.


.. _concat_lab: https://tu.berlin/en/concat

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ dependencies = [
"packaging",
"python-dateutil",
"openpyxl >= 3.0.0",
"olefile >= 0.47",
"h5netcdf ~= 1.0",
"pandas >= 2.0",
"xarray-datatree ~= 0.0.12",
Expand Down
2 changes: 1 addition & 1 deletion src/yadg/dgutils/dsutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def dicts_to_dataset(
if key in meta and darrs[k].dtype.kind in {"i", "u", "f", "c", "m", "M"}:
err = f"{k}_std_err"
darrs[k].attrs["ancillary_variables"] = err
attrs["standard_name"] = f"{k} standard error"
attrs["standard_name"] = f"{k} standard_error"
darrs[err] = xr.DataArray(data=meta[key], dims=["uts"], attrs=attrs)
if "uts" in data:
coords = dict(uts=data.pop("uts"))
Expand Down
17 changes: 10 additions & 7 deletions src/yadg/extractors/ezchrom/asc.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@ def extract(
for key in ["Version", "Method", "User Name"]:
if line.startswith(key):
k = key.lower().replace(" ", "")
metadata[k] = line.split(f"{key}:")[1].strip()
metadata[k] = line.split(f"{key}:")[1].strip().strip(",")
for key in ["Sample ID"]: # , "Data File"]:
if line.startswith(key):
k = key.lower().replace(" ", "")
metadata[k] = line.split(f"{key}:")[1].strip()
metadata[k] = line.split(f"{key}:")[1].strip().strip(",")
if line.startswith("Acquisition Date and Time:"):
uts = dgutils.str_to_uts(
timestamp=line.split("Time:")[1].strip(),
timestamp=line.split("Time:")[1].strip().strip(","),
format="%m/%d/%Y %I:%M:%S %p",
timezone=timezone,
)
Expand All @@ -99,10 +99,13 @@ def extract(
xunits = [each.strip() for each in parts[1:]]
if line.startswith("Y Axis Title:"):
parts = line.split("\t")
yunits = [each.strip() for each in parts[1:]]
if "25 V" in yunits:
logger.warning("Implicit conversion of y-axis unit from '25 V' to 'V'.")
yunits = [i.replace("25 V", "V") for i in yunits]
_yunits = [each.strip() for each in parts[1:]]
yunits = [i.replace("25", "").strip() for i in _yunits]
if yunits != _yunits:
logger.warning(
"Implicit conversion of y-axis unit from '25 µV' to 'µV'."
)
yunits = [i.replace("25", "") for i in yunits]
if line.startswith("X Axis Multiplier:"):
parts = line.split("\t")
xmuls = [float(each.strip()) for each in parts[1:]]
Expand Down
133 changes: 133 additions & 0 deletions src/yadg/extractors/ezchrom/dat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""
Handles binary files created by EZChrom software.
Usage
`````
Available since ``yadg-5.1``.
.. autopydantic_model:: dgbowl_schemas.yadg.dataschema_5_1.filetype.EZChrom_dat
Schema
``````
.. code-block:: yaml
datatree.DataTree:
{{ detector_trace }}:
coords:
uts: !!float # Unix timestamp
elution_time: !!float # Elution time
data_vars:
signal: (uts, elution_time) # Signal data
Metadata
````````
No metadata is currently extracted. If you need some particular metadata, please open
an issue.
Notes on file structure
```````````````````````
Data in these files is stored as an OLE file, which is first processed using the
:mod:`olefile` library.
The timestamp is stored as an OLE timestamp in the ``Chrom Header`` stream.
The metadata for each trace are stored within the ``Detector Trace Handler`` stream,
and contain the X- and Y-axis multiplier, Y-axis units, and some other metadata.
The data for each trace are stored within the ``Detector Data`` "directory" within the
OLE file, with one stream per trace.
Uncertainties
`````````````
The uncertainties in ``signal`` as well as ``elution_time`` are set to the axis
multiplier.
.. codeauthor::
Peter Kraus
"""

import olefile
import xarray as xr
from datatree import DataTree
import numpy as np

from yadg import dgutils


detector_trace_struct = [
("trace_name", 22, "pascal"),
("position", 4, "pascal"),
("x_mul", 0, "f4"),
("y_unit", 0, "pascal"),
("y_mul", 0, "f4"),
("tst", 33, "pascal"),
("y_unit2", 0, "pascal"),
("time", 21, "f4"),
]


def extract(
*,
fn: str,
timezone: str,
**kwargs: dict,
) -> DataTree:
# Read data from the OLE file
dd = {}
with olefile.OleFileIO(fn) as of:
ch = of.openstream(["Chrom Header"]).read()
dth = of.openstream(["Detector Trace Handler"]).read()
for path in of.listdir():
if len(path) == 2 and path[0] == "Detector Data":
dd[path[1]] = of.openstream(path).read()
# Timestamp
ole_timestamp = dgutils.read_value(data=ch, offset=8, dtype="f8")
uts = dgutils.ole_to_uts(ole_timestamp, timezone)

# Trace metadata
offset = 31
dtp = {}
for _ in dd:
params = {}
for name, delta, dtype in detector_trace_struct:
offset += delta
params[name] = dgutils.read_value(data=dth, offset=offset, dtype=dtype)
if dtype == "pascal":
offset += dgutils.read_value(data=dth, offset=offset, dtype="u1") + 1
elif dtype == "f4":
offset += 4
dtp[f"Detector {params['trace_name']} Trace"] = params

# Trace data
dt = {}
for key, vals in dd.items():
par = dtp[key]
npoints = dgutils.read_value(data=vals, offset=4, dtype="u4")
yvals = np.frombuffer(vals, offset=20, count=npoints, dtype="i4") * par["y_mul"]
ydevs = np.ones(npoints) * par["y_mul"]
yunits = {"units": par["y_unit"].replace("25", "").strip()}
xvals = np.arange(0, npoints) * par["x_mul"]
xdevs = np.ones(npoints) * par["x_mul"]
xunits = {"units": "s"}
ds = xr.Dataset(
data_vars={
"signal": (["uts", "elution_time"], [yvals], yunits),
"signal_std_err": (["uts", "elution_time"], [ydevs], yunits),
"elution_time_std_err": (["elution_time"], xdevs, xunits),
},
coords={
"elution_time": (["elution_time"], xvals, xunits),
"uts": (["uts"], [uts]),
},
)
for var in ds.variables:
if f"{var}_std_err" in ds.variables:
ds[var].attrs["ancillary_variables"] = f"{var}_std_err"
elif var.endswith("_std_err"):
end = var.index("_std_err")
if var[:end] in ds.variables:
ds[var].attrs["standard_name"] = f"{var[:end]} standard_error"
dt[f"/{key}"] = ds
return DataTree.from_dict(dt)
2 changes: 1 addition & 1 deletion src/yadg/extractors/touchstone/snp.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def extract(
elif var.endswith("_std_err"):
end = var.index("_std_err")
if var[:end] in ds.variables:
ds[var].attrs["standard_name"] = f"{var[:end]} standard error"
ds[var].attrs["standard_name"] = f"{var[:end]} standard_error"
if "angle" in var:
ds[var].attrs["units"] = "degree"
elif "frequency" in var:
Expand Down
2 changes: 2 additions & 0 deletions tests/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ def test_extract_marda(filetype, infile, outfile, datadir):
"filetype, infile, outfile",
[
("touchstone.snp", "picovna.s1p", "ref.picovna.s1p.nc"),
("ezchrom.asc", "230324.dat.asc", "ref.230324.dat.asc.nc"),
("ezchrom.dat", "230324.dat", "ref.230324.dat.nc"),
],
)
def test_extract_yadg(filetype, infile, outfile, datadir):
Expand Down
Binary file added tests/test_extract/230324.dat
Binary file not shown.
Loading

0 comments on commit f976fa5

Please sign in to comment.