Skip to content

Commit

Permalink
Implement picolog.tc08 parser (#148)
Browse files Browse the repository at this point in the history
* Implement picolog parser

* Docs etc.

* Missing reference file.

* More docs.

* Hook up extract test.

* Add another test
  • Loading branch information
PeterKraus authored May 24, 2024
1 parent 96b3e9a commit 14ffa12
Show file tree
Hide file tree
Showing 12 changed files with 186 additions and 6 deletions.
2 changes: 2 additions & 0 deletions docs/apidoc_t/package.rst_t
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
{{- "**fusion**: For Fusion chromatograms" | heading }}
{% elif 'yadg.extractors.panalytical' == pkgname %}
{{- "**panalytical**: For Panalytical XRD data" | heading }}
{% elif 'yadg.extractors.picolog' == pkgname %}
{{- "**picolog**: For PicoTech PicoLog data" | heading }}
{% elif 'yadg.extractors.phi' == pkgname %}
{{- "**phi**: For Phi XPS data" | heading }}
{% elif 'yadg.extractors.quadstar' == pkgname %}
Expand Down
2 changes: 2 additions & 0 deletions docs/source/version.5_1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ New features since ``yadg-5.0`` are:

- Support for EZChrom ``.dat`` files using the :mod:`yadg.extractors.ezchrom.dat` extractor. Test files were provided by Z. Asahi from FU Berlin, and J. Schumann from HU Berlin. The data extracted from the ``.dat`` files is cross-checked against the data obtained from ``.asc`` files using the :mod:`yadg.extractors.ezchrom.asc` extractor.

- Support for PicoTech PicoLog ``.picolog`` files (such as those created by the TC-08 thermocouple monitor) using the :mod:`yadg.extractors.picolog.tc08` extractor. Test files were provided by F. Bernasconi from Empa.

Other changes in ``yadg-5.1`` are:

- The dataschema has been simplified, eliminating parsers in favour of extractors.
Expand Down
Empty file.
150 changes: 150 additions & 0 deletions src/yadg/extractors/picolog/tc08.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""
Module for parsing PicoTech PicoLog files, such as those generated by the TC-08
thermocouple reader.

Usage
`````
Available since ``yadg-5.1``.

.. autopydantic_model:: dgbowl_schemas.yadg.dataschema_5_1.filetype.PicoLog_tc08

Schema
``````
.. code-block:: yaml

xarray.Dataset:
coords:
uts: !!float # Unix timestamp, optional
data_vars:
{{ name }} (uts) # Temperature of the named thermocouple

Metadata
````````
Metadata about the TC-08 device is read from the overall metadata file. The following
parameters are currently parsed:

- ``Model``: model number of the network analyseer
- ``Serial``: serial number of the device
- ``Software Version``: software version with which the file was created


Uncertainties
`````````````
Uncertainties in temperatures are set to 2.2°C, which is the usual value for a Type-K
thermocouple.

Notes on file structure
```````````````````````
The ``.picolog`` files are actually ``.tar`` archives, which include the following
files:

- a ``metadata.json`` file, containing information about the device and the
thermocouples plugged in,
- a ``data-map.json.gz`` file, containing information about chunks of data corresponding
to each thermocouple,
- a set of zipped binary files with data averaged at different time resolutions

Currently, the data is extracted from the ``1.0.gz`` file only.

.. codeauthor::
Peter Kraus

"""

import os
import json
import numpy as np
import tarfile
import gzip
import tempfile
import xarray as xr
from xarray import Dataset

from yadg import dgutils


def extract(
*,
fn: str,
**kwargs: dict,
) -> Dataset:
tf = tarfile.TarFile(fn, mode="r")
with tempfile.TemporaryDirectory() as tempdir:
tf.extractall(tempdir)

# Get file metadata
with open(os.path.join(tempdir, "metadata.json")) as inf:
metadata = json.load(inf)
uts = dgutils.str_to_uts(timestamp=metadata["startDate"], timezone=None)

# Here we assume that "items" only has one element
devdata = metadata["devices"]["items"][0]
attrs = {
"Software Version": metadata["appVersion"],
"Model": devdata["__type"],
"Serial": devdata["serialInternal"],
}

# Get necessary device metadata from the file metadata
devices = {}
for k, v in devdata.items():
if isinstance(v, dict) and v["enabled"]:
devices[k] = {
"id": v["channelIdentifier"],
"type": v["channelType"],
"name": v["description"],
"xmul": v["captureConfig"]["interval"] // 1000,
"npts": v["capturedSamples"],
}

# Get data from 1.0.gz
with gzip.open(os.path.join(tempdir, "data-map.json.gz"), "rb") as inp:
dmap = json.loads(inp.read())
with gzip.open(os.path.join(tempdir, "1.0.gz"), "rb") as inp:
raw = inp.read()

# Convert bytes to floats
points = {}
for tag, params in dmap:
_, __, namestr = tag.split(".")
id, res, time = namestr.split("/")
archive, start, length = params
if id not in points:
points[id] = np.empty(0, dtype=">f4")
if res == "1" and archive == "1.0.gz":
new = np.frombuffer(raw, offset=start, dtype=">f4", count=length // 4)
points[id] = np.concatenate((points[id], new))
elif res == "1":
raise RuntimeError(f"Resolution of 1 but archive is {archive!r}.")

# Push the data into the Dataset
ds = xr.Dataset(coords={"uts": (["uts"], [])})
for id, data in points.items():
for k, meta in devices.items():
if meta["id"] == id:
# The type of the device should be thermocouple
if meta["type"] == "thermocouple":
unit = {"units": "degC"}
else:
raise RuntimeError("Unknown type {meta['type']!r}.")
yvals = data[~np.isnan(data)]
ydevs = np.ones(len(yvals)) * 2.2
xvals = np.arange(len(yvals)) * meta["xmul"] + uts
newds = xr.Dataset(
data_vars={
meta["name"]: (["uts"], yvals, unit),
f"{meta['name']}_std_err": (["uts"], ydevs, unit),
},
coords={"uts": (["uts"], xvals)},
)
ds = xr.merge((ds, newds))
for var in ds.variables:
if f"{var}_std_err" in ds.variables:
ds[var].attrs["ancillary_variables"] = f"{var}_std_err"
elif var.endswith("_std_err"):
end = var.index("_std_err")
if var[:end] in ds.variables:
ds[var].attrs["standard_name"] = f"{var[:end]} standard_error"

ds.attrs = attrs
return ds
14 changes: 8 additions & 6 deletions tests/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,19 @@ def test_extract_marda(filetype, infile, outfile, datadir):


@pytest.mark.parametrize(
"filetype, infile, outfile",
"filetype, infile",
[
("touchstone.snp", "picovna.s1p", "ref.picovna.s1p.nc"),
("ezchrom.asc", "230324.dat.asc", "ref.230324.dat.asc.nc"),
("ezchrom.dat", "230324.dat", "ref.230324.dat.nc"),
("touchstone.snp", "picovna.s1p"),
("ezchrom.asc", "230324.dat.asc"),
("ezchrom.dat", "230324.dat"),
("picolog.tc08", "20220723-porosity-study-15p-Cu-200mA-longrun-07.picolog"),
],
)
def test_extract_yadg(filetype, infile, outfile, datadir):
def test_extract_yadg(filetype, infile, datadir):
os.chdir(datadir)
outfile = f"ref.{infile}.nc"
ret = extract(filetype=filetype, path=infile)
# ret.to_netcdf(outfile, engine="h5netcdf")
ref = datatree.open_datatree(outfile)
print(f"{ret=}")
compare_datatrees(ret, ref)
assert ret.equals(ref)
Binary file not shown.
Binary file not shown.
24 changes: 24 additions & 0 deletions tests/test_picolog_tc08.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import pytest
import os
import pickle
import xarray as xr
from yadg.extractors.picolog.tc08 import extract


@pytest.mark.parametrize(
"infile",
[
"20220723-porosity-study-15p-Cu-200mA-longrun-07.picolog",
"20230917-16-S07-temperature.picolog",
],
)
def test_picolog_tc08(infile, datadir):
os.chdir(datadir)
ret = extract(fn=infile)
outfile = f"ref.{infile}.pkl"
with open(outfile, "rb") as inp:
ref = pickle.load(inp)
print(f"{ret=}")
with open(outfile, "wb") as out:
pickle.dump(ret, out, 5)
xr.testing.assert_equal(ret, ref)
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 comments on commit 14ffa12

Please sign in to comment.