diff --git a/docs/apidoc_t/package.rst_t b/docs/apidoc_t/package.rst_t index f4fbe205..3f1f737f 100644 --- a/docs/apidoc_t/package.rst_t +++ b/docs/apidoc_t/package.rst_t @@ -35,6 +35,8 @@ {{- "**fusion**: For Fusion chromatograms" | heading }} {% elif 'yadg.extractors.panalytical' == pkgname %} {{- "**panalytical**: For Panalytical XRD data" | heading }} +{% elif 'yadg.extractors.picolog' == pkgname %} +{{- "**picolog**: For PicoTech PicoLog data" | heading }} {% elif 'yadg.extractors.phi' == pkgname %} {{- "**phi**: For Phi XPS data" | heading }} {% elif 'yadg.extractors.quadstar' == pkgname %} diff --git a/docs/source/version.5_1.rst b/docs/source/version.5_1.rst index 199b8d90..01b591a8 100644 --- a/docs/source/version.5_1.rst +++ b/docs/source/version.5_1.rst @@ -21,6 +21,8 @@ New features since ``yadg-5.0`` are: - Support for EZChrom ``.dat`` files using the :mod:`yadg.extractors.ezchrom.dat` extractor. Test files were provided by Z. Asahi from FU Berlin, and J. Schumann from HU Berlin. The data extracted from the ``.dat`` files is cross-checked against the data obtained from ``.asc`` files using the :mod:`yadg.extractors.ezchrom.asc` extractor. + - Support for PicoTech PicoLog ``.picolog`` files (such as those created by the TC-08 thermocouple monitor) using the :mod:`yadg.extractors.picolog.tc08` extractor. Test files were provided by F. Bernasconi from Empa. + Other changes in ``yadg-5.1`` are: - The dataschema has been simplified, eliminating parsers in favour of extractors. diff --git a/src/yadg/extractors/picolog/__init__.py b/src/yadg/extractors/picolog/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/yadg/extractors/picolog/tc08.py b/src/yadg/extractors/picolog/tc08.py new file mode 100644 index 00000000..ec6f97c8 --- /dev/null +++ b/src/yadg/extractors/picolog/tc08.py @@ -0,0 +1,150 @@ +""" +Module for parsing PicoTech PicoLog files, such as those generated by the TC-08 +thermocouple reader. + +Usage +````` +Available since ``yadg-5.1``. + +.. autopydantic_model:: dgbowl_schemas.yadg.dataschema_5_1.filetype.PicoLog_tc08 + +Schema +`````` +.. code-block:: yaml + + xarray.Dataset: + coords: + uts: !!float # Unix timestamp, optional + data_vars: + {{ name }} (uts) # Temperature of the named thermocouple + +Metadata +```````` +Metadata about the TC-08 device is read from the overall metadata file. The following +parameters are currently parsed: + +- ``Model``: model number of the network analyseer +- ``Serial``: serial number of the device +- ``Software Version``: software version with which the file was created + + +Uncertainties +````````````` +Uncertainties in temperatures are set to 2.2°C, which is the usual value for a Type-K +thermocouple. + +Notes on file structure +``````````````````````` +The ``.picolog`` files are actually ``.tar`` archives, which include the following +files: + +- a ``metadata.json`` file, containing information about the device and the + thermocouples plugged in, +- a ``data-map.json.gz`` file, containing information about chunks of data corresponding + to each thermocouple, +- a set of zipped binary files with data averaged at different time resolutions + +Currently, the data is extracted from the ``1.0.gz`` file only. + +.. codeauthor:: + Peter Kraus + +""" + +import os +import json +import numpy as np +import tarfile +import gzip +import tempfile +import xarray as xr +from xarray import Dataset + +from yadg import dgutils + + +def extract( + *, + fn: str, + **kwargs: dict, +) -> Dataset: + tf = tarfile.TarFile(fn, mode="r") + with tempfile.TemporaryDirectory() as tempdir: + tf.extractall(tempdir) + + # Get file metadata + with open(os.path.join(tempdir, "metadata.json")) as inf: + metadata = json.load(inf) + uts = dgutils.str_to_uts(timestamp=metadata["startDate"], timezone=None) + + # Here we assume that "items" only has one element + devdata = metadata["devices"]["items"][0] + attrs = { + "Software Version": metadata["appVersion"], + "Model": devdata["__type"], + "Serial": devdata["serialInternal"], + } + + # Get necessary device metadata from the file metadata + devices = {} + for k, v in devdata.items(): + if isinstance(v, dict) and v["enabled"]: + devices[k] = { + "id": v["channelIdentifier"], + "type": v["channelType"], + "name": v["description"], + "xmul": v["captureConfig"]["interval"] // 1000, + "npts": v["capturedSamples"], + } + + # Get data from 1.0.gz + with gzip.open(os.path.join(tempdir, "data-map.json.gz"), "rb") as inp: + dmap = json.loads(inp.read()) + with gzip.open(os.path.join(tempdir, "1.0.gz"), "rb") as inp: + raw = inp.read() + + # Convert bytes to floats + points = {} + for tag, params in dmap: + _, __, namestr = tag.split(".") + id, res, time = namestr.split("/") + archive, start, length = params + if id not in points: + points[id] = np.empty(0, dtype=">f4") + if res == "1" and archive == "1.0.gz": + new = np.frombuffer(raw, offset=start, dtype=">f4", count=length // 4) + points[id] = np.concatenate((points[id], new)) + elif res == "1": + raise RuntimeError(f"Resolution of 1 but archive is {archive!r}.") + + # Push the data into the Dataset + ds = xr.Dataset(coords={"uts": (["uts"], [])}) + for id, data in points.items(): + for k, meta in devices.items(): + if meta["id"] == id: + # The type of the device should be thermocouple + if meta["type"] == "thermocouple": + unit = {"units": "degC"} + else: + raise RuntimeError("Unknown type {meta['type']!r}.") + yvals = data[~np.isnan(data)] + ydevs = np.ones(len(yvals)) * 2.2 + xvals = np.arange(len(yvals)) * meta["xmul"] + uts + newds = xr.Dataset( + data_vars={ + meta["name"]: (["uts"], yvals, unit), + f"{meta['name']}_std_err": (["uts"], ydevs, unit), + }, + coords={"uts": (["uts"], xvals)}, + ) + ds = xr.merge((ds, newds)) + for var in ds.variables: + if f"{var}_std_err" in ds.variables: + ds[var].attrs["ancillary_variables"] = f"{var}_std_err" + elif var.endswith("_std_err"): + end = var.index("_std_err") + if var[:end] in ds.variables: + ds[var].attrs["standard_name"] = f"{var[:end]} standard_error" + + ds.attrs = attrs + return ds diff --git a/tests/test_extract.py b/tests/test_extract.py index d5c390d4..f3516ad6 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -26,17 +26,19 @@ def test_extract_marda(filetype, infile, outfile, datadir): @pytest.mark.parametrize( - "filetype, infile, outfile", + "filetype, infile", [ - ("touchstone.snp", "picovna.s1p", "ref.picovna.s1p.nc"), - ("ezchrom.asc", "230324.dat.asc", "ref.230324.dat.asc.nc"), - ("ezchrom.dat", "230324.dat", "ref.230324.dat.nc"), + ("touchstone.snp", "picovna.s1p"), + ("ezchrom.asc", "230324.dat.asc"), + ("ezchrom.dat", "230324.dat"), + ("picolog.tc08", "20220723-porosity-study-15p-Cu-200mA-longrun-07.picolog"), ], ) -def test_extract_yadg(filetype, infile, outfile, datadir): +def test_extract_yadg(filetype, infile, datadir): os.chdir(datadir) + outfile = f"ref.{infile}.nc" ret = extract(filetype=filetype, path=infile) # ret.to_netcdf(outfile, engine="h5netcdf") ref = datatree.open_datatree(outfile) print(f"{ret=}") - compare_datatrees(ret, ref) + assert ret.equals(ref) diff --git a/tests/test_extract/20220723-porosity-study-15p-Cu-200mA-longrun-07.picolog b/tests/test_extract/20220723-porosity-study-15p-Cu-200mA-longrun-07.picolog new file mode 100644 index 00000000..d4343ba2 Binary files /dev/null and b/tests/test_extract/20220723-porosity-study-15p-Cu-200mA-longrun-07.picolog differ diff --git a/tests/test_extract/ref.20220723-porosity-study-15p-Cu-200mA-longrun-07.picolog.nc b/tests/test_extract/ref.20220723-porosity-study-15p-Cu-200mA-longrun-07.picolog.nc new file mode 100644 index 00000000..1bf304e5 Binary files /dev/null and b/tests/test_extract/ref.20220723-porosity-study-15p-Cu-200mA-longrun-07.picolog.nc differ diff --git a/tests/test_picolog_tc08.py b/tests/test_picolog_tc08.py new file mode 100644 index 00000000..f589438c --- /dev/null +++ b/tests/test_picolog_tc08.py @@ -0,0 +1,24 @@ +import pytest +import os +import pickle +import xarray as xr +from yadg.extractors.picolog.tc08 import extract + + +@pytest.mark.parametrize( + "infile", + [ + "20220723-porosity-study-15p-Cu-200mA-longrun-07.picolog", + "20230917-16-S07-temperature.picolog", + ], +) +def test_picolog_tc08(infile, datadir): + os.chdir(datadir) + ret = extract(fn=infile) + outfile = f"ref.{infile}.pkl" + with open(outfile, "rb") as inp: + ref = pickle.load(inp) + print(f"{ret=}") + with open(outfile, "wb") as out: + pickle.dump(ret, out, 5) + xr.testing.assert_equal(ret, ref) diff --git a/tests/test_picolog_tc08/20220723-porosity-study-15p-Cu-200mA-longrun-07.picolog b/tests/test_picolog_tc08/20220723-porosity-study-15p-Cu-200mA-longrun-07.picolog new file mode 100644 index 00000000..d4343ba2 Binary files /dev/null and b/tests/test_picolog_tc08/20220723-porosity-study-15p-Cu-200mA-longrun-07.picolog differ diff --git a/tests/test_picolog_tc08/20230917-16-S07-temperature.picolog b/tests/test_picolog_tc08/20230917-16-S07-temperature.picolog new file mode 100644 index 00000000..a482860c Binary files /dev/null and b/tests/test_picolog_tc08/20230917-16-S07-temperature.picolog differ diff --git a/tests/test_picolog_tc08/ref.20220723-porosity-study-15p-Cu-200mA-longrun-07.picolog.pkl b/tests/test_picolog_tc08/ref.20220723-porosity-study-15p-Cu-200mA-longrun-07.picolog.pkl new file mode 100644 index 00000000..833fe80c Binary files /dev/null and b/tests/test_picolog_tc08/ref.20220723-porosity-study-15p-Cu-200mA-longrun-07.picolog.pkl differ diff --git a/tests/test_picolog_tc08/ref.20230917-16-S07-temperature.picolog.pkl b/tests/test_picolog_tc08/ref.20230917-16-S07-temperature.picolog.pkl new file mode 100644 index 00000000..ee208ed1 Binary files /dev/null and b/tests/test_picolog_tc08/ref.20230917-16-S07-temperature.picolog.pkl differ