Skip to content

Commit

Permalink
Technical Debt: Round 2 (#138)
Browse files Browse the repository at this point in the history
* custom extractors docs etc.

* Agilent

* Make the linter happy

* More docs changes, and drycal.

* Ezchrom & Tomato

* panalytica.

* phi

* Phi.spe and Quadstar.sac

* fusion

* Nuke most of parsers

* Linter

* The big rename

* Move stuff around

* Purge parsers

* Fix docs

* Update eclab docs
  • Loading branch information
PeterKraus authored Mar 31, 2024
1 parent fe71ea3 commit 6ec691e
Show file tree
Hide file tree
Showing 104 changed files with 1,651 additions and 2,675 deletions.
62 changes: 26 additions & 36 deletions docs/apidoc_t/package.rst_t
Original file line number Diff line number Diff line change
Expand Up @@ -15,40 +15,32 @@

{%- if is_namespace %}
{{- [pkgname, "namespace"] | join(" ") | e | heading }}
{% elif 'yadg.parsers.basiccsv' == pkgname %}
{{- "**basiccsv**: Common tabular file parser" | heading }}
{% elif 'yadg.parsers.chromdata' == pkgname %}
{{- "**chromdata**: Post-processed chromatography data parser" | heading }}
{% elif 'yadg.parsers.chromtrace' == pkgname %}
{{- "**chromtrace**: Raw chromatogram trace file parser" | heading }}
{% elif 'yadg.parsers.dummy' == pkgname %}
{{- "**dummy**: A dummy parser" | heading }}
{% elif 'yadg.parsers.electrochem' == pkgname %}
{{- "**electrochem**: Electrochemistry data parser" | heading }}
{% elif 'yadg.parsers.flowdata' == pkgname %}
{{- "**flowdata**: Flow data parser" | heading }}
{% elif 'yadg.parsers.masstrace' == pkgname %}
{{- "**masstrace**: Mass spectroscopy trace file parser" | heading }}
{% elif 'yadg.parsers.meascsv' == pkgname %}
{{- "**meascsv**: Legacy MCPT log file parser" | heading }}
{% elif 'yadg.parsers.qftrace' == pkgname %}
{{- "**qftrace**: Network analyser trace file parser" | heading }}
{% elif 'yadg.parsers.xpstrace' == pkgname %}
{{- "**xpstrace**: XPS trace file parser" | heading }}
{% elif 'yadg.parsers.xrdtrace' == pkgname %}
{{- "**xrdtrace**: X-ray diffractogram trace file parser" | heading }}
{% elif 'yadg.extractors.agilentch' == pkgname %}
{{- "**agilent-ch**: Agilent ChemStation export ``.CH``" | heading }}
{% elif 'yadg.extractors.agilentdx' == pkgname %}
{{- "**agilent-dx**: Agilent OpenLab raw data ``.dx``" | heading }}
{% elif 'yadg.extractors.eclabmpr' == pkgname %}
{{- "**eclab-mpr**: BioLogic ECLab binary ``.mpr``" | heading }}
{% elif 'yadg.extractors.eclabmpt' == pkgname %}
{{- "**eclab-mpr**: BioLogic ECLab export ``.mpt``" | heading }}
{% elif 'yadg.extractors.panalyticalxrdml' == pkgname %}
{{- "**panalytical-xrdml**: PANalytical XRDML ``.xrdml``" | heading }}
{% elif 'yadg.extractors.phispe' == pkgname %}
{{- "**phi-spe**: ULVAC-PHI Multipak ``.spe``" | heading }}
{% elif 'yadg.extractors.basic' == pkgname %}
{{- "**basic**: For tabulated data" | heading }}
{% elif 'yadg.extractors.empalc' == pkgname %}
{{- "**empalc**: For Empa's LC data" | heading }}
{% elif 'yadg.extractors.example' == pkgname %}
{{- "**example**: For yadg testing" | heading }}
{% elif 'yadg.extractors.fhimcpt' == pkgname %}
{{- "**fhimcpt**: For MCPT set-up at FHI" | heading }}
{% elif 'yadg.extractors.agilent' == pkgname %}
{{- "**agilent**: For Agilent chromatograms" | heading }}
{% elif 'yadg.extractors.drycal' == pkgname %}
{{- "**drycal**: For MesaLabs Drycal Pro data" | heading }}
{% elif 'yadg.extractors.eclab' == pkgname %}
{{- "**eclab**: For BioLogic data files" | heading }}
{% elif 'yadg.extractors.ezchrom' == pkgname %}
{{- "**ezchrom**: For EZChrom chromatograms" | heading }}
{% elif 'yadg.extractors.fusion' == pkgname %}
{{- "**fusion**: For Fusion chromatograms" | heading }}
{% elif 'yadg.extractors.panalytical' == pkgname %}
{{- "**panalytical**: For Panalytical XRD data" | heading }}
{% elif 'yadg.extractors.phi' == pkgname %}
{{- "**phi**: For Phi XPS data" | heading }}
{% elif 'yadg.extractors.quadstar' == pkgname %}
{{- "**quadstar**: For Quadstar MS data" | heading }}
{% elif 'yadg.extractors.tomato' == pkgname %}
{{- "**tomato**: For tomato outputs" | heading }}
{% else %}
{{- [pkgname, "package"] | join(" ") | e | heading }}
{% endif %}
Expand All @@ -69,8 +61,6 @@ Subpackages
{% endif %}

{%- if submodules %}
Submodules
----------
{% if separatemodules %}
{{ toctree(submodules) }}
{% else %}
Expand Down
3 changes: 1 addition & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
# "sphinx.ext.coverage",
"sphinx.ext.napoleon",
"sphinx.ext.intersphinx",
# "sphinx.ext.autosummary",
"sphinx.ext.autosummary",
"sphinx_autodoc_typehints",
"sphinx_rtd_theme",
"sphinxcontrib.autodoc_pydantic",
Expand Down Expand Up @@ -63,7 +63,6 @@
html_theme_options = {
"body_max_width": "none",
"sticky_navigation": True,
"navigation_depth": 6,
}
html_logo = "./images/yadg.png"
html_favicon = "./images/yadg_ico.png"
Expand Down
11 changes: 2 additions & 9 deletions docs/source/extractors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,5 @@
:hidden:
:glob:

apidoc/yadg.extractors.public.*

.. toctree::
:maxdepth: 1
:caption: yadg custom extractors
:hidden:
:glob:

apidoc/yadg.extractors.custom.*
apidoc/yadg.extractors.example
apidoc/yadg.extractors.*
82 changes: 9 additions & 73 deletions src/yadg/core.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
from importlib import metadata
import logging
import importlib
import xarray as xr
import numpy as np
from typing import Callable
from datatree import DataTree
from xarray import Dataset
from pydantic import BaseModel

from dgbowl_schemas.yadg.dataschema import DataSchema
from yadg import dgutils
Expand All @@ -20,18 +17,13 @@ def infer_extractor(extractor: str) -> Callable:
A function that finds an :func:`extract` function of the supplied ``extractor``.
"""
modnames = [
f"yadg.extractors.public.{extractor}",
f"yadg.extractors.custom.{extractor}",
f"yadg.extractors.{extractor.replace('.','')}",
]
for modname in modnames:
try:
m = importlib.import_module(modname)
if hasattr(m, "extract"):
return getattr(m, "extract")
except ImportError:
logger.critical(f"could not import module '{modname}'")
modname = f"yadg.extractors.{extractor}"
try:
m = importlib.import_module(modname)
if hasattr(m, "extract"):
return getattr(m, "extract")
except ImportError:
logger.critical(f"could not import module '{modname}'")
raise RuntimeError


Expand Down Expand Up @@ -99,69 +91,13 @@ def process_schema(dataschema: DataSchema, strict_merge: bool = False) -> DataTr
# there are no variables - we don't add 'uts' to those.
fvals[name] = dset
else:
fvals[name] = complete_uts(
fvals[name] = dgutils.complete_uts(
dset, tf, step.externaldate, step.extractor.timezone
)
vals = merge_dicttrees(vals, fvals, concatmode)
vals = dgutils.merge_dicttrees(vals, fvals, concatmode)

stepdt = DataTree.from_dict({} if vals is None else vals)
stepdt.name = step.tag
stepdt.attrs = sattrs
stepdt.parent = root
return root


def complete_uts(
ds: Dataset,
filename: str,
externaldate: BaseModel,
timezone: str,
) -> Dataset:
"""
A helper function ensuring that the Dataset ``ds`` contains a dimension ``"uts"``,
and that the timestamps in ``"uts"`` are completed as instructed in the
``externaldate`` specification.
"""
if not hasattr(ds, "uts"):
ds = ds.expand_dims("uts")
if len(ds.uts.coords) == 0:
ds["uts"] = np.zeros(ds.uts.size)
ds.attrs["fulldate"] = False
if not ds.attrs.get("fulldate", True) or externaldate is not None:
ts, fulldate = dgutils.complete_timestamps(
timesteps=ds.uts.values,
fn=filename,
spec=externaldate,
timezone=timezone,
)
ds["uts"] = ts
if fulldate:
ds.attrs.pop("fulldate", None)
else:
# cannot store booleans in NetCDF files
ds.attrs["fulldate"] = int(fulldate)

return ds


def merge_dicttrees(vals: dict, fvals: dict, mode: str) -> dict:
"""
A helper function that merges two ``DataTree.to_dict()`` objects by concatenating
the new values in ``fvals`` to the existing ones in ``vals``.
"""
if vals is None:
return fvals
for k in fvals.keys():
try:
vals[k] = xr.concat([vals[k], fvals[k]], dim="uts", combine_attrs=mode)
except xr.MergeError:
raise RuntimeError(
"Merging metadata from multiple files has failed, as some of the "
"values differ between files. This might be caused by trying to "
"parse data obtained using different techniques/protocols in a "
"single step. If you are certain this is what you want, try using "
"yadg with the '--ignore-merge-errors' option."
)
return vals
17 changes: 15 additions & 2 deletions src/yadg/dgutils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,31 @@
from .helpers import get_yadg_metadata
from .dateutils import now, infer_timestamp_from, ole_to_uts, complete_timestamps
from .utils import update_schema, schema_from_preset
from .dateutils import (
now,
infer_timestamp_from,
str_to_uts,
ole_to_uts,
complete_timestamps,
complete_uts,
)
from .schemautils import update_schema, schema_from_preset
from .btools import read_value
from .pintutils import sanitize_units, ureg
from .dsutils import dicts_to_dataset, append_dicts, merge_dicttrees

__all__ = [
"get_yadg_metadata",
"now",
"infer_timestamp_from",
"str_to_uts",
"ole_to_uts",
"complete_timestamps",
"complete_uts",
"update_schema",
"schema_from_preset",
"read_value",
"sanitize_units",
"ureg",
"dicts_to_dataset",
"append_dicts",
"merge_dicttrees",
]
52 changes: 42 additions & 10 deletions src/yadg/dgutils/dateutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import logging
from zoneinfo import ZoneInfo
import numpy as np
from pydantic import BaseModel
from typing import Callable, Union, Mapping, Iterable
from xarray import Dataset
from dgbowl_schemas.yadg.dataschema_5_0.externaldate import ExternalDate
from dgbowl_schemas.yadg.dataschema_5_0.timestamp import TimestampSpec

Expand All @@ -20,7 +22,8 @@ def now(
"""
Wrapper around datetime.now()
A convenience function for returning the current time as a ISO 8601 or as a unix timestamp.
A convenience function for returning the current time as a ISO 8601 or as a Unix
timestamp.
"""
dt = datetime.datetime.now(tz=tz)
if asstr:
Expand Down Expand Up @@ -138,8 +141,8 @@ def infer_timestamp_from(
spec
A specification of timestamp elements with associated column indices and
optional formats. Currently accepted combinations of keys are: "uts"; "timestamp";
"date" and / or "time".
optional formats. Currently accepted combinations of keys are: "uts";
"timestamp"; "date" and / or "time".
tz
Timezone to use for conversion. By default, UTC is used.
Expand Down Expand Up @@ -194,7 +197,7 @@ def timefn(value):

else:
logger.debug(
"Assuming specified column containing the time is in ISO 8601 format"
"Assuming specified column is time in ISO 8601 format."
)

def timefn(value):
Expand All @@ -220,14 +223,10 @@ def retfn(date, time):

return cols, retfn, True
elif "uts" in headers:
logger.debug(
"No timestamp spec provided, assuming column 'uts' is a valid unix timestamp"
)
logger.debug("Assuming column 'uts' is a valid unix timestamp.")
return [headers.index("uts")], float, True
elif "timestamp" in headers:
logger.debug(
"No timestamp spec provided, assuming column 'timestamp' is a valid ISO 8601 timestamp"
)
logger.debug("Assuming column 'timestamp' is a valid ISO 8601 timestamp")

def retfunc(value):
return str_to_uts(timestamp=value, timezone=timezone)
Expand Down Expand Up @@ -420,3 +419,36 @@ def timestamps_from_file(
)
else:
return float(data)


def complete_uts(
ds: Dataset,
filename: str,
externaldate: BaseModel,
timezone: str,
) -> Dataset:
"""
A helper function ensuring that the Dataset ``ds`` contains a dimension ``"uts"``,
and that the timestamps in ``"uts"`` are completed as instructed in the
``externaldate`` specification.
"""
if not hasattr(ds, "uts"):
ds = ds.expand_dims("uts")
if len(ds.uts.coords) == 0:
ds["uts"] = np.zeros(ds.uts.size)
ds.attrs["fulldate"] = False
if not ds.attrs.get("fulldate", True) or externaldate is not None:
ts, fulldate = complete_timestamps(
timesteps=ds.uts.values,
fn=filename,
spec=externaldate,
timezone=timezone,
)
ds["uts"] = ts
if fulldate:
ds.attrs.pop("fulldate", None)
else:
# cannot store booleans in NetCDF files
ds.attrs["fulldate"] = int(fulldate)
return ds
Loading

0 comments on commit 6ec691e

Please sign in to comment.