Skip to content

Commit

Permalink
Fix processing of data in the "Lock, stock & barrel" repository. (#129)
Browse files Browse the repository at this point in the history
* Fix crash when peak properties are not present in xlsx files.

* Same as above for csv files.

* Fix non-existing "trim" in drycal.

* Forgot files for drycal.

* black

* Avoid incompatibility with new xarray-datatree.
  • Loading branch information
PeterKraus authored Dec 4, 2023
1 parent 8ca8380 commit f783eb9
Show file tree
Hide file tree
Showing 10 changed files with 262 additions and 7 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
"python-dateutil",
"openpyxl>=3.0.0",
"h5netcdf~=1.0",
"xarray-datatree>=0.0.12",
"xarray-datatree==0.0.12",
"dgbowl-schemas>=116",
"requests",
],
Expand Down
5 changes: 2 additions & 3 deletions src/yadg/parsers/chromdata/empalccsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,8 @@ def process(*, fn: str, encoding: str, **kwargs: dict) -> xr.Dataset:
vals = {}
devs = {}
for kk in {"height", "area", "concentration", "retention time"}:
vals[kk], devs[kk] = zip(
*[v[kk].get(cn, (np.nan, np.nan)) for cn in species]
)
val = v.get(kk, {})
vals[kk], devs[kk] = zip(*[val.get(cn, (np.nan, np.nan)) for cn in species])
point["vals"] = vals
point["devs"] = devs
data.append(point)
Expand Down
5 changes: 2 additions & 3 deletions src/yadg/parsers/chromdata/empalcxlsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,8 @@ def process(*, fn: str, **kwargs: dict) -> xr.Dataset:
vals = {}
devs = {}
for kk in {"height", "area", "concentration", "retention time"}:
vals[kk], devs[kk] = zip(
*[v[kk].get(cn, (np.nan, np.nan)) for cn in species]
)
val = v.get(kk, {})
vals[kk], devs[kk] = zip(*[val.get(cn, (np.nan, np.nan)) for cn in species])
point["vals"] = vals
point["devs"] = devs
data.append(point)
Expand Down
1 change: 1 addition & 0 deletions src/yadg/parsers/flowdata/drycal.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ def drycal_table(lines: list, sep: str = ",") -> tuple[list, dict, list]:
headers = []
units = {}
data = []
trim = False
for item in items:
for rs in [". ", " "]:
parts = item.split(rs)
Expand Down
13 changes: 13 additions & 0 deletions tests/test_chromdata.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import pytest
import os
import yaml
from tests.utils import (
datagram_from_input,
standard_datagram_test,
compare_result_dicts,
dg_get_quantity,
)
from yadg.core import process_schema
from dgbowl_schemas.yadg import to_dataschema


def special_datagram_test(datagram, testspec):
Expand Down Expand Up @@ -249,3 +252,13 @@ def test_datagram_from_chromdata(input, ts, datadir):
ret = datagram_from_input(input, "chromdata", datadir, version="4.2")
standard_datagram_test(ret, ts)
special_datagram_test(ret, ts)


def test_lock_stock_chromdata(datadir):
os.chdir(datadir)
with open("lock_stock_dataschema.yml", "r") as inf:
schema = yaml.safe_load(inf)
ret = process_schema(to_dataschema(**schema))
print(f"{ret=}")
for k in {"height", "concentration", "retention time", "area"}:
assert ret["LC"][k].shape == (7, 2)
Binary file not shown.
18 changes: 18 additions & 0 deletions tests/test_chromdata/lock_stock_dataschema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
metadata:
provenance:
type: manual
version: "4.2"
timezone: Europe/Berlin
steps:
- parser: chromdata
input:
folders: ["."]
suffix: "LC-data.xlsx"
externaldate:
using:
filename:
format: "%Y-%m-%d-%H-%M-%S%z"
len: 24
parameters:
filetype: "empalc.xlsx"
tag: LC
13 changes: 13 additions & 0 deletions tests/test_drycal.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import pytest
import os
import yaml
from tests.utils import (
datagram_from_input,
standard_datagram_test,
pars_datagram_test,
)
from yadg.core import process_schema
from dgbowl_schemas.yadg import to_dataschema


@pytest.mark.parametrize(
Expand Down Expand Up @@ -104,3 +108,12 @@ def test_datagram_from_drycal(input, ts, datadir):
ret = datagram_from_input(input, "flowdata", datadir)
standard_datagram_test(ret, ts)
pars_datagram_test(ret, ts)


def test_lock_stock_drycal(datadir):
os.chdir(datadir)
with open("lock_stock_dataschema.yml", "r") as inf:
schema = yaml.safe_load(inf)
ret = process_schema(to_dataschema(**schema))
print(f"{ret=}")
assert ret["outlet"]["DryCal"].shape == (187,)
191 changes: 191 additions & 0 deletions tests/test_drycal/20220912_Defender.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
Product,Defender 530+ ,,,,
Serial Number,180849,,,,
,,,,,
Sample,DryCal smL/min ,DryCal Avg. smL/min ,Temp. Deg C,Pressure mBar ,Time
1,18.068,18.065,26.2,979,8:48:31 AM
2,17.97,18.062,26.2,979,8:49:44 AM
3,17.791,18.053,26.2,979,8:50:54 AM
4,17.991,18.051,26.2,979,8:52:04 AM
5,18.089,18.052,26.2,979,8:53:15 AM
6,18.059,18.052,26.2,979,8:54:25 AM
7,18.235,18.058,26.2,979,8:55:35 AM
8,18.194,18.062,26.2,979,8:56:46 AM
9,18.025,18.061,26.2,979,8:57:56 AM
10,18.156,18.063,26.2,979,8:59:09 AM
11,18.077,18.064,26.2,979,9:00:23 AM
12,18.035,18.063,26.2,979,9:01:35 AM
13,18.127,18.064,26.2,979,9:02:45 AM
14,18.039,18.064,26.2,979,9:03:55 AM
15,17.991,18.062,26.2,979,9:05:05 AM
16,18.113,18.063,26.2,979,9:06:16 AM
17,18.042,18.063,26.2,979,9:07:26 AM
18,17.976,18.061,26.2,979,9:08:37 AM
19,18.09,18.062,26.2,979,9:09:47 AM
20,18.055,18.061,26.2,979,9:10:57 AM
21,18.006,18.06,26.2,979,9:12:07 AM
22,18.084,18.061,26.2,979,9:13:18 AM
23,18.025,18.06,26.3,979,9:14:28 AM
24,18.008,18.008,26.2,979,9:15:38 AM
25,17.964,17.986,26.3,979,9:16:48 AM
26,17.849,17.941,26.2,979,9:17:57 AM
27,18.074,17.974,26.2,979,9:19:05 AM
28,18.116,18.002,26.2,979,9:20:14 AM
29,18.063,18.012,26.2,979,9:21:24 AM
30,18.101,18.025,26.2,979,9:22:34 AM
31,18.023,18.025,26.2,979,9:23:43 AM
32,18.19,18.043,26.2,979,9:24:51 AM
33,18.095,18.048,26.2,979,9:25:59 AM
34,18.119,18.055,26.2,979,9:27:07 AM
35,18.14,18.062,26.2,979,9:28:16 AM
36,18.049,18.061,26.2,979,9:29:25 AM
37,18.154,18.067,26.2,979,9:30:33 AM
38,18.106,18.07,26.2,979,9:31:41 AM
39,18.043,18.068,26.2,979,9:32:49 AM
40,18.147,18.073,26.2,979,9:33:58 AM
41,18.098,18.074,26.2,979,9:35:07 AM
42,18.028,18.072,26.2,979,9:36:14 AM
43,18.144,18.075,26.2,979,9:37:23 AM
44,18.154,18.079,26.2,979,9:38:31 AM
45,18.076,18.079,26.2,979,9:39:40 AM
46,18.107,18.08,26.2,979,9:40:48 AM
47,18.155,18.083,26.2,979,9:41:56 AM
48,18.134,18.085,26.2,979,9:43:05 AM
49,18.101,18.086,26.2,979,9:44:13 AM
50,18.153,18.088,26.2,979,9:45:21 AM
51,18.176,18.092,26.2,979,9:46:30 AM
52,18.138,18.093,26.2,979,9:47:38 AM
53,18.053,18.092,26.1,979,9:48:46 AM
54,18.08,18.091,26.1,979,9:49:55 AM
55,18.118,18.092,26.1,979,9:51:03 AM
56,18.192,18.095,26.1,979,9:52:12 AM
57,18.128,18.096,26.1,979,9:53:20 AM
58,18.105,18.097,26.1,979,9:54:28 AM
59,18.043,18.095,26,979,9:55:37 AM
60,18.136,18.096,26,979,9:56:45 AM
61,18.214,18.099,26,979,9:57:54 AM
62,18.119,18.1,26,979,9:59:02 AM
63,18.08,18.099,25.9,979,10:00:11 AM
64,18.03,18.098,25.9,979,10:01:19 AM
65,18.126,18.098,25.9,979,10:02:26 AM
66,18.22,18.101,25.8,979,10:03:35 AM
67,18.131,18.102,25.8,979,10:04:43 AM
68,18.067,18.101,25.8,979,10:05:52 AM
69,18.073,18.1,25.8,979,10:07:00 AM
70,18.125,18.101,25.8,979,10:08:09 AM
71,18.191,18.103,25.8,979,10:09:17 AM
72,18.125,18.103,25.7,979,10:10:25 AM
73,18.028,18.102,25.7,979,10:11:34 AM
74,18.073,18.073,25.7,979,10:12:42 AM
75,18.174,18.123,25.7,979,10:13:51 AM
76,18.148,18.131,25.7,979,10:14:59 AM
77,18.088,18.12,25.7,979,10:16:07 AM
78,18.112,18.119,25.6,979,10:17:16 AM
79,18.174,18.128,25.6,979,10:18:25 AM
80,18.13,18.128,25.6,979,10:19:32 AM
81,18.082,18.122,25.6,979,10:20:40 AM
82,18.083,18.118,25.6,979,10:21:49 AM
83,18.163,18.122,25.6,979,10:22:57 AM
84,18.122,18.122,25.6,979,10:24:06 AM
85,18.107,18.121,25.5,979,10:25:14 AM
86,18.047,18.115,25.5,979,10:26:23 AM
87,18.192,18.121,25.5,979,10:27:31 AM
88,18.141,18.122,25.5,979,10:28:39 AM
89,18.027,18.116,25.5,979,10:29:48 AM
90,18.113,18.116,25.5,979,10:30:56 AM
91,18.189,18.12,25.5,979,10:32:05 AM
92,18.119,18.12,25.5,979,10:33:13 AM
93,18.048,18.116,25.5,979,10:34:21 AM
94,18.094,18.115,25.5,979,10:35:30 AM
95,18.174,18.118,25.5,979,10:36:38 AM
96,18.158,18.12,25.5,979,10:37:47 AM
97,18.12,18.12,25.4,979,10:38:55 AM
98,18.023,18.116,25.4,979,10:40:03 AM
99,18.161,18.118,25.4,979,10:41:12 AM
100,18.168,18.12,25.4,979,10:42:21 AM
101,18.105,18.119,25.4,979,10:43:29 AM
102,18.057,18.117,25.4,979,10:44:37 AM
103,18.12,18.117,25.4,979,10:45:45 AM
104,18.156,18.118,25.4,979,10:46:54 AM
105,18.108,18.118,25.4,979,10:48:02 AM
106,18.056,18.116,25.4,979,10:49:11 AM
107,18.16,18.117,25.4,979,10:50:19 AM
108,18.145,18.118,25.4,979,10:51:27 AM
109,18.073,18.117,25.4,979,10:52:36 AM
110,18.04,18.115,25.4,979,10:53:44 AM
111,18.13,18.115,25.4,979,10:54:53 AM
112,18.134,18.116,25.4,979,10:56:01 AM
113,18.059,18.114,25.3,979,10:57:09 AM
114,18.095,18.114,25.4,979,10:58:17 AM
115,18.133,18.114,25.4,979,10:59:26 AM
116,18.051,18.113,25.4,979,11:00:34 AM
117,18.08,18.112,25.3,979,11:01:43 AM
118,18.153,18.113,25.3,979,11:02:51 AM
119,18.067,18.112,25.3,979,11:03:59 AM
120,18.019,18.11,25.3,979,11:05:07 AM
121,18.125,18.11,25.3,979,11:06:16 AM
122,18.168,18.111,25.3,979,11:07:25 AM
123,18.085,18.111,25.3,979,11:18:19 AM
124,18.028,18.028,25.3,979,11:29:27 AM
125,18.1,18.064,25.3,979,11:40:40 AM
126,18.144,18.091,25.3,979,11:51:48 AM
127,18.062,18.084,25.3,979,12:02:57 PM
128,18.107,18.088,25.3,979,12:14:06 PM
129,18.161,18.101,25.3,979,12:25:16 PM
130,18.087,18.099,25.3,979,12:36:23 PM
131,18.056,18.093,25.3,979,12:47:33 PM
132,18.137,18.098,25.3,979,12:58:39 PM
133,18.044,18.093,25.3,979,1:09:48 PM
134,18.129,18.096,25.3,979,1:20:56 PM
135,18.085,18.095,25.3,979,1:32:04 PM
136,18.029,18.09,25.3,979,1:43:13 PM
137,18.161,18.095,25.3,979,1:54:20 PM
138,18.068,18.093,25.3,979,2:05:27 PM
139,18.025,18.089,25.3,979,2:16:37 PM
140,18.181,18.094,25.3,979,2:27:46 PM
141,18.097,18.095,25.3,979,2:38:56 PM
142,18.034,18.091,25.3,979,2:50:04 PM
143,18.125,18.093,25.3,979,3:01:11 PM
144,18.186,18.097,25.3,979,3:12:19 PM
145,18.018,18.094,25.3,979,3:23:27 PM
146,18.141,18.096,25.3,979,3:34:38 PM
147,18.075,18.095,25.3,979,3:45:47 PM
148,18.231,18.1,25.3,979,3:56:57 PM
149,18.141,18.102,25.3,979,4:08:05 PM
150,18.074,18.101,25.2,979,4:19:13 PM
151,18.144,18.103,25.3,979,4:30:23 PM
152,18.141,18.104,25.3,979,4:41:31 PM
153,18.052,18.102,25.2,979,4:52:41 PM
154,18.095,18.102,25.3,979,5:03:49 PM
155,18.177,18.104,25.3,979,5:14:59 PM
156,18.188,18.107,25.2,979,5:26:08 PM
157,17.949,18.102,25.3,979,5:37:17 PM
158,18.113,18.102,25.3,979,5:48:29 PM
159,18.054,18.101,25.2,979,5:59:37 PM
160,18.009,18.099,25.2,979,6:10:45 PM
161,18.022,18.097,25.2,979,6:21:54 PM
162,18.075,18.096,25.2,979,6:33:02 PM
163,18.025,18.094,25.2,979,6:44:11 PM
164,17.978,18.091,25.2,981,6:55:20 PM
165,17.926,18.087,25.2,979,7:06:30 PM
166,18.059,18.087,25.2,981,7:17:37 PM
167,18.032,18.086,25.2,981,7:28:47 PM
168,17.97,18.083,25.2,981,7:39:54 PM
169,17.902,18.079,25.2,981,7:51:03 PM
170,17.947,18.076,25.2,981,8:02:11 PM
171,18.009,18.075,25.2,981,8:13:19 PM
172,17.944,18.072,25.2,981,8:24:29 PM
173,17.923,18.069,25.2,979,8:35:37 PM
174,17.897,17.897,25.2,981,8:46:46 PM
175,17.866,17.881,25.2,979,9:13:29 PM
176,17.892,17.885,25.2,981,9:24:37 PM
177,17.937,17.898,25.2,981,9:35:45 PM
178,17.982,17.915,25.2,981,9:46:53 PM
179,18.038,17.935,25.2,981,9:58:00 PM
180,18.024,17.948,25.2,981,10:09:08 PM
181,18.017,17.957,25.2,981,10:20:17 PM
182,17.981,17.959,25.3,981,10:31:26 PM
183,17.955,17.959,25.3,981,10:42:34 PM
184,17.993,17.962,25.3,981,10:53:42 PM
185,17.957,17.962,25.2,981,11:04:50 PM
186,17.939,17.96,25.3,981,11:15:59 PM
187,17.934,17.958,25.3,981,11:27:06 PM
21 changes: 21 additions & 0 deletions tests/test_drycal/lock_stock_dataschema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
metadata:
provenance:
type: manual
version: "4.2"
timezone: Europe/Berlin
steps:
- parser: flowdata
input:
folders: ["."]
suffix: "Defender.csv"
parameters:
filetype: "drycal.csv"
externaldate:
using:
filename:
format: "%Y%m%d"
len: 8
mode: "add"
parameters:
filetype: "drycal.csv"
tag: outlet

0 comments on commit f783eb9

Please sign in to comment.