-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuser_config.yaml
191 lines (151 loc) · 7.43 KB
/
user_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# PyChamberFlux user settings
# comply with YAML(TM) 1.1 standard <http://yaml.org/spec/1.1/>
# use PyYAML <http://pyyaml.org/> to parse into Python.
# Notice on the YAML formatting:
# - This config file allows only one nested level.
# - Boolean values are in lowercase: `true` and `false`
# - The `null` value is parsed into Python as `None`
# - Use double quotation marks for any regex expression
# - For scientific notation of floating numbers, always specify the sign of
# the exponent, i.e., `1.0e+3` is correct while `1.0e3` is wrong.
# - The NaN value for floating number is `.nan`
run_options: # Running options
chamber_config_filepath: "chamber.yaml"
# Configuration file that describes chamber settings.
load_data_by_day: false
# Load and process raw data by daily chunks if `True`. Default is
# `False` to load all data at once and then process daily chunks.
# Note: If the total size of the raw data files are larger than
# the size of computer memory, this should be enabled. Otherwise it
# may take for ever in reading the data.
save_fitting_plots: false
# If true, save the curve fitting plots for every chamber sampling period.
save_daily_plots: false
# If true, save daily plots of chamber fluxes.
data_dir: # Input and output directories, and other related settings
biomet_data: "./tests/input/biomet/*.dat"
# Absolute or relative directory to search for biomet data files.
biomet_data.date_format: "%Y_%m_%d"
# date format string in the file name (not that in the data table)
conc_data: "./tests/input/conc/*.str"
# Absolute or relative directory to search for concentration data files.
conc_data.date_format: "%y%m%d"
# date format string in the file name (not that in the data table)
output_dir: "./tests/output/"
# Output directory for the processed flux data.
output_filename_prefix: "sr"
# A prefix string to append before timestamp for output datafile names.
plot_dir: "./tests/plots/"
# Directory for saved plots.
separate_conc_data: true
# If `True`, concentration measurements are stored on their own, not in
# the biomet data files.
# If `False`, search concentration measurements in the biomet data.
biomet_data_settings: # Settings for reading the biomet data
delimiter: ','
# Supported table delimiters:
# - singe space: ' '
# - indefinite number of spaces: '\\s+' (works also for single space)
# - comma: ','
# - tab: '\\t'
header: 3
# Row number of the last line of the header (starting from 0)
# Default behavior is to infer it with `pandas.read_csv()`.
names: ['timestamp', 'T_log', 'ch_no', 'T_ch_1', 'T_ch_2', 'T_ch_3',
'T_ch_4', 'T_ch_5', 'T_ch_6', 'T_atm', 'RH_atm',
'PAR_ch_1', 'PAR_ch_2', 'PAR_ch_3', 'PAR_ch_4',
'flow_ch_1', 'flow_ch_2', 'flow_ch_3', 'flow_ch_4',
'flow_ch_5', 'flow_ch_6', 'flow_7',
'T_soil_ch_4', 'w_soil_ch_4', 'T_soil_ch_5', 'w_soil_ch_5',
'T_soil_ch_6', 'w_soil_ch_6']
# Define the data table column names.
# Default is `None`, i.e., to infer with `pandas.read_csv()`.
# Tip: copy the column names from the data file, and then change names of
# the variables of interest to the standardized names.
usecols: [0, 2, 4, 5, 7, 9, 11, 13, 15, 18, 23, 28, 33, 38, 43, 48, 53,
58, 63, 68, 73, 78, 82, 84, 93, 95, 104, 106]
# Specify a sequence of indices for columns to read into the data
# structure. Column index starts from 0 in Python.
# Default behavior (`None`) is to read all columns.
na_values: "\"NAN\""
# Modify this if you need specify the missing values.
# Default is `None` that uses the default options of `pandas.read_csv()`.
parse_dates: [0, ]
# if False, do not attempt to parse dates with `pandas.read_csv()`
# if given a list of column indices or names, parse those columns as
# dates when parse multiple columns to form a datetime variable, must
# specify a column name for the parsed result
conc_data_settings: # Settings for reading the concentration data
delimiter: "\\s+"
# Supported table delimiters:
# - singe space: ' '
# - indefinite number of spaces: '\\s+' (works also for single space)
# - comma: ','
# - tab: '\\t'
header: 0
# Row number of the last line of the header (starting from 0)
# Default behavior is to infer it with `pandas.read_csv()`.
names: ['time_sec', 'cos', 'co2', 'h2o', 'co2_2']
# Define the data table column names.
# Default is `None`, i.e., to infer with `pandas.read_csv()`.
# Note that for concentration data table, gas species that are not defined
# in the species settings will be ignored.
usecols: [0, 1, 2, 3, 4]
# Specify a sequence of indices for columns to read into the data
# structure. Column index starts from 0 in Python.
# Default behavior (`None`) is to read all columns.
parse_dates: false
# if False, do not attempt to parse dates with `pandas.read_csv()`
# if given a list of column indices or names, parse those columns as dates
# when parse multiple columns to form a datetime variable, must specify
# a column name for the parsed result
site_parameters:
site_pressure: 96.8e+3
# In Pascal. Default behavior (`None`) is to use the standard pressure.
time_zone: -7
# Time zone with respect to UTC. For example, -8 means UTC-8.
# Warning: does not support daylight saving transition. Use standard
# non daylight saving time, or process separately the data before and after
# daylight saving.
species_settings:
species_list: ['cos', 'co2', 'h2o']
# Measured gas species in the concentration data.
# Note: the order of gas species in the output file will follow the order
# defined in this sequence
species_names: ['COS', 'CO$_2$', 'H$_2$O']
# names of gas species shown in the plot axis labels.
# LaTeX format is supported by matplotlib.
# `unit`: the unit of mixing ratio in the concentration data file
# `output_unit`: the unit of mixing ratio in the output file
# `multiplier`: the number to multiply to the input values for
# conversion to the output unit, must equal to `unit / output_unit`.
# For example, if H2O in the input data file was recorded in percentage
# (1.0e-2), and the output unit of H2O concentration needs to be parts
# per thousand (1.0e-3), then the multiplier would be 10.
# Some commonly used units:
# 1.0 = mole fraction [0 to 1]
# 1.0e-2 = percent (%)
# 1.0e-3 = ppthv (parts per thousand) or mmol mol^-1
# 1.0e-6 = ppmv or mumol mol^-1
# 1.0e-9 = ppbv or nmol mol^-1
# 1.0e-12 = pptv (parts per trillion) or pmol mol^-1
h2o:
unit: 1.0e-9
output_unit: 1.0e-3
multiplier: 1.0e-6
baseline_correction: 'none'
co2:
unit: 1.0e-9
output_unit: 1.0e-6
multiplier: 1.0e-3
baseline_correction: 'median'
cos:
unit: 1.0e-9
output_unit: 1.0e-12
multiplier: 1.0e+3
baseline_correction: 'median'
# You may add your own gas species following the same format.
# The name that represents the added gas species is not so important
# as long as it is used *consistently*. For example, if you define the
# species name for CO2 to be `CO_2`, you must use the same name `CO_2`
# in the `species_list` key and in the following unit definition.