user_config.yaml

# PyChamberFlux user settings
# comply with YAML(TM) 1.1 standard <http://yaml.org/spec/1.1/>
# use PyYAML <http://pyyaml.org/> to parse into Python.


# Notice on the YAML formatting:
# - This config file allows only one nested level.
# - Boolean values are in lowercase: `true` and `false`
# - The `null` value is parsed into Python as `None`
# - Use double quotation marks for any regex expression
# - For scientific notation of floating numbers, always specify the sign of
#   the exponent, i.e., `1.0e+3` is correct while `1.0e3` is wrong.
# - The NaN value for floating number is `.nan`


run_options:  # Running options
    chamber_config_filepath: "chamber.yaml"
    # Configuration file that describes chamber settings.

    load_data_by_day: false
    # Load and process raw data by daily chunks if `True`. Default is
    # `False` to load all data at once and then process daily chunks.
    # Note: If the total size of the raw data files are larger than
    # the size of computer memory, this should be enabled. Otherwise it
    # may take for ever in reading the data.

    save_fitting_plots: false
    # If true, save the curve fitting plots for every chamber sampling period.

    save_daily_plots: false
    # If true, save daily plots of chamber fluxes.


data_dir:  # Input and output directories, and other related settings
    biomet_data: "./tests/input/biomet/*.dat"
    # Absolute or relative directory to search for biomet data files.

    biomet_data.date_format: "%Y_%m_%d"
    # date format string in the file name (not that in the data table)

    conc_data: "./tests/input/conc/*.str"
    # Absolute or relative directory to search for concentration data files.

    conc_data.date_format: "%y%m%d"
    # date format string in the file name (not that in the data table)

    output_dir: "./tests/output/"
    # Output directory for the processed flux data.

    output_filename_prefix: "sr"
    # A prefix string to append before timestamp for output datafile names.

    plot_dir: "./tests/plots/"
    # Directory for saved plots.

    separate_conc_data: true
    # If `True`, concentration measurements are stored on their own, not in
    # the biomet data files.
    # If `False`, search concentration measurements in the biomet data.


biomet_data_settings:  # Settings for reading the biomet data
    delimiter: ','
    # Supported table delimiters:
    #   - singe space: ' '
    #   - indefinite number of spaces: '\\s+' (works also for single space)
    #   - comma: ','
    #   - tab: '\\t'

    header: 3
    # Row number of the last line of the header (starting from 0)
    # Default behavior is to infer it with `pandas.read_csv()`.

    names: ['timestamp', 'T_log', 'ch_no', 'T_ch_1', 'T_ch_2', 'T_ch_3',
            'T_ch_4', 'T_ch_5', 'T_ch_6', 'T_atm', 'RH_atm',
            'PAR_ch_1', 'PAR_ch_2', 'PAR_ch_3', 'PAR_ch_4',
            'flow_ch_1', 'flow_ch_2', 'flow_ch_3', 'flow_ch_4',
            'flow_ch_5', 'flow_ch_6', 'flow_7',
            'T_soil_ch_4', 'w_soil_ch_4', 'T_soil_ch_5', 'w_soil_ch_5',
            'T_soil_ch_6', 'w_soil_ch_6']
    # Define the data table column names.
    # Default is `None`, i.e., to infer with `pandas.read_csv()`.
    # Tip: copy the column names from the data file, and then change names of
    # the variables of interest to the standardized names.

    usecols: [0, 2, 4, 5, 7, 9, 11, 13, 15, 18, 23, 28, 33, 38, 43, 48, 53, 
              58, 63, 68, 73, 78, 82, 84, 93, 95, 104, 106]
    # Specify a sequence of indices for columns to read into the data
    # structure. Column index starts from 0 in Python.
    # Default behavior (`None`) is to read all columns.

    na_values: "\"NAN\""
    # Modify this if you need specify the missing values.
    # Default is `None` that uses the default options of `pandas.read_csv()`.

    parse_dates: [0, ]
    # if False, do not attempt to parse dates with `pandas.read_csv()`
    # if given a list of column indices or names, parse those columns as
    # dates when parse multiple columns to form a datetime variable, must
    # specify a column name for the parsed result


conc_data_settings:  # Settings for reading the concentration data
    delimiter: "\\s+"
    # Supported table delimiters:
    #   - singe space: ' '
    #   - indefinite number of spaces: '\\s+' (works also for single space)
    #   - comma: ','
    #   - tab: '\\t'

    header: 0
    # Row number of the last line of the header (starting from 0)
    # Default behavior is to infer it with `pandas.read_csv()`.

    names: ['time_sec', 'cos', 'co2', 'h2o', 'co2_2']
    # Define the data table column names.
    # Default is `None`, i.e., to infer with `pandas.read_csv()`.
    # Note that for concentration data table, gas species that are not defined
    # in the species settings will be ignored.

    usecols: [0, 1, 2, 3, 4]
    # Specify a sequence of indices for columns to read into the data
    # structure. Column index starts from 0 in Python.
    # Default behavior (`None`) is to read all columns.

    parse_dates: false
    # if False, do not attempt to parse dates with `pandas.read_csv()`
    # if given a list of column indices or names, parse those columns as dates
    # when parse multiple columns to form a datetime variable, must specify
    # a column name for the parsed result


site_parameters:
    site_pressure: 96.8e+3
    # In Pascal. Default behavior (`None`) is to use the standard pressure.

    time_zone: -7
    # Time zone with respect to UTC. For example, -8 means UTC-8.
    # Warning: does not support daylight saving transition. Use standard
    # non daylight saving time, or process separately the data before and after
    # daylight saving.


species_settings:
    species_list: ['cos', 'co2', 'h2o']
    # Measured gas species in the concentration data.
    # Note: the order of gas species in the output file will follow the order
    # defined in this sequence

    species_names: ['COS', 'CO$_2$', 'H$_2$O']
    # names of gas species shown in the plot axis labels.
    # LaTeX format is supported by matplotlib.

    # `unit`: the unit of mixing ratio in the concentration data file
    # `output_unit`: the unit of mixing ratio in the output file
    # `multiplier`: the number to multiply to the input values for
    # conversion to the output unit, must equal to `unit / output_unit`.
    # For example, if H2O in the input data file was recorded in percentage
    # (1.0e-2), and the output unit of H2O concentration needs to be parts
    # per thousand (1.0e-3), then the multiplier would be 10.
    # Some commonly used units:
    #     1.0 = mole fraction [0 to 1]
    #     1.0e-2 = percent (%)
    #     1.0e-3 = ppthv (parts per thousand) or mmol mol^-1
    #     1.0e-6 = ppmv or mumol mol^-1
    #     1.0e-9 = ppbv or nmol mol^-1
    #     1.0e-12 = pptv (parts per trillion) or pmol mol^-1

    h2o:
        unit: 1.0e-9
        output_unit: 1.0e-3
        multiplier: 1.0e-6
        baseline_correction: 'none'

    co2:
        unit: 1.0e-9
        output_unit: 1.0e-6
        multiplier: 1.0e-3
        baseline_correction: 'median'

    cos:
        unit: 1.0e-9
        output_unit: 1.0e-12
        multiplier: 1.0e+3
        baseline_correction: 'median'

    # You may add your own gas species following the same format.
    # The name that represents the added gas species is not so important
    # as long as it is used *consistently*. For example, if you define the
    # species name for CO2 to be `CO_2`, you must use the same name `CO_2`
    # in the `species_list` key and in the following unit definition.