From 82d183f62a0145cd8f7dd4cb13b429637d949cf0 Mon Sep 17 00:00:00 2001 From: Alper Altuntas Date: Tue, 12 Nov 2024 16:47:37 -0700 Subject: [PATCH] CI test for input_data_list (#196) * introduce a CI test that checks whether any input files in MOM_input is missing in input_data_list * minor changes in check_input_data_list * fix param_templates paths * minor improvement in check_input_data_list filter function * more improvements in check_input_data_list filter function * major check_input_data_list.py refactor and introduction of check_input_data_repo.py * fix general-ci-tests.yaml * take 2: fix general-ci-tests.yaml * add all missing input_data_list entries --- .github/workflows/general-ci-tests.yml | 30 +++- param_templates/MOM_input.yaml | 4 +- param_templates/input_data_list.yaml | 59 +++++--- param_templates/json/MOM_input.json | 5 +- param_templates/json/input_data_list.json | 60 +++++--- tests/check_input_data_list.py | 171 ++++++++++++++++++++++ tests/check_input_data_repo.py | 48 ++++++ 7 files changed, 321 insertions(+), 56 deletions(-) create mode 100755 tests/check_input_data_list.py create mode 100755 tests/check_input_data_repo.py diff --git a/.github/workflows/general-ci-tests.yml b/.github/workflows/general-ci-tests.yml index ff6b28af..d51a8274 100644 --- a/.github/workflows/general-ci-tests.yml +++ b/.github/workflows/general-ci-tests.yml @@ -89,7 +89,35 @@ jobs: # Run the test - name: Run the check_default_params script run: python tests/check_default_params.py - + + # Job to run check_input_data_list script + check_input_data_list: + + runs-on: ubuntu-latest + + steps: + # Checkout the repo + - uses: actions/checkout@v4 + + # Run the test + - name: Run the check_input_data_list script + run: python tests/check_input_data_list.py + + # Job to run check_input_data_repo script + check_input_data_repo: + + runs-on: ubuntu-latest + + steps: + # Checkout the repo + - uses: actions/checkout@v4 + + # Run the test + - name: Run the check_input_data_repo script + run: | + pip install 'svn>=1,<1.1' + python tests/check_input_data_repo.py + # Job to run the black formatter for cime_config, see black documentation for more info check_black_format_for_cime_config: diff --git a/param_templates/MOM_input.yaml b/param_templates/MOM_input.yaml index b8509803..4eb1503f 100644 --- a/param_templates/MOM_input.yaml +++ b/param_templates/MOM_input.yaml @@ -2782,9 +2782,7 @@ Global: "default = 'MOM_IC' The file into which to write the initial conditions." datatype: string - value: - $OCN_GRID == "MISOMIP": "MISOMIP_IC" - else: = "${CASE}.mom6.ic.${RUN_STARTDATE}.nc" + value: = "${CASE}.mom6.ic.${RUN_STARTDATE}.nc" TRIMMING_USES_REMAPPING: description: | "[Boolean] default = False diff --git a/param_templates/input_data_list.yaml b/param_templates/input_data_list.yaml index b4d23893..ea1c7e3f 100644 --- a/param_templates/input_data_list.yaml +++ b/param_templates/input_data_list.yaml @@ -1,56 +1,67 @@ # The list of input files to be checked out from inputdata --- mom.input_data_list: - ocean_hgrid: + GRID_FILE: $OCN_GRID == "tx2_3v2": "${INPUTDIR}/ocean_hgrid_221123.nc" $OCN_GRID == "tx0.25v1": "${INPUTDIR}/ocean_hgrid.nc" - ocean_vgrid1: - $OCN_GRID == "tx2_3v2": "${INPUTDIR}/vgrid_65L_20200626.nc" + ALE_COORDINATE_CONFIG: + $MOM6_VERTICAL_GRID == "zstar_75L": "${INPUTDIR}/zstar_75layer_2.5m_248.4m-2024-03-29.nc" + $MOM6_VERTICAL_GRID == "zstar_65L": "${INPUTDIR}/vgrid_65L_20200626.nc" + $MOM6_VERTICAL_GRID == "hycom1" and $OCN_GRID != "tx0.25v1": "${INPUTDIR}/hybrid_75layer_zstar2.50m-2020-11-23.nc" + $MOM6_VERTICAL_GRID == "hycom1" and $OCN_GRID == "tx0.25v1": "${INPUTDIR}/hycom1_75_800m.nc" $OCN_GRID == "tx0.25v1": "${INPUTDIR}/hycom1_75_800m.nc" - ocean_vgrid2: + DIAG_COORD_DEF_Z: $OCN_GRID == "tx0.25v1": "${INPUTDIR}/interpolate_zgrid_40L.nc" - ocean_vgrid3: + COORD_FILE: $OCN_GRID == "tx0.25v1": "${INPUTDIR}/layer_coord.nc" - ocean_topog: - $OCN_GRID == "tx2_3v2": "${INPUTDIR}/ocean_topog_230413.nc" + TOPO_FILE: + $OCN_GRID == "tx2_3v2": "${INPUTDIR}/ocean_topo_tx2_3v2_240501.nc" $OCN_GRID == "tx0.25v1": "${INPUTDIR}/ocean_topog.nc" - ocean_topo_edit: + TOPO_EDITS_FILE: $OCN_GRID == "tx0.25v1": "${INPUTDIR}/All_edits.nc" - tempsalt: + TEMP_SALT_Z_INIT_FILE: $OCN_GRID in ["tx2_3v2", "tx0.25v1"]: $INIT_LAYERS_FROM_Z_FILE == "True": "${INPUTDIR}/${TEMP_SALT_Z_INIT_FILE}" - saltrestore: + MAX_LAYER_THICKNESS_CONFIG: + $MOM6_VERTICAL_GRID == "hycom1" and $OCN_GRID in ["tx2_3v2"]: + "${DIN_LOC_ROOT}/ocn/mom/grid_indpt/dz_max_90th_quantile.nc" + SURFACE_PRESSURE_FILE: + $OCN_GRID == "MISOMIP": "${INPUTDIR}/MISOMIP_181108.nc" + SALT_RESTORE_FILE: $OCN_GRID == "tx2_3v2": "${INPUTDIR}/state_restore_tx2_3_20230416.nc" - tidal: + TIDAL_ENERGY_FILE: + $OCN_GRID == "tx2_3v2": "${INPUTDIR}/energy_new_tx2_3_conserve_230415_cdf5.nc" $OCN_GRID == "tx0.25v1": "${INPUTDIR}/tidal_amplitude.v20140616.nc" - ocean_channel: + CHANNEL_LIST_FILE: + $OCN_GRID == "tx2_3v2": "${INPUTDIR}/MOM_channels_global_tx2_3v2_240501" $OCN_GRID == "tx0.25v1": "${INPUTDIR}/MOM_channels_global_025" - ocean_geothermal: + GEOTHERMAL_FILE: + $OCN_GRID == "tx2_3v2": "${INPUTDIR}/geothermal_davies2013_tx2_3_20240318_cdf5.nc" $OCN_GRID == "tx0.25v1": "${INPUTDIR}/geothermal_davies2013_v1.nc" - ocean_seaw: + CHL_FILE: $OCN_GRID == "tx0.25v1": "${INPUTDIR}/seawifs-clim-1997-2010.1440x1080.v20180328.nc" $OCN_GRID == "tx2_3v2": "${INPUTDIR}/seawifs-clim-1997-2010-tx2_3v2.230416.nc" - cfcs_forcing: "${DIN_LOC_ROOT}/ocn/mom/grid_indpt/cfc_atm_20230310.nc" - diag_coord_def_rho2: + CFC_BC_FILE: "${DIN_LOC_ROOT}/ocn/mom/grid_indpt/cfc_atm_20230310.nc" + DIAG_COORD_DEF_RHO2: $OCN_GRID == "tx2_3v2": "${INPUTDIR}/ocean_rho2_190917.nc" - marbl_tracers_ic_file: - $MARBL_CONFIG == "latest": "${INPUTDIR}/ecosys_jan_IC_omip_latlon_1x1_180W_c231221.nc" + MARBL_TRACERS_IC_FILE: + $MARBL_CONFIG == "latest": "${INPUTDIR}/ecosys_jan_IC_omip_latlon_1x1_180W_c230331.nc" $MARBL_CONFIG == "latest+4p2z": "${INPUTDIR}/ecosys_jan_IC_omip_latlon_1x1_180W_c231221.nc" - marbl_fesedflux_file: + MARBL_FESEDFLUX_FILE: '"BASE_BIO_ON=TRUE" in $MARBL_TRACER_OPTS': $OCN_GRID == "tx2_3v2": "${INPUTDIR}/fesedflux_total_reduce_oxic_tx2_3v2.c231205.nc" - marbl_feventflux_file: + MARBL_FEVENTFLUX_FILE: '"BASE_BIO_ON=TRUE" in $MARBL_TRACER_OPTS': $OCN_GRID == "tx2_3v2": "${INPUTDIR}/feventflux_5gmol_tx2_3v2.c231205.nc" - riv_flux_file: + RIV_FLUX_FILE: '"BASE_BIO_ON=TRUE" in $MARBL_TRACER_OPTS': '$ROF_GRID == "JRA025" and $OCN_GRID == "tx2_3v2"': "${INPUTDIR}/riv_nut.gnews_gnm.rJRA025_to_tx2_3v2_nnsm_e333r100_230415.20240202.nc" '$ROF_GRID == "r05" and $OCN_GRID == "tx2_3v2"': "${INPUTDIR}/riv_nut.gnews_gnm.r05_to_tx2_3v2_nnsm_e250r250_230914.20240202.nc" - d14c_file_1: + MARBL_D14C_FILE_1: '"ABIO_DIC_ON=TRUE" in $MARBL_TRACER_OPTS': "${DIN_LOC_ROOT}/ocn/mom/grid_indpt/atm_delta_C14_CMIP6_sector1_global_1850-2015_yearly_v2.0_c240202.nc" - d14c_file_2: + MARBL_D14C_FILE_2: '"ABIO_DIC_ON=TRUE" in $MARBL_TRACER_OPTS': "${DIN_LOC_ROOT}/ocn/mom/grid_indpt/atm_delta_C14_CMIP6_sector2_global_1850-2015_yearly_v2.0_c240202.nc" - d14c_file_3: + MARBL_D14C_FILE_3: '"ABIO_DIC_ON=TRUE" in $MARBL_TRACER_OPTS': "${DIN_LOC_ROOT}/ocn/mom/grid_indpt/atm_delta_C14_CMIP6_sector3_global_1850-2015_yearly_v2.0_c240202.nc" ... diff --git a/param_templates/json/MOM_input.json b/param_templates/json/MOM_input.json index 773930c0..37ca4fb4 100644 --- a/param_templates/json/MOM_input.json +++ b/param_templates/json/MOM_input.json @@ -2211,10 +2211,7 @@ "IC_OUTPUT_FILE": { "description": "\"default = 'MOM_IC'\nThe file into which to write the initial conditions.\"\n", "datatype": "string", - "value": { - "$OCN_GRID == \"MISOMIP\"": "MISOMIP_IC", - "else": "= \"${CASE}.mom6.ic.${RUN_STARTDATE}.nc\"" - } + "value": "= \"${CASE}.mom6.ic.${RUN_STARTDATE}.nc\"" }, "TRIMMING_USES_REMAPPING": { "description": "\"[Boolean] default = False\nWhen trimming the column, also remap T and S.\"\n", diff --git a/param_templates/json/input_data_list.json b/param_templates/json/input_data_list.json index d0581739..5a116b2a 100644 --- a/param_templates/json/input_data_list.json +++ b/param_templates/json/input_data_list.json @@ -1,78 +1,90 @@ { "mom.input_data_list": { - "ocean_hgrid": { + "GRID_FILE": { "$OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/ocean_hgrid_221123.nc", "$OCN_GRID == \"tx0.25v1\"": "${INPUTDIR}/ocean_hgrid.nc" }, - "ocean_vgrid1": { - "$OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/vgrid_65L_20200626.nc", + "ALE_COORDINATE_CONFIG": { + "$MOM6_VERTICAL_GRID == \"zstar_75L\"": "${INPUTDIR}/zstar_75layer_2.5m_248.4m-2024-03-29.nc", + "$MOM6_VERTICAL_GRID == \"zstar_65L\"": "${INPUTDIR}/vgrid_65L_20200626.nc", + "$MOM6_VERTICAL_GRID == \"hycom1\" and $OCN_GRID != \"tx0.25v1\"": "${INPUTDIR}/hybrid_75layer_zstar2.50m-2020-11-23.nc", + "$MOM6_VERTICAL_GRID == \"hycom1\" and $OCN_GRID == \"tx0.25v1\"": "${INPUTDIR}/hycom1_75_800m.nc", "$OCN_GRID == \"tx0.25v1\"": "${INPUTDIR}/hycom1_75_800m.nc" }, - "ocean_vgrid2": { + "DIAG_COORD_DEF_Z": { "$OCN_GRID == \"tx0.25v1\"": "${INPUTDIR}/interpolate_zgrid_40L.nc" }, - "ocean_vgrid3": { + "COORD_FILE": { "$OCN_GRID == \"tx0.25v1\"": "${INPUTDIR}/layer_coord.nc" }, - "ocean_topog": { - "$OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/ocean_topog_230413.nc", + "TOPO_FILE": { + "$OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/ocean_topo_tx2_3v2_240501.nc", "$OCN_GRID == \"tx0.25v1\"": "${INPUTDIR}/ocean_topog.nc" }, - "ocean_topo_edit": { + "TOPO_EDITS_FILE": { "$OCN_GRID == \"tx0.25v1\"": "${INPUTDIR}/All_edits.nc" }, - "tempsalt": { + "TEMP_SALT_Z_INIT_FILE": { "$OCN_GRID in [\"tx2_3v2\", \"tx0.25v1\"]": { "$INIT_LAYERS_FROM_Z_FILE == \"True\"": "${INPUTDIR}/${TEMP_SALT_Z_INIT_FILE}" } }, - "saltrestore": { + "MAX_LAYER_THICKNESS_CONFIG": { + "$MOM6_VERTICAL_GRID == \"hycom1\" and $OCN_GRID in [\"tx2_3v2\"]": "${DIN_LOC_ROOT}/ocn/mom/grid_indpt/dz_max_90th_quantile.nc" + }, + "SURFACE_PRESSURE_FILE": { + "$OCN_GRID == \"MISOMIP\"": "${INPUTDIR}/MISOMIP_181108.nc" + }, + "SALT_RESTORE_FILE": { "$OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/state_restore_tx2_3_20230416.nc" }, - "tidal": { + "TIDAL_ENERGY_FILE": { + "$OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/energy_new_tx2_3_conserve_230415_cdf5.nc", "$OCN_GRID == \"tx0.25v1\"": "${INPUTDIR}/tidal_amplitude.v20140616.nc" }, - "ocean_channel": { + "CHANNEL_LIST_FILE": { + "$OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/MOM_channels_global_tx2_3v2_240501", "$OCN_GRID == \"tx0.25v1\"": "${INPUTDIR}/MOM_channels_global_025" }, - "ocean_geothermal": { + "GEOTHERMAL_FILE": { + "$OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/geothermal_davies2013_tx2_3_20240318_cdf5.nc", "$OCN_GRID == \"tx0.25v1\"": "${INPUTDIR}/geothermal_davies2013_v1.nc" }, - "ocean_seaw": { + "CHL_FILE": { "$OCN_GRID == \"tx0.25v1\"": "${INPUTDIR}/seawifs-clim-1997-2010.1440x1080.v20180328.nc", "$OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/seawifs-clim-1997-2010-tx2_3v2.230416.nc" }, - "cfcs_forcing": "${DIN_LOC_ROOT}/ocn/mom/grid_indpt/cfc_atm_20230310.nc", - "diag_coord_def_rho2": { + "CFC_BC_FILE": "${DIN_LOC_ROOT}/ocn/mom/grid_indpt/cfc_atm_20230310.nc", + "DIAG_COORD_DEF_RHO2": { "$OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/ocean_rho2_190917.nc" }, - "marbl_tracers_ic_file": { - "$MARBL_CONFIG == \"latest\"": "${INPUTDIR}/ecosys_jan_IC_omip_latlon_1x1_180W_c231221.nc", + "MARBL_TRACERS_IC_FILE": { + "$MARBL_CONFIG == \"latest\"": "${INPUTDIR}/ecosys_jan_IC_omip_latlon_1x1_180W_c230331.nc", "$MARBL_CONFIG == \"latest+4p2z\"": "${INPUTDIR}/ecosys_jan_IC_omip_latlon_1x1_180W_c231221.nc" }, - "marbl_fesedflux_file": { + "MARBL_FESEDFLUX_FILE": { "\"BASE_BIO_ON=TRUE\" in $MARBL_TRACER_OPTS": { "$OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/fesedflux_total_reduce_oxic_tx2_3v2.c231205.nc" } }, - "marbl_feventflux_file": { + "MARBL_FEVENTFLUX_FILE": { "\"BASE_BIO_ON=TRUE\" in $MARBL_TRACER_OPTS": { "$OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/feventflux_5gmol_tx2_3v2.c231205.nc" } }, - "riv_flux_file": { + "RIV_FLUX_FILE": { "\"BASE_BIO_ON=TRUE\" in $MARBL_TRACER_OPTS": { "$ROF_GRID == \"JRA025\" and $OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/riv_nut.gnews_gnm.rJRA025_to_tx2_3v2_nnsm_e333r100_230415.20240202.nc", "$ROF_GRID == \"r05\" and $OCN_GRID == \"tx2_3v2\"": "${INPUTDIR}/riv_nut.gnews_gnm.r05_to_tx2_3v2_nnsm_e250r250_230914.20240202.nc" } }, - "d14c_file_1": { + "MARBL_D14C_FILE_1": { "\"ABIO_DIC_ON=TRUE\" in $MARBL_TRACER_OPTS": "${DIN_LOC_ROOT}/ocn/mom/grid_indpt/atm_delta_C14_CMIP6_sector1_global_1850-2015_yearly_v2.0_c240202.nc" }, - "d14c_file_2": { + "MARBL_D14C_FILE_2": { "\"ABIO_DIC_ON=TRUE\" in $MARBL_TRACER_OPTS": "${DIN_LOC_ROOT}/ocn/mom/grid_indpt/atm_delta_C14_CMIP6_sector2_global_1850-2015_yearly_v2.0_c240202.nc" }, - "d14c_file_3": { + "MARBL_D14C_FILE_3": { "\"ABIO_DIC_ON=TRUE\" in $MARBL_TRACER_OPTS": "${DIN_LOC_ROOT}/ocn/mom/grid_indpt/atm_delta_C14_CMIP6_sector3_global_1850-2015_yearly_v2.0_c240202.nc" } } diff --git a/tests/check_input_data_list.py b/tests/check_input_data_list.py new file mode 100755 index 00000000..98ddc9f9 --- /dev/null +++ b/tests/check_input_data_list.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python + +import yaml +import re + + +# List of filename regex patterns to exclude from the check between MOM_input.yaml and input_data_list.yaml +EXCEPTIONS = [ + "._velocity_truncations", +] + + +def extract_values(d): + """Given a dictionary or value, this function recursively extracts all values from the dictionary.""" + if isinstance(d, dict): + for key in d: + yield from extract_values(d[key]) + else: + yield d + + +def _retrieve_input_filenames(varname, values): + """Given a variable name and values pair from a param template dict, this function extracts all + input file names from the values. It attempts to filter out non-file names, output file names, + and known exceptions. It also retains only the file names from relative/absolute paths. + + Parameters + ---------- + varname : str + The variable name from the param template dict. + values : list + A list of values for the variable name from the param template dict. + + Returns + ------- + values : list + A list of input file names extracted from the values. + """ + # Remove all entries that are not strings + values = [v for v in values if isinstance(v, str)] + + # Retain only values that are (most likely) file names: ending with '.nc' or '.txt' or containing 'FILE:' + # or containing ':.nc' or ':.txt' + values = [ + v + for v in values + if "FILE:" in v + or re.search(":[\w\-\.]+\.(nc|txt)", v) + or v.endswith(".nc") + or v.endswith(".txt") + or varname.endswith("_FILE") + or v.strip("${}").endswith("_FILE") + ] + + # Extract the file names from values of the form 'WORD:FILENAME[,]' + values = [v.split(":")[1].split(",")[0].strip() if ":" in v else v for v in values] + + # If relative/absolute paths, retain only the file names + values = [v.split("/")[-1] for v in values] + + # Exclude entries containing CASE-specific XML variables, as these indicate output files rather than inputs. + values = [v for v in values if not re.search(r"\$\{.*CASE.*\}|\{\$.*CASE.*\}", v)] + + # Filter out known exceptions: + for pattern in EXCEPTIONS: + values = [v for v in values if not re.search(pattern, v)] + + return values + + +def get_input_files_in_MOM_input(MOM_input_yaml): + """ + This function reads the MOM_input.yaml file and extracts all input file names that it can detect. + + Parameters + ---------- + MOM_input_yaml : dict + The dictionary object containing the parsed MOM_input.yaml file. + + Returns + ------- + file_params: dict + A dictionary of varname: file names pairs, where varname is the parameter name and file names are the input file names. + """ + + files = {} + + for module in MOM_input_yaml: + for varname in MOM_input_yaml[module]: + value_block = MOM_input_yaml[module][varname]["value"] + values = _retrieve_input_filenames(varname, extract_values(value_block)) + if values: + files[varname] = values + + return files + + +def get_input_data_list_files(input_data_list_yaml, MOM_input_files): + """ + This function reads the input_data_list.yaml file and extracts all input file names that it can detect. + To do so, it looks for all values in the input_data_list.yaml file. + + Parameters + ---------- + input_data_list_yaml : dict + The dictionary object containing the parsed input_data_list.yaml file. + MOM_input_files : dict + The dictionary object containing the varname: file names pairs extracted from the MOM_input.yaml file. + To be used to expand expandable variables in the input_data_list.yaml file. + + Returns + ------- + files: dict + A dictionary of varname: file names pairs, where varname is the parameter name and file names are the input file names. + """ + + files = {} + + input_data_list = input_data_list_yaml["mom.input_data_list"] + + for varname in input_data_list: + _files = _retrieve_input_filenames( + varname, extract_values(input_data_list[varname]) + ) + if _files: + # Expand expandable variables in the input_data_list.yaml file + for i, _file in enumerate(_files): + # Find all expandable variables in the file name: + expandable_vars = re.findall(r"\$\{.*\}|\b\$.*\b", _file) + for expandable_var in expandable_vars: + if expandable_var.strip("${}") in MOM_input_files: + # Replace the expandable variable with the corresponding file name from MOM_input.yaml + _files.pop(i) + _files.extend(MOM_input_files[expandable_var.strip("${}")]) + + files[varname] = _files + + return files + + +if __name__ == "__main__": + + # Read in the MOM_input.yaml file and extract all input file names + MOM_input_yaml = yaml.safe_load(open("./param_templates/MOM_input.yaml", "r")) + MOM_input_files = get_input_files_in_MOM_input(MOM_input_yaml) + + # Read in the input_data_list.yaml file and extract all input file names + input_data_list_yaml = yaml.safe_load( + open("./param_templates/input_data_list.yaml", "r") + ) + input_data_list_files = get_input_data_list_files( + input_data_list_yaml, MOM_input_files + ) + + # Check if all files in MOM_input.yaml are present in input_data_list.yaml + # If not, print the missing files and raise an error + missing_files = set( + filename for filelist in MOM_input_files.values() for filename in filelist + ) - set( + filename for filelist in input_data_list_files.values() for filename in filelist + ) + if missing_files: + raise ValueError( + "Below parameter value(s) in MOM_input.yaml are suspected to be input file name(s), " + "but are not present in input_data_list.yaml. If these are indeed input files, " + "please add them to input_data_list.yaml. If not, please update this CI test module " + "to exclude them from the check e.g., by adding them to the EXCEPTIONS list.\n\n " + + "\n ".join(missing_files) + ) + + print("All input files in MOM_input.yaml are present in input_data_list.yaml.") diff --git a/tests/check_input_data_repo.py b/tests/check_input_data_repo.py new file mode 100755 index 00000000..2b2529ce --- /dev/null +++ b/tests/check_input_data_repo.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +import yaml +import svn.remote as sr +from check_input_data_list import ( + get_input_files_in_MOM_input, + get_input_data_list_files, +) + +if __name__ == "__main__": + + # Read in the MOM_input.yaml file and extract all input file names + MOM_input_yaml = yaml.safe_load(open("./param_templates/MOM_input.yaml", "r")) + MOM_input_files = get_input_files_in_MOM_input(MOM_input_yaml) + + # Read in the input_data_list.yaml file and extract all input file names + input_data_list_yaml = yaml.safe_load( + open("./param_templates/input_data_list.yaml", "r") + ) + input_data_list_files = get_input_data_list_files( + input_data_list_yaml, MOM_input_files + ) + + # all mom input file names in svn inputdata repository + r = sr.RemoteClient( + "https://svn-ccsm-inputdata.cgd.ucar.edu/trunk/inputdata/ocn/mom/" + ) + repo_files = {f["name"] for relpath, f in r.list_recursive() if f["kind"] == "file"} + + # File names missing in the svn repository + missing_files = ( + set( + filename + for filelist in input_data_list_files.values() + for filename in filelist + ) + - repo_files + ) + if missing_files: + raise ValueError( + "Below file names are listed in input_data_list.yaml but are missing " + "in the svn inputdata repository. If these files are not needed, " + "please remove them from input_data_list.yaml. If they are needed, " + "please import them to the svn repository.\n\n " + + "\n ".join(missing_files) + ) + else: + print("All files in input_data_list.yaml are present in the svn repository.")