From 290f1d2d0b8a461e6ada787c673c713573ab2be1 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 24 Dec 2024 01:05:34 -0700 Subject: [PATCH 1/4] Support global-workflow using Rocky 8 on CSPs (#2998) # Description With ParallelWorks now default Rocky 8 on CSPs, and move to Rocky 8 only after 1/1/2025, we need to modify global-workflow module files to use Rocky 8 supported spack-stack, and test compile and run to make sure all works under Rocky 8. i) Rocky 8 update new features: a. Wave worked in C48_S2SWA_gefs case, so turn SUPPORT_WAVES to "YES" in awspw.yaml. Actually, if we did not set SUPPORT_WAVES to "YES", setup_expt.py will rise exception. b. Using two type of nodes (chips/queues) on AWS, compute/process, where forecasts run in "compute" queue, which is a big node (more cores), others run in "process" queue, which has small node (less cores). ii) Rocky 8 update needs the following submodules PRs below - NOAA-EMC/gfs_utils#81 - NOAA-EMC/ufs_utils#989 - NOAA-EMC/upp#1034 - ufs-community/ufs-weather-model#2461 Resolves #2997 --------- Co-authored-by: David Huber <69919478+DavidHuber-NOAA@users.noreply.github.com> --- env/AWSPW.env | 68 ++++++++++-------------- env/AZUREPW.env | 17 +++--- env/GOOGLEPW.env | 4 +- modulefiles/module_base.noaacloud.lua | 3 ++ modulefiles/module_gwci.noaacloud.lua | 6 +-- modulefiles/module_gwsetup.noaacloud.lua | 13 ++--- parm/config/gefs/config.resources | 6 +-- parm/config/gefs/config.resources.AWSPW | 58 ++++++++++++++++++++ parm/config/gfs/config.resources | 8 +-- parm/config/gfs/config.resources.AWSPW | 24 +++++++++ sorc/build_ufs.sh | 2 +- sorc/gfs_utils.fd | 2 +- sorc/ufs_utils.fd | 2 +- versions/build.noaacloud.ver | 6 +-- versions/run.noaacloud.ver | 6 +-- workflow/hosts/awspw.yaml | 4 +- workflow/hosts/azurepw.yaml | 6 ++- workflow/hosts/googlepw.yaml | 6 ++- workflow/setup_expt.py | 4 -- 19 files changed, 161 insertions(+), 84 deletions(-) diff --git a/env/AWSPW.env b/env/AWSPW.env index e366128a1d..f365695f85 100755 --- a/env/AWSPW.env +++ b/env/AWSPW.env @@ -33,7 +33,29 @@ else exit 2 fi -if [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then +if [[ "${step}" = "prep" ]] || [[ "${step}" = "prepbufr" ]]; then + + export POE="NO" + export BACK="NO" + export sys_tp="AWSPW" + export launcher_PREP="srun" + +elif [[ "${step}" = "prepsnowobs" ]]; then + + export APRUN_CALCFIMS="${APRUN_default}" + +elif [[ "${step}" = "prep_emissions" ]]; then + + export APRUN="${APRUN_default}" + +elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]]; then + + export CFP_MP="YES" + if [[ "${step}" = "waveprep" ]]; then export MP_PULSE=0 ; fi + export wavempexec=${launcher} + export wave_mpmd=${mpmd_opt} + +elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then export launcher="srun --mpi=pmi2 -l" @@ -52,52 +74,16 @@ elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step} elif [[ "${step}" = "post" ]]; then - export NTHREADS_NP=${NTHREADS1} - export APRUN_NP="${APRUN_default}" - - export NTHREADS_DWN=${threads_per_task_dwn:-1} - [[ ${NTHREADS_DWN} -gt ${max_threads_per_task} ]] && export NTHREADS_DWN=${max_threads_per_task} - export APRUN_DWN="${launcher} -n ${ntasks_dwn}" - -elif [[ "${step}" = "atmos_products" ]]; then - - export USE_CFP="YES" # Use MPMD for downstream product generation on Hera + export NTHREADS_UPP=${NTHREADS1} + export APRUN_UPP="${APRUN_default} --cpus-per-task=${NTHREADS_UPP}" elif [[ "${step}" = "oceanice_products" ]]; then export NTHREADS_OCNICEPOST=${NTHREADS1} export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}" -elif [[ "${step}" = "ecen" ]]; then - - export NTHREADS_ECEN=${NTHREADSmax} - export APRUN_ECEN="${APRUN_default}" - - export NTHREADS_CHGRES=${threads_per_task_chgres:-12} - [[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]] && export NTHREADS_CHGRES=${max_tasks_per_node} - export APRUN_CHGRES="time" - - export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} - [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]] && export NTHREADS_CALCINC=${max_threads_per_task} - export APRUN_CALCINC="${APRUN_default}" - -elif [[ "${step}" = "esfc" ]]; then - - export NTHREADS_ESFC=${NTHREADSmax} - export APRUN_ESFC="${APRUN_default}" - - export NTHREADS_CYCLE=${threads_per_task_cycle:-14} - [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]] && export NTHREADS_CYCLE=${max_tasks_per_node} - export APRUN_CYCLE="${APRUN_default}" - -elif [[ "${step}" = "epos" ]]; then - - export NTHREADS_EPOS=${NTHREADSmax} - export APRUN_EPOS="${APRUN_default}" - -elif [[ "${step}" = "fit2obs" ]]; then +elif [[ "${step}" = "atmos_products" ]]; then - export NTHREADS_FIT2OBS=${NTHREADS1} - export MPIRUN="${APRUN_default}" + export USE_CFP="YES" # Use MPMD for downstream product generation on AWS fi diff --git a/env/AZUREPW.env b/env/AZUREPW.env index c2faeb2bf6..b2b4063ff3 100755 --- a/env/AZUREPW.env +++ b/env/AZUREPW.env @@ -15,6 +15,7 @@ export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" # Configure MPI environment export OMP_STACKSIZE=2048000 export NTHSTACK=1024000000 +export UCX_TLS=ud,sm,self ulimit -s unlimited ulimit -a @@ -50,6 +51,10 @@ elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step} export wavempexec=${launcher} export wave_mpmd=${mpmd_opt} +elif [[ "${step}" = "prep_emissions" ]]; then + + export APRUN="${APRUN_default}" + elif [[ "${step}" = "post" ]]; then export NTHREADS_NP=${NTHREADS1} @@ -71,7 +76,7 @@ elif [[ "${step}" = "oceanice_products" ]]; then elif [[ "${step}" = "ecen" ]]; then export NTHREADS_ECEN=${NTHREADSmax} - export APRUN_ECEN="${APRUN}" + export APRUN_ECEN="${APRUN_default}" export NTHREADS_CHGRES=${threads_per_task_chgres:-12} [[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]] && export NTHREADS_CHGRES=${max_tasks_per_node} @@ -79,25 +84,25 @@ elif [[ "${step}" = "ecen" ]]; then export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]] && export NTHREADS_CALCINC=${max_threads_per_task} - export APRUN_CALCINC="${APRUN}" + export APRUN_CALCINC="${APRUN_default}" elif [[ "${step}" = "esfc" ]]; then export NTHREADS_ESFC=${NTHREADSmax} - export APRUN_ESFC="${APRUN}" + export APRUN_ESFC="${APRUN_default}" export NTHREADS_CYCLE=${threads_per_task_cycle:-14} [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]] && export NTHREADS_CYCLE=${max_tasks_per_node} - export APRUN_CYCLE="${APRUN}" + export APRUN_CYCLE="${APRUN_default}" elif [[ "${step}" = "epos" ]]; then export NTHREADS_EPOS=${NTHREADSmax} - export APRUN_EPOS="${APRUN}" + export APRUN_EPOS="${APRUN_default}" elif [[ "${step}" = "fit2obs" ]]; then export NTHREADS_FIT2OBS=${NTHREADS1} - export MPIRUN="${APRUN}" + export MPIRUN="${APRUN_default}" fi diff --git a/env/GOOGLEPW.env b/env/GOOGLEPW.env index c3b5ec806a..d84008d648 100755 --- a/env/GOOGLEPW.env +++ b/env/GOOGLEPW.env @@ -45,7 +45,7 @@ if [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then elif [[ "${step}" = "prep_emissions" ]]; then - export APRUN + export APRUN="${APRUN_default}" elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]]; then @@ -102,6 +102,6 @@ elif [[ "${step}" = "epos" ]]; then elif [[ "${step}" = "fit2obs" ]]; then export NTHREADS_FIT2OBS=${NTHREADS1} - export MPIRUN="${APRUN}" + export MPIRUN="${APRUN_default}" fi diff --git a/modulefiles/module_base.noaacloud.lua b/modulefiles/module_base.noaacloud.lua index 7997b618e4..3a7cc75d7a 100644 --- a/modulefiles/module_base.noaacloud.lua +++ b/modulefiles/module_base.noaacloud.lua @@ -5,8 +5,11 @@ Load environment to run GFS on noaacloud local spack_mod_path=(os.getenv("spack_mod_path") or "None") prepend_path("MODULEPATH", spack_mod_path) +load("gnu") load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "None"))) load(pathJoin("stack-intel-oneapi-mpi", (os.getenv("stack_impi_ver") or "None"))) +unload("gnu") + load(pathJoin("python", (os.getenv("python_ver") or "None"))) load(pathJoin("jasper", (os.getenv("jasper_ver") or "None"))) diff --git a/modulefiles/module_gwci.noaacloud.lua b/modulefiles/module_gwci.noaacloud.lua index c3142cd60d..2ac284ef85 100644 --- a/modulefiles/module_gwci.noaacloud.lua +++ b/modulefiles/module_gwci.noaacloud.lua @@ -2,10 +2,10 @@ help([[ Load environment to run GFS workflow setup scripts on noaacloud ]]) -prepend_path("MODULEPATH", "/contrib/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core") +prepend_path("MODULEPATH", "/contrib/spack-stack-rocky8/spack-stack-1.6.0/envs/ue-env/install/modulefiles/Core") -load(pathJoin("stack-intel", os.getenv("2021.3.0"))) -load(pathJoin("stack-intel-oneapi-mpi", os.getenv("2021.3.0"))) +load(pathJoin("stack-intel", os.getenv("2021.10.0"))) +load(pathJoin("stack-intel-oneapi-mpi", os.getenv("2021.10.0"))) load(pathJoin("netcdf-c", os.getenv("4.9.2"))) load(pathJoin("netcdf-fortran", os.getenv("4.6.1"))) diff --git a/modulefiles/module_gwsetup.noaacloud.lua b/modulefiles/module_gwsetup.noaacloud.lua index f3845e8d72..e2aa4050a3 100644 --- a/modulefiles/module_gwsetup.noaacloud.lua +++ b/modulefiles/module_gwsetup.noaacloud.lua @@ -4,17 +4,18 @@ Load environment to run GFS workflow setup scripts on noaacloud load(pathJoin("rocoto")) -prepend_path("MODULEPATH", "/contrib/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core") +prepend_path("MODULEPATH", "/contrib/spack-stack-rocky8/spack-stack-1.6.0/envs/ue-intel/install/modulefiles/Core") -local stack_intel_ver=os.getenv("stack_intel_ver") or "2021.3.0" -local python_ver=os.getenv("python_ver") or "3.10.3" +load("gnu") +local stack_intel_ver=os.getenv("stack_intel_ver") or "2021.10.0" +local stack_mpi_ver=os.getenv("stack_mpi_ver") or "2021.10.0" load(pathJoin("stack-intel", stack_intel_ver)) -load(pathJoin("python", python_ver)) +load(pathJoin("stack-intel-oneapi-mpi", stack_mpi_ver)) +unload("gnu") + load("py-jinja2") load("py-pyyaml") load("py-numpy") -local git_ver=os.getenv("git_ver") or "1.8.3.1" -load(pathJoin("git", git_ver)) whatis("Description: GFS run setup environment") diff --git a/parm/config/gefs/config.resources b/parm/config/gefs/config.resources index e1b9a036de..68f81c1039 100644 --- a/parm/config/gefs/config.resources +++ b/parm/config/gefs/config.resources @@ -41,15 +41,15 @@ case ${machine} in ;; "AWSPW") export PARTITION_BATCH="compute" - max_tasks_per_node=36 + max_tasks_per_node=48 ;; "AZUREPW") export PARTITION_BATCH="compute" - max_tasks_per_node=24 + max_tasks_per_node=36 ;; "GOOGLEPW") export PARTITION_BATCH="compute" - max_tasks_per_node=32 + max_tasks_per_node=30 ;; *) echo "FATAL ERROR: Unknown machine encountered by ${BASH_SOURCE[0]}" diff --git a/parm/config/gefs/config.resources.AWSPW b/parm/config/gefs/config.resources.AWSPW index a735c7622d..f91460b6aa 100644 --- a/parm/config/gefs/config.resources.AWSPW +++ b/parm/config/gefs/config.resources.AWSPW @@ -9,3 +9,61 @@ unset memory for mem_var in $(env | grep '^memory_' | cut -d= -f1); do unset "${mem_var}" done + +step=$1 + +case ${step} in + "fcst" | "efcs") + export PARTITION_BATCH="compute" + max_tasks_per_node=48 + ;; + + "arch") + export PARTITION_BATCH="process" + max_tasks_per_node=24 + ;; + + "prep_emissions") + export PARTITION_BATCH="process" + max_tasks_per_node=24 + export ntasks=1 + export threads_per_task=1 + export tasks_per_node=$(( max_tasks_per_node / threads_per_task )) + ;; + + "waveinit") + export PARTITION_BATCH="process" + max_tasks_per_node=24 + export ntasks=12 + export threads_per_task=1 + export tasks_per_node=$(( max_tasks_per_node / threads_per_task )) + export NTASKS=${ntasks} + ;; + + "wavepostpnt") + export PARTITION_BATCH="compute" + max_tasks_per_node=48 + export ntasks=240 + export threads_per_task=1 + export tasks_per_node=$(( max_tasks_per_node / threads_per_task )) + export NTASKS=${ntasks} + ;; + + "wavepostsbs" | "wavepostbndpnt" | "wavepostbndpntbll") + export PARTITION_BATCH="process" + max_tasks_per_node=24 + export ntasks=24 + export threads_per_task=1 + export tasks_per_node=$(( max_tasks_per_node / threads_per_task )) + export NTASKS=${ntasks} + ;; + + *) + export PARTITION_BATCH="process" + max_tasks_per_node=24 + ;; + +esac + +export max_tasks_per_node + diff --git a/parm/config/gfs/config.resources b/parm/config/gfs/config.resources index 230872b8f3..eeb33716c0 100644 --- a/parm/config/gfs/config.resources +++ b/parm/config/gfs/config.resources @@ -107,16 +107,16 @@ case ${machine} in ;; "AWSPW") export PARTITION_BATCH="compute" - npe_node_max=36 - max_tasks_per_node=36 + npe_node_max=48 + max_tasks_per_node=48 # TODO Supply a max mem/node value for AWS # shellcheck disable=SC2034 mem_node_max="" ;; "AZUREPW") export PARTITION_BATCH="compute" - npe_node_max=24 - max_tasks_per_node=24 + npe_node_max=36 + max_tasks_per_node=36 # TODO Supply a max mem/node value for AZURE # shellcheck disable=SC2034 mem_node_max="" diff --git a/parm/config/gfs/config.resources.AWSPW b/parm/config/gfs/config.resources.AWSPW index a735c7622d..22fe110670 100644 --- a/parm/config/gfs/config.resources.AWSPW +++ b/parm/config/gfs/config.resources.AWSPW @@ -9,3 +9,27 @@ unset memory for mem_var in $(env | grep '^memory_' | cut -d= -f1); do unset "${mem_var}" done + +step=$1 + +case ${step} in + "fcst" | "efcs") + export PARTITION_BATCH="compute" + max_tasks_per_node=48 + ;; + + "arch") + export PARTITION_BATCH="process" + max_tasks_per_node=24 + ;; + + + *) + export PARTITION_BATCH="process" + max_tasks_per_node=24 + ;; + +esac + +export max_tasks_per_node + diff --git a/sorc/build_ufs.sh b/sorc/build_ufs.sh index 773c104be3..3b0b3ed638 100755 --- a/sorc/build_ufs.sh +++ b/sorc/build_ufs.sh @@ -12,7 +12,7 @@ EXEC_NAME="gfs_model.x" while getopts ":da:fj:e:vwy" option; do case "${option}" in - d) BUILD_TYPE="Debug";; + d) BUILD_TYPE="DEBUG";; a) APP="${OPTARG}";; f) FASTER="ON";; j) BUILD_JOBS="${OPTARG}";; diff --git a/sorc/gfs_utils.fd b/sorc/gfs_utils.fd index 856a42076a..4848ecbb5e 160000 --- a/sorc/gfs_utils.fd +++ b/sorc/gfs_utils.fd @@ -1 +1 @@ -Subproject commit 856a42076a65256aaae9b29f4891532cb4a3fbca +Subproject commit 4848ecbb5e713b16127433e11f7d3edc6ac784c4 diff --git a/sorc/ufs_utils.fd b/sorc/ufs_utils.fd index 06eec5b6f6..2323761084 160000 --- a/sorc/ufs_utils.fd +++ b/sorc/ufs_utils.fd @@ -1 +1 @@ -Subproject commit 06eec5b6f636123835e2dfd9fc5229980c006735 +Subproject commit 23237610845c3a4438b21b25e9b3dc25c4c15b73 diff --git a/versions/build.noaacloud.ver b/versions/build.noaacloud.ver index fc288b76b5..b5fd272b4b 100644 --- a/versions/build.noaacloud.ver +++ b/versions/build.noaacloud.ver @@ -1,5 +1,5 @@ -export stack_intel_ver=2021.3.0 -export stack_impi_ver=2021.3.0 +export stack_intel_ver=2021.10.0 +export stack_impi_ver=2021.10.0 export spack_env=gsi-addon-env source "${HOMEgfs:-}/versions/spack.ver" -export spack_mod_path="/contrib/spack-stack/spack-stack-${spack_stack_ver}/envs/gsi-addon-env/install/modulefiles/Core" +export spack_mod_path="/contrib/spack-stack-rocky8/spack-stack-${spack_stack_ver}/envs/gsi-addon-env/install/modulefiles/Core" diff --git a/versions/run.noaacloud.ver b/versions/run.noaacloud.ver index 1fc3779b2e..98ec2b36f9 100644 --- a/versions/run.noaacloud.ver +++ b/versions/run.noaacloud.ver @@ -1,8 +1,8 @@ -export stack_intel_ver=2021.3.0 -export stack_impi_ver=2021.3.0 +export stack_intel_ver=2021.10.0 +export stack_impi_ver=2021.10.0 export spack_env=gsi-addon-env source "${HOMEgfs:-}/versions/spack.ver" -export spack_mod_path="/contrib/spack-stack/spack-stack-${spack_stack_ver}/envs/gsi-addon-env/install/modulefiles/Core" +export spack_mod_path="/contrib/spack-stack-rocky8/spack-stack-${spack_stack_ver}/envs/gsi-addon-env/install/modulefiles/Core" export cdo_ver=2.2.0 diff --git a/workflow/hosts/awspw.yaml b/workflow/hosts/awspw.yaml index b98c838faa..c80800725a 100644 --- a/workflow/hosts/awspw.yaml +++ b/workflow/hosts/awspw.yaml @@ -27,5 +27,5 @@ MAKE_ACFTBUFR: 'NO' DO_TRACKER: 'NO' DO_GENESIS: 'NO' DO_METP: 'NO' -SUPPORT_WAVES: 'NO' -SUPPORTED_RESOLUTIONS: ['C48', 'C96'] # TODO: Test and support all cubed-sphere resolutions. +SUPPORTED_RESOLUTIONS: ['C48', 'C96', 'C192', 'C384', 'C768'] # TODO: Test and support all cubed-sphere resolutions. +AERO_INPUTS_DIR: /contrib/global-workflow-shared-data/data/gocart_emissions diff --git a/workflow/hosts/azurepw.yaml b/workflow/hosts/azurepw.yaml index 4725e28962..d7c064dc60 100644 --- a/workflow/hosts/azurepw.yaml +++ b/workflow/hosts/azurepw.yaml @@ -24,5 +24,7 @@ LOCALARCH: 'NO' ATARDIR: '' # TODO: This will not yet work from AZURE. MAKE_NSSTBUFR: 'NO' MAKE_ACFTBUFR: 'NO' -SUPPORT_WAVES: 'NO' -SUPPORTED_RESOLUTIONS: ['C48', 'C96'] # TODO: Test and support all cubed-sphere resolutions. +DO_TRACKER: 'NO' +DO_GENESIS: 'NO' +DO_METP: 'NO' +SUPPORTED_RESOLUTIONS: ['C48', 'C96', 'C384', 'C768'] # TODO: Test and support all cubed-sphere resolutions. diff --git a/workflow/hosts/googlepw.yaml b/workflow/hosts/googlepw.yaml index 1b979b6bc9..8ba8e18e74 100644 --- a/workflow/hosts/googlepw.yaml +++ b/workflow/hosts/googlepw.yaml @@ -24,5 +24,7 @@ LOCALARCH: 'NO' ATARDIR: '' # TODO: This will not yet work from GOOGLE. MAKE_NSSTBUFR: 'NO' MAKE_ACFTBUFR: 'NO' -SUPPORT_WAVES: 'NO' -SUPPORTED_RESOLUTIONS: ['C48', 'C96'] # TODO: Test and support all cubed-sphere resolutions. +DO_TRACKER: 'NO' +DO_GENESIS: 'NO' +DO_METP: 'NO' +SUPPORTED_RESOLUTIONS: ['C48', 'C96', 'C384'] # TODO: Test and support all cubed-sphere resolutions. diff --git a/workflow/setup_expt.py b/workflow/setup_expt.py index 574dc0d91a..09bc1c90ac 100755 --- a/workflow/setup_expt.py +++ b/workflow/setup_expt.py @@ -372,7 +372,6 @@ def query_and_clean(dirname, force_clean=False): def validate_user_request(host, inputs): supp_res = host.info['SUPPORTED_RESOLUTIONS'] - supp_waves = host.info.get('SUPPORT_WAVES', 'YES') machine = host.machine for attr in ['resdetatmos', 'resensatmos']: try: @@ -382,9 +381,6 @@ def validate_user_request(host, inputs): if expt_res not in supp_res: raise NotImplementedError(f"Supported resolutions on {machine} are:\n{', '.join(supp_res)}") - if "W" in inputs.app and supp_waves == "NO": - raise NotImplementedError(f"Waves are not supported on {machine}") - def get_ocean_resolution(resdetatmos): """ From d85214db1683848019d67815c63dc663c6049ec5 Mon Sep 17 00:00:00 2001 From: David Huber <69919478+DavidHuber-NOAA@users.noreply.github.com> Date: Tue, 24 Dec 2024 17:48:19 -0500 Subject: [PATCH 2/4] Create compute build option (#3186) This creates scripts to run compute-node builds and also refactors the build_all.sh script to make it easier to build all executables. In place of various options to control what components are built when using `build_all.sh`, instead it takes in a list of one or more systems to build: - `gfs` builds everything needed for forecast-only gfs (UFS model with unstructured wave grid, gfs_utils, ufs_utils, upp, ww3 pre/post for unstructured wave grid) - `gefs` builds everything needed for GEFS (UFS model with structured wave grid, gfs_utils, ufs_utils, upp, ww3 pre/post for structured wave grid) - `sfs` builds everything needed SFS (UFS model in hydrostatic mode with unstructured wave grid, gfs_utils, ufs_utils, upp, ww3 pre/post for structured wave grid) - `gsi` builds GSI-based DA components (gsi_enkf, gsi_monitor, gsi_utils) - `gdas` builds JEDI-based DA components (gdas app, gsi_monitor, gsi_utils) `all` will build all of the above (mostly for testing) Examples: Build for forecast-only GFS: ```./build_all.sh gfs``` Build cycled GFS including coupled DA: ``` ./build_all.sh gfs gsi gdas``` Build GEFS: ```./build_all.sh gefs``` Build everything (for testing purposes): ```./build_all.sh all``` Other options, such as `-d` to build in debug mode, remain unchanged. The full script signature is now: ``` ./build_all.sh [-a UFS_app][-c build_config][-d][-f][-h][-v] [gfs] [gefs] [sfs] [gsi] [gdas] [all] ``` Additionally, there is a new script to build components on the compute nodes using the job scheduler instead of the login node. This method takes the load off of the login nodes and may be faster in some cases. Compute build is invoked using the build_compute.sh script, which behaves similarly to the new `build_all.sh:` ``` ./build_compute.sh [-h][-v][-A ] [gfs] [gefs] [sfs] [gsi] [gdas] [all] ``` Compute build will generate a rocoto workflow and then call `rocotorun` itself repeatedly until either a build fails or all builds succeed, at which point the script will exit. Since the script is calling `rocotorun` itself, you don't need to set up your own cron to do it, but advanced users can also use all the regular rocoto tools on `build.xml` and `build.db` if you wish. Some things to note with the compute build: - When a build fails, other build jobs are not cancelled and will continue to run. - Since the script stops running `rocotorun` once one build fails, the rocoto database will no longer update with the status of the remaining jobs after that point. - Similarly, if the terminal running `build_compute.sh` gets disconnected, the rocoto database will no longer update. - In either of the above cases, you could run `rocotorun` yourself manually to update the database as long as the job information hasn't aged off the scheduler yet. Resolves #3131 --------- Co-authored-by: Rahul Mahajan --- .github/CODEOWNERS | 1 + .gitignore | 3 + ci/Jenkinsfile | 4 +- docs/source/clone.rst | 56 +++------ sorc/build_all.sh | 211 ++++++++++++++++----------------- sorc/build_compute.sh | 115 ++++++++++++++++++ sorc/build_upp.sh | 25 ++++ workflow/build_compute.py | 178 +++++++++++++++++++++++++++ workflow/build_opts.yaml | 94 +++++++++++++++ workflow/generate_workflows.sh | 47 ++------ 10 files changed, 548 insertions(+), 186 deletions(-) create mode 100755 sorc/build_compute.sh create mode 100755 workflow/build_compute.py create mode 100644 workflow/build_opts.yaml diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 5068b961f7..b0b51922c5 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -211,3 +211,4 @@ ush/python/pygfs/utils/marine_da_utils.py @guillaumevernieres @AndrewEichmann-NO # Specific workflow scripts workflow/generate_workflows.sh @DavidHuber-NOAA +workflow/build_compute.py @DavidHuber-NOAA @aerorahul diff --git a/.gitignore b/.gitignore index 49fb3f438a..f3cb1e1b3e 100644 --- a/.gitignore +++ b/.gitignore @@ -85,6 +85,9 @@ parm/wafs # Ignore sorc and logs folders from externals #-------------------------------------------- +sorc/build.xml +sorc/build.db +sorc/build_lock.db sorc/*log sorc/logs sorc/calc_analysis.fd diff --git a/ci/Jenkinsfile b/ci/Jenkinsfile index 9e2381268d..b7a29e15b0 100644 --- a/ci/Jenkinsfile +++ b/ci/Jenkinsfile @@ -120,9 +120,7 @@ pipeline { def error_logs_message = "" dir("${HOMEgfs}/sorc") { try { - sh(script: './build_all.sh -kgu') // build the global-workflow executables for GFS variant (UFS-wx-model, WW3 pre/post executables) - sh(script: './build_ww3prepost.sh -w > ./logs/build_ww3prepost_gefs.log 2>&1') // build the WW3 pre/post processing executables for GEFS variant - sh(script: './build_ufs.sh -w -e gefs_model.x > ./logs/build_ufs_gefs.log 2>&1') // build the UFS-wx-model executable for GEFS variant + sh(script: './build_compute.sh all') // build the global-workflow executables } catch (Exception error_build) { echo "Failed to build global-workflow: ${error_build.getMessage()}" if ( fileExists("logs/error.logs") ) { diff --git a/docs/source/clone.rst b/docs/source/clone.rst index d3f81f2e47..ec0018157a 100644 --- a/docs/source/clone.rst +++ b/docs/source/clone.rst @@ -18,35 +18,39 @@ Clone the `global-workflow` and `cd` into the `sorc` directory: git clone --recursive https://github.com/NOAA-EMC/global-workflow cd global-workflow/sorc -For forecast-only (coupled or uncoupled) build of the components: +.. _build_examples: + +The build_all.sh script can be used to build all required components of the global workflow. The accepted arguments is a list of systems to be built. This includes builds for GFS and GEFS forecast-only experiments, GSI and GDASApp-based DA for cycled GFS experiments. See `feature availability `__ to see which system(s) are available on each supported system. :: - ./build_all.sh + ./build_all.sh [gfs] [gefs] [gs] [gdas] [all] -For cycled (w/ data assimilation) use the `-g` option during build: +For example, to run GFS experiments with GSI DA, execute: :: - ./build_all.sh -g + ./build_all.sh gfs gsi -For coupled cycling (include new UFSDA) use the `-gu` options during build: +This builds the GFS, UFS-utils, GFS-utils, WW3 with PDLIB (structured wave grids), UPP, GSI, GSI-monitor, and GSI-utils executables. -[Currently only available on Hera, Orion, and Hercules] +For coupled cycling (include new UFSDA) execute: :: - ./build_all.sh -gu + ./build_all.sh gfs gdas +This builds all of the same executables, except it builds the GDASApp instead of the GSI. -For building without PDLIB (unstructured grid) for the wave model, use the `-w` options during build: +To run GEFS (forecast-only) execute: :: - ./build_all.sh -w + ./build_all.sh gefs +This builds the GEFS, UFS-utils, GFS-utils, WW3 *without* PDLIB (unstructure wave grids), and UPP executables. -Build workflow components and link workflow artifacts such as executables, etc. +Once the building is complete, link workflow artifacts such as executables, configuration files, and scripts via :: @@ -107,40 +111,19 @@ Under the ``/sorc`` folder is a script to build all components called ``build_al :: - ./build_all.sh [-a UFS_app][-g][-h][-u][-v] + ./build_all.sh [-a UFS_app][-k][-h][-v] [list of system(s) to build] -a UFS_app: Build a specific UFS app instead of the default - -g: - Build GSI + -k: + Kill all builds immediately if one fails -h: Print this help message and exit - -j: - Specify maximum number of build jobs (n) - -u: - Build UFS-DA -v: Execute all build scripts with -v option to turn on verbose where supported -For forecast-only (coupled or uncoupled) build of the components: - -:: - - ./build_all.sh - -For cycled (w/ data assimilation) use the `-g` option during build: - -:: - - ./build_all.sh -g - -For coupled cycling (include new UFSDA) use the `-gu` options during build: - -[Currently only available on Hera, Orion, and Hercules] - -:: - - ./build_all.sh -gu + Lastly, pass to build_all.sh a list of systems to build. This includes `gfs`, `gefs`, `sfs` (not fully supported), `gsi`, `gdas`, and `all`. +For examples of how to use this script, see :ref:`build examples `. ^^^^^^^^^^^^^^^ Link components @@ -156,4 +139,3 @@ After running the checkout and build scripts run the link script: Where: ``-o``: Run in operations (NCO) mode. This creates copies instead of using symlinks and is generally only used by NCO during installation into production. - diff --git a/sorc/build_all.sh b/sorc/build_all.sh index 9414846f2a..f4618b948c 100755 --- a/sorc/build_all.sh +++ b/sorc/build_all.sh @@ -13,32 +13,23 @@ set +x #------------------------------------ function _usage() { cat << EOF -Builds all of the global-workflow components by calling the individual build - scripts in sequence. +Builds all of the global-workflow components by calling the individual build scripts in parallel. -Usage: ${BASH_SOURCE[0]} [-a UFS_app][-c build_config][-d][-f][-h][-j n][-v][-w][-y] +Usage: ${BASH_SOURCE[0]} [-a UFS_app][-c build_config][-d][-f][-h][-v] [gfs] [gefs] [sfs] [gsi] [gdas] [all] -a UFS_app: - Build a specific UFS app instead of the default + Build a specific UFS app instead of the default. This will be applied to all UFS (GFS, GEFS, SFS) builds. -d: Build in debug mode -f: - Build the UFS model using the -DFASTER=ON option - -g: - Build GSI + Build the UFS model(s) using the -DFASTER=ON option. -h: Print this help message and exit - -j: - Specify maximum number of build jobs (n) -k: Kill all builds if any build fails - -u: - Build UFS-DA -v: Execute all build scripts with -v option to turn on verbose where supported - -w: - Use structured wave grid - -y: - Use hydrostatic version of FV3 + + Specified systems (gfs, gefs, sfs, gsi, gdas) are non-exclusive, so they can be built together. EOF exit 1 } @@ -48,30 +39,21 @@ readonly HOMEgfs=$(cd "$(dirname "$(readlink -f -n "${BASH_SOURCE[0]}" )" )/.." cd "${HOMEgfs}/sorc" || exit 1 _build_ufs_opt="" -_build_ufsda="NO" -_build_gsi="NO" _build_debug="" _verbose_opt="" -_wave_opt="" -_hydro_opt="" _build_job_max=20 _quick_kill="NO" _ufs_exec="-e gfs_model.x" # Reset option counter in case this script is sourced OPTIND=1 -while getopts ":a:dfghj:kuvwy" option; do +while getopts ":a:dfhkv" option; do case "${option}" in a) _build_ufs_opt+="-a ${OPTARG} ";; f) _build_ufs_opt+="-f ";; d) _build_debug="-d" ;; - g) _build_gsi="YES" ;; h) _usage;; - j) _build_job_max="${OPTARG} ";; k) _quick_kill="YES" ;; - u) _build_ufsda="YES" ;; - v) _verbose_opt="-v";; - w) _wave_opt="-w"; _ufs_exec="-e gefs_model.x";; - y) _hydro_opt="-y"; _ufs_exec="-e sfs_model.x";; + v) _verbose_opt="-v" ;; :) echo "[${BASH_SOURCE[0]}]: ${option} requires an argument" _usage @@ -82,20 +64,91 @@ while getopts ":a:dfghj:kuvwy" option; do ;; esac done - shift $((OPTIND-1)) +# If no build system was specified, build for gfs forecast-only +if [[ $# -eq 0 ]]; then + selected_systems="gfs" +else + selected_systems="$*" +fi + +supported_systems=("gfs" "gefs" "sfs" "gsi" "gdas" "all") + +declare -A system_builds +system_builds=( + ["gfs"]="ufs_gfs gfs_utils ufs_utils upp ww3_gfs" + ["gefs"]="ufs_gefs gfs_utils ufs_utils upp ww3_gefs" + ["sfs"]="ufs_sfs gfs_utils ufs_utils upp ww3_gefs" + ["gsi"]="gsi_enkf gsi_monitor gsi_utils" + ["gdas"]="gdas gsi_monitor gsi_utils" + ["all"]="ufs_gfs gfs_utils ufs_utils upp ww3_gfs ufs_gefs ufs_sfs ww3_gefs gdas gsi_enkf gsi_monitor gsi_utils" +) + logs_dir="${HOMEgfs}/sorc/logs" if [[ ! -d "${logs_dir}" ]]; then echo "Creating logs folder" mkdir -p "${logs_dir}" || exit 1 fi -# Check final exec folder exists -if [[ ! -d "${HOMEgfs}/exec" ]]; then - echo "Creating ${HOMEgfs}/exec folder" - mkdir -p "${HOMEgfs}/exec" -fi +# Jobs per build ("min max") +declare -A build_jobs build_opts build_scripts +build_jobs=( + ["ufs_gfs"]=8 ["ufs_gefs"]=8 ["ufs_sfs"]=8 ["gdas"]=8 ["gsi_enkf"]=2 ["gfs_utils"]=1 ["ufs_utils"]=1 + ["ww3_gfs"]=1 ["ww3_gefs"]=1 ["gsi_utils"]=1 ["gsi_monitor"]=1 ["gfs_utils"]=1 ["upp"]=1 +) + +# Establish build options for each job +_gfs_exec="gfs_model.x" +_gefs_exec="gefs_model.x" +_sfs_exec="sfs_model.x" +build_opts=( + ["ufs_gfs"]="${wave_opt} ${_build_ufs_opt} ${_verbose_opt} ${_build_debug} -e ${_gfs_exec}" + ["ufs_gefs"]="${wave_opt} ${_build_ufs_opt} ${_verbose_opt} ${_build_debug} -e ${_gefs_exec}" + ["ufs_sfs"]="${wave_opt} ${_build_ufs_opt} ${_verbose_opt} ${_build_debug} -e ${_sfs_exec}" + ["upp"]="${_build_debug}" + ["ww3_gfs"]="${_verbose_opt} ${_build_debug}" + ["ww3_gefs"]="-w ${_verbose_opt} ${_build_debug}" + ["gdas"]="${_verbose_opt} ${_build_debug}" + ["ufs_utils"]="${_verbose_opt} ${_build_debug}" + ["gfs_utils"]="${_verbose_opt} ${_build_debug}" + ["gsi_utils"]="${_verbose_opt} ${_build_debug}" + ["gsi_enkf"]="${_verbose_opt} ${_build_debug}" + ["gsi_monitor"]="${_verbose_opt} ${_build_debug}" +) + +# Set the build script name for each build +build_scripts=( + ["ufs_gfs"]="build_ufs.sh" + ["ufs_gefs"]="build_ufs.sh" + ["ufs_sfs"]="build_ufs.sh" + ["gdas"]="build_gdas.sh" + ["gsi_enkf"]="build_gsi_enkf.sh" + ["gfs_utils"]="build_gfs_utils.sh" + ["ufs_utils"]="build_ufs_utils.sh" + ["ww3_gfs"]="build_ww3prepost.sh" + ["ww3_gefs"]="build_ww3prepost.sh" + ["gsi_utils"]="build_gsi_utils.sh" + ["gsi_monitor"]="build_gsi_monitor.sh" + ["gfs_utils"]="build_gfs_utils.sh" + ["upp"]="build_upp.sh" +) + +# Check the requested systems to make sure we can build them +declare -A builds +system_count=0 +for system in ${selected_systems}; do + # shellcheck disable=SC2076 + if [[ " ${supported_systems[*]} " =~ " ${system} " ]]; then + (( system_count += 1 )) + for build in ${system_builds["${system}"]}; do + builds["${build}"]="yes" + done + else + echo "Unsupported build system: ${system}" + _usage + fi +done #------------------------------------ # GET MACHINE @@ -108,6 +161,9 @@ if [[ -z "${MACHINE_ID}" ]]; then exit 1 fi +# Create the log directory +mkdir -p "${HOMEgfs}/sorc/logs" + #------------------------------------ # SOURCE BUILD VERSION FILES #------------------------------------ @@ -123,87 +179,18 @@ ERRSCRIPT=${ERRSCRIPT:-'eval [[ $errs = 0 ]]'} # shellcheck disable= errs=0 -declare -A build_jobs -declare -A build_opts - #------------------------------------ # Check which builds to do and assign # of build jobs #------------------------------------ -# Mandatory builds, unless otherwise specified, for the UFS -big_jobs=0 -build_jobs["ufs"]=8 -big_jobs=$((big_jobs+1)) -build_opts["ufs"]="${_wave_opt} ${_hydro_opt} ${_verbose_opt} ${_build_ufs_opt} ${_build_debug} ${_ufs_exec}" - -build_jobs["upp"]=1 -build_opts["upp"]="${_build_debug}" - -build_jobs["ufs_utils"]=1 -build_opts["ufs_utils"]="${_verbose_opt} ${_build_debug}" - -build_jobs["gfs_utils"]=1 -build_opts["gfs_utils"]="${_verbose_opt} ${_build_debug}" - -build_jobs["ww3prepost"]=1 -build_opts["ww3prepost"]="${_wave_opt} ${_verbose_opt} ${_build_ufs_opt} ${_build_debug}" - -# Optional DA builds -if [[ "${_build_ufsda}" == "YES" ]]; then - if [[ "${MACHINE_ID}" != "orion" && "${MACHINE_ID}" != "hera" && "${MACHINE_ID}" != "hercules" && "${MACHINE_ID}" != "wcoss2" && "${MACHINE_ID}" != "noaacloud" && "${MACHINE_ID}" != "gaea" ]]; then - echo "NOTE: The GDAS App is not supported on ${MACHINE_ID}. Disabling build." - else - build_jobs["gdas"]=8 - big_jobs=$((big_jobs+1)) - build_opts["gdas"]="${_verbose_opt} ${_build_debug}" - fi -fi -if [[ "${_build_gsi}" == "YES" ]]; then - build_jobs["gsi_enkf"]=2 - build_opts["gsi_enkf"]="${_verbose_opt} ${_build_debug}" -fi -if [[ "${_build_gsi}" == "YES" || "${_build_ufsda}" == "YES" ]] ; then - build_jobs["gsi_utils"]=1 - build_opts["gsi_utils"]="${_verbose_opt} ${_build_debug}" - build_jobs["gsi_monitor"]=1 - build_opts["gsi_monitor"]="${_verbose_opt} ${_build_debug}" -fi - -# Go through all builds and adjust CPU counts down if necessary -requested_cpus=0 -build_list="" -for build in "${!build_jobs[@]}"; do - if [[ -z "${build_list}" ]]; then - build_list="${build}" - else - build_list="${build_list}, ${build}" - fi - if [[ ${build_jobs[${build}]} -gt ${_build_job_max} ]]; then - build_jobs[${build}]=${_build_job_max} - fi - requested_cpus=$(( requested_cpus + build_jobs[${build}] )) -done - echo "Building ${build_list}" -# Go through all builds and adjust CPU counts up if possible -if [[ ${requested_cpus} -lt ${_build_job_max} && ${big_jobs} -gt 0 ]]; then - # Add cores to the gdas and ufs build jobs - extra_cores=$(( _build_job_max - requested_cpus )) - extra_cores=$(( extra_cores / big_jobs )) - for build in "${!build_jobs[@]}"; do - if [[ "${build}" == "gdas" || "${build}" == "ufs" ]]; then - build_jobs[${build}]=$(( build_jobs[${build}] + extra_cores )) - fi - done -fi - procs_in_use=0 declare -A build_ids check_builds() { - for chk_build in "${!build_jobs[@]}"; do + for chk_build in "${!builds[@]}"; do # Check if the build is complete and if so what the status was if [[ -n "${build_ids[${chk_build}]+0}" ]]; then if ! ps -p "${build_ids[${chk_build}]}" > /dev/null; then @@ -213,7 +200,7 @@ check_builds() echo "build_${chk_build}.sh failed! Exiting!" echo "Check logs/build_${chk_build}.log for details." echo "logs/build_${chk_build}.log" > "${HOMEgfs}/sorc/logs/error.logs" - for kill_build in "${!build_jobs[@]}"; do + for kill_build in "${!builds[@]}"; do if [[ -n "${build_ids[${kill_build}]+0}" ]]; then pkill -P "${build_ids[${kill_build}]}" fi @@ -228,15 +215,15 @@ check_builds() builds_started=0 # Now start looping through all of the jobs until everything is done -while [[ ${builds_started} -lt ${#build_jobs[@]} ]]; do - for build in "${!build_jobs[@]}"; do +while [[ ${builds_started} -lt ${#builds[@]} ]]; do + for build in "${!builds[@]}"; do # Has the job started? if [[ -n "${build_jobs[${build}]+0}" && -z "${build_ids[${build}]+0}" ]]; then # Do we have enough processors to run it? if [[ ${_build_job_max} -ge $(( build_jobs[build] + procs_in_use )) ]]; then # double-quoting build_opts here will not work since it is a string of options #shellcheck disable=SC2086 - "./build_${build}.sh" ${build_opts[${build}]:-} -j "${build_jobs[${build}]}" > \ + "./${build_scripts[${build}]}" ${build_opts[${build}]:-} -j "${build_jobs[${build}]}" > \ "${logs_dir}/build_${build}.log" 2>&1 & build_ids["${build}"]=$! echo "Starting build_${build}.sh" @@ -249,7 +236,7 @@ while [[ ${builds_started} -lt ${#build_jobs[@]} ]]; do # Also recalculate how many processors are in use to account for completed builds builds_started=0 procs_in_use=0 - for build in "${!build_jobs[@]}"; do + for build in "${!builds[@]}"; do # Has the build started? if [[ -n "${build_ids[${build}]+0}" ]]; then builds_started=$(( builds_started + 1)) @@ -275,7 +262,7 @@ done # Wait for all jobs to complete and check return statuses -while [[ "${#build_jobs[@]}" -gt 0 ]]; do +while [[ "${#builds[@]}" -gt 0 ]]; do # If requested, check if any build has failed and exit if so if [[ "${_quick_kill}" == "YES" ]]; then @@ -286,7 +273,7 @@ while [[ "${#build_jobs[@]}" -gt 0 ]]; do fi fi - for build in "${!build_jobs[@]}"; do + for build in "${!builds[@]}"; do # Test if each job is complete and if so, notify and remove from the array if [[ -n "${build_ids[${build}]+0}" ]]; then if ! ps -p "${build_ids[${build}]}" > /dev/null; then @@ -294,14 +281,14 @@ while [[ "${#build_jobs[@]}" -gt 0 ]]; do build_stat=$? errs=$((errs+build_stat)) if [[ ${build_stat} == 0 ]]; then - echo "build_${build}.sh completed successfully!" + echo "${build_scripts[${build}]} completed successfully!" else - echo "build_${build}.sh failed with status ${build_stat}!" + echo "${build_scripts[${build}]} failed with status ${build_stat}!" fi # Remove the completed build from the list of PIDs unset 'build_ids[${build}]' - unset 'build_jobs[${build}]' + unset 'builds[${build}]' fi fi done diff --git a/sorc/build_compute.sh b/sorc/build_compute.sh new file mode 100755 index 0000000000..794b4fa350 --- /dev/null +++ b/sorc/build_compute.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash + +function _usage() { + cat << EOF +Builds all of the global-workflow components on compute nodes. + +Usage: ${BASH_SOURCE[0]} [-h][-v][-A ] [ gfs gefs sfs gsi gdas all] + -h: + Print this help message and exit + -v: + Verbose mode + -A: + HPC account to use for the compute-node builds + (default is \$HOMEgfs/ci/platforms/config.\$machine:\$HPC_ACCOUNT) + + Input arguments are the system(s) to build. + Valid options are + "gfs", "gefs", "sfs", "gsi", "gdas", or "all". + (default is "gfs") +EOF + exit 1 +} +# This script launches compute-node builds of selected submodules +# Two positional arguments are accepted: + +set -eu + +rocoto_verbose_opt="" +verbose="NO" +build_xml="build.xml" +build_db="build.db" +build_lock_db="build_lock.db" + +OPTIND=1 +while getopts ":hA:v" option; do + case "${option}" in + h) _usage;; + A) export HPC_ACCOUNT="${OPTARG}" ;; + v) verbose="YES" && rocoto_verbose_opt="-v10";; + :) + echo "[${BASH_SOURCE[0]}]: ${option} requires an argument" + _usage + ;; + *) + echo "[${BASH_SOURCE[0]}]: Unrecognized option: ${option}" + _usage + ;; + esac +done +shift $((OPTIND-1)) + +# Set build system to gfs if not specified +if [[ $# -eq 0 ]]; then + systems="gfs" +else + systems=$* +fi + +if [[ "${verbose}" == "YES" ]]; then + set -x +fi + +# shellcheck disable=SC2155,SC2312 +HOMEgfs=$(cd "$(dirname "$(readlink -f -n "${BASH_SOURCE[0]}" )" )/.." && pwd -P) +cd "${HOMEgfs}/sorc" || exit 1 + +# Delete the rocoto XML and database if they exist +rm -f "${build_xml}" "${build_db}" "${build_lock_db}" + +echo "Sourcing global-workflow modules ..." +source "${HOMEgfs}/workflow/gw_setup.sh" + +echo "Generating build.xml for building global-workflow programs on compute nodes ..." +# Catch errors manually from here out +set +e +"${HOMEgfs}/workflow/build_compute.py" --yaml "${HOMEgfs}/workflow/build_opts.yaml" --systems "${systems}" +rc=$? +if (( rc != 0 )); then + echo "FATAL ERROR: ${BASH_SOURCE[0]} failed to create 'build.xml' with error code ${rc}" + exit 1 +fi + +echo "Launching builds in parallel on compute nodes ..." +runcmd="rocotorun -w ${build_xml} -d ${build_db} ${rocoto_verbose_opt}" + +finished=false +${runcmd} +echo "Running builds on compute nodes" +while [[ "${finished}" == "false" ]]; do + sleep 3m + ${runcmd} + state="$("${HOMEgfs}/ci/scripts/utils/rocotostat.py" -w "${build_xml}" -d "${build_db}")" + if [[ "${verbose_opt}" == "true" ]]; then + echo "Rocoto is in state ${state}" + else + echo -n "." + fi + + if [[ "${state}" == "DONE" ]]; then + finished=true + elif [[ "${state}" == "RUNNING" ]]; then + finished=false + elif [[ "${state}" == "DEAD" ]]; then + echo "FATAL ERROR: ${BASH_SOURCE[0]} one or more builds failed!" + # TODO add capability to determine which build(s) failed + exit 2 + else + echo "FATAL ERROR: ${BASH_SOURCE[0]} rocoto failed with state '${state}'" + exit 3 + fi +done + +echo "All builds completed successfully!" + +exit 0 diff --git a/sorc/build_upp.sh b/sorc/build_upp.sh index e217e171db..15e2dfb146 100755 --- a/sorc/build_upp.sh +++ b/sorc/build_upp.sh @@ -26,6 +26,31 @@ if [[ ! -d "../exec" ]]; then mkdir -p ../exec fi +# The UPP does not load a cmake module and the WCOSS2 compute nodes do not have cmake in PATH by default +# Add cmake to the default modules if the command isn't found +# TODO remove this workaround when issue NOAA-EMC/UPP#1106 is addressed. +if ! command -v cmake >& /dev/null; then + export COMPILER="intel" + if [[ -z ${HOMEgfs+x} ]]; then + # shellcheck disable=SC2155 + readonly HOMEgfs=$(cd "$(dirname "$(readlink -f -n "${BASH_SOURCE[0]}" )" )/.." && pwd -P) + fi + source "${HOMEgfs}/ush/detect_machine.sh" + if [[ "${MACHINE_ID}" == "wcoss2" ]]; then + set +x + module try-load cmake + + if module is-loaded cmake; then + LMOD_SYSTEM_DEFAULT_MODULES="${LMOD_SYSTEM_DEFAULT_MODULES} cmake" + echo "Added cmake to the default modules" + else + echo "FATAL ERROR Could not find cmake or a cmake module!" + exit 2 + fi + set -x + fi +fi + cd ufs_model.fd/FV3/upp/tests # shellcheck disable=SC2086 BUILD_JOBS=${BUILD_JOBS:-8} ./compile_upp.sh ${_opts} diff --git a/workflow/build_compute.py b/workflow/build_compute.py new file mode 100755 index 0000000000..7787e9ad40 --- /dev/null +++ b/workflow/build_compute.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 + +""" +Entry point for setting up a compute-node build +""" + +import os +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter +from typing import Dict + +from wxflow import parse_yaml, AttrDict + +from hosts import Host +import rocoto.rocoto as rocoto + + +_here = os.path.dirname(__file__) +HOMEgfs = os.path.abspath(os.path.join(os.path.abspath(_here), '..')) + + +def input_args(*argv): + """ + Method to collect user arguments for `compute_build.py` + """ + + description = """ + Setup files and directories to start a compute build. + """ + + parser = ArgumentParser(description=description, + formatter_class=ArgumentDefaultsHelpFormatter) + + parser.add_argument('--yaml', help='Input YAML file', + type=str, required=False, default='build_opts.yaml') + parser.add_argument('--account', help='HPC account to use; default is host-dependent', required=False, default=os.getenv('HPC_ACCOUNT')) + parser.add_argument('--systems', help='System(s) to build (options: gfs, gefs, sfs, gsi, gdas, or all)', required=False, default='gfs') + + inputs = parser.parse_args(list(*argv) if len(argv) else None) + + return inputs + + +def get_task_spec(task_name: str, task_spec: Dict, host_spec: Dict) -> Dict: + """ + Generate a task specification dictionary for a given task. + + Parameters + ---------- + task_name: str + The name of the task. + task_spec: Dict + The specification of the task, containing command, walltime, and cores. + host_spec: Dict + The specification of the host, containing account, queue, partition, and native. + + Returns: + -------- + task_dict: Dict + A dictionary containing the task specification, including resources and other task-related information. + """ + + task_dict = AttrDict() + task_dict.task_name = task_name + task_dict.cycledef = "build" + task_dict.maxtries = 1 + task_dict.command = f"cd {HOMEgfs}/sorc/; {task_spec.command}" + task_dict.job_name = task_name + task_dict.log = f"{HOMEgfs}/sorc/logs/{task_name}.log" + + task_dict.resources = AttrDict() + task_dict.resources.account = host_spec.account + task_dict.resources.queue = host_spec.queue + task_dict.resources.partition = host_spec.partition + task_dict.resources.walltime = task_spec.walltime + task_dict.resources.native = host_spec.native + task_dict.resources.memory = None + task_dict.resources.nodes = 1 + task_dict.resources.ntasks = task_spec.cores + task_dict.resources.ppn = task_spec.cores + task_dict.resources.threads = 1 + + return task_dict + + +def get_host_specs(host: Dict) -> Dict: + """Generate host specs for the build.xml file based on Host() info + + Parameters + ---------- + host : Dict + Host information returned by Host() + + Returns + ------- + specs: Dict + Consolidated compute specifics needed for the XML + """ + + native = None + partition = None + + if host.info.SCHEDULER in ['pbspro']: + native = '-l place=vscatter' + elif host.info.SCHEDULER in ['slurm']: + native = '--export=NONE' + if host.info.PARTITION_BATCH not in [""]: + partition = host.info.PARTITION_BATCH + + if host.info.RESERVATION not in [""]: + native += f' --reservation={host.info.RESERVATION}' + + if host.info.CLUSTERS not in [""]: + native += f' --clusters={host.info.CLUSTERS}' + + specs = AttrDict() + specs.scheduler = host.info.SCHEDULER + specs.account = host.info.ACCOUNT + specs.queue = host.info.QUEUE + specs.partition = partition + specs.native = native + + return specs + + +def main(*argv): + + user_inputs = input_args(*argv) + host_specs = get_host_specs(Host()) + + # Update the default host account if the user supplied one + if user_inputs.account is not None: + host_specs.account = user_inputs.account + + build_specs = AttrDict(parse_yaml(user_inputs.yaml)) + + systems = user_inputs.systems.split() if "all" not in user_inputs.systems else ["all"] + + # Determine systems to build + builds = set() + if systems[0] == "all": + builds = build_specs.build + else: + builds.update(build_specs.systems["common"]) + try: + for system in systems: + builds.update(build_specs.systems[system]) + except KeyError as e: + raise KeyError(f"{system} is not a valid global-workflow system!") from e + + # Build the task specs from the build specs and host specs + task_specs = AttrDict() + for task_name, task_spec in build_specs.build.items(): + if task_name in builds: + task_specs[task_name] = get_task_spec(task_name, task_spec, host_specs) + + # Start building the XML + strings = ['', + '', + f'', + f'\t{HOMEgfs}/sorc/logs/build.log', + '\t190001010000 190001010000 24:00:00', + '\n'] + xml_header = '\n'.join(strings) + xml_footer = '\n\n' + + task_list = [] + for _, task_spec in task_specs.items(): + task_list.append(rocoto.create_task(task_spec)) + xml_tasks = '\n'.join(task_list) + + xml = ''.join([xml_header, xml_tasks, xml_footer]) + xml_file = f"{HOMEgfs}/sorc/build.xml" + with open(xml_file, 'w') as fh: + fh.write(xml) + + +if __name__ == '__main__': + main() diff --git a/workflow/build_opts.yaml b/workflow/build_opts.yaml new file mode 100644 index 0000000000..464701c2f3 --- /dev/null +++ b/workflow/build_opts.yaml @@ -0,0 +1,94 @@ +systems: + common: + - "ufs_utils" + - "gfs_utils" + - "upp" + gfs: + - "gfs_model" + - "gfs_ww3prepost" + gsi: + - "gsi_enkf" + - "gsi_utils" + - "gsi_monitor" + gdas: + - "gdas" + - "gsi_utils" + - "gsi_monitor" + gefs: + - "gefs_model" + - "gefs_ww3_prepost" + sfs: + - "sfs_model" + - "gefs_ww3_prepost" +build: + gfs_model: + command: "./build_ufs.sh -e gfs_model.x -j 12" + log: "build_ufs_gfs.log" + cores: 12 + walltime: "00:30:00" + + gfs_ww3prepost: + command: "./build_ww3prepost.sh -j 4" + log: "build_ww3prepost_gfs.log" + cores: 4 + walltime: "00:10:00" + + gefs_model: + command: "./build_ufs.sh -w -e gefs_model.x -j 12" + log: "build_ufs_gefs.log" + cores: 12 + walltime: "00:30:00" + + gefs_ww3_prepost: + command: "./build_ww3prepost.sh -w -j 4" + log: "build_ww3prepost_gefs.log" + cores: 4 + walltime: "00:10:00" + + sfs_model: + command: "./build_ufs.sh -y -e sfs_model.x -j 12" + log: "build_ufs_sfs.log" + cores: 12 + walltime: "00:30:00" + + upp: + command: "./build_upp.sh -j 8" + log: "build_upp.log" + cores: 8 + walltime: "00:10:00" + + gsi_enkf: + command: "./build_gsi_enkf.sh -j 8" + log: "build_gsi_enkf.log" + cores: 8 + walltime: "00:15:00" + + gsi_monitor: + command: "./build_gsi_monitor.sh -j 4" + log: "build_gsi_monitor.log" + cores: 4 + walltime: "00:10:00" + + gsi_utils: + command: "./build_gsi_utils.sh -j 6" + log: "build_gsi_utils.log" + cores: 6 + walltime: "00:10:00" + + ufs_utils: + command: "./build_ufs_utils.sh -j 8" + log: "build_ufs_utils.log" + cores: 8 + walltime: "00:10:00" + + gfs_utils: + command: "./build_gfs_utils.sh -j 6" + log: "build_gfs_utils.log" + cores: 6 + walltime: "00:10:00" + + gdas: + command: "./build_gdas.sh -j 12" + log: "build_gdas.log" + cores: 12 + walltime: "01:00:00" diff --git a/workflow/generate_workflows.sh b/workflow/generate_workflows.sh index c98fa3028a..a5615a8b0d 100755 --- a/workflow/generate_workflows.sh +++ b/workflow/generate_workflows.sh @@ -19,11 +19,6 @@ function _usage() { -b Run build_all.sh with default flags (build the UFS, UPP, UFS_Utils, and GFS-utils only - -B "build flags" - Run build_all.sh with the build specified flags. Refer to - build_all.sh -h for a list of valid flags. - NOTE: the list of build flags MUST be in quotes. - -u Update submodules before building and/or generating experiments. -y "list of YAMLs to run" @@ -37,13 +32,12 @@ function _usage() { -G Run all valid GFS cases in the specified YAML directory. If -b is specified, then "-g -u" (build the GSI and GDASApp) - will be passed to build_all.sh unless -B is also specified. + will be passed to build_all.sh. Note that these builds are disabled on some systems, which will result in a warning from build_all.sh. -E Run all valid GEFS cases in the specified YAML directory. - If -b is specified, then "-w" will be passed to build_all.sh - unless -B is also specified. + If -b is specified, then "-w" will be passed to build_all.sh. -S (Not yet supported!) Run all valid SFS cases in the specified YAML directory. @@ -91,7 +85,6 @@ HOMEgfs="" _specified_home=false _build=false _build_flags="" -_explicit_build_flags=false _update_submods=false declare -a _yaml_list=("C48_ATM") _specified_yaml_list=false @@ -126,7 +119,6 @@ while [[ $# -gt 0 && "$1" != "--" ]]; do fi ;; b) _build=true ;; - B) _build_flags="${OPTARG}" && _explicit_build_flags=true ;; u) _update_submods=true ;; y) # Start over with an empty _yaml_list declare -a _yaml_list=() @@ -231,18 +223,6 @@ else done fi -# Test if multiple "run_all" options were set -_count_run_alls=0 -[[ "${_run_all_gfs}" == "true" ]] && ((_count_run_alls+=1)) -[[ "${_run_all_gefs}" == "true" ]] && ((_count_run_alls+=1)) -[[ "${_run_all_sfs}" == "true" ]] && ((_count_run_alls+=1)) - -if (( _count_run_alls > 1 )) ; then - echo "Only one run all option (-G -E -S) may be specified" - echo "Rerun with just one option and/or with -h for usage examples" - exit 5 -fi - # If -S is specified, exit (for now). # TODO when SFS tests come online, enable this option. if [[ "${_run_all_sfs}" == "true" ]]; then @@ -277,7 +257,7 @@ function select_all_yamls() # Bash cannot return an array from a function and any edits are descoped at # the end of the function, so use a nameref instead. - local -n _nameref_yaml_list='_yaml_list' + local -n _nameref_yaml_list="${2}" if [[ "${_specified_yaml_list}" == false ]]; then # Start over with an empty _yaml_list @@ -328,21 +308,20 @@ EOM # Check if running all GEFS cases if [[ "${_run_all_gefs}" == "true" ]]; then # Append -w to build_all.sh flags if -E was specified - if [[ "${_explicit_build_flags}" == "false" && "${_build}" == "true" ]]; then - _build_flags="-w" - fi + _build_flags="${_build_flags} gefs " - select_all_yamls "gefs" + declare -a _gefs_yaml_list + select_all_yamls "gefs" "_gefs_yaml_list" + _yaml_list=("${_yaml_list[@]}" "${_gefs_yaml_list[@]}") fi -# Check if running all SFS cases +# Check if running all GFS cases if [[ "${_run_all_gfs}" == "true" ]]; then - # Append -g -u to build_all.sh flags if -G was specified - if [[ "${_explicit_build_flags}" == "false" && "${_build}" == "true" ]]; then - _build_flags="-g -u" - fi + _build_flags="${_build_flags} gfs " - select_all_yamls "gfs" + declare -a _gfs_yaml_list + select_all_yamls "gfs" "_gfs_yaml_list" + _yaml_list=("${_yaml_list[@]}" "${_gfs_yaml_list[@]}") fi # Loading modules sometimes raises unassigned errors, so disable checks @@ -397,7 +376,7 @@ if [[ "${_build}" == "true" ]]; then printf "Building via build_all.sh %s\n\n" "${_build_flags}" # Let the output of build_all.sh go to stdout regardless of verbose options #shellcheck disable=SC2086,SC2248 - ${HOMEgfs}/sorc/build_all.sh ${_build_flags} ${_verbose_flag} + ${HOMEgfs}/sorc/build_all.sh ${_verbose_flag} ${_build_flags} fi # Link the workflow silently unless there's an error From bdc0e290797f0966656b80b255fc7225b46bcd1a Mon Sep 17 00:00:00 2001 From: Eric Sinsky - NOAA <48259628+EricSinsky-NOAA@users.noreply.github.com> Date: Wed, 25 Dec 2024 23:12:33 -0500 Subject: [PATCH 3/4] Fix mod_icec bug in atmos_prod (#3167) The purpose of this PR is to fix a bug that causes the ensstat task to occasionally produce a segmentation fault error. This segmentation fault error is due to undefined values not being skipped in the ensemble spread and mean calculations in the ensstat program. The reason undefined values were not being skipped is because variables with undefined values were not using bitmap in the pgrb files. Ensstat expects undefined variables to use bitmap, otherwise ensstat will not skip those undefined values. The undefined variables were not using bitmap because of a bug in the atmos_prod task, where the mod_icec function was not being skipped for grib2 files that did not contain the LAND and ICEC variables. In the [offline UPP](https://github.com/NOAA-EMC/UPP/blob/develop/ush/fv3gfs_dwn_nems.sh), the mod_icec functionality was being executed when LAND and ICEC variables existed in the grib2 file. This same condition has been applied in this PR for mod_icec. Resolves #3150 --- ush/interp_atmos_master.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ush/interp_atmos_master.sh b/ush/interp_atmos_master.sh index 4c4ee4b03c..3a3edc470b 100755 --- a/ush/interp_atmos_master.sh +++ b/ush/interp_atmos_master.sh @@ -53,7 +53,11 @@ export err=$?; err_chk # trim and mask for all grids for grid in "${grids[@]}"; do trim_rh "${output_file_prefix}_${grid}"; export err=$?; err_chk - mod_icec "${output_file_prefix}_${grid}"; export err=$?; err_chk + # shellcheck disable=SC2312 + var_count=$(${WGRIB2} "${output_file_prefix}_${grid}" -match "LAND|ICEC" |wc -l) + if [[ "${var_count}" -eq 2 ]]; then + mod_icec "${output_file_prefix}_${grid}"; export err=$?; err_chk + fi done exit 0 From 1c37f907ecaadd835580d5bb73a0eaf2a146f2fc Mon Sep 17 00:00:00 2001 From: Travis Elless <113720457+TravisElless-NOAA@users.noreply.github.com> Date: Wed, 25 Dec 2024 23:13:30 -0500 Subject: [PATCH 4/4] Remove early-cycle EnKF forecast (#3185) Currently GFS experiments with the early-cycle EnKF call identical jobs to the late-cycle. However, since the forecast portion of the early-cycle is handled through the GEFS workflow, the forecast and post jobs are not needed in the GFS early-cycle EnKF. This PR removes calling the early-cycle EnFK forecast and post jobs in GFS experiments, and adds statements to the archive yamls to only search for forecast files during the late-cyle EnKF. --- parm/archive/enkf.yaml.j2 | 4 ++++ parm/archive/enkf_grp.yaml.j2 | 2 ++ parm/archive/enkf_restartb_grp.yaml.j2 | 2 ++ scripts/exgdas_enkf_earc.py | 2 +- workflow/applications/gfs_cycled.py | 4 +++- workflow/rocoto/gfs_tasks.py | 5 ++++- 6 files changed, 16 insertions(+), 3 deletions(-) diff --git a/parm/archive/enkf.yaml.j2 b/parm/archive/enkf.yaml.j2 index 9f9ad296f8..12167198cb 100644 --- a/parm/archive/enkf.yaml.j2 +++ b/parm/archive/enkf.yaml.j2 @@ -3,6 +3,7 @@ enkf: target: "{{ ATARDIR }}/{{ cycle_YMDH }}/{{ RUN }}.tar" required: # Logs + {% if RUN == 'enkfgdas' %} {% for mem in range(1, nmem_ens + 1) %} - "logs/{{ cycle_YMDH }}/{{ RUN }}_fcst_mem{{ '%03d' % mem }}.log" {% endfor %} @@ -10,6 +11,7 @@ enkf: - "logs/{{ cycle_YMDH }}/{{ RUN }}_epos{{ '%03d' % (fhr - fhmin) }}.log" {% endfor %} - "logs/{{ cycle_YMDH }}/{{ RUN }}_echgres.log" + {% endif %} - "logs/{{ cycle_YMDH }}/{{ RUN }}_esfc.log" {% for grp in range(IAUFHRS | length) %} - "logs/{{ cycle_YMDH }}/{{ RUN }}_ecen{{ '%03d' % grp }}.log" @@ -37,6 +39,7 @@ enkf: {% endfor %} # Ensemble mean and spread + {% if RUN == 'enkfgdas' %} {% for fhr in range(3, fhmax + 1, 3) %} - "{{ COMIN_ATMOS_HISTORY_ENSSTAT | relpath(ROTDIR) }}/{{ head }}atmf{{ '%03d' % fhr }}.ensmean.nc" - "{{ COMIN_ATMOS_HISTORY_ENSSTAT | relpath(ROTDIR) }}/{{ head }}sfcf{{ '%03d' % fhr }}.ensmean.nc" @@ -44,6 +47,7 @@ enkf: - "{{ COMIN_ATMOS_HISTORY_ENSSTAT | relpath(ROTDIR) }}/{{ head }}atmf{{ '%03d' % fhr }}.ensspread.nc" {% endif %} {% endfor %} + {% endif %} # Ensemble mean state {% if not DO_JEDIATMENS %} diff --git a/parm/archive/enkf_grp.yaml.j2 b/parm/archive/enkf_grp.yaml.j2 index 933ca45caf..3b58bbb27d 100644 --- a/parm/archive/enkf_grp.yaml.j2 +++ b/parm/archive/enkf_grp.yaml.j2 @@ -10,12 +10,14 @@ enkf_grp: {% set COMIN_ATMOS_RESTART_MEM = COMIN_ATMOS_RESTART_MEM_list[imem] %} # Forecast data + {% if RUN == 'enkfgdas' %} {% for fhr in range(3, 10, 3) %} - "{{ COMIN_ATMOS_HISTORY_MEM | relpath(ROTDIR) }}/{{ head }}atmf{{ "%03d" % fhr }}.nc" {% endfor %} # Only store the 6-hour surface forecast - "{{ COMIN_ATMOS_HISTORY_MEM | relpath(ROTDIR) }}/{{ head }}sfcf006.nc" + {% endif %} # Store the individual member analysis data {% if not lobsdiag_forenkf %} diff --git a/parm/archive/enkf_restartb_grp.yaml.j2 b/parm/archive/enkf_restartb_grp.yaml.j2 index c7aaf6682e..50595a6bbf 100644 --- a/parm/archive/enkf_restartb_grp.yaml.j2 +++ b/parm/archive/enkf_restartb_grp.yaml.j2 @@ -22,6 +22,7 @@ enkf_restartb_grp: {% endfor %} # Now get the restart files. + {% if RUN == 'enkfgdas' %} {% for r_time in range(restart_interval, fhmax + 1, restart_interval) %} {% set r_timedelta = (r_time | string + "H") | to_timedelta %} {% set r_dt = current_cycle | add_to_datetime(r_timedelta) %} @@ -38,3 +39,4 @@ enkf_restartb_grp: - "{{ COMIN_ATMOS_RESTART_MEM | relpath(ROTDIR) }}/{{ r_prefix }}.fv_core.res.nc" {% endfor %} {% endfor %} + {% endif %} diff --git a/scripts/exgdas_enkf_earc.py b/scripts/exgdas_enkf_earc.py index 535dd2ea37..107d541a41 100755 --- a/scripts/exgdas_enkf_earc.py +++ b/scripts/exgdas_enkf_earc.py @@ -28,7 +28,7 @@ def main(): 'DOHYBVAR', 'DOIAU_ENKF', 'IAU_OFFSET', 'DOIAU', 'DO_CA', 'DO_CALC_INCREMENT', 'assim_freq', 'ARCH_CYC', 'DO_JEDISNOWDA', 'ARCH_WARMICFREQ', 'ARCH_FCSTICFREQ', - 'IAUFHRS_ENKF', 'NET'] + 'IAUFHRS_ENKF', 'NET', 'NMEM_ENS_GFS'] archive_dict = AttrDict() for key in keys: diff --git a/workflow/applications/gfs_cycled.py b/workflow/applications/gfs_cycled.py index 543d7a9d8c..5ecfddf276 100644 --- a/workflow/applications/gfs_cycled.py +++ b/workflow/applications/gfs_cycled.py @@ -317,7 +317,9 @@ def get_task_names(self): task_names[run].append('echgres') if 'gdas' in run else 0 task_names[run] += ['ediag'] if options['lobsdiag_forenkf'] else ['eomg'] task_names[run].append('esnowanl') if options['do_jedisnowda'] and 'gdas' in run else 0 + task_names[run].append('efcs') if 'gdas' in run else 0 + task_names[run].append('epos') if 'gdas' in run else 0 - task_names[run] += ['stage_ic', 'ecen', 'esfc', 'efcs', 'epos', 'earc', 'cleanup'] + task_names[run] += ['stage_ic', 'ecen', 'esfc', 'earc', 'cleanup'] return task_names diff --git a/workflow/rocoto/gfs_tasks.py b/workflow/rocoto/gfs_tasks.py index 54870b79cc..9b6f712380 100644 --- a/workflow/rocoto/gfs_tasks.py +++ b/workflow/rocoto/gfs_tasks.py @@ -2896,7 +2896,10 @@ def _get_eposgroups(epos): def earc(self): deps = [] - dep_dict = {'type': 'metatask', 'name': f'{self.run}_epmn'} + if 'enkfgdas' in self.run: + dep_dict = {'type': 'metatask', 'name': f'{self.run}_epmn'} + else: + dep_dict = {'type': 'task', 'name': f'{self.run}_esfc'} deps.append(rocoto.add_dependency(dep_dict)) dependencies = rocoto.create_dependency(dep=deps)