Skip to content

Commit

Permalink
Merge pull request #4689 from jedwards4b/add_timestamp_to_rpointers
Browse files Browse the repository at this point in the history
Add timestamp to rpointers
  • Loading branch information
jedwards4b authored Dec 4, 2024
2 parents 29aa60f + f42dc2e commit f285060
Show file tree
Hide file tree
Showing 14 changed files with 259 additions and 93 deletions.
34 changes: 30 additions & 4 deletions CIME/SystemTests/eri.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def run_phase(self):
start_1 = run_startdate

stop_n2 = stop_n - stop_n1
rest_n2 = int(stop_n2 / 2 + 1)

hist_n = stop_n2

start_1_year, start_1_month, start_1_day = [
Expand All @@ -93,10 +93,28 @@ def run_phase(self):
start_2 = "{:04d}-{:02d}-{:02d}".format(
start_2_year, start_1_month, start_1_day
)
rest_n2 = self._set_restart_interval(
stop_n=stop_n2,
stop_option=stop_option,
startdate=start_2,
starttime=start_tod,
)

stop_n3 = stop_n2 - rest_n2
rest_n3 = int(stop_n3 / 2 + 1)

ninst = self._case.get_value("NINST")
drvrest = "rpointer.cpl"
if ninst > 1:
drvrest += "_0001"
drvrest += self._rest_time
self._set_drv_restart_pointer(drvrest)

rest_n3 = self._set_restart_interval(
stop_n=stop_n3,
stop_option=stop_option,
startdate=start_2,
starttime=start_tod,
)
stop_n4 = stop_n3 - rest_n3

expect(stop_n4 >= 1 and stop_n1 >= 1, "Run length too short")
Expand Down Expand Up @@ -223,8 +241,10 @@ def run_phase(self):
self._case.set_value("GET_REFCASE", False)
self._case.set_value("CONTINUE_RUN", False)
self._case.set_value("STOP_N", stop_n3)
self._case.set_value("REST_OPTION", stop_option)
self._case.set_value("REST_N", rest_n3)
self._set_restart_interval(
stop_n=stop_n3, startdate=refdate_3, starttime=refsec_3
)

self._case.set_value("HIST_OPTION", stop_option)
self._case.set_value("HIST_N", stop_n2)
self._case.set_value("DOUT_S", False)
Expand Down Expand Up @@ -266,6 +286,12 @@ def run_phase(self):
self._case.set_value("DOUT_S", False)
self._case.set_value("HIST_OPTION", stop_option)
self._case.set_value("HIST_N", hist_n)
drvrest = "rpointer.cpl"
if ninst > 1:
drvrest += "_0001"
drvrest += self._rest_time

self._set_drv_restart_pointer(drvrest)
self._case.flush()

# do the restart run (short term archiving is off)
Expand Down
13 changes: 6 additions & 7 deletions CIME/SystemTests/err.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import glob, os
from CIME.XML.standard_module_setup import *
from CIME.SystemTests.restart_tests import RestartTest
from CIME.utils import ls_sorted_by_mtime, safe_copy
from CIME.utils import safe_copy

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -35,12 +35,11 @@ def _case_two_setup(self):

def _case_two_custom_prerun_action(self):
dout_s_root = self._case1.get_value("DOUT_S_ROOT")
rest_root = os.path.abspath(os.path.join(dout_s_root, "rest"))
restart_list = ls_sorted_by_mtime(rest_root)
expect(len(restart_list) >= 1, "No restart files found in {}".format(rest_root))
self._case.restore_from_archive(
rest_dir=os.path.join(rest_root, restart_list[0])
)
self._drv_restart_pointer = self._case2.get_value("DRV_RESTART_POINTER")
resttime = self._drv_restart_pointer[-16:]
rest_root = os.path.abspath(os.path.join(dout_s_root, "rest", resttime))
expect(os.path.isdir(rest_root), "None such directory {}".format(rest_root))
self._case.restore_from_archive(rest_dir=rest_root)

def _case_two_custom_postrun_action(self):
# Link back to original case1 name
Expand Down
6 changes: 6 additions & 0 deletions CIME/SystemTests/ers.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,13 @@ def _ers_second_phase(self):
pfile,
os.path.join(os.path.dirname(pfile), "run1." + os.path.basename(pfile)),
)
ninst = self._case.get_value("NINST")
drvrest = "rpointer.cpl"
if ninst > 1:
drvrest += "_0001"
drvrest += self._rest_time

self._set_drv_restart_pointer(drvrest)
self._case.set_value("HIST_N", stop_n)
self._case.set_value("STOP_N", stop_new)
self._case.set_value("CONTINUE_RUN", True)
Expand Down
6 changes: 6 additions & 0 deletions CIME/SystemTests/restart_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,9 @@ def _case_two_setup(self):
self._case.set_value("STOP_N", stop_new)
self._case.set_value("CONTINUE_RUN", True)
self._case.set_value("REST_OPTION", "never")
ninst = self._case.get_value("NINST")
drvrest = "rpointer.cpl"
if ninst > 1:
drvrest += "_0001"
drvrest += self._rest_time
self._set_drv_restart_pointer(drvrest)
89 changes: 80 additions & 9 deletions CIME/SystemTests/system_tests_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@
load_coupler_customization,
)
import CIME.build as build
from datetime import datetime, timedelta
import glob, gzip, time, traceback, os, math, calendar

import glob, gzip, time, traceback, os, math
from contextlib import ExitStack

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -111,6 +112,7 @@ def __init__(
self._init_environment(caseroot)
self._init_locked_files(caseroot, expected)
self._skip_pnl = False
self._rest_time = None
self._cpllog = (
"med" if self._case.get_value("COMP_INTERFACE") == "nuopc" else "cpl"
)
Expand All @@ -119,10 +121,22 @@ def __init__(
self._user_separate_builds = False
self._expected_num_cmp = None
self._rest_n = None
# Does the model support this variable?
self._drv_restart_pointer = self._case.get_value("DRV_RESTART_POINTER")

def _set_drv_restart_pointer(self, value):
if self._drv_restart_pointer:
logger.info("setting DRV_RESTART_POINTER={}".format(value))
self._case.set_value("DRV_RESTART_POINTER", value)

def _set_restart_interval(
self, stop_n=None, stop_option=None, startdate=None, starttime=None
):
if not stop_n:
stop_n = self._case.get_value("STOP_N")
if not stop_option:
stop_option = self._case.get_value("STOP_OPTION")

def _set_restart_interval(self):
stop_n = self._case.get_value("STOP_N")
stop_option = self._case.get_value("STOP_OPTION")
self._case.set_value("REST_OPTION", stop_option)
# We need to make sure the run is long enough and to set REST_N to a
# value that makes sense for all components
Expand Down Expand Up @@ -172,19 +186,68 @@ def _set_restart_interval(self):
factor = 315360000
else:
expect(False, f"stop_option {stop_option} not available for this test")

stop_n = int(stop_n * factor // coupling_secs)
rest_n = math.ceil((stop_n // 2 + 1) * coupling_secs / factor)

expect(stop_n > 0, "Bad STOP_N: {:d}".format(stop_n))

expect(stop_n > 2, "ERROR: stop_n value {:d} too short".format(stop_n))
if not starttime:
starttime = self._case.get_value("START_TOD")
if not startdate:
startdate = self._case.get_value("RUN_STARTDATE")
if "-" in startdate:
startdatetime = datetime.fromisoformat(startdate) + timedelta(
seconds=int(starttime)
)
else:
startdatetime = datetime.strptime(startdate, "%Y%m%d") + timedelta(
seconds=int(starttime)
)

cal = self._case.get_value("CALENDAR")

if stop_option == "nsteps":
rtd = timedelta(seconds=rest_n * factor)
elif stop_option == "nminutes":
rtd = timedelta(minutes=rest_n)
elif stop_option == "nhours":
rtd = timedelta(hours=rest_n)
elif stop_option == "ndays":
rtd = timedelta(days=rest_n)
elif stop_option == "nyears":
rtd = timedelta(days=rest_n * 365)
else:
expect(False, f"stop_option {stop_option} not available for this test")

restdatetime = startdatetime + rtd
if cal == "NO_LEAP":
dayscorrected = 0
syr = startdatetime.year
smon = startdatetime.month
ryr = restdatetime.year
rmon = restdatetime.month
while ryr > syr:
if rmon > 2 and calendar.isleap(ryr):
dayscorrected += 1
ryr = ryr - 1
if rmon > 2 and smon <= 2:
if calendar.isleap(syr):
dayscorrected += 1
restdatetime = restdatetime + timedelta(days=dayscorrected)
self._rest_time = (
f".{restdatetime.year:04d}-{restdatetime.month:02d}-{restdatetime.day:02d}-"
)
h = restdatetime.hour
m = restdatetime.minute
s = restdatetime.second
self._rest_time += f"{(h*3600+m*60+s):05d}"

logger.info(
"doing an {0} {1} initial test with restart file at {2} {1}".format(
str(stop_n), stop_option, str(rest_n)
)
)
self._case.set_value("REST_N", rest_n)

return rest_n

def _init_environment(self, caseroot):
Expand Down Expand Up @@ -261,7 +324,9 @@ def build(
sharedlib_only=(phase_name == SHAREDLIB_BUILD_PHASE),
model_only=(phase_name == MODEL_BUILD_PHASE),
)
except BaseException as e: # We want KeyboardInterrupts to generate FAIL status
except (
BaseException
) as e: # We want KeyboardInterrupts to generate FAIL status
success = False
if isinstance(e, CIMEError):
# Don't want to print stacktrace for a build failure since that
Expand Down Expand Up @@ -334,7 +399,13 @@ def run(self, skip_pnl=False):
"""
success = True
start_time = time.time()
self._skip_pnl = skip_pnl
wav_comp = self._case.get_value("COMP_WAV")
# WW3 requires pnl to be run again after the build phase.
if wav_comp and wav_comp == "ww3":
self._skip_pnl = False
else:
self._skip_pnl = skip_pnl

try:
self._resetup_case(RUN_PHASE)
do_baseline_ops = True
Expand Down
29 changes: 16 additions & 13 deletions CIME/case/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -1846,13 +1846,15 @@ def create_caseroot(self, clone=False):
component_class in self._component_description
and len(self._component_description[component_class]) > 0
):
append_status(
"Component {} is {}".format(
component_class, self._component_description[component_class]
),
"README.case",
caseroot=self._caseroot,
)
if "Stub" not in self._component_description[component_class]:
append_status(
"Component {} is {}".format(
component_class,
self._component_description[component_class],
),
"README.case",
caseroot=self._caseroot,
)
if component_class == "CPL":
append_status(
"Using %s coupler instances" % (self.get_value("NINST_CPL")),
Expand All @@ -1861,12 +1863,13 @@ def create_caseroot(self, clone=False):
)
continue
comp_grid = "{}_GRID".format(component_class)

append_status(
"{} is {}".format(comp_grid, self.get_value(comp_grid)),
"README.case",
caseroot=self._caseroot,
)
grid_val = self.get_value(comp_grid)
if grid_val != "null":
append_status(
"{} is {}".format(comp_grid, self.get_value(comp_grid)),
"README.case",
caseroot=self._caseroot,
)
comp = str(self.get_value("COMP_{}".format(component_class)))
user_mods = self._get_comp_user_mods(comp)
if user_mods is not None:
Expand Down
10 changes: 9 additions & 1 deletion CIME/case/case_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ def case_run(self, skip_pnl=False, set_continue_run=False, submit_resubmits=Fals
and len(data_assimilation_script) > 0
and os.path.isfile(data_assimilation_script)
)

drv_restart_pointer = self.get_value("DRV_RESTART_POINTER")
for cycle in range(data_assimilation_cycles):
# After the first DA cycle, runs are restart runs
if cycle > 0:
Expand All @@ -496,6 +496,14 @@ def case_run(self, skip_pnl=False, set_continue_run=False, submit_resubmits=Fals
"{} RUN_MODEL BEGINS HERE".format(time.strftime("%Y-%m-%d %H:%M:%S")),
)
lid = _run_model(self, lid, skip_pnl, da_cycle=cycle)

# get the most recent cpl restart pointer file
rundir = self.get_value("RUNDIR")
if drv_restart_pointer:
pattern = os.path.join(rundir, "rpointer.cpl*")
files = sorted(glob.glob(pattern), key=os.path.getmtime)
drv_ptr = os.path.basename(files[-1])
self.set_value("DRV_RESTART_POINTER", drv_ptr)
model_log(
"e3sm",
logger,
Expand Down
Loading

0 comments on commit f285060

Please sign in to comment.