From 6bb6037f07e37ef47baa92a63cfef353259ba8af Mon Sep 17 00:00:00 2001 From: Shahzeb Siddiqui Date: Fri, 20 Oct 2023 11:38:11 -0400 Subject: [PATCH 1/7] add trap message in build script to catch signals and print message --- buildtest/builders/base.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/buildtest/builders/base.py b/buildtest/builders/base.py index c90a988fb..26f179509 100644 --- a/buildtest/builders/base.py +++ b/buildtest/builders/base.py @@ -566,6 +566,19 @@ def _write_build_script(self, modules=None, modulepurge=None, unload_modules=Non lines = ["#!/bin/bash"] + trap_msg = """ +# Function to handle all signals and perform cleanup +function cleanup() { + echo "Signal trapped. Performing cleanup before exiting." + exitcode=$? + echo "buildtest: command \`$BASH_COMMAND' failed (exit code: $exitcode)" + exit $exitcode +} + +# Trap all signals and call the cleanup function +trap cleanup SIGINT SIGTERM SIGHUP SIGQUIT SIGABRT SIGKILL SIGALRM SIGPIPE SIGTERM SIGTSTP SIGTTIN SIGTTOU +""" + lines.append(trap_msg) lines += self._default_test_variables() lines.append("# source executor startup script") From aa3fd9212cd60b16b08db7e18afe19e152980ac2 Mon Sep 17 00:00:00 2001 From: Shahzeb Siddiqui Date: Fri, 20 Oct 2023 12:09:47 -0400 Subject: [PATCH 2/7] catch exception when running 'BuildTest' class for KeyBoardInterrupt and SystemExit exception and print message --- buildtest/cli/build.py | 3 +- buildtest/main.py | 80 ++++++++++++++++++++++++------------------ 2 files changed, 47 insertions(+), 36 deletions(-) diff --git a/buildtest/cli/build.py b/buildtest/cli/build.py index c1bbd4421..04f15e4ed 100644 --- a/buildtest/cli/build.py +++ b/buildtest/cli/build.py @@ -195,8 +195,7 @@ def discover_buildspecs( # if no files discovered let's stop now if not buildspec_dict["included"]: - msg = "There are no config files to process." - sys.exit(msg) + sys.exit("There are no config files to process.") logger.debug( f"buildtest discovered the following Buildspecs: {buildspec_dict['included']}" diff --git a/buildtest/main.py b/buildtest/main.py index ea59630e8..061ad2e2c 100644 --- a/buildtest/main.py +++ b/buildtest/main.py @@ -2,6 +2,7 @@ import os import shutil +import sys import tempfile import webbrowser @@ -145,40 +146,51 @@ def main(): if args.subcommands in ["build", "bd"]: stdout_file = tempfile.NamedTemporaryFile(delete=True, suffix=".txt") with Tee(stdout_file.name): - cmd = BuildTest( - configuration=configuration, - buildspecs=args.buildspec, - exclude_buildspecs=args.exclude, - executors=args.executor, - tags=args.tags, - name=args.name, - exclude_tags=args.exclude_tags, - filter_buildspecs=args.filter, - rebuild=args.rebuild, - stage=args.stage, - testdir=args.testdir, - buildtest_system=system, - report_file=report_file, - maxpendtime=args.maxpendtime, - poll_interval=args.pollinterval, - remove_stagedir=args.remove_stagedir, - retry=args.retry, - account=args.account, - helpfilter=args.helpfilter, - numprocs=args.procs, - numnodes=args.nodes, - modules=args.modules, - modulepurge=args.module_purge, - unload_modules=args.unload_modules, - rerun=args.rerun, - executor_type=args.executor_type, - timeout=args.timeout, - limit=args.limit, - save_profile=args.save_profile, - profile=args.profile, - max_jobs=args.max_jobs, - ) - cmd.build() + try: + cmd = BuildTest( + configuration=configuration, + buildspecs=args.buildspec, + exclude_buildspecs=args.exclude, + executors=args.executor, + tags=args.tags, + name=args.name, + exclude_tags=args.exclude_tags, + filter_buildspecs=args.filter, + rebuild=args.rebuild, + stage=args.stage, + testdir=args.testdir, + buildtest_system=system, + report_file=report_file, + maxpendtime=args.maxpendtime, + poll_interval=args.pollinterval, + remove_stagedir=args.remove_stagedir, + retry=args.retry, + account=args.account, + helpfilter=args.helpfilter, + numprocs=args.procs, + numnodes=args.nodes, + modules=args.modules, + modulepurge=args.module_purge, + unload_modules=args.unload_modules, + rerun=args.rerun, + executor_type=args.executor_type, + timeout=args.timeout, + limit=args.limit, + save_profile=args.save_profile, + profile=args.profile, + max_jobs=args.max_jobs, + ) + cmd.build() + except KeyboardInterrupt as err: + console.print( + "[red]Unable to complete buildtest build command, signal: KeyboardInterrupt detected" + ) + console.print(err) + sys.exit(1) + except SystemExit as err: + console.print("[red]buildtest build command failed") + console.print(err) + sys.exit(1) if cmd.build_success(): build_history_dir = cmd.get_build_history_dir() From 2e8fbf581134c7af4ac76bc4345ddeb58a7a5167 Mon Sep 17 00:00:00 2001 From: Shahzeb Siddiqui Date: Fri, 20 Oct 2023 15:59:52 -0400 Subject: [PATCH 3/7] refactor some code into list comprehension --- buildtest/executors/setup.py | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/buildtest/executors/setup.py b/buildtest/executors/setup.py index 18371becd..38e86090f 100644 --- a/buildtest/executors/setup.py +++ b/buildtest/executors/setup.py @@ -307,13 +307,11 @@ def run(self, builders): console.print(f"Spawning {num_workers} processes for processing builders") count = 0 while True: - active_builders = [] count += 1 console.rule(f"Iteration {count}") - - for builder in self.builders: - if builder.is_pending(): - active_builders.append(builder) + active_builders = [ + builder for builder in self.builders if builder.is_pending() + ] run_builders = self.select_builders_to_run(active_builders) @@ -322,7 +320,7 @@ def run(self, builders): run_builders = run_builders[: self.max_jobs] if not run_builders: - raise BuildTestError("Unable to run tests ") + raise BuildTestError("Unable to run tests") run_table = Table( Column("Builder", overflow="fold", style="red"), @@ -345,11 +343,11 @@ def run(self, builders): if isinstance(task, BuilderBase): self.builders.add(task) - pending_jobs = set() - for builder in self.builders: - # returns True if attribute builder.job is an instance of class Job. Only add jobs that are active running for pending - if builder.is_batch_job() and builder.is_running(): - pending_jobs.add(builder) + pending_jobs = { + builder + for builder in self.builders + if builder.is_batch_job() and builder.is_running() + } self.poll(pending_jobs) @@ -358,13 +356,12 @@ def run(self, builders): # if builder.is_failed(): # self.builders.remove(builder) - terminate = True + # set Terminate to True if no builders are pending or running - # condition below checks if all tests are complete, if any are pending or running we need to stay in loop until jobs are finished - # until finished - for builder in self.builders: - if builder.is_pending() or builder.is_running(): - terminate = False + terminate = not any( + builder.is_pending() or builder.is_running() + for builder in self.builders + ) if terminate: break From 6c67f567fc9e283918be0ee0397b80cbbf35324f Mon Sep 17 00:00:00 2001 From: Shahzeb Siddiqui Date: Sun, 22 Oct 2023 18:07:42 -0400 Subject: [PATCH 4/7] use rich.track for time display for poll interval --- buildtest/executors/setup.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/buildtest/executors/setup.py b/buildtest/executors/setup.py index 38e86090f..99a3651e4 100644 --- a/buildtest/executors/setup.py +++ b/buildtest/executors/setup.py @@ -10,6 +10,7 @@ import os import time +from rich.progress import track from rich.table import Column, Table from buildtest.builders.base import BuilderBase @@ -381,7 +382,10 @@ def poll(self, pending_jobs): # poll until all pending jobs are complete while pending_jobs: print(f"Polling Jobs in {self.pollinterval} seconds") - time.sleep(self.pollinterval) + for i in track(range(self.pollinterval), description="Poll Interval"): + time.sleep(1) + + # time.sleep(self.pollinterval) jobs = pending_jobs.copy() # for every pending job poll job and mark if job is finished or cancelled From 4093751f9e2ef2f3f967c9f2f94aeac9256e77ee Mon Sep 17 00:00:00 2001 From: Shahzeb Siddiqui Date: Tue, 24 Oct 2023 07:41:37 -0700 Subject: [PATCH 5/7] remove track for displaying poll interval --- buildtest/executors/setup.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/buildtest/executors/setup.py b/buildtest/executors/setup.py index 99a3651e4..6b6cdd397 100644 --- a/buildtest/executors/setup.py +++ b/buildtest/executors/setup.py @@ -10,7 +10,6 @@ import os import time -from rich.progress import track from rich.table import Column, Table from buildtest.builders.base import BuilderBase @@ -379,11 +378,11 @@ def poll(self, pending_jobs): and poll jobs until there is no pending jobs.""" # only add builders that are batch jobs + # poll until all pending jobs are complete while pending_jobs: print(f"Polling Jobs in {self.pollinterval} seconds") - for i in track(range(self.pollinterval), description="Poll Interval"): - time.sleep(1) + time.sleep(self.pollinterval) # time.sleep(self.pollinterval) jobs = pending_jobs.copy() From ec919727e36a41160691f40adf6a3500c962b2c6 Mon Sep 17 00:00:00 2001 From: Shahzeb Siddiqui Date: Tue, 24 Oct 2023 11:17:23 -0400 Subject: [PATCH 6/7] removing test directory and cancelling jobs if Crtl+C is pressed during execution --- buildtest/cli/build.py | 5 +- buildtest/executors/setup.py | 125 +++++++++++++++++++++-------------- 2 files changed, 78 insertions(+), 52 deletions(-) diff --git a/buildtest/cli/build.py b/buildtest/cli/build.py index 04f15e4ed..95c06d479 100644 --- a/buildtest/cli/build.py +++ b/buildtest/cli/build.py @@ -1222,7 +1222,10 @@ def run_phase(self): """ console.rule("[bold red]Running Tests") - self.buildexecutor.run(self.builders) + try: + self.buildexecutor.run(self.builders) + except KeyboardInterrupt: + raise KeyboardInterrupt builders = self.buildexecutor.get_validbuilders() ########## TEST SUMMARY #################### diff --git a/buildtest/executors/setup.py b/buildtest/executors/setup.py index 6b6cdd397..ab3c8e16c 100644 --- a/buildtest/executors/setup.py +++ b/buildtest/executors/setup.py @@ -8,6 +8,7 @@ import logging import multiprocessing as mp import os +import shutil import time from rich.table import Column, Table @@ -306,66 +307,89 @@ def run(self, builders): pool = mp.Pool(num_workers) console.print(f"Spawning {num_workers} processes for processing builders") count = 0 - while True: - count += 1 - console.rule(f"Iteration {count}") - active_builders = [ - builder for builder in self.builders if builder.is_pending() - ] - - run_builders = self.select_builders_to_run(active_builders) - - # if max_jobs property is set then reduce the number of jobs to run to max_jobs - if self.max_jobs: - run_builders = run_builders[: self.max_jobs] - - if not run_builders: - raise BuildTestError("Unable to run tests") - - run_table = Table( - Column("Builder", overflow="fold", style="red"), - title="Builders Eligible to Run", - header_style="blue", - ) - for builder in run_builders: - run_table.add_row(f"{str(builder)}") - console.print(run_table) + try: + while True: + count += 1 + console.rule(f"Iteration {count}") + active_builders = [ + builder for builder in self.builders if builder.is_pending() + ] + + run_builders = self.select_builders_to_run(active_builders) + + # if max_jobs property is set then reduce the number of jobs to run to max_jobs + if self.max_jobs: + run_builders = run_builders[: self.max_jobs] + + if not run_builders: + raise BuildTestError("Unable to run tests") + + run_table = Table( + Column("Builder", overflow="fold", style="red"), + title="Builders Eligible to Run", + header_style="blue", + ) + for builder in run_builders: + run_table.add_row(f"{str(builder)}") + console.print(run_table) - results = [] + results = [] - for builder in run_builders: - executor = self._choose_executor(builder) - results.append(pool.apply_async(executor.run, args=(builder,))) - self.builders.remove(builder) + for builder in run_builders: + executor = self._choose_executor(builder) + results.append(pool.apply_async(executor.run, args=(builder,))) + self.builders.remove(builder) - for result in results: - task = result.get() - if isinstance(task, BuilderBase): - self.builders.add(task) + for result in results: + task = result.get() + if isinstance(task, BuilderBase): + self.builders.add(task) - pending_jobs = { - builder - for builder in self.builders - if builder.is_batch_job() and builder.is_running() - } + pending_jobs = { + builder + for builder in self.builders + if builder.is_batch_job() and builder.is_running() + } - self.poll(pending_jobs) + self.poll(pending_jobs) - # remove any failed jobs from list - # for builder in self.builders: - # if builder.is_failed(): - # self.builders.remove(builder) + # remove any failed jobs from list + # for builder in self.builders: + # if builder.is_failed(): + # self.builders.remove(builder) - # set Terminate to True if no builders are pending or running + # set Terminate to True if no builders are pending or running - terminate = not any( - builder.is_pending() or builder.is_running() - for builder in self.builders - ) + terminate = not any( + builder.is_pending() or builder.is_running() + for builder in self.builders + ) - if terminate: - break + if terminate: + break + except KeyboardInterrupt: + console.print("Caught KeyboardInterrupt, terminating workers") + for builder in self.builders: + console.print( + f"{builder}: Removing test directory: {builder.test_root}" + ) + try: + shutil.rmtree(builder.test_root) + except OSError: + continue + + if builder.is_batch_job(): + console.print(f"{builder}: Cancelling Job {builder.job.get()}") + builder.job.cancel() + + # close the worker pool by preventing any more tasks from being submitted + pool.close() + + # terminate all worker processes + pool.join() + + raise KeyboardInterrupt # close the worker pool by preventing any more tasks from being submitted pool.close() @@ -378,7 +402,6 @@ def poll(self, pending_jobs): and poll jobs until there is no pending jobs.""" # only add builders that are batch jobs - # poll until all pending jobs are complete while pending_jobs: print(f"Polling Jobs in {self.pollinterval} seconds") From f36aa9ccc66493fa8e89f2a20ebf88be128b374a Mon Sep 17 00:00:00 2001 From: Shahzeb Siddiqui Date: Tue, 24 Oct 2023 11:23:36 -0400 Subject: [PATCH 7/7] add exception handling when removing directory --- buildtest/executors/setup.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/buildtest/executors/setup.py b/buildtest/executors/setup.py index ab3c8e16c..5e5de785d 100644 --- a/buildtest/executors/setup.py +++ b/buildtest/executors/setup.py @@ -368,19 +368,24 @@ def run(self, builders): if terminate: break except KeyboardInterrupt: - console.print("Caught KeyboardInterrupt, terminating workers") + console.print("[red]Caught KeyboardInterrupt, terminating workers") for builder in self.builders: console.print( - f"{builder}: Removing test directory: {builder.test_root}" + f"[blue]{builder}[/blue]: [red]Removing test directory: {builder.test_root}" ) try: shutil.rmtree(builder.test_root) - except OSError: + except OSError as err: + console.print( + f"[blue]{builder}[/blue]: [red]Unable to delete test directory {builder.test_root} with error: {err.strerror}" + ) continue if builder.is_batch_job(): - console.print(f"{builder}: Cancelling Job {builder.job.get()}") + console.print( + f"[blue]{builder}[/blue]: [red]Cancelling Job {builder.job.get()}" + ) builder.job.cancel() # close the worker pool by preventing any more tasks from being submitted