From 15707dada7b0a3015f0ce35c354496a6befaaaa1 Mon Sep 17 00:00:00 2001 From: sverhoeven Date: Tue, 22 Oct 2024 14:26:20 +0200 Subject: [PATCH 1/4] Add --account and --job-name to slurm submission Refs #105 --- docs/configuration.md | 2 ++ src/bartender/schedulers/abstract.py | 4 ++++ src/bartender/schedulers/slurm.py | 18 ++++++++++++++---- src/bartender/web/api/applications/submit.py | 19 +++++++++++++++++-- tests/schedulers/helpers.py | 2 ++ 5 files changed, 39 insertions(+), 6 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 6cd07ee8..977e1ec6 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -191,6 +191,8 @@ A destination has its own authentication mechanism. When a job is submitted by any user of the web service, it will be executed by the username/proxy that is configured in the destination. +For allowed configuration options see the [API reference](https://i-vresse-bartender.readthedocs.io/en/latest/autoapi/bartender). + ### Example of running jobs on the local system ```yaml diff --git a/src/bartender/schedulers/abstract.py b/src/bartender/schedulers/abstract.py index 36e265a0..765a46f1 100644 --- a/src/bartender/schedulers/abstract.py +++ b/src/bartender/schedulers/abstract.py @@ -16,6 +16,10 @@ class JobDescription(BaseModel): job_dir: Path # Command to run command: str + # Application name + application: str = "" + # User that submitted the job + submitter: str = "" class JobSubmissionError(Exception): diff --git a/src/bartender/schedulers/slurm.py b/src/bartender/schedulers/slurm.py index 7c41a823..6b99a105 100644 --- a/src/bartender/schedulers/slurm.py +++ b/src/bartender/schedulers/slurm.py @@ -52,6 +52,7 @@ class SlurmSchedulerConfig(BaseModel): extra_options: Escape hatch to add extra options to job script. The string `#SBATCH {extra_options[i]}` will be appended to job script. + submitter_as_account: Use the submitter as the account to run the job. """ type: Literal["slurm"] = "slurm" @@ -59,6 +60,7 @@ class SlurmSchedulerConfig(BaseModel): partition: Optional[str] = None time: Optional[str] = None extra_options: Optional[list[str]] = None + submitter_as_account: Optional[bool] = False class SlurmScheduler(AbstractScheduler): @@ -80,9 +82,10 @@ def __init__(self, config: SlurmSchedulerConfig): self.extra_options = [] else: self.extra_options = config.extra_options + self.submitter_as_account = config.submitter_as_account async def submit(self, description: JobDescription) -> str: # noqa: D102): - # TODO if runner is a SSHCommandRunner then description.jobdir + # if runner is a SSHCommandRunner then description.jobdir # must be on a shared filesystem or remote filesystem script = self._submit_script(description) command = "sbatch" @@ -167,14 +170,19 @@ async def _state_from_accounting(self, job_id: str) -> str: ) return stdout - def _submit_script(self, description: JobDescription) -> str: + def _submit_script(self, description: JobDescription) -> str: # noqa: WPS210 partition_line = "" if self.partition: partition_line = f"#SBATCH --partition={self.partition}" time_line = "" if self.time: time_line = f"#SBATCH --time={self.time}" - + account_line = "" + if description.submitter != "" and self.submitter_as_account: + account_line = f"#SBATCH --account={description.submitter}" + job_name_line = "" + if description.application != "": + job_name_line = f"#SBATCH --job-name={description.application}" # TODO filter out options already set extra_option_lines = "\n".join( [f"#SBATCH {extra}" for extra in self.extra_options], @@ -184,9 +192,11 @@ def _submit_script(self, description: JobDescription) -> str: {extra_option_lines} {partition_line} {time_line} + {account_line} + {job_name_line} #SBATCH --output=stdout.txt #SBATCH --error=stderr.txt {description.command} echo -n $? > returncode - """ + """ # noqa: WPS221 return dedent(script) diff --git a/src/bartender/web/api/applications/submit.py b/src/bartender/web/api/applications/submit.py index 0b76dc9c..fe6e56a0 100644 --- a/src/bartender/web/api/applications/submit.py +++ b/src/bartender/web/api/applications/submit.py @@ -13,6 +13,8 @@ def build_description( job_dir: Path, payload: dict[str, str], config: ApplicatonConfiguration, + application: str = "", + submitter: str = "", ) -> JobDescription: """ Builds a job description. @@ -21,13 +23,20 @@ def build_description( job_dir: The directory where the job will be executed. payload: The payload containing the non-file input data for the job. config: The configuration for the application. + application: The name of the application. + submitter: The user who submitted the job. Returns: Job description containing the job directory and command. """ template = template_environment.from_string(config.command_template) command = template.render(**payload) - return JobDescription(job_dir=job_dir, command=command) + return JobDescription( + job_dir=job_dir, + command=command, + application=application, + submitter=submitter, + ) async def submit( # noqa: WPS211 @@ -50,7 +59,13 @@ async def submit( # noqa: WPS211 job_dao: JobDAO object. context: Context with applications and destinations. """ - description = build_description(job_dir, payload, context.applications[application]) + description = build_description( + job_dir, + payload, + context.applications[application], + application, + submitter.username, + ) destination_name = context.destination_picker( job_dir, diff --git a/tests/schedulers/helpers.py b/tests/schedulers/helpers.py index 998ee49f..e4951319 100644 --- a/tests/schedulers/helpers.py +++ b/tests/schedulers/helpers.py @@ -10,6 +10,8 @@ def prepare_input(job_dir: Path) -> JobDescription: return JobDescription( command="echo -n hello && wc input > output", job_dir=job_dir, + submitter="test", + application="hellowc", ) From 9b4214ffa12e5c2f3533f97f96cf66d3d4b65e26 Mon Sep 17 00:00:00 2001 From: sverhoeven Date: Tue, 22 Oct 2024 15:06:20 +0200 Subject: [PATCH 2/4] Assert sbatch script --- src/bartender/schedulers/slurm.py | 1 + src/bartender/web/api/applications/submit.py | 4 ++-- tests/schedulers/helpers.py | 2 +- tests/schedulers/test_slurm.py | 10 +++++++++- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/bartender/schedulers/slurm.py b/src/bartender/schedulers/slurm.py index 6b99a105..b47e997a 100644 --- a/src/bartender/schedulers/slurm.py +++ b/src/bartender/schedulers/slurm.py @@ -89,6 +89,7 @@ async def submit(self, description: JobDescription) -> str: # noqa: D102): # must be on a shared filesystem or remote filesystem script = self._submit_script(description) command = "sbatch" + logger.debug(f"Submitting job with stdin: \n{script}") (returncode, stdout, stderr) = await self.runner.run( command, args=[], diff --git a/src/bartender/web/api/applications/submit.py b/src/bartender/web/api/applications/submit.py index fe6e56a0..6ef3aac6 100644 --- a/src/bartender/web/api/applications/submit.py +++ b/src/bartender/web/api/applications/submit.py @@ -63,8 +63,8 @@ async def submit( # noqa: WPS211 job_dir, payload, context.applications[application], - application, - submitter.username, + application=application, + submitter=submitter.username, ) destination_name = context.destination_picker( diff --git a/tests/schedulers/helpers.py b/tests/schedulers/helpers.py index e4951319..500880b6 100644 --- a/tests/schedulers/helpers.py +++ b/tests/schedulers/helpers.py @@ -10,7 +10,7 @@ def prepare_input(job_dir: Path) -> JobDescription: return JobDescription( command="echo -n hello && wc input > output", job_dir=job_dir, - submitter="test", + submitter="testsubmitter", application="hellowc", ) diff --git a/tests/schedulers/test_slurm.py b/tests/schedulers/test_slurm.py index b142026d..01c2669c 100644 --- a/tests/schedulers/test_slurm.py +++ b/tests/schedulers/test_slurm.py @@ -1,3 +1,4 @@ +import logging from asyncio import new_event_loop from pathlib import Path from typing import Generator @@ -69,11 +70,15 @@ def slurm_server() -> Generator[SlurmContainer, None, None]: async def test_ok_running_job_with_input_and_output_file( tmp_path: Path, slurm_server: SlurmContainer, + caplog: pytest.LogCaptureFixture, ) -> None: + caplog.set_level(logging.DEBUG, "bartender.schedulers.slurm") job_dir = tmp_path try: ssh_config = slurm_server.get_config() - scheduler = SlurmScheduler(SlurmSchedulerConfig(ssh_config=ssh_config)) + scheduler = SlurmScheduler( + SlurmSchedulerConfig(ssh_config=ssh_config, submitter_as_account=True), + ) description = prepare_input(job_dir) fs = slurm_server.get_filesystem() localized_description = fs.localize_description(description, job_dir.parent) @@ -87,6 +92,8 @@ async def test_ok_running_job_with_input_and_output_file( await fs.download(localized_description, description) assert_output(job_dir) + assert "--job-name=hellowc" in caplog.text + assert "--account=testsubmitter" in caplog.text finally: await scheduler.close() @@ -113,6 +120,7 @@ async def test_ok_running_job_without_iofiles( await fs.download(localized_description, description) assert_output_without_iofiles(job_dir) + finally: await scheduler.close() From a926a404d9f2a148d69daf3ea93eb74faba617c6 Mon Sep 17 00:00:00 2001 From: sverhoeven Date: Tue, 22 Oct 2024 15:35:51 +0200 Subject: [PATCH 3/4] Bump version + add release docs --- docs/conf.py | 2 +- docs/develop.md | 11 +++++++++++ pyproject.toml | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index b0ffdcfd..cd184e33 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -7,7 +7,7 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information project = "bartender" -release = "0.5.1" # TODO have version only in one place pyproject.toml +release = "0.5.2" # TODO have version only in one place pyproject.toml # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/docs/develop.md b/docs/develop.md index b8ae6b24..6e952f70 100644 --- a/docs/develop.md +++ b/docs/develop.md @@ -232,3 +232,14 @@ dirac-dms-filecatalog-cli # pilot logs cat ~diracpilot/localsite/output/* ``` + +## Creating a new release + +To create a new release, you should follow these steps: + +1. In `pyproject.toml` and `docs/conf.py` update the version number. +2. Create a new GitHub release + +- set tag and title to new version number with `v` prefix. +- with same first line of description as previous release. +- append generated release notes to the description. diff --git a/pyproject.toml b/pyproject.toml index 36aff724..f81ca71f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "bartender" -version = "0.5.1" +version = "0.5.2" description = "Job middleware for i-VRESSE" authors = [ From fc9c1de59559786880bc395aa50d9dfea041dd67 Mon Sep 17 00:00:00 2001 From: sverhoeven Date: Tue, 22 Oct 2024 16:11:06 +0200 Subject: [PATCH 4/4] Move submitter_as_account=true to own test --- tests/schedulers/helpers.py | 2 -- tests/schedulers/test_slurm.py | 37 +++++++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/tests/schedulers/helpers.py b/tests/schedulers/helpers.py index 500880b6..998ee49f 100644 --- a/tests/schedulers/helpers.py +++ b/tests/schedulers/helpers.py @@ -10,8 +10,6 @@ def prepare_input(job_dir: Path) -> JobDescription: return JobDescription( command="echo -n hello && wc input > output", job_dir=job_dir, - submitter="testsubmitter", - application="hellowc", ) diff --git a/tests/schedulers/test_slurm.py b/tests/schedulers/test_slurm.py index 01c2669c..f8200118 100644 --- a/tests/schedulers/test_slurm.py +++ b/tests/schedulers/test_slurm.py @@ -70,14 +70,12 @@ def slurm_server() -> Generator[SlurmContainer, None, None]: async def test_ok_running_job_with_input_and_output_file( tmp_path: Path, slurm_server: SlurmContainer, - caplog: pytest.LogCaptureFixture, ) -> None: - caplog.set_level(logging.DEBUG, "bartender.schedulers.slurm") job_dir = tmp_path try: ssh_config = slurm_server.get_config() scheduler = SlurmScheduler( - SlurmSchedulerConfig(ssh_config=ssh_config, submitter_as_account=True), + SlurmSchedulerConfig(ssh_config=ssh_config), ) description = prepare_input(job_dir) fs = slurm_server.get_filesystem() @@ -92,9 +90,42 @@ async def test_ok_running_job_with_input_and_output_file( await fs.download(localized_description, description) assert_output(job_dir) + finally: + await scheduler.close() + + +@pytest.mark.anyio +async def test_submit_cancel_job_with_submitter_as_account( # noqa: WPS231 + tmp_path: Path, + slurm_server: SlurmContainer, + caplog: pytest.LogCaptureFixture, +) -> None: + caplog.set_level(logging.DEBUG, "bartender.schedulers.slurm") + job_dir = tmp_path + try: + ssh_config = slurm_server.get_config() + scheduler = SlurmScheduler( + SlurmSchedulerConfig( + ssh_config=ssh_config, + submitter_as_account=True, + ), + ) + description = prepare_input(job_dir) + description.submitter = "testsubmitter" + description.application = "hellowc" + + fs = slurm_server.get_filesystem() + localized_description = fs.localize_description(description, job_dir.parent) + + await fs.upload(description, localized_description) + + jid = await scheduler.submit(localized_description) + await scheduler.cancel(jid) + assert "--job-name=hellowc" in caplog.text assert "--account=testsubmitter" in caplog.text finally: + await fs.delete(localized_description) await scheduler.close()