From 9f3580887ac0359dab6a36b309cd7a9bbc78f81d Mon Sep 17 00:00:00 2001 From: Xing Wang Date: Thu, 28 Nov 2024 22:59:09 +0100 Subject: [PATCH] Add `create_conda_env` (#3) --- docs/gallery/autogen/how_to.py | 67 +++++++++++++--- src/aiida_pythonjob/utils.py | 138 ++++++++++++++++++++++++++++++++- 2 files changed, 193 insertions(+), 12 deletions(-) diff --git a/docs/gallery/autogen/how_to.py b/docs/gallery/autogen/how_to.py index 4329dc5..8ce77aa 100644 --- a/docs/gallery/autogen/how_to.py +++ b/docs/gallery/autogen/how_to.py @@ -6,16 +6,60 @@ ###################################################################### -# Introduction -# ------------ +# Preparing inputs for `PythonJob` +# -------------------------------- +# The `prepare_pythonjob_inputs` function is available for setting up the +# inputs for a `PythonJob` calculation. This function simplifies the process +# of preparing and serializing data, and configuring the execution environment. +# +# - **Code**: You can specify the `computer` where the job will run, which will +# create a `python3@computer` code if it doesn't already exist. Alternatively, +# if the code has already been created, you can set the `code` directly. +# +# - **Data**: Use standard Python data types for input. The `prepare_pythonjob_inputs` +# function handles the conversion to AiiDA data. For serialization: +# - The function first searches for an AiiDA data entry point corresponding to the module +# and class names (e.g., `ase.atoms.Atoms`). +# - If a matching entry point exists, it is used for serialization. +# - If no match is found, the data is serialized into binary format using `PickledData`. +# +# - **Python Version**: Ensure the Python version on the remote computer matches the local environment. +# This is important since pickle is used for data storage and retrieval. Use **conda** to +# create and activate a virtual environment with the same Python version. Pass metadata +# to the scheduler to activate the environment during the job execution: +# +# .. code-block:: python +# +# metadata = { +# "options": { +# 'custom_scheduler_commands': 'module load anaconda\nconda activate py3.11\n', +# } +# } +# +# -------------------------------------------------- +# Create a conda environment on the remote computer +# -------------------------------------------------- +# One can use the `create_conda_env` function to create a conda environment +# on the remote computer. The function will create a conda environment with +# the specified packages and modules. The function will update the packages +# if the environment already exists. +# +# .. code-block:: python +# +# from aiida_pythonjob.utils import create_conda_env +# # create a conda environment on remote computer +# create_conda_env( +# "merlin6", # Remote computer +# "test_pythonjob", # Name of the conda environment +# modules=["anaconda"], # Modules to load (e.g., Anaconda) +# pip=["numpy", "matplotlib"], # Python packages to install via pip +# conda={ # Conda-specific settings +# "channels": ["conda-forge"], # Channels to use +# "dependencies": ["qe"] # Conda packages to install +# } +# ) # -# To run this tutorial, you need to load the AiiDA profile. # - -from aiida import load_profile - -load_profile() - ###################################################################### # Default outputs @@ -24,8 +68,11 @@ # The default output of the function is `result`. The `PythonJob` task # will store the result as one node in the database with the key `result`. # -from aiida.engine import run_get_node # noqa: E402 -from aiida_pythonjob import PythonJob, prepare_pythonjob_inputs # noqa: E402 +from aiida import load_profile +from aiida.engine import run_get_node +from aiida_pythonjob import PythonJob, prepare_pythonjob_inputs + +load_profile() def add(x, y): diff --git a/src/aiida_pythonjob/utils.py b/src/aiida_pythonjob/utils.py index 873238b..6155df0 100644 --- a/src/aiida_pythonjob/utils.py +++ b/src/aiida_pythonjob/utils.py @@ -1,7 +1,7 @@ -from typing import Optional, Union +from typing import Dict, List, Optional, Tuple, Union from aiida.common.exceptions import NotExistent -from aiida.orm import Computer, InstalledCode, load_code, load_computer +from aiida.orm import Computer, InstalledCode, User, load_code, load_computer def get_or_create_code( @@ -29,3 +29,137 @@ def get_or_create_code( code.store() return code + + +def generate_bash_to_create_python_env( + name: str, + pip: Optional[List[str]] = None, + conda: Optional[Dict[str, list]] = None, + modules: Optional[List[str]] = None, + python_version: Optional[str] = None, + variables: Optional[Dict[str, str]] = None, + shell: str = "posix", +): + """ + Generates a bash script for creating or updating a Python environment on a remote computer. + If python_version is None, it uses the Python version from the local environment. + Conda is a dictionary that can include 'channels' and 'dependencies'. + """ + import sys + + pip = pip or [] + conda_channels = conda.get("channels", []) if conda else [] + conda_dependencies = conda.get("dependencies", []) if conda else [] + # Determine the Python version from the local environment if not provided + local_python_version = f"{sys.version_info.major}.{sys.version_info.minor}" + desired_python_version = python_version if python_version is not None else local_python_version + + # Start of the script + script = "#!/bin/bash\n\n" + + # Load modules if provided + if modules: + script += "# Load specified system modules\n" + for module in modules: + script += f"module load {module}\n" + + # Conda shell hook initialization for proper conda activation + script += "# Initialize Conda for this shell\n" + script += f'eval "$(conda shell.{shell} hook)"\n' + + script += "# Setup the Python environment\n" + script += "if ! conda info --envs | grep -q ^{name}$; then\n" + script += " # Environment does not exist, create it\n" + if conda_dependencies: + dependencies_string = " ".join(conda_dependencies) + script += f" conda create -y -n {name} python={desired_python_version} {dependencies_string}\n" + else: + script += f" conda create -y -n {name} python={desired_python_version}\n" + script += "fi\n" + if conda_channels: + script += "EXISTING_CHANNELS=$(conda config --show channels)\n" + script += "for CHANNEL in " + " ".join(conda_channels) + ";\n" + script += "do\n" + script += ' if ! echo "$EXISTING_CHANNELS" | grep -q $CHANNEL; then\n' + script += " conda config --prepend channels $CHANNEL\n" + script += " fi\n" + script += "done\n" + script += f"conda activate {name}\n" + + # Install pip packages + if pip: + script += f"pip install {' '.join(pip)}\n" + + # Set environment variables + if variables: + for var, value in variables.items(): + script += f"export {var}='{value}'\n" + + # End of the script + script += "echo 'Environment setup is complete.'\n" + + return script + + +def create_conda_env( + computer: Union[str, Computer], + name: str, + pip: Optional[List[str]] = None, + conda: Optional[List[str]] = None, + modules: Optional[List[str]] = None, + python_version: Optional[str] = None, + variables: Optional[Dict[str, str]] = None, + shell: str = "posix", +) -> Tuple[bool, str]: + """Test that there is no unexpected output from the connection.""" + # Execute a command that should not return any error, except ``NotImplementedError`` + # since not all transport plugins implement remote command execution. + from aiida.common.exceptions import NotExistent + + user = User.collection.get_default() + if isinstance(computer, str): + computer = load_computer(computer) + try: + authinfo = computer.get_authinfo(user) + except NotExistent: + raise f"Computer<{computer.label}> is not yet configured for user<{user.email}>" + + scheduler = authinfo.computer.get_scheduler() + transport = authinfo.get_transport() + + script = generate_bash_to_create_python_env(name, pip, conda, modules, python_version, variables, shell) + with transport: + scheduler.set_transport(transport) + try: + retval, stdout, stderr = transport.exec_command_wait(script) + except NotImplementedError: + return ( + True, + f"Skipped, remote command execution is not implemented for the " + f"`{computer.transport_type}` transport plugin", + ) + + if retval != 0: + return ( + False, + f"The command `echo -n` returned a non-zero return code ({retval})", + ) + + template = """ +We detected an error while creating the environemnt on the remote computer, as shown between the bars +============================================================================================= +{} +============================================================================================= +Please check! + """ + if stderr: + return False, template.format(stderr) + + if stdout: + # the last line is the echo 'Environment setup is complete.' + if not stdout.strip().endswith("Environment setup is complete."): + return False, template.format(stdout) + else: + return True, "Environment setup is complete." + + return True, None