Skip to content

Commit

Permalink
Merge pull request #11 from cedadev/cci_cases
Browse files Browse the repository at this point in the history
Changes from CCI branch to main
  • Loading branch information
dwest77a authored Mar 6, 2024
2 parents 91f2ccb + 32f7c3f commit 7b71b88
Show file tree
Hide file tree
Showing 22 changed files with 5,487 additions and 5,989 deletions.
707 changes: 399 additions & 308 deletions assess.py

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion extensions/templates/setup-cci.sh → config/setup-cci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ export WORKDIR=/gws/nopw/j04/esacci_portal/kerchunk_conversion/
export SRCDIR=/home/users/dwest77/Documents/kerchunk_dev/kerchunk-builder
export KVENV=/home/users/dwest77/Documents/kerchunk_dev/kerchunk-builder/build_venv

source $KVENV/bin/activate
source $KVENV/bin/activate
module load jaspy
File renamed without changes.
File renamed without changes.
8 changes: 2 additions & 6 deletions extensions/templates/phase.sbatch.template
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
#!/bin/bash
#SBATCH --partition=short-serial-4hr
#SBATCH --account=short4hr
#SBATCH --job-name={}
#SBATCH --partition=short-serial
#SBATCH --job-name={}_kerchunk

#SBATCH --time={}
#SBATCH --mem={}

#SBATCH -o {}
#SBATCH -e {}

module add jaspy
source {}/bin/activate

Expand Down
24 changes: 21 additions & 3 deletions group_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import os
import argparse
import subprocess

from pipeline.logs import init_logger, BypassSwitch

Expand All @@ -28,8 +29,15 @@ def get_attribute(env, args, var):
else:
print(f'Error: Missing attribute {var}')
return None


def main(args):

logger = init_logger(args.verbose, 0, 'main-group')

deploy(args, logger)

def deploy(args, logger, get_id=False, dependent_id=False):
"""Assemble sbatch script for parallel running jobs"""

logger = init_logger(args.verbose, 0, 'main-group')
Expand Down Expand Up @@ -98,8 +106,6 @@ def main(args):
f'{group}_{phase}_array', # Job name
time, # Time
mem, # Memory
f'{GROUPDIR}/outs/%A_{label}/%a.out', # Outs
f'{GROUPDIR}/errs/%A_{label}/%a.err', # Errs
VENV,
WORKDIR,
GROUPDIR,
Expand All @@ -115,6 +121,8 @@ def main(args):
sb += ' -Q'
if args.backtrack:
sb += ' -B'
if args.dryrun:
sb += ' -d'

if 'X' in args.bypass:
logger.warning('Running with XK Shape Bypass flag "X" is experimental and should only be used with approval.')
Expand All @@ -130,7 +138,17 @@ def main(args):
logger.info('DRYRUN: sbatch command: ')
print(f'sbatch --array=0-{group_len-1} {group_phase_sbatch}')
else:
os.system(f'sbatch --array=0-{group_len-1} {group_phase_sbatch}')
if get_id: # Unused section to save the ID of the process
result = subprocess.run(['sbatch', f'--array=0-{group_len-1}', group_phase_sbatch], stdout=subprocess.PIPE)
try:
id = result.stdout.decode('utf-8').split(' ')[3].strip() # Check!
assert len(id) == 8
return id
except:
logger.error('Slurm submission failed')
return None
else:
os.system(f'sbatch --array=0-{group_len-1} {group_phase_sbatch}')

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run a pipeline step for a group of datasets')
Expand Down
23 changes: 23 additions & 0 deletions pipeline/allocator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
__author__ = "Daniel Westwood"
__contact__ = "[email protected]"
__copyright__ = "Copyright 2023 United Kingdom Research and Innovation"

# Job Subset Allocation Script
# - Calculate Expected Utilisation for each Job (500 + Kerchunk Size*5 MB)

# First-Fit Bin Packing Algorithm
# - Sort Utilisations from largest to smallest
# - Bin Capacity is largest size rounded up to next memory cap (1, 2, 3, 4 GB)
# - Allocate item to first bin with space remaining
# - End with N bins (subsets) - write list of project codes for each subset to a separate file in proj_code_subsets/set_N.txt
# - Run array with number of subsets already set.

# Utilisation estimate is (total_chunks * 835) + 500 (MB)
"""
for proj_code in (repeat_id set):
open detail-cfg (for this code)
calculate utilisation
add to dict [utilisation, proj_code]
keep track of max/min
get bins using binpacking (pypi)
"""
Loading

0 comments on commit 7b71b88

Please sign in to comment.