diff --git a/conf/vsc_kul_uhasselt.config b/conf/vsc_kul_uhasselt.config index ac5419c4..a3e92c4b 100644 --- a/conf/vsc_kul_uhasselt.config +++ b/conf/vsc_kul_uhasselt.config @@ -2,6 +2,8 @@ // see: https://github.com/nf-core/configs?tab=readme-ov-file#adding-a-new-config scratch_dir = System.getenv("VSC_SCRATCH") ?: "/tmp" tier1_project = System.getenv("SLURM_ACCOUNT") ?: null +avail_queues = System.getenv("VSC_DEDICATED_QUEUES") ?: null +def availQueues = avail_queues?.toString()?.split(',') // Perform work directory cleanup when the run has succesfully completed // cleanup = true @@ -47,6 +49,11 @@ aws { maxErrorRetry = 3 } +// Function to limit task time when dedicated queues are not available +def limitTaskTime(time, maxTime) { + return time > maxTime ? maxTime : time +} + // Define profiles for each cluster profiles { genius { @@ -68,6 +75,8 @@ profiles { resourceLimits = [ memory: 703.GB, cpus: 36 , time: 168.h ] apptainer.runOptions = '--containall --cleanenv --nv' singularity.runOptions = '--containall --cleanenv --nv' + + // Set clusteroptions clusterOptions = { // suggested to use 9 cpus per gpu def gpus = task.accelerator?.request ?: Math.max(1, Math.floor((task.cpus ?:1)/9) as int) @@ -104,7 +113,7 @@ profiles { } } } - + wice { params.config_profile_description = 'wice profile for use on the Wice cluster of the VSC HPC.' @@ -112,19 +121,26 @@ profiles { // max is 2016000 resourceLimits = [ memory: 1968.GB, cpus: 72, time: 168.h ] beforeScript = 'module load cluster/wice' - + // Set queue + // The task time is limites to 72 hours if the memory is larger than 239GB + // and dedicated queues are not available queue = { - task.memory >= 239.GB ? - (task.time >= 72.h ? 'dedicated_big_bigmem' : 'bigmem,hugemem') : - (task.time >= 72.h ? 'batch_long,batch_icelake_long,batch_sapphirerapids_long' : 'batch,batch_sapphirerapids,batch_icelake') + def maxTime = 72.h + if (task.memory >= 239.GB) { + task.time = task.time >= maxTime && !availQueues.contains('dedicated_big_bigmem') ? + limitTaskTime(task.time, maxTime) : task.time + return availQueues.contains('dedicated_big_bigmem') ? 'dedicated_big_bigmem' : 'bigmem,hugemem' + } else { + return task.time >= maxTime ? 'batch_long,batch_icelake_long,batch_sapphirerapids_long' : 'batch,batch_sapphirerapids,batch_icelake' + } } - + // Set clusterOptions, changing account based on queue clusterOptions = { def queueValue = { - task.memory >= 239.GB ? - (task.time >= 72.h ? 'dedicated_big_bigmem' : 'bigmem,hugemem') : + task.memory >= 239.GB ? + (task.time >= 72.h && availQueues.contains('dedicated_big_bigmem') ? 'dedicated_big_bigmem' : 'bigmem,hugemem') : (task.time >= 72.h ? 'batch_long,batch_icelake_long,batch_sapphirerapids_long' : 'batch,batch_sapphirerapids,batch_icelake') } queueValue() =~ /dedicated/ ? "--clusters=wice --account=lp_big_wice_cpu" : "--clusters=wice --account=$tier1_project" @@ -134,19 +150,31 @@ profiles { resourceLimits = [ memory: 703.GB, cpus: 64, time: 168.h ] apptainer.runOptions = '--containall --cleanenv --nv' singularity.runOptions = '--containall --cleanenv --nv' + + // Set queue + // The task time is limites to 72 hours if the memory is larger than 239GB + // and dedicated queues are not available queue = { - task.memory >= 239.GB ? - (task.time >= 72.h ? 'dedicated_big_gpu_h100' : 'gpu_h100') : - (task.time >= 72.h ? 'dedicated_big_gpu' : 'gpu_a100,gpu') + def maxTime = 72.h + if (task.memory >= 239.GB) { + task.time = task.time >= maxTime && !availQueues.contains('dedicated_big_gpu_h100') ? + limitTaskTime(task.time, maxTime) : task.time + return availQueues.contains('dedicated_big_gpu_h100') ? 'dedicated_big_gpu_h100' : 'gpu_h100' + } else { + task.time = task.time >= maxTime && !availQueues.contains('dedicated_big_gpu') ? + limitTaskTime(task.time, maxTime) : task.time + return availQueues.contains('dedicated_big_gpu') ? 'dedicated_big_gpu' : 'gpu_a100,gpu' + } } + clusterOptions = { // suggested to use 16 cpus per gpu def gpus = task.accelerator?.request ?: Math.max(1, Math.floor((task.cpus ?:1)/16) as int) // Do same queue evaluation as above def queueValue = { - task.memory >= 239.GB ? - (task.time >= 72.h ? 'dedicated_big_gpu_h100' : 'gpu_h100') : - (task.time >= 72.h ? 'dedicated_big_gpu' : 'gpu_a100,gpu') + task.memory >= 239.GB ? + (task.time >= 72.h && availQueues.contains('dedicated_big_gpu_h100') ? 'dedicated_big_gpu_h100' : 'gpu_h100') : + (task.time >= 72.h && availQueues.contains('dedicated_big_gpu') ? 'dedicated_big_gpu' : 'gpu_a100,gpu') } // Set clusterOptions, changing account based on queue @@ -167,19 +195,31 @@ profiles { // 768 - 65 so 65GB for overhead, max is 720000MB resourceLimits = [ memory: 703.GB, cpus: 64, time: 168.h ] beforeScript = 'module load cluster/wice' + // Set queue + // The task time is limites to 72 hours if the memory is larger than 239GB + // and dedicated queues are not available queue = { - task.memory >= 239.GB ? - (task.time >= 72.h ? 'dedicated_big_gpu_h100' : 'gpu_h100') : - (task.time >= 72.h ? 'dedicated_big_gpu' : 'gpu_a100,gpu') + def maxTime = 72.h + if (task.memory >= 239.GB) { + task.time = task.time >= maxTime && !availQueues.contains('dedicated_big_gpu_h100') ? + limitTaskTime(task.time, maxTime) : task.time + return availQueues.contains('dedicated_big_gpu_h100') ? 'dedicated_big_gpu_h100' : 'gpu_h100' + } else { + task.time = task.time >= maxTime && !availQueues.contains('dedicated_big_gpu') ? + limitTaskTime(task.time, maxTime) : task.time + return availQueues.contains('dedicated_big_gpu') ? 'dedicated_big_gpu' : 'gpu_a100,gpu' + } } + + // Set clusteroptions clusterOptions = { // suggested to use 16 cpus per gpu def gpus = task.accelerator?.request ?: Math.max(1, Math.floor((task.cpus ?:1)/16) as int) - // Do same queue evaluation as above + // Do same queue evaluation as above, without adjusting task.time def queueValue = { - task.memory >= 239.GB ? - (task.time >= 72.h ? 'dedicated_big_gpu_h100' : 'gpu_h100') : - (task.time >= 72.h ? 'dedicated_big_gpu' : 'gpu_a100,gpu') + task.memory >= 239.GB ? + (task.time >= 72.h && availQueues.contains('dedicated_big_gpu_h100') ? 'dedicated_big_gpu_h100' : 'gpu_h100') : + (task.time >= 72.h && availQueues.contains('dedicated_big_gpu') ? 'dedicated_big_gpu' : 'gpu_a100,gpu') } // Set clusterOptions, changing account based on queue