refactored code for dedicated queues and added max tasklimit if dedic…

…ated queue is not available
nf-core · Jan 7, 2025 · 2c391ba · 2c391ba
1 parent c1b3583
commit 2c391ba
Showing 1 changed file with 61 additions and 21 deletions.
diff --git a/conf/vsc_kul_uhasselt.config b/conf/vsc_kul_uhasselt.config
@@ -2,6 +2,8 @@
 // see: https://github.com/nf-core/configs?tab=readme-ov-file#adding-a-new-config
 scratch_dir   = System.getenv("VSC_SCRATCH") ?: "/tmp"
 tier1_project = System.getenv("SLURM_ACCOUNT") ?: null
+avail_queues  = System.getenv("VSC_DEDICATED_QUEUES") ?: null
+def availQueues = avail_queues?.toString()?.split(',')
 
 // Perform work directory cleanup when the run has succesfully completed
 // cleanup = true
@@ -47,6 +49,11 @@ aws {
     maxErrorRetry = 3
 }
 
+// Function to limit task time when dedicated queues are not available
+def limitTaskTime(time, maxTime) {
+    return time > maxTime ? maxTime : time
+}
+
 // Define profiles for each cluster
 profiles {
     genius {
@@ -68,6 +75,8 @@ profiles {
                 resourceLimits         = [ memory: 703.GB, cpus: 36 , time: 168.h ]
                 apptainer.runOptions   = '--containall --cleanenv --nv'
                 singularity.runOptions = '--containall --cleanenv --nv'
+
+                // Set clusteroptions
                 clusterOptions         = {
                     // suggested to use 9 cpus per gpu
                     def gpus = task.accelerator?.request ?: Math.max(1, Math.floor((task.cpus ?:1)/9) as int)
@@ -104,27 +113,34 @@ profiles {
             }
         }
     }
-
+    
     wice {
         params.config_profile_description = 'wice profile for use on the Wice cluster of the VSC HPC.'
 
         process {
             // max is 2016000
             resourceLimits = [ memory: 1968.GB, cpus: 72, time: 168.h ]
             beforeScript   = 'module load cluster/wice'
-
+                        
             // Set queue
+            // The task time is limites to 72 hours if the memory is larger than 239GB
+            // and dedicated queues are not available
             queue = {
-                task.memory >= 239.GB ?
-                    (task.time >= 72.h ? 'dedicated_big_bigmem' : 'bigmem,hugemem') :
-                    (task.time >= 72.h ? 'batch_long,batch_icelake_long,batch_sapphirerapids_long' : 'batch,batch_sapphirerapids,batch_icelake')
+                def maxTime = 72.h
+                if (task.memory >= 239.GB) {
+                    task.time = task.time >= maxTime && !availQueues.contains('dedicated_big_bigmem') ? 
+                        limitTaskTime(task.time, maxTime) : task.time
+                    return availQueues.contains('dedicated_big_bigmem') ? 'dedicated_big_bigmem' : 'bigmem,hugemem'
+                } else {
+                    return task.time >= maxTime ? 'batch_long,batch_icelake_long,batch_sapphirerapids_long' : 'batch,batch_sapphirerapids,batch_icelake'
+                }
             }
-
+            
             // Set clusterOptions, changing account based on queue
             clusterOptions = {
                 def queueValue = {
-                    task.memory >= 239.GB ?
-                        (task.time >= 72.h ? 'dedicated_big_bigmem' : 'bigmem,hugemem') :
+                    task.memory >= 239.GB ? 
+                        (task.time >= 72.h && availQueues.contains('dedicated_big_bigmem') ? 'dedicated_big_bigmem' : 'bigmem,hugemem') : 
                         (task.time >= 72.h ? 'batch_long,batch_icelake_long,batch_sapphirerapids_long' : 'batch,batch_sapphirerapids,batch_icelake')
                 }
                 queueValue() =~ /dedicated/ ? "--clusters=wice --account=lp_big_wice_cpu" : "--clusters=wice --account=$tier1_project"
@@ -134,19 +150,31 @@ profiles {
                 resourceLimits         = [ memory: 703.GB, cpus: 64, time: 168.h ]
                 apptainer.runOptions   = '--containall --cleanenv --nv'
                 singularity.runOptions = '--containall --cleanenv --nv'
+
+                // Set queue
+                // The task time is limites to 72 hours if the memory is larger than 239GB
+                // and dedicated queues are not available
                 queue = {
-                    task.memory >= 239.GB ?
-                        (task.time >= 72.h ? 'dedicated_big_gpu_h100' : 'gpu_h100') :
-                        (task.time >= 72.h ? 'dedicated_big_gpu' : 'gpu_a100,gpu')
+                    def maxTime = 72.h
+                    if (task.memory >= 239.GB) {
+                        task.time = task.time >= maxTime && !availQueues.contains('dedicated_big_gpu_h100') ? 
+                            limitTaskTime(task.time, maxTime) : task.time
+                        return availQueues.contains('dedicated_big_gpu_h100') ? 'dedicated_big_gpu_h100' : 'gpu_h100'
+                    } else {
+                        task.time = task.time >= maxTime && !availQueues.contains('dedicated_big_gpu') ? 
+                            limitTaskTime(task.time, maxTime) : task.time
+                        return availQueues.contains('dedicated_big_gpu') ? 'dedicated_big_gpu' : 'gpu_a100,gpu'
+                    }
                 }
+
                 clusterOptions = {
                     // suggested to use 16 cpus per gpu
                     def gpus = task.accelerator?.request ?: Math.max(1, Math.floor((task.cpus ?:1)/16) as int)
                     // Do same queue evaluation as above
                     def queueValue = {
-                        task.memory >= 239.GB ?
-                            (task.time >= 72.h ? 'dedicated_big_gpu_h100' : 'gpu_h100') :
-                            (task.time >= 72.h ? 'dedicated_big_gpu' : 'gpu_a100,gpu')
+                        task.memory >= 239.GB ? 
+                            (task.time >= 72.h && availQueues.contains('dedicated_big_gpu_h100') ? 'dedicated_big_gpu_h100' : 'gpu_h100') : 
+                            (task.time >= 72.h && availQueues.contains('dedicated_big_gpu') ? 'dedicated_big_gpu' : 'gpu_a100,gpu')
                     }
 
                     // Set clusterOptions, changing account based on queue
@@ -167,19 +195,31 @@ profiles {
             // 768 - 65 so 65GB for overhead, max is 720000MB
             resourceLimits = [ memory: 703.GB, cpus: 64, time: 168.h ]
             beforeScript   = 'module load cluster/wice'
+                // Set queue
+                // The task time is limites to 72 hours if the memory is larger than 239GB
+                // and dedicated queues are not available
             queue = {
-                task.memory >= 239.GB ?
-                    (task.time >= 72.h ? 'dedicated_big_gpu_h100' : 'gpu_h100') :
-                    (task.time >= 72.h ? 'dedicated_big_gpu' : 'gpu_a100,gpu')
+                    def maxTime = 72.h
+                    if (task.memory >= 239.GB) {
+                        task.time = task.time >= maxTime && !availQueues.contains('dedicated_big_gpu_h100') ? 
+                            limitTaskTime(task.time, maxTime) : task.time
+                        return availQueues.contains('dedicated_big_gpu_h100') ? 'dedicated_big_gpu_h100' : 'gpu_h100'
+                    } else {
+                        task.time = task.time >= maxTime && !availQueues.contains('dedicated_big_gpu') ? 
+                            limitTaskTime(task.time, maxTime) : task.time
+                        return availQueues.contains('dedicated_big_gpu') ? 'dedicated_big_gpu' : 'gpu_a100,gpu'
+                    }
             }
+
+            // Set clusteroptions
             clusterOptions = {
                 // suggested to use 16 cpus per gpu
                 def gpus = task.accelerator?.request ?: Math.max(1, Math.floor((task.cpus ?:1)/16) as int)
-                // Do same queue evaluation as above
+                // Do same queue evaluation as above, without adjusting task.time
                 def queueValue = {
-                    task.memory >= 239.GB ?
-                        (task.time >= 72.h ? 'dedicated_big_gpu_h100' : 'gpu_h100') :
-                        (task.time >= 72.h ? 'dedicated_big_gpu' : 'gpu_a100,gpu')
+                    task.memory >= 239.GB ? 
+                        (task.time >= 72.h && availQueues.contains('dedicated_big_gpu_h100') ? 'dedicated_big_gpu_h100' : 'gpu_h100') : 
+                        (task.time >= 72.h && availQueues.contains('dedicated_big_gpu') ? 'dedicated_big_gpu' : 'gpu_a100,gpu')
                 }
 
                 // Set clusterOptions, changing account based on queue