From d3b037a08497ecf5db9f608b7f5d5ed9889ee8f3 Mon Sep 17 00:00:00 2001
From: Andres <andres.posada@dsme-rwth-aachen.de>
Date: Fri, 17 May 2024 14:00:24 +0200
Subject: [PATCH] adding slurm executors

---
 data/config/launcher/slurm.yaml    | 10 ++++++++--
 data/config/launcher/slurmgpu.yaml | 27 +++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 2 deletions(-)
 create mode 100644 data/config/launcher/slurmgpu.yaml

diff --git a/data/config/launcher/slurm.yaml b/data/config/launcher/slurm.yaml
index 0316220..6593456 100644
--- a/data/config/launcher/slurm.yaml
+++ b/data/config/launcher/slurm.yaml
@@ -6,7 +6,13 @@ hydra:
   callbacks:
     log_job_return:
       _target_: hydra.experimental.callbacks.LogJobReturnCallback
-  launcher:
-    setup: [which python, echo 1]
+  launcher: # https://hydra.cc/docs/plugins/submitit_launcher/
+    setup: [
+      "echo '# Loading python module!'", "module load Python/3.10.4 2>&1",
+      "echo '# List of modules:'", "module list 2>&1",
+      "echo '# Current working directory:'", "pwd", 
+      "echo '# List of folders in pwd:'", "ls",
+      "echo '# Activate venv!'", ". .venv/bin/activate", 
+      "echo '# Which python:'", "which python"]
     submitit_folder: ${hydra.sweep.dir}/.submitit/%j
       
diff --git a/data/config/launcher/slurmgpu.yaml b/data/config/launcher/slurmgpu.yaml
new file mode 100644
index 0000000..3f91ba5
--- /dev/null
+++ b/data/config/launcher/slurmgpu.yaml
@@ -0,0 +1,27 @@
+# @package _global_
+defaults:
+  - override /hydra/launcher: submitit_slurm
+
+hydra: 
+  callbacks:
+    log_job_return:
+      _target_: hydra.experimental.callbacks.LogJobReturnCallback
+  launcher: # https://hydra.cc/docs/plugins/submitit_launcher/
+    setup: [
+      "echo '# Loading python module!'", "module load Python/3.10.4 2>&1",
+      "echo '# Loading CUDA module!'", "CUDA/12.3.0 2>&1",
+      "echo '# List of modules:'", "module list 2>&1",
+      "echo '# Current working directory:'", "pwd", 
+      "echo '# List of folders in pwd:'", "ls",
+      "echo '# Activate venv!'", ". .venv/bin/activate", 
+      "echo '# Which python:'", "which python",
+      "echo '# nvidia-smi:'", "nvidia-smi",
+      "echo '# Torch collect:'", "python -m torch.utils.collect_env"]
+    submitit_folder: ${hydra.sweep.dir}/.submitit/%j
+    cpus_per_task: 4
+    gpus_per_node: 1
+    mem_per_cpu: 8000
+    array_parallelism: 5
+    gres: "gpu:1"
+    timeout_min: 5
+      
-- 
GitLab