From d3b037a08497ecf5db9f608b7f5d5ed9889ee8f3 Mon Sep 17 00:00:00 2001 From: Andres <andres.posada@dsme-rwth-aachen.de> Date: Fri, 17 May 2024 14:00:24 +0200 Subject: [PATCH] adding slurm executors --- data/config/launcher/slurm.yaml | 10 ++++++++-- data/config/launcher/slurmgpu.yaml | 27 +++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 data/config/launcher/slurmgpu.yaml diff --git a/data/config/launcher/slurm.yaml b/data/config/launcher/slurm.yaml index 0316220..6593456 100644 --- a/data/config/launcher/slurm.yaml +++ b/data/config/launcher/slurm.yaml @@ -6,7 +6,13 @@ hydra: callbacks: log_job_return: _target_: hydra.experimental.callbacks.LogJobReturnCallback - launcher: - setup: [which python, echo 1] + launcher: # https://hydra.cc/docs/plugins/submitit_launcher/ + setup: [ + "echo '# Loading python module!'", "module load Python/3.10.4 2>&1", + "echo '# List of modules:'", "module list 2>&1", + "echo '# Current working directory:'", "pwd", + "echo '# List of folders in pwd:'", "ls", + "echo '# Activate venv!'", ". .venv/bin/activate", + "echo '# Which python:'", "which python"] submitit_folder: ${hydra.sweep.dir}/.submitit/%j diff --git a/data/config/launcher/slurmgpu.yaml b/data/config/launcher/slurmgpu.yaml new file mode 100644 index 0000000..3f91ba5 --- /dev/null +++ b/data/config/launcher/slurmgpu.yaml @@ -0,0 +1,27 @@ +# @package _global_ +defaults: + - override /hydra/launcher: submitit_slurm + +hydra: + callbacks: + log_job_return: + _target_: hydra.experimental.callbacks.LogJobReturnCallback + launcher: # https://hydra.cc/docs/plugins/submitit_launcher/ + setup: [ + "echo '# Loading python module!'", "module load Python/3.10.4 2>&1", + "echo '# Loading CUDA module!'", "CUDA/12.3.0 2>&1", + "echo '# List of modules:'", "module list 2>&1", + "echo '# Current working directory:'", "pwd", + "echo '# List of folders in pwd:'", "ls", + "echo '# Activate venv!'", ". .venv/bin/activate", + "echo '# Which python:'", "which python", + "echo '# nvidia-smi:'", "nvidia-smi", + "echo '# Torch collect:'", "python -m torch.utils.collect_env"] + submitit_folder: ${hydra.sweep.dir}/.submitit/%j + cpus_per_task: 4 + gpus_per_node: 1 + mem_per_cpu: 8000 + array_parallelism: 5 + gres: "gpu:1" + timeout_min: 5 + -- GitLab