diff --git a/data/config/launcher/slurm.yaml b/data/config/launcher/slurm.yaml index 03162202439a2231b63f1bcd98e03c10dc4b5707..6593456948346bbf93289fb2eb173bcc56c56527 100644 --- a/data/config/launcher/slurm.yaml +++ b/data/config/launcher/slurm.yaml @@ -6,7 +6,13 @@ hydra: callbacks: log_job_return: _target_: hydra.experimental.callbacks.LogJobReturnCallback - launcher: - setup: [which python, echo 1] + launcher: # https://hydra.cc/docs/plugins/submitit_launcher/ + setup: [ + "echo '# Loading python module!'", "module load Python/3.10.4 2>&1", + "echo '# List of modules:'", "module list 2>&1", + "echo '# Current working directory:'", "pwd", + "echo '# List of folders in pwd:'", "ls", + "echo '# Activate venv!'", ". .venv/bin/activate", + "echo '# Which python:'", "which python"] submitit_folder: ${hydra.sweep.dir}/.submitit/%j diff --git a/data/config/launcher/slurmgpu.yaml b/data/config/launcher/slurmgpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f91ba5aca6db7fa2b8f412d06c7e44f78565fcb --- /dev/null +++ b/data/config/launcher/slurmgpu.yaml @@ -0,0 +1,27 @@ +# @package _global_ +defaults: + - override /hydra/launcher: submitit_slurm + +hydra: + callbacks: + log_job_return: + _target_: hydra.experimental.callbacks.LogJobReturnCallback + launcher: # https://hydra.cc/docs/plugins/submitit_launcher/ + setup: [ + "echo '# Loading python module!'", "module load Python/3.10.4 2>&1", + "echo '# Loading CUDA module!'", "CUDA/12.3.0 2>&1", + "echo '# List of modules:'", "module list 2>&1", + "echo '# Current working directory:'", "pwd", + "echo '# List of folders in pwd:'", "ls", + "echo '# Activate venv!'", ". .venv/bin/activate", + "echo '# Which python:'", "which python", + "echo '# nvidia-smi:'", "nvidia-smi", + "echo '# Torch collect:'", "python -m torch.utils.collect_env"] + submitit_folder: ${hydra.sweep.dir}/.submitit/%j + cpus_per_task: 4 + gpus_per_node: 1 + mem_per_cpu: 8000 + array_parallelism: 5 + gres: "gpu:1" + timeout_min: 5 +