From e5758cb03afdabf328f5cc8fe0731d89b9c6ff53 Mon Sep 17 00:00:00 2001 From: Jannis Klinkenberg <j.klinkenberg@itc.rwth-aachen.de> Date: Mon, 17 Feb 2025 10:13:22 +0100 Subject: [PATCH] added Slurm examples for CLAIX-2023 --- slurm/basic_mpi.sh | 15 ----------- slurm/{beeond.sh => beeond_job.sh} | 0 slurm/gpu_job_1gpu.sh | 36 ++++++++++++++++++++++++++ slurm/gpu_job_2gpus-1proc.sh | 36 ++++++++++++++++++++++++++ slurm/gpu_job_2gpus-2procs.sh | 36 ++++++++++++++++++++++++++ slurm/gpu_job_4gpus-4procs.sh | 36 ++++++++++++++++++++++++++ slurm/gpu_job_8gpus-8procs.sh | 37 +++++++++++++++++++++++++++ slurm/hybrid_mpi_openmp_job.sh | 39 +++++++++++++++++++++++++++++ slurm/mpi_job_1node.sh | 24 ++++++++++++++++++ slurm/mpi_job_2nodes.sh | 24 ++++++++++++++++++ slurm/mpi_job_basic.sh | 20 +++++++++++++++ slurm/openmp_multi-threading_job.sh | 38 ++++++++++++++++++++++++++++ 12 files changed, 326 insertions(+), 15 deletions(-) delete mode 100644 slurm/basic_mpi.sh rename slurm/{beeond.sh => beeond_job.sh} (100%) create mode 100644 slurm/gpu_job_1gpu.sh create mode 100644 slurm/gpu_job_2gpus-1proc.sh create mode 100644 slurm/gpu_job_2gpus-2procs.sh create mode 100644 slurm/gpu_job_4gpus-4procs.sh create mode 100644 slurm/gpu_job_8gpus-8procs.sh create mode 100644 slurm/hybrid_mpi_openmp_job.sh create mode 100644 slurm/mpi_job_1node.sh create mode 100644 slurm/mpi_job_2nodes.sh create mode 100644 slurm/mpi_job_basic.sh create mode 100644 slurm/openmp_multi-threading_job.sh diff --git a/slurm/basic_mpi.sh b/slurm/basic_mpi.sh deleted file mode 100644 index b1bc9cb..0000000 --- a/slurm/basic_mpi.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/zsh -############################################################ -### Slurm flags -############################################################ - -#SBATCH --ntasks=8 # Ask for 8 MPI tasks -#SBATCH --time=00:15:00 # Run time of 15 minutes -#SBATCH --job-name=example_job # Sets the job name -#SBATCH --output=stdout_%j.txt # Redirects stdout and stderr to stdout.txt -#SBATCH --account=<project-id> # Insertg your project-id or delete this line - -############################################################ -### Execution / Commands -############################################################ -srun hostname \ No newline at end of file diff --git a/slurm/beeond.sh b/slurm/beeond_job.sh similarity index 100% rename from slurm/beeond.sh rename to slurm/beeond_job.sh diff --git a/slurm/gpu_job_1gpu.sh b/slurm/gpu_job_1gpu.sh new file mode 100644 index 0000000..74a97a3 --- /dev/null +++ b/slurm/gpu_job_1gpu.sh @@ -0,0 +1,36 @@ +#!/usr/bin/zsh +############################################################ +### Slurm flags +############################################################ + +#SBATCH --partition=c23g # request partition with GPU nodes +#SBATCH --nodes=1 # request desired number of nodes +#SBATCH --ntasks-per-node=1 # request desired number of processes (or MPI tasks) + +#SBATCH --cpus-per-task=24 # request desired number of CPU cores or threads per process (default: 1) + # Note: available main memory is also scaling with + # number of cores if not specified otherwise + # Note: On CLAIX-2023 each GPU can be used with 24 cores + +#SBATCH --gres=gpu:1 # specify desired number of GPUs per node +#SBATCH --time=00:15:00 # max. run time of the job +#SBATCH --job-name=example_1gpus # set the job name +#SBATCH --output=stdout_%j.txt # redirects stdout and stderr to stdout.txt +#SBATCH --account=<project-id> # insert your project-id or delete this line + +############################################################ +### Parameters and Settings +############################################################ + +# print some information about current system +echo "Job nodes: ${SLURM_JOB_NODELIST}" +echo "Current machine: $(hostname)" +nvidia-smi + +############################################################ +### Execution / Commands +############################################################ + +# Example: Only a single GPU is used. However, due to billing +# settings, 24 CPU cores can be requested and used +# for free. \ No newline at end of file diff --git a/slurm/gpu_job_2gpus-1proc.sh b/slurm/gpu_job_2gpus-1proc.sh new file mode 100644 index 0000000..578136a --- /dev/null +++ b/slurm/gpu_job_2gpus-1proc.sh @@ -0,0 +1,36 @@ +#!/usr/bin/zsh +############################################################ +### Slurm flags +############################################################ + +#SBATCH --partition=c23g # request partition with GPU nodes +#SBATCH --nodes=1 # request desired number of nodes +#SBATCH --ntasks-per-node=1 # request desired number of processes (or MPI tasks) + +#SBATCH --cpus-per-task=48 # request desired number of CPU cores or threads per process (default: 1) + # Note: available main memory is also scaling with + # number of cores if not specified otherwise + # Note: On CLAIX-2023 each GPU can be used with 24 cores + +#SBATCH --gres=gpu:2 # specify desired number of GPUs per node +#SBATCH --time=00:15:00 # max. run time of the job +#SBATCH --job-name=example_job_gpus # set the job name +#SBATCH --output=stdout_%j.txt # redirects stdout and stderr to stdout.txt +#SBATCH --account=<project-id> # insert your project-id or delete this line + +############################################################ +### Parameters and Settings +############################################################ + +# print some information about current system +echo "Job nodes: ${SLURM_JOB_NODELIST}" +echo "Current machine: $(hostname)" +nvidia-smi + +############################################################ +### Execution / Commands +############################################################ + +# Example: 1:2 mapping between MPI processes and GPUs +# Process intened to use both GPUs + diff --git a/slurm/gpu_job_2gpus-2procs.sh b/slurm/gpu_job_2gpus-2procs.sh new file mode 100644 index 0000000..ae9553d --- /dev/null +++ b/slurm/gpu_job_2gpus-2procs.sh @@ -0,0 +1,36 @@ +#!/usr/bin/zsh +############################################################ +### Slurm flags +############################################################ + +#SBATCH --partition=c23g # request partition with GPU nodes +#SBATCH --nodes=1 # request desired number of nodes +#SBATCH --ntasks-per-node=2 # request desired number of processes (or MPI tasks) + +#SBATCH --cpus-per-task=24 # request desired number of CPU cores or threads per process (default: 1) + # Note: available main memory is also scaling with + # number of cores if not specified otherwise + # Note: On CLAIX-2023 each GPU can be used with 24 cores + +#SBATCH --gres=gpu:2 # specify desired number of GPUs per node +#SBATCH --time=00:15:00 # max. run time of the job +#SBATCH --job-name=example_job_gpus # set the job name +#SBATCH --output=stdout_%j.txt # redirects stdout and stderr to stdout.txt +#SBATCH --account=<project-id> # insert your project-id or delete this line + +############################################################ +### Parameters and Settings +############################################################ + +# print some information about current system +echo "Job nodes: ${SLURM_JOB_NODELIST}" +echo "Current machine: $(hostname)" +nvidia-smi + +############################################################ +### Execution / Commands +############################################################ + +# Example: 1:1 mapping between MPI processes and GPUs +# Each process intened to use 1 GPU + diff --git a/slurm/gpu_job_4gpus-4procs.sh b/slurm/gpu_job_4gpus-4procs.sh new file mode 100644 index 0000000..08efaac --- /dev/null +++ b/slurm/gpu_job_4gpus-4procs.sh @@ -0,0 +1,36 @@ +#!/usr/bin/zsh +############################################################ +### Slurm flags +############################################################ + +#SBATCH --partition=c23g # request partition with GPU nodes +#SBATCH --nodes=1 # request desired number of nodes +#SBATCH --ntasks-per-node=4 # request desired number of processes (or MPI tasks) + +#SBATCH --cpus-per-task=24 # request desired number of CPU cores or threads per process (default: 1) + # Note: available main memory is also scaling with + # number of cores if not specified otherwise + # Note: On CLAIX-2023 each GPU can be used with 24 cores + +#SBATCH --gres=gpu:4 # specify desired number of GPUs per node +#SBATCH --time=00:15:00 # max. run time of the job +#SBATCH --job-name=example_job_gpus # set the job name +#SBATCH --output=stdout_%j.txt # redirects stdout and stderr to stdout.txt +#SBATCH --account=<project-id> # insert your project-id or delete this line + +############################################################ +### Parameters and Settings +############################################################ + +# print some information about current system +echo "Job nodes: ${SLURM_JOB_NODELIST}" +echo "Current machine: $(hostname)" +nvidia-smi + +############################################################ +### Execution / Commands +############################################################ + +# Example: 1:1 mapping between MPI processes and GPUs +# Each process intened to use 1 GPU + diff --git a/slurm/gpu_job_8gpus-8procs.sh b/slurm/gpu_job_8gpus-8procs.sh new file mode 100644 index 0000000..da90d9a --- /dev/null +++ b/slurm/gpu_job_8gpus-8procs.sh @@ -0,0 +1,37 @@ +#!/usr/bin/zsh +############################################################ +### Slurm flags +############################################################ + +#SBATCH --partition=c23g # request partition with GPU nodes +#SBATCH --nodes=2 # request desired number of nodes +#SBATCH --ntasks-per-node=4 # request desired number of processes (or MPI tasks) + +#SBATCH --cpus-per-task=24 # request desired number of CPU cores or threads per process (default: 1) + # Note: available main memory is also scaling with + # number of cores if not specified otherwise + # Note: On CLAIX-2023 each GPU can be used with 24 cores + +#SBATCH --gres=gpu:4 # specify desired number of GPUs per node +#SBATCH --time=00:15:00 # max. run time of the job +#SBATCH --job-name=example_job_gpus # set the job name +#SBATCH --output=stdout_%j.txt # redirects stdout and stderr to stdout.txt +#SBATCH --account=<project-id> # insert your project-id or delete this line + +############################################################ +### Parameters and Settings +############################################################ + +# print some information about current system +echo "Job nodes: ${SLURM_JOB_NODELIST}" +echo "Current machine: $(hostname)" +nvidia-smi + +############################################################ +### Execution / Commands +############################################################ + +# Example: 1:1 mapping between MPI processes and GPUs +# Each process intened to use 1 GPU. +# 2 full compute nodes are used. + diff --git a/slurm/hybrid_mpi_openmp_job.sh b/slurm/hybrid_mpi_openmp_job.sh new file mode 100644 index 0000000..fed587e --- /dev/null +++ b/slurm/hybrid_mpi_openmp_job.sh @@ -0,0 +1,39 @@ +#!/usr/bin/zsh +############################################################ +### Slurm flags +############################################################ + +#SBATCH --nodes=2 # request desired number of nodes +#SBATCH --ntasks-per-node=4 # request desired number of processes (or MPI tasks) +#SBATCH --cpus-per-task=24 # request desired number of CPU cores or threads per process (default: 1) + # Note: available main memory is also scaling with + # number of cores if not specified otherwise +#SBATCH --time=00:15:00 # max. run time of the job +#SBATCH --job-name=example_job_hyb # set the job name +#SBATCH --output=stdout_%j.txt # redirects stdout and stderr to stdout.txt +#SBATCH --account=<project-id> # insert your project-id or delete this line + +############################################################ +### Parameters and Settings +############################################################ + +# print some information about current system +echo "Job nodes: ${SLURM_JOB_NODELIST}" +echo "Current machine: $(hostname)" + +############################################################ +### Execution / Commands +############################################################ + +# Example: Hybrid MPI + OpenMP execution + +# set number of threads +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} # usually automatically set by SLURM + +# enable thread binding to pyhsical CPU cores +export OMP_PLACES=cores +export OMP_PROC_BIND=spread # aiming to maximize memory bandwidth utilization +# export OMP_PROC_BIND=close # typically used in scenarios where neihboring threads need to communicate/synchronize a lot + +# execute your program +srun <prog> <params> \ No newline at end of file diff --git a/slurm/mpi_job_1node.sh b/slurm/mpi_job_1node.sh new file mode 100644 index 0000000..95f8155 --- /dev/null +++ b/slurm/mpi_job_1node.sh @@ -0,0 +1,24 @@ +#!/usr/bin/zsh +############################################################ +### Slurm flags +############################################################ + +#SBATCH --nodes=1 # request desired number of nodes +#SBATCH --ntasks-per-node=96 # request desired number of processes (or MPI tasks) +#SBATCH --time=00:15:00 # max. run time of the job +#SBATCH --job-name=example_job_mpi # set the job name +#SBATCH --output=stdout_%j.txt # redirects stdout and stderr to stdout.txt +#SBATCH --account=<project-id> # insert your project-id or delete this line + +############################################################ +### Parameters and Settings +############################################################ + +# print some information about current system +echo "Job nodes: ${SLURM_JOB_NODELIST}" +echo "Current machine: $(hostname)" + +############################################################ +### Execution / Commands +############################################################ +srun hostname \ No newline at end of file diff --git a/slurm/mpi_job_2nodes.sh b/slurm/mpi_job_2nodes.sh new file mode 100644 index 0000000..46b566b --- /dev/null +++ b/slurm/mpi_job_2nodes.sh @@ -0,0 +1,24 @@ +#!/usr/bin/zsh +############################################################ +### Slurm flags +############################################################ + +#SBATCH --nodes=2 # request desired number of nodes +#SBATCH --ntasks-per-node=96 # request desired number of processes (or MPI tasks) +#SBATCH --time=00:15:00 # max. run time of the job +#SBATCH --job-name=example_job_mpi # set the job name +#SBATCH --output=stdout_%j.txt # redirects stdout and stderr to stdout.txt +#SBATCH --account=<project-id> # insert your project-id or delete this line + +############################################################ +### Parameters and Settings +############################################################ + +# print some information about current system +echo "Job nodes: ${SLURM_JOB_NODELIST}" +echo "Current machine: $(hostname)" + +############################################################ +### Execution / Commands +############################################################ +srun hostname \ No newline at end of file diff --git a/slurm/mpi_job_basic.sh b/slurm/mpi_job_basic.sh new file mode 100644 index 0000000..ceac829 --- /dev/null +++ b/slurm/mpi_job_basic.sh @@ -0,0 +1,20 @@ +#!/usr/bin/zsh +############################################################ +### Slurm flags +############################################################ + +#SBATCH --ntasks=8 # request desired number of processes (or MPI tasks) +#SBATCH --time=00:15:00 # max. run time of the job +#SBATCH --job-name=example_job_mpi # set the job name +#SBATCH --output=stdout_%j.txt # redirects stdout and stderr to stdout.txt +#SBATCH --account=<project-id> # insert your project-id or delete this line + +############################################################ +### Execution / Commands +############################################################ + +# Note: Not specified where MPI tasks will be allocated. +# Most likely on the same node but could in theory +# also be placed on different nodes. + +srun hostname \ No newline at end of file diff --git a/slurm/openmp_multi-threading_job.sh b/slurm/openmp_multi-threading_job.sh new file mode 100644 index 0000000..0eda131 --- /dev/null +++ b/slurm/openmp_multi-threading_job.sh @@ -0,0 +1,38 @@ +#!/usr/bin/zsh +############################################################ +### Slurm flags +############################################################ + +#SBATCH --nodes=1 # pure multi-threading restricted to single node +#SBATCH --ntasks-per-node=1 # pure multi-threading restricted to single process +#SBATCH --cpus-per-task=48 # request desired number of CPU cores or threads per process (default: 1) + # Note: available main memory is also scaling with + # number of cores if not specified otherwise +#SBATCH --time=00:15:00 # max. run time of the job +#SBATCH --job-name=example_job_omp # set the job name +#SBATCH --output=stdout_%j.txt # redirects stdout and stderr to stdout.txt +#SBATCH --account=<project-id> # insert your project-id or delete this line + +############################################################ +### Parameters and Settings +############################################################ + +# print some information about current system +echo "Current machine: $(hostname)" + +############################################################ +### Execution / Commands +############################################################ + +# Example: OpenMP + +# set number of threads +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} # usually automatically set by SLURM + +# enable thread binding to pyhsical CPU cores +export OMP_PLACES=cores +export OMP_PROC_BIND=spread # aiming to maximize memory bandwidth utilization +# export OMP_PROC_BIND=close # typically used in scenarios where neihboring threads need to communicate/synchronize a lot + +# execute your program +<prog> <params> \ No newline at end of file -- GitLab