From e5758cb03afdabf328f5cc8fe0731d89b9c6ff53 Mon Sep 17 00:00:00 2001
From: Jannis Klinkenberg <j.klinkenberg@itc.rwth-aachen.de>
Date: Mon, 17 Feb 2025 10:13:22 +0100
Subject: [PATCH] added Slurm examples for CLAIX-2023

---
 slurm/basic_mpi.sh                  | 15 -----------
 slurm/{beeond.sh => beeond_job.sh}  |  0
 slurm/gpu_job_1gpu.sh               | 36 ++++++++++++++++++++++++++
 slurm/gpu_job_2gpus-1proc.sh        | 36 ++++++++++++++++++++++++++
 slurm/gpu_job_2gpus-2procs.sh       | 36 ++++++++++++++++++++++++++
 slurm/gpu_job_4gpus-4procs.sh       | 36 ++++++++++++++++++++++++++
 slurm/gpu_job_8gpus-8procs.sh       | 37 +++++++++++++++++++++++++++
 slurm/hybrid_mpi_openmp_job.sh      | 39 +++++++++++++++++++++++++++++
 slurm/mpi_job_1node.sh              | 24 ++++++++++++++++++
 slurm/mpi_job_2nodes.sh             | 24 ++++++++++++++++++
 slurm/mpi_job_basic.sh              | 20 +++++++++++++++
 slurm/openmp_multi-threading_job.sh | 38 ++++++++++++++++++++++++++++
 12 files changed, 326 insertions(+), 15 deletions(-)
 delete mode 100644 slurm/basic_mpi.sh
 rename slurm/{beeond.sh => beeond_job.sh} (100%)
 create mode 100644 slurm/gpu_job_1gpu.sh
 create mode 100644 slurm/gpu_job_2gpus-1proc.sh
 create mode 100644 slurm/gpu_job_2gpus-2procs.sh
 create mode 100644 slurm/gpu_job_4gpus-4procs.sh
 create mode 100644 slurm/gpu_job_8gpus-8procs.sh
 create mode 100644 slurm/hybrid_mpi_openmp_job.sh
 create mode 100644 slurm/mpi_job_1node.sh
 create mode 100644 slurm/mpi_job_2nodes.sh
 create mode 100644 slurm/mpi_job_basic.sh
 create mode 100644 slurm/openmp_multi-threading_job.sh

diff --git a/slurm/basic_mpi.sh b/slurm/basic_mpi.sh
deleted file mode 100644
index b1bc9cb..0000000
--- a/slurm/basic_mpi.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/zsh 
-############################################################
-### Slurm flags
-############################################################
-
-#SBATCH --ntasks=8              # Ask for 8 MPI tasks
-#SBATCH --time=00:15:00         # Run time of 15 minutes
-#SBATCH --job-name=example_job  # Sets the job name
-#SBATCH --output=stdout_%j.txt  # Redirects stdout and stderr to stdout.txt
-#SBATCH --account=<project-id>  # Insertg your project-id or delete this line
-
-############################################################
-### Execution / Commands
-############################################################
-srun hostname
\ No newline at end of file
diff --git a/slurm/beeond.sh b/slurm/beeond_job.sh
similarity index 100%
rename from slurm/beeond.sh
rename to slurm/beeond_job.sh
diff --git a/slurm/gpu_job_1gpu.sh b/slurm/gpu_job_1gpu.sh
new file mode 100644
index 0000000..74a97a3
--- /dev/null
+++ b/slurm/gpu_job_1gpu.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/zsh
+############################################################
+### Slurm flags
+############################################################
+
+#SBATCH --partition=c23g            # request partition with GPU nodes
+#SBATCH --nodes=1                   # request desired number of nodes
+#SBATCH --ntasks-per-node=1         # request desired number of processes (or MPI tasks)
+
+#SBATCH --cpus-per-task=24          # request desired number of CPU cores or threads per process (default: 1)
+                                    # Note: available main memory is also scaling with
+                                    #       number of cores if not specified otherwise
+                                    # Note: On CLAIX-2023 each GPU can be used with 24 cores
+
+#SBATCH --gres=gpu:1                # specify desired number of GPUs per node
+#SBATCH --time=00:15:00             # max. run time of the job
+#SBATCH --job-name=example_1gpus    # set the job name
+#SBATCH --output=stdout_%j.txt      # redirects stdout and stderr to stdout.txt
+#SBATCH --account=<project-id>      # insert your project-id or delete this line
+
+############################################################
+### Parameters and Settings
+############################################################
+
+# print some information about current system
+echo "Job nodes: ${SLURM_JOB_NODELIST}"
+echo "Current machine: $(hostname)"
+nvidia-smi
+
+############################################################
+### Execution / Commands
+############################################################
+
+# Example: Only a single GPU is used. However, due to billing
+#          settings, 24 CPU cores can be requested and used
+#          for free.
\ No newline at end of file
diff --git a/slurm/gpu_job_2gpus-1proc.sh b/slurm/gpu_job_2gpus-1proc.sh
new file mode 100644
index 0000000..578136a
--- /dev/null
+++ b/slurm/gpu_job_2gpus-1proc.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/zsh
+############################################################
+### Slurm flags
+############################################################
+
+#SBATCH --partition=c23g            # request partition with GPU nodes
+#SBATCH --nodes=1                   # request desired number of nodes
+#SBATCH --ntasks-per-node=1         # request desired number of processes (or MPI tasks)
+
+#SBATCH --cpus-per-task=48          # request desired number of CPU cores or threads per process (default: 1)
+                                    # Note: available main memory is also scaling with
+                                    #       number of cores if not specified otherwise
+                                    # Note: On CLAIX-2023 each GPU can be used with 24 cores
+
+#SBATCH --gres=gpu:2                # specify desired number of GPUs per node
+#SBATCH --time=00:15:00             # max. run time of the job
+#SBATCH --job-name=example_job_gpus # set the job name
+#SBATCH --output=stdout_%j.txt      # redirects stdout and stderr to stdout.txt
+#SBATCH --account=<project-id>      # insert your project-id or delete this line
+
+############################################################
+### Parameters and Settings
+############################################################
+
+# print some information about current system
+echo "Job nodes: ${SLURM_JOB_NODELIST}"
+echo "Current machine: $(hostname)"
+nvidia-smi
+
+############################################################
+### Execution / Commands
+############################################################
+
+# Example: 1:2 mapping between MPI processes and GPUs
+#          Process intened to use both GPUs 
+
diff --git a/slurm/gpu_job_2gpus-2procs.sh b/slurm/gpu_job_2gpus-2procs.sh
new file mode 100644
index 0000000..ae9553d
--- /dev/null
+++ b/slurm/gpu_job_2gpus-2procs.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/zsh
+############################################################
+### Slurm flags
+############################################################
+
+#SBATCH --partition=c23g            # request partition with GPU nodes
+#SBATCH --nodes=1                   # request desired number of nodes
+#SBATCH --ntasks-per-node=2         # request desired number of processes (or MPI tasks)
+
+#SBATCH --cpus-per-task=24          # request desired number of CPU cores or threads per process (default: 1)
+                                    # Note: available main memory is also scaling with
+                                    #       number of cores if not specified otherwise
+                                    # Note: On CLAIX-2023 each GPU can be used with 24 cores
+
+#SBATCH --gres=gpu:2                # specify desired number of GPUs per node
+#SBATCH --time=00:15:00             # max. run time of the job
+#SBATCH --job-name=example_job_gpus # set the job name
+#SBATCH --output=stdout_%j.txt      # redirects stdout and stderr to stdout.txt
+#SBATCH --account=<project-id>      # insert your project-id or delete this line
+
+############################################################
+### Parameters and Settings
+############################################################
+
+# print some information about current system
+echo "Job nodes: ${SLURM_JOB_NODELIST}"
+echo "Current machine: $(hostname)"
+nvidia-smi
+
+############################################################
+### Execution / Commands
+############################################################
+
+# Example: 1:1 mapping between MPI processes and GPUs
+#          Each process intened to use 1 GPU 
+
diff --git a/slurm/gpu_job_4gpus-4procs.sh b/slurm/gpu_job_4gpus-4procs.sh
new file mode 100644
index 0000000..08efaac
--- /dev/null
+++ b/slurm/gpu_job_4gpus-4procs.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/zsh
+############################################################
+### Slurm flags
+############################################################
+
+#SBATCH --partition=c23g            # request partition with GPU nodes
+#SBATCH --nodes=1                   # request desired number of nodes
+#SBATCH --ntasks-per-node=4         # request desired number of processes (or MPI tasks)
+
+#SBATCH --cpus-per-task=24          # request desired number of CPU cores or threads per process (default: 1)
+                                    # Note: available main memory is also scaling with
+                                    #       number of cores if not specified otherwise
+                                    # Note: On CLAIX-2023 each GPU can be used with 24 cores
+
+#SBATCH --gres=gpu:4                # specify desired number of GPUs per node
+#SBATCH --time=00:15:00             # max. run time of the job
+#SBATCH --job-name=example_job_gpus # set the job name
+#SBATCH --output=stdout_%j.txt      # redirects stdout and stderr to stdout.txt
+#SBATCH --account=<project-id>      # insert your project-id or delete this line
+
+############################################################
+### Parameters and Settings
+############################################################
+
+# print some information about current system
+echo "Job nodes: ${SLURM_JOB_NODELIST}"
+echo "Current machine: $(hostname)"
+nvidia-smi
+
+############################################################
+### Execution / Commands
+############################################################
+
+# Example: 1:1 mapping between MPI processes and GPUs
+#          Each process intened to use 1 GPU 
+
diff --git a/slurm/gpu_job_8gpus-8procs.sh b/slurm/gpu_job_8gpus-8procs.sh
new file mode 100644
index 0000000..da90d9a
--- /dev/null
+++ b/slurm/gpu_job_8gpus-8procs.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/zsh
+############################################################
+### Slurm flags
+############################################################
+
+#SBATCH --partition=c23g            # request partition with GPU nodes
+#SBATCH --nodes=2                   # request desired number of nodes
+#SBATCH --ntasks-per-node=4         # request desired number of processes (or MPI tasks)
+
+#SBATCH --cpus-per-task=24          # request desired number of CPU cores or threads per process (default: 1)
+                                    # Note: available main memory is also scaling with
+                                    #       number of cores if not specified otherwise
+                                    # Note: On CLAIX-2023 each GPU can be used with 24 cores
+
+#SBATCH --gres=gpu:4                # specify desired number of GPUs per node
+#SBATCH --time=00:15:00             # max. run time of the job
+#SBATCH --job-name=example_job_gpus # set the job name
+#SBATCH --output=stdout_%j.txt      # redirects stdout and stderr to stdout.txt
+#SBATCH --account=<project-id>      # insert your project-id or delete this line
+
+############################################################
+### Parameters and Settings
+############################################################
+
+# print some information about current system
+echo "Job nodes: ${SLURM_JOB_NODELIST}"
+echo "Current machine: $(hostname)"
+nvidia-smi
+
+############################################################
+### Execution / Commands
+############################################################
+
+# Example: 1:1 mapping between MPI processes and GPUs
+#          Each process intened to use 1 GPU.
+#          2 full compute nodes are used.
+
diff --git a/slurm/hybrid_mpi_openmp_job.sh b/slurm/hybrid_mpi_openmp_job.sh
new file mode 100644
index 0000000..fed587e
--- /dev/null
+++ b/slurm/hybrid_mpi_openmp_job.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/zsh 
+############################################################
+### Slurm flags
+############################################################
+
+#SBATCH --nodes=2                   # request desired number of nodes
+#SBATCH --ntasks-per-node=4         # request desired number of processes (or MPI tasks)
+#SBATCH --cpus-per-task=24          # request desired number of CPU cores or threads per process (default: 1)
+                                    # Note: available main memory is also scaling with
+                                    #       number of cores if not specified otherwise
+#SBATCH --time=00:15:00             # max. run time of the job
+#SBATCH --job-name=example_job_hyb  # set the job name
+#SBATCH --output=stdout_%j.txt      # redirects stdout and stderr to stdout.txt
+#SBATCH --account=<project-id>      # insert your project-id or delete this line
+
+############################################################
+### Parameters and Settings
+############################################################
+
+# print some information about current system
+echo "Job nodes: ${SLURM_JOB_NODELIST}"
+echo "Current machine: $(hostname)"
+
+############################################################
+### Execution / Commands
+############################################################
+
+# Example: Hybrid MPI + OpenMP execution
+
+# set number of threads
+export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}   # usually automatically set by SLURM
+
+# enable thread binding to pyhsical CPU cores
+export OMP_PLACES=cores
+export OMP_PROC_BIND=spread     # aiming to maximize memory bandwidth utilization 
+# export OMP_PROC_BIND=close    # typically used in scenarios where neihboring threads need to communicate/synchronize a lot
+
+# execute your program
+srun <prog> <params>
\ No newline at end of file
diff --git a/slurm/mpi_job_1node.sh b/slurm/mpi_job_1node.sh
new file mode 100644
index 0000000..95f8155
--- /dev/null
+++ b/slurm/mpi_job_1node.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/zsh 
+############################################################
+### Slurm flags
+############################################################
+
+#SBATCH --nodes=1                   # request desired number of nodes
+#SBATCH --ntasks-per-node=96        # request desired number of processes (or MPI tasks)
+#SBATCH --time=00:15:00             # max. run time of the job
+#SBATCH --job-name=example_job_mpi  # set the job name
+#SBATCH --output=stdout_%j.txt      # redirects stdout and stderr to stdout.txt
+#SBATCH --account=<project-id>      # insert your project-id or delete this line
+
+############################################################
+### Parameters and Settings
+############################################################
+
+# print some information about current system
+echo "Job nodes: ${SLURM_JOB_NODELIST}"
+echo "Current machine: $(hostname)"
+
+############################################################
+### Execution / Commands
+############################################################
+srun hostname
\ No newline at end of file
diff --git a/slurm/mpi_job_2nodes.sh b/slurm/mpi_job_2nodes.sh
new file mode 100644
index 0000000..46b566b
--- /dev/null
+++ b/slurm/mpi_job_2nodes.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/zsh 
+############################################################
+### Slurm flags
+############################################################
+
+#SBATCH --nodes=2                   # request desired number of nodes
+#SBATCH --ntasks-per-node=96        # request desired number of processes (or MPI tasks)
+#SBATCH --time=00:15:00             # max. run time of the job
+#SBATCH --job-name=example_job_mpi  # set the job name
+#SBATCH --output=stdout_%j.txt      # redirects stdout and stderr to stdout.txt
+#SBATCH --account=<project-id>      # insert your project-id or delete this line
+
+############################################################
+### Parameters and Settings
+############################################################
+
+# print some information about current system
+echo "Job nodes: ${SLURM_JOB_NODELIST}"
+echo "Current machine: $(hostname)"
+
+############################################################
+### Execution / Commands
+############################################################
+srun hostname
\ No newline at end of file
diff --git a/slurm/mpi_job_basic.sh b/slurm/mpi_job_basic.sh
new file mode 100644
index 0000000..ceac829
--- /dev/null
+++ b/slurm/mpi_job_basic.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/zsh 
+############################################################
+### Slurm flags
+############################################################
+
+#SBATCH --ntasks=8                  # request desired number of processes (or MPI tasks)
+#SBATCH --time=00:15:00             # max. run time of the job
+#SBATCH --job-name=example_job_mpi  # set the job name
+#SBATCH --output=stdout_%j.txt      # redirects stdout and stderr to stdout.txt
+#SBATCH --account=<project-id>      # insert your project-id or delete this line
+
+############################################################
+### Execution / Commands
+############################################################
+
+# Note: Not specified where MPI tasks will be allocated.
+#       Most likely on the same node but could in theory
+#       also be placed on different nodes.
+
+srun hostname
\ No newline at end of file
diff --git a/slurm/openmp_multi-threading_job.sh b/slurm/openmp_multi-threading_job.sh
new file mode 100644
index 0000000..0eda131
--- /dev/null
+++ b/slurm/openmp_multi-threading_job.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/zsh 
+############################################################
+### Slurm flags
+############################################################
+
+#SBATCH --nodes=1                   # pure multi-threading restricted to single node
+#SBATCH --ntasks-per-node=1         # pure multi-threading restricted to single process
+#SBATCH --cpus-per-task=48          # request desired number of CPU cores or threads per process (default: 1)
+                                    # Note: available main memory is also scaling with
+                                    #       number of cores if not specified otherwise
+#SBATCH --time=00:15:00             # max. run time of the job
+#SBATCH --job-name=example_job_omp  # set the job name
+#SBATCH --output=stdout_%j.txt      # redirects stdout and stderr to stdout.txt
+#SBATCH --account=<project-id>      # insert your project-id or delete this line
+
+############################################################
+### Parameters and Settings
+############################################################
+
+# print some information about current system
+echo "Current machine: $(hostname)"
+
+############################################################
+### Execution / Commands
+############################################################
+
+# Example: OpenMP
+
+# set number of threads
+export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}   # usually automatically set by SLURM
+
+# enable thread binding to pyhsical CPU cores
+export OMP_PLACES=cores
+export OMP_PROC_BIND=spread     # aiming to maximize memory bandwidth utilization 
+# export OMP_PROC_BIND=close    # typically used in scenarios where neihboring threads need to communicate/synchronize a lot
+
+# execute your program
+<prog> <params>
\ No newline at end of file
-- 
GitLab