From 34bfb0326ae1d937b8d18796c1c0c9211ff56fe1 Mon Sep 17 00:00:00 2001 From: Jannis Klinkenberg <j.klinkenberg@itc.rwth-aachen.de> Date: Wed, 19 Mar 2025 15:07:06 +0100 Subject: [PATCH] filled generic job README.md --- generic-job-scripts/README.md | 19 +++++++++++++++++-- .../openmp_multi-threading_job.sh | 5 +++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/generic-job-scripts/README.md b/generic-job-scripts/README.md index f7da021..1763d6a 100644 --- a/generic-job-scripts/README.md +++ b/generic-job-scripts/README.md @@ -1,3 +1,18 @@ -# Generic SLURM Job Scripts +# Generic Slurm Job Script Examples -... coming soon ... +This folder contains common job script examples and best practices. You can submit jobs to the Slurm batch system via `sbatch <script-name>.sh`. + +## What can you find here? + +| File/Folder | Description | +|--------|-------------| +| [beeond_job.sh](beeond_job.sh) | Job script for setting up and using BeeOND (BeeGFS On Demand) in an HPC environment. | +| [gpu_job_1gpu.sh](gpu_job_1gpu.sh) | Runs a job with 1 GPU and a single process. | +| [gpu_job_2gpus-1proc.sh](gpu_job_2gpus-1proc.sh) | Runs a job with 2 GPUs and a single process. Useful for tasks that require multi-GPU acceleration but not multi-processing. | +| [gpu_job_2gpus-2procs.sh](gpu_job_2gpus-2procs.sh) | Runs a job with 2 GPUs and and 2 separate processes. Commonly used for parallel deep learning training. | +| [gpu_job_4gpus-4procs.sh](gpu_job_4gpus-4procs.sh) | Runs a job with 4 GPUs and and 4 separate processes (full node with 4x H100). Commonly used for parallel deep learning training. | +| [gpu_job_8gpus-8procs.sh](gpu_job_8gpus-8procs.sh) | Runs a job with 8 GPUs and and 8 separate processes (2 full nodes with 4x H100). Commonly used for parallel deep learning training. | +| [hybrid_mpi_openmp_job.sh](hybrid_mpi_openmp_job.sh) | Hybrid job script combining MPI (distributed computing) with OpenMP (shared-memory parallelism). Ideal for hybrid HPC workloads. | +| [mpi_job_basic.sh](mpi_job_basic.sh) | A basic MPI job script, useful for testing and learning MPI-based job submission. | +| [mpi_job_1node.sh](mpi_job_1node.sh) | Runs an MPI job on a single node, demonstrating intra-node parallel processing with multiple processes per node. | +| [mpi_job_2nodes.sh](mpi_job_2nodes.sh) | Runs an MPI job spanning 2 full compute nodes, demonstrating inter-node parallelism and distributed computing across multiple machines. | diff --git a/generic-job-scripts/openmp_multi-threading_job.sh b/generic-job-scripts/openmp_multi-threading_job.sh index 0eda131..d5bed48 100644 --- a/generic-job-scripts/openmp_multi-threading_job.sh +++ b/generic-job-scripts/openmp_multi-threading_job.sh @@ -5,7 +5,7 @@ #SBATCH --nodes=1 # pure multi-threading restricted to single node #SBATCH --ntasks-per-node=1 # pure multi-threading restricted to single process -#SBATCH --cpus-per-task=48 # request desired number of CPU cores or threads per process (default: 1) +#SBATCH --cpus-per-task=96 # request desired number of CPU cores or threads per process (default: 1). Here, we want to request all CPU cores # Note: available main memory is also scaling with # number of cores if not specified otherwise #SBATCH --time=00:15:00 # max. run time of the job @@ -26,8 +26,9 @@ echo "Current machine: $(hostname)" # Example: OpenMP -# set number of threads +# set number of OpenMP threads to be used export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} # usually automatically set by SLURM +# Note: you can also use less cores/threads or experiment with different number of cores/threads in the same job # enable thread binding to pyhsical CPU cores export OMP_PLACES=cores -- GitLab