From 3f416d537d6ca6969a37ac4ad24fd1ccaa53ea80 Mon Sep 17 00:00:00 2001 From: Jannis Klinkenberg <j.klinkenberg@itc.rwth-aachen.de> Date: Fri, 6 Dec 2024 10:27:52 +0100 Subject: [PATCH] added venv version for single GPU --- .../submit_job_venv_single-node.sh | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 tensorflow/cifar10_distributed/submit_job_venv_single-node.sh diff --git a/tensorflow/cifar10_distributed/submit_job_venv_single-node.sh b/tensorflow/cifar10_distributed/submit_job_venv_single-node.sh new file mode 100644 index 0000000..b0d9940 --- /dev/null +++ b/tensorflow/cifar10_distributed/submit_job_venv_single-node.sh @@ -0,0 +1,40 @@ +#!/usr/bin/zsh +############################################################ +### Slurm flags +############################################################ + +#SBATCH --time=00:15:00 +#SBATCH --partition=c23g +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=2 +#SBATCH --cpus-per-task=24 +#SBATCH --gres=gpu:2 + +############################################################ +### Load modules or software +############################################################ + +# TODO: load/activate your desired modules and virtual environment + +############################################################ +### Parameters and Settings +############################################################ + +# print some information about current system +echo "Job nodes: ${SLURM_JOB_NODELIST}" +echo "Current machine: $(hostname)" +nvidia-smi + +export NCCL_DEBUG=INFO +export TF_CPP_MIN_LOG_LEVEL=1 # disable info messages +export TF_GPU_THREAD_MODE='gpu_private' +export NCCL_SOCKET_NTHREADS=8 # multi-threading for NCCL communication + +############################################################ +### Execution (Model Training) +############################################################ + +# each process sets required environment variables and +# runs the python script +source set_vars.sh +python -W ignore train_model.py --strategy 'mirrored' \ No newline at end of file -- GitLab