From ef4ef5823a6613a33d329cb1f2998bfa4cf51c9d Mon Sep 17 00:00:00 2001 From: Jannis Klinkenberg <j.klinkenberg@itc.rwth-aachen.de> Date: Fri, 8 Nov 2024 18:19:28 +0100 Subject: [PATCH] small fixes --- pytorch/cifar10/submit_job_venv.sh | 2 +- pytorch/cifar10_distributed/submit_job_venv.sh | 2 +- pytorch/mnist/submit_job_venv.sh | 2 +- pytorch/mnist_distributed/submit_job_venv.sh | 2 +- tensorflow/cifar10/submit_job_container.sh | 6 +++--- tensorflow/cifar10/submit_job_venv.sh | 12 +----------- .../cifar10_distributed/submit_job_container.sh | 4 ++-- tensorflow/cifar10_distributed/submit_job_venv.sh | 12 +----------- 8 files changed, 11 insertions(+), 31 deletions(-) diff --git a/pytorch/cifar10/submit_job_venv.sh b/pytorch/cifar10/submit_job_venv.sh index 982188f..ec783c5 100644 --- a/pytorch/cifar10/submit_job_venv.sh +++ b/pytorch/cifar10/submit_job_venv.sh @@ -14,7 +14,7 @@ ### Load modules or software ############################################################ -# TODO: activate your desired virtual environment +# TODO: load/activate your desired modules and virtual environment ############################################################ ### Parameters and Settings diff --git a/pytorch/cifar10_distributed/submit_job_venv.sh b/pytorch/cifar10_distributed/submit_job_venv.sh index ff2fe92..38fde6e 100644 --- a/pytorch/cifar10_distributed/submit_job_venv.sh +++ b/pytorch/cifar10_distributed/submit_job_venv.sh @@ -14,7 +14,7 @@ ### Load modules or software ############################################################ -# TODO: activate your desired virtual environment +# TODO: load/activate your desired modules and virtual environment ############################################################ ### Parameters and Settings diff --git a/pytorch/mnist/submit_job_venv.sh b/pytorch/mnist/submit_job_venv.sh index 982188f..ec783c5 100644 --- a/pytorch/mnist/submit_job_venv.sh +++ b/pytorch/mnist/submit_job_venv.sh @@ -14,7 +14,7 @@ ### Load modules or software ############################################################ -# TODO: activate your desired virtual environment +# TODO: load/activate your desired modules and virtual environment ############################################################ ### Parameters and Settings diff --git a/pytorch/mnist_distributed/submit_job_venv.sh b/pytorch/mnist_distributed/submit_job_venv.sh index ff2fe92..38fde6e 100644 --- a/pytorch/mnist_distributed/submit_job_venv.sh +++ b/pytorch/mnist_distributed/submit_job_venv.sh @@ -14,7 +14,7 @@ ### Load modules or software ############################################################ -# TODO: activate your desired virtual environment +# TODO: load/activate your desired modules and virtual environment ############################################################ ### Parameters and Settings diff --git a/tensorflow/cifar10/submit_job_container.sh b/tensorflow/cifar10/submit_job_container.sh index fa9b7b5..0e6f440 100644 --- a/tensorflow/cifar10/submit_job_container.sh +++ b/tensorflow/cifar10/submit_job_container.sh @@ -14,7 +14,7 @@ ### Load modules or software ############################################################ -# load module for PyTorch container +# load module for TensorFlow container module load TensorFlow/nvcr-24.01-tf2-py3 module list @@ -36,11 +36,11 @@ export NCCL_SOCKET_NTHREADS=8 # multi-threading for NCCL communication ### Execution (Model Training) ############################################################ -# tensorflow in container often needs a tmp directory +# TensorFlow in container often needs a tmp directory NEWTMP=$(pwd)/tmp mkdir -p ${NEWTMP} # run the python script inside the container source set_vars.sh apptainer exec -e --nv -B ${NEWTMP}:/tmp ${TENSORFLOW_IMAGE} \ - bash -c "python -W ignore train_model.py"' + bash -c "python -W ignore train_model.py" diff --git a/tensorflow/cifar10/submit_job_venv.sh b/tensorflow/cifar10/submit_job_venv.sh index cc27dd6..6a03c40 100644 --- a/tensorflow/cifar10/submit_job_venv.sh +++ b/tensorflow/cifar10/submit_job_venv.sh @@ -9,22 +9,12 @@ #SBATCH --ntasks-per-node=2 #SBATCH --cpus-per-task=24 #SBATCH --gres=gpu:2 -#SBATCH --account=supp0001 ############################################################ ### Load modules or software ############################################################ -# TODO: activate your desired virtual environment -module purge -module load GCC/11.3.0 -module load OpenMPI/4.1.4 -module load CMake/3.21.1 -module load Python/3.9.6 -module load NCCL/2.20.5-CUDA-12.4.0 -module load cuDNN/8.9.7.29-CUDA-12.3.0 - -source /work/jk869269/venvs/tensorflow-2.17_CUDA-12.3/bin/activate +# TODO: load/activate your desired modules and virtual environment ############################################################ ### Parameters and Settings diff --git a/tensorflow/cifar10_distributed/submit_job_container.sh b/tensorflow/cifar10_distributed/submit_job_container.sh index 8fc8291..3e1a1e7 100644 --- a/tensorflow/cifar10_distributed/submit_job_container.sh +++ b/tensorflow/cifar10_distributed/submit_job_container.sh @@ -14,7 +14,7 @@ ### Load modules or software ############################################################ -# load module for PyTorch container +# load module for TensorFlow container module load TensorFlow/nvcr-24.01-tf2-py3 module list @@ -36,7 +36,7 @@ export NCCL_SOCKET_NTHREADS=8 # multi-threading for NCCL communication ### Execution (Model Training) ############################################################ -# tensorflow in container often needs a tmp directory +# TensorFlow in container often needs a tmp directory NEWTMP=$(pwd)/tmp mkdir -p ${NEWTMP} diff --git a/tensorflow/cifar10_distributed/submit_job_venv.sh b/tensorflow/cifar10_distributed/submit_job_venv.sh index be3ff76..14e2209 100644 --- a/tensorflow/cifar10_distributed/submit_job_venv.sh +++ b/tensorflow/cifar10_distributed/submit_job_venv.sh @@ -9,22 +9,12 @@ #SBATCH --ntasks-per-node=2 #SBATCH --cpus-per-task=24 #SBATCH --gres=gpu:2 -#SBATCH --account=supp0001 ############################################################ ### Load modules or software ############################################################ -# TODO: activate your desired virtual environment -module purge -module load GCC/11.3.0 -module load OpenMPI/4.1.4 -module load CMake/3.21.1 -module load Python/3.9.6 -module load NCCL/2.20.5-CUDA-12.4.0 -module load cuDNN/8.9.7.29-CUDA-12.3.0 - -source /work/jk869269/venvs/tensorflow-2.17_CUDA-12.3/bin/activate +# TODO: load/activate your desired modules and virtual environment ############################################################ ### Parameters and Settings -- GitLab