diff --git a/tensorflow/cifar10_distributed/limit_gpu_visibility.sh b/tensorflow/cifar10_distributed/limit_gpu_visibility.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7a7704f3d33025f2f21cb2a18a92c0c0e6d366ba
--- /dev/null
+++ b/tensorflow/cifar10_distributed/limit_gpu_visibility.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/zsh
+
+# limit visible devices to ensure correct device selection and number of replicas in TensorFlow MultiWorkerMirroredStrategy and Horovod
+export CUDA_VISIBLE_DEVICES=${SLURM_LOCALID}
\ No newline at end of file
diff --git a/tensorflow/cifar10_distributed/set_vars.sh b/tensorflow/cifar10_distributed/set_vars.sh
index 1decdb2024fd34543ab5dda4b2f66d4dbf9a0097..e3c81d15e9ed37729d231e3abe3b0d69879b0a05 100644
--- a/tensorflow/cifar10_distributed/set_vars.sh
+++ b/tensorflow/cifar10_distributed/set_vars.sh
@@ -4,9 +4,6 @@ export RANK=${SLURM_PROCID}
 export LOCAL_RANK=${SLURM_LOCALID}
 export WORLD_SIZE=${SLURM_NTASKS}
 
-# limit visible devices to ensure correct device selection and number of replicas in TensorFlow MultiWorkerMirroredStrategy
-export CUDA_VISIBLE_DEVICES=${SLURM_LOCALID}
-
 # make variables also available inside container
 export APPTAINERENV_RANK=${RANK}
 export APPTAINERENV_LOCAL_RANK=${LOCAL_RANK}
diff --git a/tensorflow/cifar10_distributed/submit_job_container.sh b/tensorflow/cifar10_distributed/submit_job_container.sh
index 3e1a1e7204f9c43f0dd7320548eb8c0366f47e88..9eea23940286ca210a0048f9989283ae773489e0 100644
--- a/tensorflow/cifar10_distributed/submit_job_container.sh
+++ b/tensorflow/cifar10_distributed/submit_job_container.sh
@@ -43,6 +43,7 @@ mkdir -p ${NEWTMP}
 # each process sets required environment variables and
 # runs the python script inside the container
 srun zsh -c '\
+    source limit_gpu_visibility.sh && \
     source set_vars.sh && \
     apptainer exec -e --nv -B ${NEWTMP}:/tmp ${TENSORFLOW_IMAGE} \
         bash -c "bash ./execution_wrapper.sh"'
diff --git a/tensorflow/cifar10_distributed/submit_job_container_horovod.sh b/tensorflow/cifar10_distributed/submit_job_container_horovod.sh
index 0b62a97120d9ae1997c5824653c1c7d4de0e9af2..99f24f11355d30e0ca30472c40dd511aa5a8c9de 100644
--- a/tensorflow/cifar10_distributed/submit_job_container_horovod.sh
+++ b/tensorflow/cifar10_distributed/submit_job_container_horovod.sh
@@ -43,6 +43,7 @@ mkdir -p ${NEWTMP}
 # each process sets required environment variables and
 # runs the python script inside the container
 srun zsh -c '\
+    source limit_gpu_visibility.sh && \
     source set_vars.sh && \
-    apptainer exec -e --nv -B ${NEWTMP}:/tmp ${TENSORFLOW_IMAGE} \
+    apptainer exec --nv -B ${NEWTMP}:/tmp ${TENSORFLOW_IMAGE} \
         bash -c "python -W ignore train_model_horovod.py"'
diff --git a/tensorflow/cifar10_distributed/submit_job_container_single-node.sh b/tensorflow/cifar10_distributed/submit_job_container_single-node.sh
new file mode 100644
index 0000000000000000000000000000000000000000..61184cc214852649be4bf2a5bf24ae32e9a69595
--- /dev/null
+++ b/tensorflow/cifar10_distributed/submit_job_container_single-node.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/zsh
+############################################################
+### Slurm flags
+############################################################
+
+#SBATCH --time=00:15:00
+#SBATCH --partition=c23g
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=2
+#SBATCH --cpus-per-task=24
+#SBATCH --gres=gpu:2
+#SBATCH --account=supp0001
+
+############################################################
+### Load modules or software
+############################################################
+
+# load module for TensorFlow container
+module load TensorFlow/nvcr-24.01-tf2-py3
+module list
+
+############################################################
+### Parameters and Settings
+############################################################
+
+# print some information about current system
+echo "Job nodes: ${SLURM_JOB_NODELIST}"
+echo "Current machine: $(hostname)"
+nvidia-smi
+
+export NCCL_DEBUG=INFO
+export TF_CPP_MIN_LOG_LEVEL=1 # disable info messages
+export TF_GPU_THREAD_MODE='gpu_private'
+export NCCL_SOCKET_NTHREADS=8 # multi-threading for NCCL communication
+
+############################################################
+### Execution (Model Training)
+############################################################
+
+# TensorFlow in container often needs a tmp directory
+NEWTMP=$(pwd)/tmp
+mkdir -p ${NEWTMP}
+
+# each process sets required environment variables and
+# runs the python script inside the container
+source set_vars.sh
+apptainer exec -e --nv -B ${NEWTMP}:/tmp ${TENSORFLOW_IMAGE} \
+    bash -c "python -W ignore train_model.py --strategy 'mirrored'"
diff --git a/tensorflow/cifar10_distributed/submit_job_venv.sh b/tensorflow/cifar10_distributed/submit_job_venv.sh
index c42ac3f394efa35b1c9e2fde633f20ca2cfc627b..4e413382c1902056713c8586677f609a42dd4183 100644
--- a/tensorflow/cifar10_distributed/submit_job_venv.sh
+++ b/tensorflow/cifar10_distributed/submit_job_venv.sh
@@ -37,5 +37,6 @@ export NCCL_SOCKET_NTHREADS=8 # multi-threading for NCCL communication
 # each process sets required environment variables and
 # runs the python script
 srun zsh -c "\
+    source limit_gpu_visibility.sh && \
     source set_vars.sh && \
     zsh ./execution_wrapper.sh"
\ No newline at end of file
diff --git a/tensorflow/cifar10_distributed/submit_job_venv_horovod.sh b/tensorflow/cifar10_distributed/submit_job_venv_horovod.sh
index 90b41fc3b321f5e1d6fa6a024b97789da119e8f9..4284dcdd505a9b0e1c29797bd1e0e0a74095d0bd 100644
--- a/tensorflow/cifar10_distributed/submit_job_venv_horovod.sh
+++ b/tensorflow/cifar10_distributed/submit_job_venv_horovod.sh
@@ -37,5 +37,6 @@ export NCCL_SOCKET_NTHREADS=8 # multi-threading for NCCL communication
 # each process sets required environment variables and
 # runs the python script
 srun zsh -c "\
+    source limit_gpu_visibility.sh && \
     source set_vars.sh && \
     python -W ignore train_model_horovod.py"
diff --git a/tensorflow/cifar10_distributed/train_model.py b/tensorflow/cifar10_distributed/train_model.py
index 812eea8d77ca031235497c7b7aa357101df06b2d..4fc24e1ffee20523cdac908feda4b531a3cf158f 100644
--- a/tensorflow/cifar10_distributed/train_model.py
+++ b/tensorflow/cifar10_distributed/train_model.py
@@ -12,6 +12,7 @@ import tensorflow.keras.applications as applications
 def parse_command_line():
     parser = argparse.ArgumentParser()
     parser.add_argument("--device", required=False, type=str, choices=["cpu", "cuda"], default="cuda")
+    parser.add_argument("--strategy", required=False, type=str, choices=["mirrored", "multi-worker"], default="multi-worker")
     parser.add_argument("--num_epochs", required=False, type=int, default=5)
     parser.add_argument("--batch_size", required=False, type=int, default=128)
     parser.add_argument("--tensorboard", required=False, help="Whether to use tensorboard callback", action="store_true", default=False)
@@ -79,14 +80,16 @@ def setup(args):
     tf.config.optimizer.set_jit(True)
 
     # define data parallel strategy for distrbuted training
-    strategy = tf.distribute.MultiWorkerMirroredStrategy(
-        communication_options=tf.distribute.experimental.CommunicationOptions(
-            implementation=tf.distribute.experimental.CollectiveCommunication.NCCL
+    if args.strategy == "mirrored":
+        strategy = tf.distribute.MirroredStrategy()
+    else:
+        strategy = tf.distribute.MultiWorkerMirroredStrategy(
+            communication_options=tf.distribute.experimental.CommunicationOptions(
+                implementation=tf.distribute.experimental.CollectiveCommunication.NCCL
+            )
         )
-    )
-
-    print("MultiWorkerMirroredStrategy.num_replicas_in_sync:", strategy.num_replicas_in_sync)
-    print("MultiWorkerMirroredStrategy.worker_index:", strategy.cluster_resolver.task_id)
+        print("MultiWorkerMirroredStrategy.num_replicas_in_sync:", strategy.num_replicas_in_sync)
+        print("MultiWorkerMirroredStrategy.worker_index:", strategy.cluster_resolver.task_id)
 
     return strategy