diff --git a/.vscode/extensions.json b/.vscode/extensions.json
new file mode 100644
index 0000000000000000000000000000000000000000..f7579995a9c7fe1241b5360ef9785a3b3a268801
--- /dev/null
+++ b/.vscode/extensions.json
@@ -0,0 +1,5 @@
+{
+    "recommendations": [
+        "genieai.chatgpt-vscode"
+    ]
+}
\ No newline at end of file
diff --git a/01_train_model.py b/01_train_model.py
index 53541b26f8ff398a523d680d2dcdeb7d65d835f2..aecab4018ce28f692f7d25999d157cee2b291840 100644
--- a/01_train_model.py
+++ b/01_train_model.py
@@ -1,82 +1,129 @@
+import hydra
+from omegaconf import DictConfig
 import torch
-import torch.nn as nn
-import torch.nn.functional as F
 import torch.optim as optim
 from torchvision import datasets, transforms
 from torch.optim.lr_scheduler import StepLR
 from pathlib import Path
-import fire
 
 from modules.models.simple_net import Net
 from modules.training.training import train, test
-from modules.utils.parallelize import pex
 
-
-def main(batch_size: int = 64, test_batch_size: int = 1000, epochs: int = 14, lr: float = 1.0, 
-         gamma: float = 0.7, no_cuda: bool = False, no_mps: bool = False, 
-         dry_run: bool = False, seed: int = 1, log_interval: int = 10, save_model: bool = False) -> None:
+# Registering the config path with Hydra
+@hydra.main(config_path="./data/config", config_name="train_model", version_base="1.3")
+def main(cfg: DictConfig) -> None:
     """
     Main function for training and evaluating a neural network on the MNIST dataset.
+    Utilizes Hydra for configuration management, separating model and training configurations.
 
     Args:
-        batch_size (int): Input batch size for training. Default: 64.
-        test_batch_size (int): Input batch size for testing. Default: 1000.
-        epochs (int): Number of epochs to train. Default: 14.
-        lr (float): Learning rate. Default: 1.0.
-        gamma (float): Learning rate step gamma. Default: 0.7.
-        no_cuda (bool): Flag to disable CUDA training. Default: False.
-        no_mps (bool): Flag to disable macOS GPU training. Default: False.
-        dry_run (bool): Flag for a quick single pass. Default: False.
-        seed (int): Random seed. Default: 1.
-        log_interval (int): Interval for logging training status. Default: 10.
-        save_model (bool): Flag to save the current model. Default: False.
+        cfg (DictConfig): Configuration object containing all parameters and sub-configurations.
+            Structure and default values of cfg are as follows:
+            ```
+            model:
+                num_layers: 2  # Default: 2, Number of layers in the neural network model.
+            training:
+                batch_size: 64  # Default: 64, Input batch size for training.
+                test_batch_size: 1000  # Default: 1000, input batch size for testing.
+                epochs: 14  # Default: 14, number of epochs to train.
+                lr: 1.0  # Default: 1.0, learning rate.
+                gamma: 0.7  # Default: 0.7, learning rate step gamma.
+                no_cuda: False  # Default: False, flag to disable CUDA training.
+                no_mps: False  # Default: False, flag to disable macOS GPU training.
+                dry_run: False  # Default: False, flag for a quick single pass.
+                seed: 1  # Default: 1, random seed for reproducibility.
+                log_interval: 10  # Default: 10, interval for logging training status.
+                save_model: True  # Default: True, flag to save the trained model.
+                data_dir: "./data"  # Default: "./data", directory for storing dataset files.
+                model_dir: "./models"  # Default: "./models", directory for saving trained model files.
+            ```
 
     Returns:
         None: This function does not return any value.
+
+    Examples:
+        To run training with the default configuration specified in `./data/config/train_model.yaml`:
+        ```bash
+        $ python train.py
+        ```
+
+        To change the number of epochs to 20:
+        ```bash
+        $ python train.py training.epochs=20
+        ```
+
+        To override configuration with another file `alternative.yaml`:
+        ```bash
+        $ python train.py +config=alternative.yaml
+        ```
+
+        To perform multiple runs with different model sizes using Hydra's multirun feature:
+        ```bash
+        $ python train.py --multirun model.num_layers=1,2,3
+        ```
+
+        Using Hydra and Slurm for cluster job submissions:
+        ```bash
+        $ python train.py --multirun model.num_layers=1,2,3 hydra/launcher=slurm \
+            hydra.launcher.partition=my_partition \
+            hydra.launcher.comment='MNIST training runs' \
+            hydra.launcher.nodes=1 \
+            hydra.launcher.tasks_per_node=1 \
+            hydra.launcher.mem_per_cpu=4G
+        ```
+        
+        Note: For integrating Hydra with Slurm, additional configuration may be required and should be checked against Hydra's documentation and your Slurm setup.
     """
-    use_cuda = not no_cuda and torch.cuda.is_available()
-    use_mps = not no_mps and torch.backends.mps.is_available()
 
-    torch.manual_seed(seed)
+    # Determine if CUDA or MPS should be used based on configuration and availability
+    use_cuda: bool = not cfg.training.no_cuda and torch.cuda.is_available()
+    use_mps: bool = not cfg.training.no_mps and torch.backends.mps.is_available()
 
+    torch.manual_seed(cfg.training.seed)
+
+    device: torch.device = torch.device("cuda") if use_cuda else torch.device("mps") if use_mps else torch.device("cpu")
+
+    # Setup DataLoader arguments based on device availability
+    train_kwargs: dict = {'batch_size': cfg.training.batch_size}
+    test_kwargs: dict = {'batch_size': cfg.training.test_batch_size}
     if use_cuda:
-        device = torch.device("cuda")
-    elif use_mps:
-        device = torch.device("mps")
-    else:
-        device = torch.device("cpu")
-
-    train_kwargs = {'batch_size': batch_size}
-    test_kwargs = {'batch_size': test_batch_size}
-    if use_cuda:
-        cuda_kwargs = {'num_workers': 1,
-                       'pin_memory': True,
-                       'shuffle': True}
+        cuda_kwargs: dict = {'num_workers': 1, 'pin_memory': True, 'shuffle': True}
         train_kwargs.update(cuda_kwargs)
         test_kwargs.update(cuda_kwargs)
 
+    # Image transformation pipeline
     transform = transforms.Compose([
         transforms.ToTensor(),
         transforms.Normalize((0.1307,), (0.3081,))
     ])
-    dataset1 = datasets.MNIST('./data', train=True, download=True, transform=transform)
-    dataset2 = datasets.MNIST('./data', train=False, transform=transform)
-    train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
-    test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
 
-    model = Net().to(device)
-    optimizer = optim.Adadelta(model.parameters(), lr=lr)
+    # Dataset preparation
+    dataset1: datasets.MNIST = datasets.MNIST(cfg.training.data_dir, train=True, download=True, transform=transform)
+    dataset2: datasets.MNIST = datasets.MNIST(cfg.training.data_dir, train=False, transform=transform)
+    
+    # DataLoaders for training and testing
+    train_loader: torch.utils.data.DataLoader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
+    test_loader: torch.utils.data.DataLoader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
 
-    scheduler = StepLR(optimizer, step_size=1, gamma=gamma)
-    for epoch in range(1, epochs + 1):
-        train(model, device, train_loader, optimizer, epoch, log_interval, dry_run)
+    # Model initialization
+    model: Net = Net(num_layers=cfg.model.num_layers).to(device)
+    
+    # Optimizer setup
+    optimizer: optim.Optimizer = optim.Adadelta(model.parameters(), lr=cfg.training.lr)
+    
+    # Learning rate scheduler
+    scheduler: StepLR = StepLR(optimizer, step_size=1, gamma=cfg.training.gamma)
+
+    # Training loop
+    for epoch in range(1, cfg.training.epochs + 1):
+        train(model, device, train_loader, optimizer, epoch, cfg.training.log_interval, cfg.training.dry_run)
         test(model, device, test_loader)
         scheduler.step()
 
-    if save_model:
-        Path("./data/models").mkdir(parents=True, exist_ok=True)
-        torch.save(model.state_dict(), f"./data/models/mnist_cnn_{seed}.pt")
-
+    # Save the model checkpoint if configured to do so
+    if cfg.training.save_model:
+        Path(cfg.training.model_dir).mkdir(parents=True, exist_ok=True)
+        torch.save(model.state_dict(), f"{cfg.training.model_dir}/mnist_cnn_{cfg.training.seed}.pt")
 
 if __name__ == '__main__':
-    fire.Fire()
\ No newline at end of file
+    main()
\ No newline at end of file
diff --git a/README.md b/README.md
index 78e5cbf66b072d78cf624800d98945c2c8768b8c..ec778ab5e1a8c0c762482c84a6769dc2953d3534 100644
--- a/README.md
+++ b/README.md
@@ -7,20 +7,20 @@ The docker image that we are going to use is the one on 'env_setup/Dockerfile'.
 
 ```bash
 # build image
-docker build -t andresfp14/xaicu122 ./env_setup
+docker build -t andresfp14/xaicu118 ./env_setup
 
 # push image to docker repo (if you want to make it available in general)
-docker push andresfp14/xaicu122
+docker push andresfp14/xaicu118
 
 # Examples of how to launch it in windows
-docker run -it --rm --name xaicu122 --gpus all -p 8888:8888 -p 6007:6007 -v %cd%:/home/example andresfp14/xaicu122
-docker run -d --rm --name xaicu122 --gpus all -p 8888:8888 -p 6007:6007 -v %cd%:/home/example andresfp14/xaicu122 bash
+docker run -it --rm --name xaicu118 --gpus all -p 8888:8888 -p 6007:6007 -v %cd%:/home/example andresfp14/xaicu118
+docker run -d --rm --name xaicu118 --gpus all -p 8888:8888 -p 6007:6007 -v %cd%:/home/example andresfp14/xaicu118 bash
 
 # Examples of how to launch it in linux
-docker run -it --rm --name xaicu122 --shm-size 100G --gpus all -p 8888:8888 -p 6007:6007 -v $(pwd):/home/example andresfp14/xaicu122 bash
-docker run -d --rm --name xaicu122 --shm-size 50G --gpus all -p 8888:8888 -p 6007:6007 -v $(pwd):/home/example andresfp14/xaicu122 bash
-docker run -idt --rm --name xai_1 --shm-size 50G --gpus '"device=0:0"' -v ~/data/datasets:/home/example/data/datasets -v $(pwd):/home/example andresfp14/xaicu122
-docker run -idt --rm --name xai_2 --shm-size 50G --gpus '"device=0:0"' -v $(pwd):/home/example andresfp14/xaicu122
+docker run -it --rm --name xaicu118 --shm-size 100G --gpus all -p 8888:8888 -p 6007:6007 -v $(pwd):/home/example andresfp14/xaicu118 bash
+docker run -d --rm --name xaicu118 --shm-size 50G --gpus all -p 8888:8888 -p 6007:6007 -v $(pwd):/home/example andresfp14/xaicu118 bash
+docker run -idt --rm --name xai_1 --shm-size 50G --gpus '"device=0:0"' -v ~/data/datasets:/home/example/data/datasets -v $(pwd):/home/example andresfp14/xaicu118
+docker run -idt --rm --name xai_2 --shm-size 50G --gpus '"device=0:0"' -v $(pwd):/home/example andresfp14/xaicu118
 
 ```
 
@@ -34,9 +34,9 @@ In general, this is defined in the file 'env/requirements.txt'.
 # with conda
 ###############################
 # create environment
-conda create --prefix ./venv python=3.11
+conda create --prefix ./.venv python=3.11
 # activate environment
-conda activate ./venv
+conda activate ./.venv
 # install requirements
 pip install -r ./env_setup/requirements.txt
 # export environment (if you want to update it)
@@ -66,23 +66,37 @@ Now, with the environment setup, we can run the needed code from the base direct
 
 ```bash
 ###############################
-# Getting help with fire
+# Getting help
 ###############################
-python 01_train_model.py main --help
+python 01_train_model.py --help
 
 ###############################
 # Executing with default arguments
 ###############################
-python 01_train_model.py main
+python 01_train_model.py
 
 ###############################
 # Executing and changing an argument
 ###############################
-python 01_train_model.py main --seed=7
+python 01_train_model.py training.seed=7
 
 ###############################
-# Executing the function main for multiple arguments
-# See helper function pex (parallel execution).
+# Executing with an alternative configuration file
 ###############################
-python 01_train_model.py pex main --seed=[0,1,2,3,4,5,6,7,8,9] --num_processes=4
-```
+python 01_train_model.py +config=alternative.yaml
+
+###############################
+# Executing multiple runs with different model sizes using Hydra's multirun feature
+###############################
+python 01_train_model.py --multirun model.num_layers=1,2,3
+
+###############################
+# Using Hydra and Slurm for cluster job submissions
+###############################
+python 01_train_model.py --multirun model.num_layers=1,2,3 hydra/launcher=slurm \
+    hydra.launcher.partition=my_partition \
+    hydra.launcher.comment='MNIST training runs' \
+    hydra.launcher.nodes=1 \
+    hydra.launcher.tasks_per_node=1 \
+    hydra.launcher.mem_per_cpu=4G
+```
\ No newline at end of file
diff --git a/env_setup/requirements.txt b/env_setup/requirements.txt
index e9fe0701c2fcd01dbbddba62f733566c561536fc..885ea99020bbf34f98e0a64403469e3f5feaddcd 100644
--- a/env_setup/requirements.txt
+++ b/env_setup/requirements.txt
@@ -8,6 +8,8 @@ torchaudio==2.2.1
 numpy==1.26.3
 matplotlib==3.8.3
 
-fire==0.6.0
 pyyaml==6.0.1
 tqdm==4.66.2
+hydra-core==1.3.2
+hydra-submitit-launcher==1.2.0
+hydra-joblib-launcher==1.2.0
\ No newline at end of file
diff --git a/modules/models/simple_net.py b/modules/models/simple_net.py
index 2476c1f2ce796aa4895bb633819a067c58ada966..c7187cfb7eba18d24326d261155c2431acd6b995 100644
--- a/modules/models/simple_net.py
+++ b/modules/models/simple_net.py
@@ -3,13 +3,18 @@ import torch.nn as nn
 import torch.nn.functional as F
 
 class Net(nn.Module):
-    def __init__(self):
+    def __init__(self, num_layers=1):
         super(Net, self).__init__()
+        self.num_layers = num_layers
         self.conv1 = nn.Conv2d(1, 32, 3, 1)
         self.conv2 = nn.Conv2d(32, 64, 3, 1)
         self.dropout1 = nn.Dropout(0.25)
         self.dropout2 = nn.Dropout(0.5)
         self.fc1 = nn.Linear(9216, 128)
+
+        # Intermediate fully connected layers
+        self.fc_intermediate = nn.Linear(128, 128) if self.num_layers > 1 else None
+
         self.fc2 = nn.Linear(128, 10)
 
     def forward(self, x):
@@ -22,7 +27,13 @@ class Net(nn.Module):
         x = torch.flatten(x, 1)
         x = self.fc1(x)
         x = F.relu(x)
+
+        # Apply the intermediate fully connected layer if it exists
+        if self.fc_intermediate:
+            x = self.fc_intermediate(x)
+            x = F.relu(x)
+
         x = self.dropout2(x)
         x = self.fc2(x)
         output = F.log_softmax(x, dim=1)
-        return output
\ No newline at end of file
+        return output
diff --git a/modules/training/training.py b/modules/training/training.py
index 50d4c7806004083ac329cc0a9fd203c4a4b544ab..6d5c2ca31cf57c34ca53e75eaf2320df39f565bc 100644
--- a/modules/training/training.py
+++ b/modules/training/training.py
@@ -1,8 +1,10 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from modules.utils.loggers import create_logger
 
 def train(model, device, train_loader, optimizer, epoch, log_interval, dry_run):
+    logger = create_logger(name="training")
     model.train()
     for batch_idx, (data, target) in enumerate(train_loader):
         data, target = data.to(device), target.to(device)
@@ -12,13 +14,14 @@ def train(model, device, train_loader, optimizer, epoch, log_interval, dry_run):
         loss.backward()
         optimizer.step()
         if batch_idx % log_interval == 0:
-            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
+            logger.info('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                 epoch, batch_idx * len(data), len(train_loader.dataset),
                 100. * batch_idx / len(train_loader), loss.item()))
             if dry_run:
                 break
 
 def test(model, device, test_loader):
+    logger = create_logger(name="test")
     model.eval()
     test_loss = 0
     correct = 0
@@ -32,6 +35,6 @@ def test(model, device, test_loader):
 
     test_loss /= len(test_loader.dataset)
 
-    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
+    logger.info('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
         test_loss, correct, len(test_loader.dataset),
         100. * correct / len(test_loader.dataset)))
\ No newline at end of file