diff --git a/src/gui_version/RandomForest_gui.py b/src/gui_version/RandomForest_gui.py
index 2057521e6e538a13eaab42fa09002521bcdcd419..79bcca73490e03a15cd0e0c70ca4608caf9f4882 100644
--- a/src/gui_version/RandomForest_gui.py
+++ b/src/gui_version/RandomForest_gui.py
@@ -17,6 +17,7 @@ from tkinter import Label
 from utilities.ncfile_generation import generate_ncfile
 from utilities.strings_for_ncfile import char_to_string
 
+
 class prepare_data:
 
     """
@@ -24,17 +25,14 @@ class prepare_data:
         used in the Random Forest classifier.
     """
 
-    def __init__(self, master, aim, log=None, retrain=False):
+    def __init__(self, master, aim, log=None):
 
         self.master = master
         self.logger = log
         self.row = 0
-        self.retrain = retrain
+  
         self.import_parameters()
-        if self.retrain:
-            self.logger.info("Model is retrained")
-        else:
-            self.logger.info("Susceptibility/hazard map generation started")
+        self.logger.info("Susceptibility/hazard map generation started")
 
         self.master.geometry()
         self.master.winfo_toplevel().title("Map generation")
@@ -70,32 +68,18 @@ class prepare_data:
                 self.split_training_testing()
             elif aim == 'prediction':
                 self.import_features()
-
+                
     def import_parameters(self):
+        
+        """
+            User-defined parameters are imported.
+        """
 
         with open('tmp_map.pkl', 'rb') as handle:
             self.properties_map = pkl.load(handle)
 
         with open('tmp_settings.pkl', 'rb') as handle:
             self.properties_settings = pkl.load(handle)
-
-        if self.properties_map['drop_pred'] == '':
-            self.not_included_pred_data = []
-        else:
-            self.not_included_pred_data = self.properties_map[
-                'drop_pred'].split(',')
-
-        if self.properties_map['drop_train'] == '':
-            self.not_included_train_data = []
-        else:
-            self.not_included_train_data = self.properties_map[
-                'drop_train'].split(',')
-            
-        if self.retrain:
-            self.features_to_remove = pd.read_csv(self.properties_map['model_path'] + '/' + self.properties_map['model_to_save'] + '/feature_mismatch_training.csv')['to_drop'].to_list()
-            self.not_included_train_data = self.not_included_train_data + self.features_to_remove
-            self.properties_map['model_to_save'] = self.properties_map['model_to_save'] + '_retrain'
-            self.properties_map['model_to_load'] = self.properties_map['model_to_load'] + '_retrain'
             
     def import_features(self):
 
@@ -103,53 +87,23 @@ class prepare_data:
             Imports the features for prediction.
         """
 
-        # Import prediction dataset either as csv file or nc file
-        if self.properties_map['pred_path'].split('.')[-1] == 'csv':
-            self.features = pd.read_csv(self.properties_map['pred_path'])
-
-        elif self.properties_map['pred_path'].split('.')[-1] == 'nc':
-            ds = nc.Dataset(self.properties_map['pred_path'])
-            pred = ds['Result'][:, :].data
-            pred_features = ds['features'][:].data
-            self.feature_list = char_to_string(pred_features)
-            if 'xcoord' in self.feature_list and 'ycoord' in self.feature_list:
-                self.features = pd.DataFrame(pred, columns=self.feature_list)
-            else:
-                self.features = pd.DataFrame(pred, columns=['xcoord', 'ycoord']+self.feature_list)
-            
-            self.dropped = ds['Dropped'][:].data
-            self.dropped = [int(x) for x in self.dropped]
+        ds = nc.Dataset(self.properties_map['pred_path'])
+        pred = ds['Result'][:, :].data
+        pred_features = ds['features'][:].data
+        self.feature_list = char_to_string(pred_features)
+        self.features = pd.DataFrame(pred, columns=self.feature_list)
+        
+        self.dropped = ds['Dropped'][:].data
+        self.dropped = [int(x) for x in self.dropped]
 
         # Save the prediction coordinates in the prediction dataset
         self.xy['ycoord'] = self.features['ycoord']
         self.xy['xcoord'] = self.features['xcoord']
-
-        # Remove all features that shall not be included
-        # in prediction from DataFrame (see settings!)
-        if len(self.not_included_pred_data) > 0:
-            for dataset in self.not_included_pred_data:
-                self.features = self.features.drop(dataset, axis=1)
-                
-        # Determine which classes are contained in the categorical features
-        # It is distinguished between one-hot and ordinal encoded features
-        self.categorical_classes = {}
-        cat_subset = [feat for feat in self.features.columns.tolist() if '_encode' in feat]
-        df_sub = self.features[cat_subset]
-        cat_feat = ['_'.join(col.split('_')[:(len(col.split('_'))-2)]) for col in df_sub.columns.tolist()]
-        self.distibuish_encoding = {}
-        for feat in list(set(cat_feat)):
-            classes = []
-            if cat_feat.count(feat)>1:
-                classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
-                self.distibuish_encoding[feat] = 'ohe'
-            else:
-                classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
-                self.distibuish_encoding[feat] = 'ordinal'
-            self.categorical_classes[feat] = {}
-            self.categorical_classes[feat]['classes'] = [item for sublist in classes for item in sublist]
-            self.categorical_classes[feat]['num_cols'] = cat_feat.count(feat)
+        
+        self.features = self.features.drop(['xcoord', 'ycoord'], axis=1)
         self.feature_list = list(self.features.columns)
-        self.features_org = self.features.copy()
+        self.features = np.array(self.features)
+        
         self.logger.info('Features imported')
         self.logger.info('The following ' + str(len(self.feature_list))
                          + ' features are included in the prediction dataset: '
@@ -159,7 +113,7 @@ class prepare_data:
             row=self.row, column=1)
         self.row = self.row + 1
         self.master.update()
-
+        
     def import_features_labels(self):
 
         """
@@ -178,35 +132,8 @@ class prepare_data:
         self.xy['ycoord'] = self.features['ycoord']
         self.xy['xcoord'] = self.features['xcoord']
         
-        self.features = self.features.drop(['xcoord', 'ycoord'], axis=1)
-
-        # Drop ID from training data
-        self.features = self.features.drop('ID', axis=1)
-
-        # Remove all features that shall not be included in
-        # training from DataFrame (see settings!)
-        if len(self.not_included_train_data) > 0:
-            for dataset in self.not_included_train_data:
-                self.features = self.features.drop(dataset, axis=1)
-                
-        # Determine which classes are contained in the categorical features
-        # It is distinguished between one-hot and ordinal encoded features
-        self.categorical_classes = {}
-        cat_subset = [feat for feat in self.features.columns.tolist() if '_encode' in feat]
-        df_sub = self.features[cat_subset]
-        cat_feat = ['_'.join(col.split('_')[:(len(col.split('_'))-2)]) for col in df_sub.columns.tolist()]
-        for feat in list(set(cat_feat)):
-            classes = []
-            if cat_feat.count(feat)>1:
-                classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
-            else:
-                classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
-            self.categorical_classes[feat] = {}
-            self.categorical_classes[feat]['classes'] = [item for sublist in classes for item in sublist]
-            self.categorical_classes[feat]['num_cols'] = cat_feat.count(feat)
-
+        self.features = self.features.drop(['xcoord', 'ycoord', 'ID'], axis=1)
         self.feature_list = list(self.features.columns)
-        self.features_backup = self.features.copy()
         self.features = np.array(self.features)
 
         self.logger.info('Features imported')
@@ -219,7 +146,7 @@ class prepare_data:
                   row=self.row, column=1)
         self.row = self.row + 1
         self.master.update()
-
+        
     def split_training_testing(self):
 
         """
@@ -240,8 +167,7 @@ class prepare_data:
             row=self.row, column=1)
         self.row = self.row + 1
         self.master.update()
-
-
+        
 class RandomForest(prepare_data):
     
     """
@@ -249,14 +175,14 @@ class RandomForest(prepare_data):
         generation of the landslide susceptibility and hazard map.
     """
 
-    def __init__(self, master, aim, parallel=False, log=None, retrain=None):
+    def __init__(self, master, aim, parallel=False, log=None):
         
-        super().__init__(master, aim, log=log, retrain=retrain)
+        super().__init__(master, aim, log=log)
         self.aim = aim
         self.logger = log
         self.parallel = parallel
         self.num_chunks = 10
-        self.retrain = retrain
+
         # Random Forest settings
         self.criterion = self.properties_map['criterion']
         self.n_estimators = self.properties_map['num_trees']
@@ -297,12 +223,11 @@ class RandomForest(prepare_data):
 
             self.create_output_dir()
             self.load_model()
-            if not self.error:
-                self.predict()
-                self.extract_pos_neg_predictions()
-                self.reshape_prediction()
-                self.save_prediction()
-
+            self.predict()
+            self.extract_pos_neg_predictions()
+            self.reshape_prediction()
+            self.save_prediction()
+                
     def define(self):
 
         """
@@ -375,7 +300,7 @@ class RandomForest(prepare_data):
             self.master.update()
 
             self.logger.info('Validation data predicted')
-
+            
     def split_array_into_chunks(self, pred):
 
         """
@@ -491,8 +416,7 @@ class RandomForest(prepare_data):
                   'roc_tpr': self.tpr,
                   'roc_auc': self.roc_auc,
                   'accuracy': self.acc,
-                  'fbeta': self.fbeta,
-                  'categories': self.categorical_classes
+                  'fbeta': self.fbeta
                   }
 
         with open(self.model_dir
@@ -510,136 +434,6 @@ class RandomForest(prepare_data):
         self.row = self.row + 1
         self.master.update()
         
-    def adapt_categorical_features(self, train_classes, training_features):
-        
-        """
-            Assure that identical categorical features are used in training
-            and prediction dataset
-        
-            The encoded features in the training and prediction dataset are
-            compared regarding the contained classes. Depending on the user
-            input, instances in the prediction dataset with classes that are
-            not included in the training dataset are either set to no_value or
-            nevertheless considered in the prediction. The surplus additional
-            features are removed either way to achieve the same set of features
-            as in the training dataset. 
-            
-            The prediction dataset is furthermore assessed if all features
-            that are included in the training dataset also appear in the prediction
-            dataset. If that is not the case, the training process is relaunched
-            with an adapted training dataset where the feature(s) that is/are
-            not contrained in the training dataset are removed. The second 
-            trained model will be stored in a seperate folder which is named
-            <old_folder_name>_retrain.
-            
-            Input:
-                train_classes:      dictionary containing for each categorical feature
-                                    all classes and the number of total classes
-                                    contained in the training dataset
-                training_features:  Complete feature names of the features
-                                    contained in the training dataset
-                
-            Output:
-                None
-        """
-        
-        Label(self.master, text="Categorical features are compared between training and prediction dataset").grid(
-            row=self.row, column=1)
-        self.row = self.row + 1
-        self.master.update()
-        
-        self.instances_to_drop = []
-        self.features_not_in_training = []
-        
-        for feat in [val for val in training_features if '_encode' in val]:
-            if feat not in self.feature_list:
-                
-                Label(self.master, text='Categorical feature ' + feat + ' not in prediction dataset').grid(
-                row=self.row, column=1)
-                self.row = self.row + 1
-                self.master.update()
-                
-                Label(self.master, text='Error: cannot proceed with mapping').grid(
-                row=self.row, column=1)
-                self.row = self.row + 1
-                self.master.update()
-
-                self.logger.error('Error: Categorical feature ' + feat + ' not in prediction dataset')
-                self.logger.error('Error: cannot proceed with mapping')
-                self.error = True
-                self.retrain = True
-                self.features_not_in_training.append(feat)
-                
-        if len(self.features_not_in_training) > 0:
-                pd.DataFrame(self.features_not_in_training, columns=['to_drop']).to_csv(self.model_dir + self.model_to_load + 'feature_mismatch_training.csv', index=False)
-
-        if not self.retrain:
-            if list(set([val for val in training_features if '_encode' in val])) != list(set(self.feature_list)):    
-                for feat in list(set(['_'.join(val.split('_')[:-2]) for val in self.feature_list if '_encode' in val])):
-                    if feat in list(self.distibuish_encoding.keys()):
-                        if self.distibuish_encoding[feat] == 'ohe':
-                            if (train_classes[feat]['num_cols'] < self.categorical_classes[feat]['num_cols']) or (set(train_classes[feat]['classes']) != set(self.categorical_classes[feat]['classes'])):
-                                Label(self.master, text=feat + ': Prediction dataset contains more or other classes than training dataset').grid(
-                                    row=self.row, column=1)
-                                self.row = self.row + 1
-                                self.master.update()
-            
-                                Label(self.master, text='Apply user defined handling approach').grid(
-                                    row=self.row, column=1)
-                                self.row = self.row + 1
-                                self.master.update()
-                                
-                                self.logger.warning(feat + ': Prediction dataset contains more classes than training dataset')
-                                self.logger.info('Apply user defined handling approach')
-                                
-                                common_elements = set(train_classes[feat]['classes']).intersection(set(self.categorical_classes[feat]['classes']))
-                            
-                                if self.properties_map['keep']:
-                                    if len(common_elements) == 0:
-                                        
-                                        Label(self.master, text='Error: no common classes for ' + feat + ' in training and prediction dataset').grid(
-                                            row=self.row, column=1)
-                                        self.row = self.row + 1
-                                        self.master.update()
-                                        
-                                        self.logger.error('Error: no common classes for ' + feat + ' in training and prediction dataset')
-                                        self.error = True
-                                    else:
-                                        to_drop = [feat + '_' + str(f) + '_encode' for f in self.categorical_classes[feat]['classes'] if f not in common_elements]
-                                        self.features = self.features.drop(to_drop, axis=1)
-                                        self.feature_list = self.features.columns.tolist()
-                                elif self.properties_map['remove_instances']:
-                                    to_drop_col = [feat + '_' + str(f) + '_encode' for f in self.categorical_classes[feat]['classes'] if f not in common_elements]
-                                    to_drop_row = []
-                                    for col in to_drop_col:
-                                        to_drop_row = to_drop_row + self.features.index[self.features[col] == 1].tolist()
-                                    self.features = self.features.drop(to_drop_col, axis=1)
-                                    
-                                    Label(self.master, text='Not matching features have been removed').grid(
-                                            row=self.row, column=1)
-                                    self.row = self.row + 1
-                                    self.master.update()
-                                    
-                                    self.logger.info('Not matching features have been removed')
-                                    
-                                    self.feature_list = self.features.columns.tolist()
-                                    self.instances_to_drop = self.instances_to_drop + to_drop_row
-                                    
-                                    Label(self.master, text='Instances to consider during mapping have been adapted').grid(
-                                            row=self.row, column=1)
-                                    self.row = self.row + 1
-                                    self.master.update()
-                                    
-                                    self.logger.info('Instances to consider during mapping have been adapted')
-
-            Label(self.master, text='Categorical features have been handled and hamonised').grid(
-                    row=self.row, column=1)
-            self.row = self.row + 1
-            self.master.update()
-    
-            self.logger.info('Categorical features have been handled and hamonised')
-            self.logger.info('Remaining features: ' + str(self.feature_list))
-
     def load_model(self):
 
         """
@@ -659,97 +453,8 @@ class RandomForest(prepare_data):
                 + self.properties_map['model_to_load']
                 + '/model_params.pkl', 'rb') as f:
             params = pkl.load(f)
-        features = params['features']
-        self.error = False
-        self.adapt_categorical_features(params['categories'], features)
-
-        if not self.error:
-            if len(self.feature_list) == len(features):
-                if set(self.feature_list) != set(features):
-                    
-                    Label(self.master, text='Error: Not all features of the model are contained in the prediction dataset').grid(
-                                row=self.row, column=1)
-                    self.row = self.row + 1
-                    self.master.update()
-                    
-                    self.logger.error('Error: Not all features of the model are contained in the prediction dataset')
-                    
-                    self.error = True
-                elif self.feature_list != features:
-                    
-                    Label(self.master, text='The order or features differs. Prediction features are reordered').grid(
-                                row=self.row, column=1)
-                    self.row = self.row + 1
-                    self.master.update()
-                    
-                    self.logger.info('The order or features differs. Prediction features are reordered')
-                    
-                    self.features = self.features[features]
-                    if self.features.columns.tolist() != features:
-                        
-                        Label(self.master, text='There is still something wrong with the order of the features!').grid(
-                                row=self.row, column=1)
-                        self.row = self.row + 1
-                        self.master.update()
-                        self.error = True
-                elif self.feature_list == features:
-                    
-                    Label(self.master, text='Prediction and training dataset have the same order').grid(
-                                row=self.row, column=1)
-                    self.row = self.row + 1
-                    self.master.update()
-
-                    self.logger.info('Prediction and training dataset have the same order')
-            elif len(self.feature_list) < len(features):
-                
-                Label(self.master, text='Error: Not all features of the model are contained in the prediction dataset').grid(
-                                row=self.row, column=1)
-                self.row = self.row + 1
-                self.master.update()
-
-                self.logger.error('Error: Not all features of the model are contained in the prediction dataset')
-                
-                self.error = True
-            elif len(self.feature_list) > len(features):
-                if set(features).issubset(self.feature_list):
-                    to_drop = list(set(self.feature_list)-set(features))
-                    self.features_org = self.features_org.drop(to_drop, axis=1)
-                    self.features_org = self.features_org[features]
-                    if self.features_org.columns.tolist() != features:
-                        Label(self.master, text='There is still something wrong with the order of the features!').grid(
-                                row=self.row, column=1)
-                        self.row = self.row + 1
-                        self.master.update()
-                        self.error = True
-                    else:                      
-                        Label(self.master, text='Features in the prediction dataset which were not used for training were removed').grid(
-                                row=self.row, column=1)
-                        self.row = self.row + 1
-                        self.master.update()
-                        
-                        Label(self.master, text='Features in the prediction dataset were sorted to match the training features').grid(
-                                row=self.row, column=1)
-                        self.row = self.row + 1
-                        self.master.update()
-                        
-                        self.logger.warning('Features in the prediction dataset which were not used for training were removed')
-                        self.logger.info('Features left: ' + str(self.feature_list))
-                        self.logger.info('Features in the prediction dataset were sorted to match the training features')
-                else:
-                    Label(self.master, text='Error: Not all features of the model are contained in the prediction dataset').grid(
-                                row=self.row, column=1)
-                    self.row = self.row + 1
-                    self.master.update()
-                    
-                    self.logger.error('Error: Not all features of the model are contained in the prediction dataset')
-                    
-                    self.error = True
-            if not self.error:
-                self.feature_list = self.features.columns.tolist()
-                self.features = self.features.to_numpy()  
         
-
-        self.logger.info('Model loaded from '
+        self.logger.info('Model succesfully loaded from '
                          + self.model_dir
                          + self.model_to_load)
         
@@ -758,12 +463,7 @@ class RandomForest(prepare_data):
                          + self.model_to_load)).grid(row=self.row, column=1)
         self.row = self.row + 1
         self.master.update()
-
-        Label(self.master, text="Model successfully loaded").grid(
-            row=self.row, column=1)
-        self.row = self.row + 1
-        self.master.update()
-
+        
     def save_prediction(self):
 
         """
@@ -792,15 +492,15 @@ class RandomForest(prepare_data):
             row=self.row, column=1)
         self.row = self.row + 1
         self.master.update()
-
+        
     def reshape_prediction(self):
 
         """
             Reshape the individual predictions into a map.
         """
-        dropped = list(set(self.dropped + self.instances_to_drop))
+        
         arr_xy = np.array(self.xy)
-        arr_xy[dropped, :] = [self.properties_settings['no_value']]
+        arr_xy[self.dropped, :] = [self.properties_settings['no_value']]
 
         result = np.reshape(list(arr_xy[:, 2]),
                             (len(list(set(self.xy['ycoord']))),
diff --git a/src/gui_version/compatibility_of_input_datasets.py b/src/gui_version/compatibility_of_input_datasets.py
new file mode 100644
index 0000000000000000000000000000000000000000..61e0405611c069575d3abe3552c62f0d51758c2c
--- /dev/null
+++ b/src/gui_version/compatibility_of_input_datasets.py
@@ -0,0 +1,279 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Jan 29 13:20:59 2025
+
+@author: aedrich
+"""
+
+import numpy as np
+import pandas as pd
+import netCDF4 as nc
+import pickle as pkl
+import os
+import logging
+import re
+
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import mean_squared_error, f1_score, roc_curve, auc, fbeta_score
+from joblib import delayed, Parallel
+from tkinter import Label
+
+from utilities.ncfile_generation import generate_basic_ncfile
+from utilities.strings_for_ncfile import char_to_string, features_to_char
+
+
+class comparison_training_prediction_dataset:
+    
+    def __init__(self, logger):
+        
+        self.logger = logger
+        self.error = False
+        
+        self.import_parameters()
+        self.import_prediction_dataset()
+        self.import_training_dataset()
+        self.compare_features()
+        if not self.error:
+            self.additional_instances_to_drop()
+            self.save_prediction_dataset()
+            self.save_training_dataset()
+        
+    def import_parameters(self):
+
+        with open('tmp_map.pkl', 'rb') as handle:
+            self.properties_map = pkl.load(handle)
+
+        with open('tmp_settings.pkl', 'rb') as handle:
+            self.properties_settings = pkl.load(handle)
+            
+        if self.properties_map['drop_pred'] == '':
+            self.not_included_pred_data = []
+        else:
+            self.not_included_pred_data = self.properties_map[
+                'drop_pred'].split(',')
+
+        if self.properties_map['drop_train'] == '':
+            self.not_included_train_data = []
+        else:
+            self.not_included_train_data = self.properties_map[
+                'drop_train'].split(',')
+            
+    def import_prediction_dataset(self):
+
+        ds = nc.Dataset(self.properties_map['pred_path'])
+        pred = ds['Result'][:, :].data
+        pred_features = ds['features'][:].data
+        self.feature_list = char_to_string(pred_features)
+        
+        if 'xcoord' in self.feature_list and 'ycoord' in self.feature_list:
+            self.pred = pd.DataFrame(pred, columns=self.feature_list)
+        else:
+            self.pred = pd.DataFrame(pred, columns=['xcoord', 'ycoord']+self.feature_list)
+            
+        self.xy = pd.DataFrame()
+        self.xy['ycoord'] = self.pred['ycoord']
+        self.xy['xcoord'] = self.pred['xcoord']
+        
+        self.idx = ds['Dropped'][:].data
+        self.idx = [int(x) for x in self.idx]
+            
+        if len(self.not_included_pred_data) > 0:
+            for dataset in self.not_included_pred_data:
+                if dataset in self.pred.columns.tolist():
+                    self.pred = self.pred.drop(dataset, axis=1)
+                
+        self.logger.info('Prediction dataset imported')
+        self.logger.info('The following ' + str(len(self.pred.columns.tolist()))
+                         + ' features are included in the prediction dataset: '
+                         + str(self.pred.columns.tolist()))
+                
+    def import_training_dataset(self):
+        
+        # Import training dataset as csv file
+        self.train = pd.read_csv(self.properties_map['train_path'])
+        # Extract and remove labels from training dataset
+        self.labels = np.array(
+            self.train[self.properties_map['name_label']]).reshape(
+                [np.shape(self.train[self.properties_map['name_label']])[0], 1])
+        
+        self.xy_train = pd.DataFrame()
+        self.xy_train['ID'] = self.train['ID']
+        self.xy_train[self.properties_map['name_label']] = self.train[self.properties_map['name_label']]
+        self.xy_train['ycoord'] = self.train['ycoord']
+        self.xy_train['xcoord'] = self.train['xcoord']
+        
+        self.train = self.train.drop(['xcoord', 'ycoord', 'ID', self.properties_map['name_label']], axis=1)
+
+        if len(self.not_included_train_data) > 0:
+            for dataset in self.not_included_train_data:
+                if dataset in self.train.columns.tolist():
+                    self.train = self.train.drop(dataset, axis=1)
+
+        self.logger.info('Training dataset imported')
+        self.logger.info('The following ' + str(len(self.train.columns.tolist()))
+                         + ' features are included in the training dataset: '
+                         + str(self.train.columns.tolist()))
+
+    def compare_features(self):
+        
+        """
+            It is assessed if all features in the training dataset also appear
+            in the prediction dataset. If that is not the case, the training 
+            process will be relaunched with an adapted training dataset where the 
+            feature(s) that is/are not contrained in the training dataset are
+            removed. The second trained model will be stored in a seperate
+            folder which is named <old_folder_name>_retrain.
+            
+            If more features appear in the prediction dataset, the additional 
+            features are removed.
+            
+        """
+    
+        self.logger.info('Features are compared between training and prediction dataset')
+        
+        if set(self.train.columns) == set(self.pred.columns):  
+            self.logger.info('Features are identical in both training and prediction dataset')
+            self.pred = self.pred[self.train.columns]
+            
+            self.logger.info('Potentially varying order of features has been fixed')
+            self.error = False
+            
+        else:
+            self.logger.warning('Features are not identical in the training and prediction dataset')
+
+            extra_in_pred = set(self.pred.columns) - set(self.train.columns)
+            extra_in_train = set(self.train.columns) - set(self.pred.columns)
+            
+            if len(extra_in_pred) > 0 and len(extra_in_train) == 0:
+                self.logger.warning('More features in prediction dataset, additional features are removed')
+                
+                self.pred = self.pred[self.train.columns]
+                self.error = False
+                
+            elif len(extra_in_train) > 0  and len(extra_in_pred) == 0 :
+                self.logger.warning('More features in training dataset, additional features are removed')
+                
+                self.train = self.train[self.pred.columns]
+                self.error = False
+                
+            elif len(extra_in_train) > 0  and len(extra_in_pred) > 0:
+                self.logger.warning('There are mismatching features in both datasets')
+
+                self.common_columns = self.train.columns.intersection(self.pred.columns)
+                
+                if len(self.common_columns.tolist()) == 0:
+                    self.logger.error('Error: No common columns in training and prediction dataset')
+                    self.error = True
+
+                elif len(self.common_columns.tolist()) < 6:
+                    self.logger.warning('Warning: only ' + str(len(self.common_columns.tolist())) + ' common columns in training and prediction dataset')
+                    self.error = False
+                    
+                    self.train = self.train[self.common_columns]
+                    self.pred = self.pred[self.common_columns]
+                    
+                else:
+                    self.logger.info(str(len(self.common_columns.tolist())) + ' common columns in training and prediction dataset')         
+                    self.error = False
+                    
+                    self.train = self.train[self.common_columns]
+                    self.pred = self.pred[self.common_columns]
+            else:
+                self.logger.error('Error: Unknown issue detected. Check features manually!')
+                self.error = True
+                
+            self.logger.info('Feature comparison completed')
+            
+    def additional_instances_to_drop(self):      
+    
+        """
+            All instances that have a value of zero in all columns of a categorical
+            feature are identified and appended to the list of instances for which
+            a reliable prediction is not possible.
+            
+            Input:
+                master: related to information display in external window
+                logger: related to generation of a process log
+                row: related to information display in external window, int
+                idx: Previously defined instances for which prediction is not
+                     possible, list
+                pred: prediction dataset, pandas DataFrame
+                
+            Output:
+                idx: Updated list of instances for which prediction is not
+                     possible, list
+                row: Updated row information related to information display in
+                     external window, int
+        
+        """
+    
+        self.logger.info('Start identification of instances that are not represented by at least one categorical feature')
+    
+        columns = self.pred.columns
+        # Regular expression to match "<feature>_<value>_encoded"
+        pattern = re.compile(r"^(.*?)(_?\d+)?_encoded$")
+        encoded_features = {pattern.match(col).group(1) for col in columns if pattern.match(col)}
+        
+        self.logger.info('Identified encoded features: ' + str(encoded_features))
+        count = 0
+        for feature in encoded_features:
+            
+            feature_cols = [col for col in self.pred.columns if col.startswith(feature) and col.endswith("_encoded")]
+            all_zero_rows = (self.pred[feature_cols] == 0).all(axis=1)
+            all_zero_rows = self.pred.index[all_zero_rows].tolist()
+            self.idx = list(set(self.idx + all_zero_rows))
+            count = count + len(all_zero_rows)
+
+        self.logger.info(str(count) + ' instances have been identified that are not represented by at least one categorical feature')
+        
+    def save_prediction_dataset(self):
+        
+        """
+            Save prediction dataset and information on dropped rows as nc-file
+        """
+
+        self.pred = pd.concat([self.xy, self.pred], axis=1)
+        pred = self.pred.to_numpy()
+        char_features = features_to_char(self.pred.columns)
+
+        outfile = self.properties_map['pred_path']
+        self.logger.info('Prediction dataset is saved to ' + outfile)
+        
+        if os.path.exists(outfile):
+            os.remove(outfile)
+
+        ds = generate_basic_ncfile(outfile, crs=None)
+        ds.createDimension('lat', (np.shape(pred)[0]))
+        ds.createDimension('lon', (np.shape(pred)[1]))
+        ds.createDimension('ix', (len(self.idx)))
+        ds.createDimension('feat', len(char_features))
+        result = ds.createVariable('Result', 'f4', ('lat', 'lon'))
+        dropped = ds.createVariable('Dropped', 'u8', 'ix')
+        Features = ds.createVariable('features', 'S1', 'feat')
+        result[:, :] = pred
+        dropped[:] = np.array(self.idx)
+        Features[:] = char_features
+        ds.close()
+    
+    def save_training_dataset(self):
+
+        """
+            Save dataframe as csv. If necessary folder is created.
+        """
+
+        self.logger.info('Saving of training data in progress')
+
+        outfile = self.properties_map['train_path']
+
+        # If outfile exists already, delete
+        if os.path.exists(outfile):
+            os.remove(outfile)
+            
+        self.train = pd.concat([self.xy_train, self.train], axis=1)
+
+        # Save dataframe as csv
+        self.train.to_csv(outfile, sep=',', index=False)
+        self.logger.info('Training dataset saved')
+
diff --git a/src/gui_version/shire.py b/src/gui_version/shire.py
index cd1c9a6e927e4ff79264414ececc3a1fff4771fe..59f5fdab1a9a500ecd465ad804e99b5c667bfdc1 100644
--- a/src/gui_version/shire.py
+++ b/src/gui_version/shire.py
@@ -9,6 +9,7 @@ import tkinter as tk
 from create_training_data_gui import *
 from create_prediction_data_gui import *
 from RandomForest_gui import *
+from compatibility_of_input_datasets import *
 
 from check_user_input import check_general_settings
 from utilities.initialise_log import save_log
@@ -84,34 +85,26 @@ else:
         logger.info('Map generation started')
         with open('tmp_map.pkl', 'rb') as handle:
            properties_map = pickle.load(handle)
+           
+        s = comparison_training_prediction_dataset(logger)
+        if not s.error:
                 
-        if properties_map['training'] == 1 and properties_map['prediction'] == 1:
-            for mode in ['train_test', 'prediction']:
-                if mode == 'train_test':
-                    s = RandomForest(master, mode, log=logger)
-                else:
-                    if properties_map['parallel'] == 1:
-                        s = RandomForest(master, mode, parallel=True, log=logger)
-                    else:
+            if properties_map['training'] == 1 and properties_map['prediction'] == 1:
+                for mode in ['train_test', 'prediction']:
+                    if mode == 'train_test':
                         s = RandomForest(master, mode, log=logger)
-        elif properties_map['training'] == 1 and properties_map['prediction'] == 0:
-            s = RandomForest(master, 'train_test', log=logger)
-        elif properties_map['prediction'] == 1 and properties_map['training'] == 0:
-            if properties_map['parallel'] == 1:
-               s = RandomForest(master, 'prediction', parallel=True, log=logger)
-            else:
-               s = RandomForest(master, 'prediction', log=logger)
-               
-        if s.retrain:
-            print('Retrain necessary')
-            for mode in ['train_test', 'prediction']:
-                if mode == 'train_test':
-                    s = RandomForest(master, mode, log=logger, retrain=True)
-                else:
-                    if properties_map['parallel'] == 1:
-                        s = RandomForest(master, mode, parallel=True, log=logger, retrain=True)
                     else:
-                        s = RandomForest(master, mode, log=logger, retrain=True)
+                        if properties_map['parallel'] == 1:
+                            s = RandomForest(master, mode, parallel=True, log=logger)
+                        else:
+                            s = RandomForest(master, mode, log=logger)
+            elif properties_map['training'] == 1 and properties_map['prediction'] == 0:
+                s = RandomForest(master, 'train_test', log=logger)
+            elif properties_map['prediction'] == 1 and properties_map['training'] == 0:
+                if properties_map['parallel'] == 1:
+                   s = RandomForest(master, 'prediction', parallel=True, log=logger)
+                else:
+                   s = RandomForest(master, 'prediction', log=logger)
         
         os.remove('tmp_map.pkl')
         logger = s.logger
diff --git a/src/gui_version/utilities/gui.py b/src/gui_version/utilities/gui.py
index 6e66dd949504f3fe09452261598896802de11b1d..5fa64eca3a99a6dd7aab8498e0c3e91614779875 100644
--- a/src/gui_version/utilities/gui.py
+++ b/src/gui_version/utilities/gui.py
@@ -672,40 +672,40 @@ class settings_map:
         global all_buttons
         all_buttons = []
         
-        Label(self.master, text="How to treat mismatching categories?", anchor='w', justify='left').grid(
-            row=self.row, column=0, sticky='w')
-        
-        self.button_pressed = tk.StringVar()
-        self.keep = tk.IntVar()
-        self.keep.set(0)
-        i = 'keep'
-        self.keep.trace_add("write", lambda name, index, mode,
-             var=self.keep, i=i: self.callback(var, i))
-        self.b1 = tk.Radiobutton(self.master,
-                         text="Keep instances of\n matching classes",
-                         variable=self.keep,
-                         value=1,
-                         command=lambda: self.combined_command(self.b1, 'keep'),
-                         anchor='w', justify='left')
-        self.b1.grid(row=self.row, column=1, columnspan=1, sticky='w')
-
-        
-        self.remove_instances = tk.IntVar()
-        self.remove_instances.set(0)
-        i = 'remove_instances'
-        self.remove_instances.trace_add("write", lambda name, index, mode,
-             var=self.remove_instances, i=i: self.callback(var, i))
-        self.b2 = tk.Radiobutton(self.master,
-                         text="Remove instances of\n mismatching classes",
-                         variable=self.remove_instances,
-                         value=1,
-                         command=lambda: self.combined_command(self.b2, 'remove'), anchor='w', justify='left')
-        self.b2.grid(row=self.row, column=2, columnspan=1, sticky='w')
-        
-        all_buttons.append(self.b1)
-        all_buttons.append(self.b2)
-        
-        self.row = self.row + 1
+        # Label(self.master, text="How to treat mismatching categories?", anchor='w', justify='left').grid(
+        #     row=self.row, column=0, sticky='w')
+        
+        # self.button_pressed = tk.StringVar()
+        # self.keep = tk.IntVar()
+        # self.keep.set(0)
+        # i = 'keep'
+        # self.keep.trace_add("write", lambda name, index, mode,
+        #      var=self.keep, i=i: self.callback(var, i))
+        # self.b1 = tk.Radiobutton(self.master,
+        #                  text="Keep instances of\n matching classes",
+        #                  variable=self.keep,
+        #                  value=1,
+        #                  command=lambda: self.combined_command(self.b1, 'keep'),
+        #                  anchor='w', justify='left')
+        # self.b1.grid(row=self.row, column=1, columnspan=1, sticky='w')
+
+        
+        # self.remove_instances = tk.IntVar()
+        # self.remove_instances.set(0)
+        # i = 'remove_instances'
+        # self.remove_instances.trace_add("write", lambda name, index, mode,
+        #      var=self.remove_instances, i=i: self.callback(var, i))
+        # self.b2 = tk.Radiobutton(self.master,
+        #                  text="Remove instances of\n mismatching classes",
+        #                  variable=self.remove_instances,
+        #                  value=1,
+        #                  command=lambda: self.combined_command(self.b2, 'remove'), anchor='w', justify='left')
+        # self.b2.grid(row=self.row, column=2, columnspan=1, sticky='w')
+        
+        # all_buttons.append(self.b1)
+        # all_buttons.append(self.b2)
+        
+        # self.row = self.row + 1
         
         Label(self.master).grid(row=self.row, column=0)
         self.row = self.row + 1
@@ -876,8 +876,8 @@ class settings_map:
             dic['depth_trees'] = self.depth_trees.get()
             dic['name_label'] = self.name_label.get()
             dic['criterion'] = self.criterion.get()
-            dic['keep'] = self.keep.get()
-            dic['remove_instances'] = self.remove_instances.get()
+            # dic['keep'] = self.keep.get()
+            # dic['remove_instances'] = self.remove_instances.get()
 
             sourceDir = filedialog.askdirectory(
                 parent=self.master, initialdir="/", title='Choose path')
@@ -899,8 +899,8 @@ class settings_map:
             dic['depth_trees'] = self.properties['depth_trees']
             dic['name_label'] = self.properties['name_label']
             dic['criterion'] = self.properties['criterion']
-            dic['keep'] = self.properties['keep']
-            dic['remove_instances'] = self.properties['remove_instances']
+            # dic['keep'] = self.properties['keep']
+            # dic['remove_instances'] = self.properties['remove_instances']
 
             for key in self.dic_change:
                 if self.dic_change[key] not in placeholders: