diff --git a/src/gui_version/RandomForest_gui.py b/src/gui_version/RandomForest_gui.py
index 1113c5775be328e006d3b6305a2ebcadef13e2d3..debdd6ef4893ff654ec63bb6513baf3d9ebb82a6 100644
--- a/src/gui_version/RandomForest_gui.py
+++ b/src/gui_version/RandomForest_gui.py
@@ -140,7 +140,7 @@ class prepare_data:
classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
self.distibuish_encoding[feat] = 'ohe'
else:
- classes.append(list(set(df_sub[feat + '_encode'].tolist())))
+ classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
self.distibuish_encoding[feat] = 'ordinal'
self.categorical_classes[feat] = {}
self.categorical_classes[feat]['classes'] = [item for sublist in classes for item in sublist]
@@ -197,7 +197,7 @@ class prepare_data:
if cat_feat.count(feat)>1:
classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
else:
- classes.append(list(set(df_sub[feat + '_encode'].tolist())))
+ classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
self.categorical_classes[feat] = {}
self.categorical_classes[feat]['classes'] = [item for sublist in classes for item in sublist]
self.categorical_classes[feat]['num_cols'] = cat_feat.count(feat)
@@ -240,8 +240,14 @@ class prepare_data:
class RandomForest(prepare_data):
+
+ """
+ This class conducts the training of the Random Forest model and the
+ generation of the landslide susceptibility and hazard map.
+ """
def __init__(self, master, aim, parallel=False, log=None, retrain=None):
+
super().__init__(master, aim, log=log, retrain=retrain)
self.aim = aim
self.logger = log
@@ -371,6 +377,13 @@ class RandomForest(prepare_data):
"""
Split a NumPy array into chunks without changing the number of columns.
+
+ Input:
+ pred: prediction dataset, varies depending on if the current run
+ is for model training or map generation
+
+ Output:
+ Nones
"""
@@ -506,12 +519,22 @@ class RandomForest(prepare_data):
not included in the training dataset are either set to no_value or
nevertheless considered in the prediction. The surplus additional
features are removed either way to achieve the same set of features
- as in the training dataset
+ as in the training dataset.
+
+ The prediction dataset is furthermore assessed if all features
+ that are included in the training dataset also appear in the prediction
+ dataset. If that is not the case, the training process is relaunched
+ with an adapted training dataset where the feature(s) that is/are
+ not contrained in the training dataset are removed. The second
+ trained model will be stored in a seperate folder which is named
+ <old_folder_name>_retrain.
Input:
- train_classes: dictionary containing for each categorical feature
- all classes and the number of total classes
- contained in the training dataset
+ train_classes: dictionary containing for each categorical feature
+ all classes and the number of total classes
+ contained in the training dataset
+ training_features: Complete feature names of the features
+ contained in the training dataset
Output:
None
@@ -539,6 +562,7 @@ class RandomForest(prepare_data):
self.master.update()
self.logger.error('Error: Categorical feature ' + feat + ' not in prediction dataset')
+ self.logger.error('Error: cannot proceed with mapping')
self.error = True
self.retrain = True
self.features_not_in_training.append(feat)
diff --git a/src/plain_scripts/RandomForest.py b/src/plain_scripts/RandomForest.py
index 8f48ed54d4480cd21bf499d7417ae9ca336b3522..02d800230a9fba9b742f0aa04b7d3119fdcf2c0b 100644
--- a/src/plain_scripts/RandomForest.py
+++ b/src/plain_scripts/RandomForest.py
@@ -107,7 +107,7 @@ class prepare_data:
classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
self.distibuish_encoding[feat] = 'ohe'
else:
- classes.append(list(set(df_sub[feat + '_encoded'].tolist())))
+ classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
self.distibuish_encoding[feat] = 'ordinal'
self.categorical_classes[feat] = {}
self.categorical_classes[feat]['classes'] = [item for sublist in classes for item in sublist]
@@ -171,7 +171,7 @@ class prepare_data:
if cat_feat.count(feat)>1:
classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
else:
- classes.append(list(set(df_sub[feat + '_encoded'].tolist())))
+ classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
self.categorical_classes[feat] = {}
self.categorical_classes[feat]['classes'] = [item for sublist in classes for item in sublist]
self.categorical_classes[feat]['num_cols'] = cat_feat.count(feat)