Skip to content
Snippets Groups Projects
Commit 966d8258 authored by Ann-Kathrin Margarete Edrich's avatar Ann-Kathrin Margarete Edrich
Browse files

Fix categorical_classes for ordinal encoding

parent b5441d0b
Branches
No related tags found
No related merge requests found
......@@ -140,7 +140,7 @@ class prepare_data:
classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
self.distibuish_encoding[feat] = 'ohe'
else:
classes.append(list(set(df_sub[feat + '_encode'].tolist())))
classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
self.distibuish_encoding[feat] = 'ordinal'
self.categorical_classes[feat] = {}
self.categorical_classes[feat]['classes'] = [item for sublist in classes for item in sublist]
......@@ -197,7 +197,7 @@ class prepare_data:
if cat_feat.count(feat)>1:
classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
else:
classes.append(list(set(df_sub[feat + '_encode'].tolist())))
classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
self.categorical_classes[feat] = {}
self.categorical_classes[feat]['classes'] = [item for sublist in classes for item in sublist]
self.categorical_classes[feat]['num_cols'] = cat_feat.count(feat)
......@@ -241,7 +241,13 @@ class prepare_data:
class RandomForest(prepare_data):
"""
This class conducts the training of the Random Forest model and the
generation of the landslide susceptibility and hazard map.
"""
def __init__(self, master, aim, parallel=False, log=None, retrain=None):
super().__init__(master, aim, log=log, retrain=retrain)
self.aim = aim
self.logger = log
......@@ -372,6 +378,13 @@ class RandomForest(prepare_data):
"""
Split a NumPy array into chunks without changing the number of columns.
Input:
pred: prediction dataset, varies depending on if the current run
is for model training or map generation
Output:
Nones
"""
# Calculate the number of rows in each chunk
......@@ -506,12 +519,22 @@ class RandomForest(prepare_data):
not included in the training dataset are either set to no_value or
nevertheless considered in the prediction. The surplus additional
features are removed either way to achieve the same set of features
as in the training dataset
as in the training dataset.
The prediction dataset is furthermore assessed if all features
that are included in the training dataset also appear in the prediction
dataset. If that is not the case, the training process is relaunched
with an adapted training dataset where the feature(s) that is/are
not contrained in the training dataset are removed. The second
trained model will be stored in a seperate folder which is named
<old_folder_name>_retrain.
Input:
train_classes: dictionary containing for each categorical feature
all classes and the number of total classes
contained in the training dataset
training_features: Complete feature names of the features
contained in the training dataset
Output:
None
......@@ -539,6 +562,7 @@ class RandomForest(prepare_data):
self.master.update()
self.logger.error('Error: Categorical feature ' + feat + ' not in prediction dataset')
self.logger.error('Error: cannot proceed with mapping')
self.error = True
self.retrain = True
self.features_not_in_training.append(feat)
......
......@@ -107,7 +107,7 @@ class prepare_data:
classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
self.distibuish_encoding[feat] = 'ohe'
else:
classes.append(list(set(df_sub[feat + '_encoded'].tolist())))
classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
self.distibuish_encoding[feat] = 'ordinal'
self.categorical_classes[feat] = {}
self.categorical_classes[feat]['classes'] = [item for sublist in classes for item in sublist]
......@@ -171,7 +171,7 @@ class prepare_data:
if cat_feat.count(feat)>1:
classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
else:
classes.append(list(set(df_sub[feat + '_encoded'].tolist())))
classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
self.categorical_classes[feat] = {}
self.categorical_classes[feat]['classes'] = [item for sublist in classes for item in sublist]
self.categorical_classes[feat]['num_cols'] = cat_feat.count(feat)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment