diff --git a/src/gui_version/compatibility_of_input_datasets.py b/src/gui_version/compatibility_of_input_datasets.py index 61e0405611c069575d3abe3552c62f0d51758c2c..a348986861b87da1123334e1157033c81fd0e667 100644 --- a/src/gui_version/compatibility_of_input_datasets.py +++ b/src/gui_version/compatibility_of_input_datasets.py @@ -213,14 +213,14 @@ class comparison_training_prediction_dataset: columns = self.pred.columns # Regular expression to match "<feature>_<value>_encoded" - pattern = re.compile(r"^(.*?)(_?\d+)?_encoded$") + pattern = re.compile(r"^(.*?)(_?\d+)?_encode$") encoded_features = {pattern.match(col).group(1) for col in columns if pattern.match(col)} self.logger.info('Identified encoded features: ' + str(encoded_features)) count = 0 for feature in encoded_features: - feature_cols = [col for col in self.pred.columns if col.startswith(feature) and col.endswith("_encoded")] + feature_cols = [col for col in self.pred.columns if col.startswith(feature) and col.endswith("_encode")] all_zero_rows = (self.pred[feature_cols] == 0).all(axis=1) all_zero_rows = self.pred.index[all_zero_rows].tolist() self.idx = list(set(self.idx + all_zero_rows)) @@ -235,6 +235,9 @@ class comparison_training_prediction_dataset: """ self.pred = pd.concat([self.xy, self.pred], axis=1) + + self.logger.info('Features in the prediction dataset: ' + str(self.pred.columns.tolist())) + pred = self.pred.to_numpy() char_features = features_to_char(self.pred.columns) @@ -272,7 +275,7 @@ class comparison_training_prediction_dataset: os.remove(outfile) self.train = pd.concat([self.xy_train, self.train], axis=1) - + self.logger.info('Features in the training dataset: ' + str(self.train.columns.tolist())) # Save dataframe as csv self.train.to_csv(outfile, sep=',', index=False) self.logger.info('Training dataset saved') diff --git a/src/plain_scripts/compatibility_of_input_datasets.py b/src/plain_scripts/compatibility_of_input_datasets.py index 255869e669b384d04b44030b4381f7f7b65795ef..e12e7c1e8faa1902c81f61b6f152905a9ee3a3e7 100644 --- a/src/plain_scripts/compatibility_of_input_datasets.py +++ b/src/plain_scripts/compatibility_of_input_datasets.py @@ -193,14 +193,14 @@ class comparison_training_prediction_dataset: columns = self.pred.columns # Regular expression to match "<feature>_<value>_encoded" - pattern = re.compile(r"^(.*?)(_?\d+)?_encoded$") + pattern = re.compile(r"^(.*?)(_?\d+)?_encode$") encoded_features = {pattern.match(col).group(1) for col in columns if pattern.match(col)} - + print(encoded_features) self.logger.info('Identified encoded features: ' + str(encoded_features)) count = 0 for feature in encoded_features: - feature_cols = [col for col in self.pred.columns if col.startswith(feature) and col.endswith("_encoded")] + feature_cols = [col for col in self.pred.columns if col.startswith(feature) and col.endswith("_encode")] all_zero_rows = (self.pred[feature_cols] == 0).all(axis=1) all_zero_rows = self.pred.index[all_zero_rows].tolist() self.idx = list(set(self.idx + all_zero_rows)) @@ -215,6 +215,7 @@ class comparison_training_prediction_dataset: """ self.pred = pd.concat([self.xy, self.pred], axis=1) + self.logger.info('Features in the prediction dataset: ' + str(self.pred.columns.tolist())) pred = self.pred.to_numpy() char_features = features_to_char(self.pred.columns) @@ -252,6 +253,7 @@ class comparison_training_prediction_dataset: os.remove(outfile) self.train = pd.concat([self.xy_train, self.train], axis=1) + self.logger.info('Features in the training dataset: ' + str(self.train.columns.tolist())) # Save dataframe as csv self.train.to_csv(outfile, sep=',', index=False)