diff --git a/.gitignore b/.gitignore index 0ae0cc05639d5bbe5a32266e2983308cc1ec3e6d..8c561d5c470572f0fae6aa351f35287613450a43 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ utilities/__pycache__/ # Ignore archive directory archive/ examples/ +test/ # Ignore all pickle files *.pkl diff --git a/src/plain_scripts/settings copy.py b/src/plain_scripts/settings copy.py deleted file mode 100644 index 30ee799a15006c6db9168f649d2eb7b75b79ca15..0000000000000000000000000000000000000000 --- a/src/plain_scripts/settings copy.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" - This is a template file for settings.py - Either duplicate and rename or fill out and rename. - More information on the individual meaning and what to consider can be - found in the user manual -""" - -import logging -import json -import types - -def export_variables(logger): - - variables = globals() - # Filter out non-serializable objects - defined_vars = {} - for k, v in variables.items(): - if not k.startswith('__') and not callable(v) and not isinstance(v, types.ModuleType): - try: - # Test if the value is JSON serializable - json.dumps(v) - defined_vars[k] = v - except (TypeError, OverflowError): - # Skip non-serializable values - pass - # Convert the dictionary to a JSON string - vars_json = json.dumps(defined_vars, indent=4) - logger.info("Exported variables: %s", vars_json) - -# Mandatory parameters -days = 2 -approach = 'statistical' - -# Steps -training_dataset = False # Boolean, if training dataset shall be created -preprocessing = 'no_interpolation' # Defines preprocessing approach: 'cluster', 'interpolation', 'no_interpolation' -train_from_scratch = True -train_delete = None - -prediction_dataset = False # Boolean, if prediction dataset shall be created -pred_from_scratch = True -pred_delete = None - -map_generation = True # Boolean, if mapping shall be performed - -# General - -crs = 'wgs84' # Coordinate reference system, string -no_value = -999 # No data value, integer, suggestion -999 -random_seed = 42 # Random seed, integer -resolution = 25 # Resolution in m of the final map, integer, all datasets will be interpolated to this resolution -path_ml = '/Volumes/LaCie/2nd_Paper/entire_swiss_for_paper/maps/' # Path to where shire framework related parameters/files will be stored -data_summary_path = None # Path to the data summary file, string, relevant only for training/prediction dataset generation -key_to_include_path = None # Path to kets_to_include file, string, relevant only for training/prediction dataset generation - -# Training dataset generation - -size = None # Size of the validation dataset, float number between 0 and 1 -path_train = '/Volumes/LaCie/2nd_Paper/entire_swiss_for_paper/training_datasets/{days}/training_statistical_{days}d.csv' # Path to directory where the training dataset is/shall be stored -ohe = None # One-hot encoding, bool - -path_landslide_database = None # Path to where the landslide database is stored, string -ID = 'ID' # Name of the column containing landslide ID, string -landslide_database_x = 'xcoord' # Name of the column containing longitude values, string -landslide_database_y = 'ycoord' # Name of the column containing latitude values, string - -path_nonls_locations = None # Path to where the non-landslide database is stored, string -num_nonls = None # Number of non-landslide locations to include in the training dataset, integer -nonls_database_x = None # Name of the column containing longitude values, string -nonls_database_y = None # Name of the column containing longitude values, string - -#cluster = False # Use clustering for training dataset generation, bool -#interpolation = False # Use interpolation for training dataset generation, bool - -# Prediction dataset generation - -bounding_box = None # Coordinates of the edges of the bounding box of the area of interest, list, [<ymax>, <ymin>, <xmin>, <xmax>] -path_pred = None # Path to directory where the prediction dataset is/shall be stored - -# Map generation - -RF_training = True # Train the RF, bool -RF_prediction = True # Make a prediction using the RF, bool - -not_included_pred_data = ['xcoord', 'ycoord']# List of features in the training dataset not to be considered in prediction -not_included_train_data = [] # List of features in the training dataset not to be considered in model training - -num_trees = 100 # Number of trees in the Random Forest, integer -criterion = 'gini' # Criterion for the Random Forest, string -depth = 20 # Number of nodes of the RF, integer - -model_to_save = '/Volumes/LaCie/2nd_Paper/entire_swiss_for_paper/maps/{approach}/RF_{days}' # Folder name for storage of the RF results, string -model_to_load = '/Volumes/LaCie/2nd_Paper/entire_swiss_for_paper/maps/{approach}/RF_{days}' # Folder where RF model is stored, string, identical to model_to_save if training and prediction is done at the same time -model_database_dir = path_ml # Directory where models should be stored -parallel = True # Boolean, true if prediction data shall be split to predict in parallel - -keep_cat_features = False #bool, true if categorical features shall be kept even if some instances in prediction dataset have classes not covered by the prediction dataset -remove_instances = True # bool, true of instances in prediction dataset shall be removed if they have different classes than the instances in the training dataset \ No newline at end of file