1

7226febb · Yu-Jin Schröer · f935f09c · 7226febb
Commit 7226febb authored 1 month ago by Yu-Jin Schröer
--- a/Optimization of control rules/Optimization_control_rules.ipynb
+++ b/Optimization of control rules/Optimization_control_rules.ipynb
@@ -288,7 +288,7 @@
   "outputs": [],
   "source": [
    "structures = file_to_df(temp_folder / structure_xlsx)\n",
-    "structures_list = structures.tolist()\n",
+    "structures_list = structures['structures'].tolist()\n",
    "structures_list"
   ]
  },

 %% Cell type:markdown id:dc6fe0c6 tags:

 ### Import of libraries

 %% Cell type:markdown id:9f940e4e tags:

 General libraries

 %% Cell type:code id:ebbcf530 tags:

 ``` python
 import typing
 import datetime
 import shutil
 import subprocess
 from multiprocessing.connection import Client
 from time import sleep
 from warnings import warn
 from pathlib import Path
 import functools
 import threading
 from multiprocessing.pool import ThreadPool
 import traceback
 import numpy as np
 import pandas as pd
 import nevergrad as ng
 from IPython.utils import io as ipyio
 import os
 from scipy.ndimage import gaussian_filter1d
 ```

 %% Cell type:markdown id:d260d7da tags:

 Libraries for the interface with MIKE+

 %% Cell type:code id:ebb5c530 tags:

 ``` python
 from mikeplus import DataTableAccess
 from mikeplus.engines import Engine1D
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 from mikeio1d import Res1D
 from mikeio1d.query import QueryDataStructure
 from mikeio1d.query import QueryDataNode
 ```

 %% Cell type:markdown id:7de86ed5 tags:

 ### Setting variables

 %% Cell type:code id:34c9b436 tags:

 ``` python
 # a folder where used files are saved
 # note this should be at the same folder depth as the original folder, so that relative paths lead to the same files
 model_folder = Path("verteilte Steuerung/")
 # path of .mupp file, relative to model_folder
 db_path_rel = "240502_Modell Rodenkirchen.mupp"
 # Number of simulations allowed for optimization
 optimization_budget = 60
 ```

 %% Cell type:markdown id:003d8aff tags:

 Names of the input Excel files

 %% Cell type:code id:afb44b21 tags:

 ``` python
 parameter_xlsx = "config_parameter.xlsx"
 event_xlsx = "config_events.xlsx"
 structure_xlsx = "structures.xlsx"
 ```

 %% Cell type:markdown id:32eb5a4f tags:

 Further Variables

 %% Cell type:code id:5ffa64b6 tags:

 ``` python
 # delete  result files(as specified by event config) before a sim, assures that no old file is reread in the case that a simulation does not run
 DELETE_RESFILE_BEFORE_SIM = True
 # output heaps of additional data, may be useful for debugging, Default = False
 VERBOSE_OUTPUT = False
 # attempt simulation at most so many times, used when a simulation fails
 MAX_SIMULATION_TRY = 1
 # Consider a simulation as failed and retry if so many values in the result compared to the reference are missing
 MAX_MISSING_RATIO = 0.6
 # delete the temp folder at end, Default = True
 DELETE_TEMP = True
 # time string for output folder
 time_path_part = datetime.datetime.now().strftime("%Y-%m-%dT%H.%M.%S")
 # set path of temp and output folder
 temp_folder = Path("tmp_" + time_path_part)
 # output folder
 out_folder = Path("out_" + time_path_part)
 # wheter output from mike simulation get shown on error
 show_sim_out_on_error = True
 # set the id of the sensors and the timezone
 timezone = "UTC+01:00"
 #Number of seats for the Engine, limis amount of simulation we can run at once
 engine_seats = 4
 optimizers = ["BayesOptimBO"]
 ```

 %% Cell type:markdown id:b9dceb58 tags:

 ### Preparation of the database

 %% Cell type:code id:b04a0f9f tags:

 ``` python
 def file_to_df(path: Path) -> pd.DataFrame:
    """
    Tries to read the file at path as a pandas DataFrame.
    Supports Excel, CSV, and .res1d files.
    """
    if path.suffix == ".res1d":
        res1d = Res1D(str(path))  # Create a Res1D instance with the file
        df = res1d.read_all()  # Read all data from the .res1d file
        return df
    elif path.suffix == ".xlsx":
        return pd.read_excel(path)
    elif path.suffix == ".csv":
        return pd.read_csv(path)
    else:
        raise NotImplementedError(f"No method for {path.suffix} implemented")
 ```

 %% Cell type:markdown id:14138d91 tags:

 Creating an output folder and a temporary folder

 %% Cell type:code id:fa253ce4 tags:

 ``` python
 out_folder.mkdir(exist_ok=True,parents=True)
 assert not temp_folder.exists(), "Make sure not to mess with existing folders"
 shutil.copytree(model_folder,temp_folder)
 db_path = temp_folder / db_path_rel

 # print the path of the temp and out folder
 temp_folder.absolute()
 out_folder.absolute()
 ```

 %% Cell type:markdown id:8283a2e5 tags:

 Read the excel config_events

 %% Cell type:code id:0efc3c47 tags:

 ``` python
 events = file_to_df(temp_folder / event_xlsx).set_index('Event', drop=False)
 # check if start time of events is before the end time.
 assert (events["Start"] <= events["End"]).all(
    axis=None
 ), "Event end needs to be after start."
 # add timezone. Use same as sensor_date
 events["Start"]=events["Start"].dt.tz_localize(timezone)
 events["End"]=events["End"].dt.tz_localize(timezone)
 # check if there are duplicated events
 assert events.index.is_unique, "Need unique event"
 assert events.drop_duplicates(subset="Event")["SimulationName"].is_unique, "Need exactly one simulation name per event"
 # Conversion of the SimulationName column to the String data type, if it is not already there
 events["SimulationName"] = events["SimulationName"].astype(str)
 events
 ```

 %% Cell type:markdown id:3db96632 tags:

 Read the excel config_parameter

 %% Cell type:code id:5d1e12ad tags:

 ``` python
 param_specs = file_to_df(temp_folder / parameter_xlsx)

 # give params_specs a sensible string index, this is later used to pass optimization parameters as a dictionary
 param_specs.index = pd.Index(
    param_specs[["Table", "Column", "Muid"]].agg(";".join, axis="columns"),
    name="ParamKey",
 )
 assert (
    param_specs.index.is_unique
 ), "Index needs to be unique. Otherwise indicates dupplicates in the parameters."
 param_specs
 ```

 %% Cell type:markdown id:0c455656 tags:

 Read the excel strucutres

 %% Cell type:code id:b60324a6 tags:

 ``` python
 structures = file_to_df(temp_folder / structure_xlsx)
-structures_list = structures.tolist()
+structures_list = structures['structures'].tolist()
 structures_list
 ```

 %% Cell type:markdown id:21eab72c tags:

 Open Mike+ database and start engine

 %% Cell type:code id:fe607e54 tags:

 ``` python
 dta = DataTableAccess(db_path)
 # open the database from MIKE+
 dta.open_database()
 # check if the database is open.
 dta.is_database_open()
 # Create the Engine1D object that will be used to run MIKE 1D simulations
 engine = Engine1D(dta.datatables)
 ```

 %% Cell type:markdown id:a67a5823 tags:

 Threading is used to run multiple simulations with differnt engines

 %% Cell type:code id:371fc664 tags:

 ``` python
 engines = {}

 def init_engine():
    engines[threading.current_thread()] = Engine1D(dta.datatables)
    print(f'initialized engine for {threading.current_thread()}')

 # Use threadpool from the multiprocessing package, this has a rather simple interface
 sim_executor = ThreadPool(processes=engine_seats,initializer=init_engine)
 ```

 %% Cell type:markdown id:40f2b6da tags:

 Functions for the simulation

 %% Cell type:code id:a51fa7f3 tags:

 ``` python
 def calculate_discharge_for_structure(res1d, structure):
    """
    Calculates the discharge for a given structure from the loaded Res1D object.

    Parameters
    ----------
    res1d : Res1D
        The loaded Res1D object containing simulation results.
    structure : str
        The name of the structure for which discharge is to be calculated.

    Returns
    -------
    float or None
        The total discharge in liters, or None if an error occurs.
    """
    try:
        values = res1d.structures[structure].Discharge.read()
        total_discharge = np.sum(values) * 60 * 1000
        return total_discharge
    except KeyError:
        print(f"Structure {structure} not found in the Res1D data.")
        return None
    except AttributeError:
        print(f"Discharge data not available for structure {structure}.")
        return None
    except Exception as e:
        print(f"An error occurred while calculating discharge for {structure}: {e}")
        return None
 ```

 %% Cell type:code id:f3ab9f06 tags:

 ``` python
 def check_flood(res1d) -> bool:
    """
    Checks whether there is flooding in the system by checking all nodes of the .res1d file
    are checked for the WaterVolumeAboveGround value.

    Parameters:
    -----------
    res1d_file_path: str
        The file path to the .res1d file containing the simulation results.

    Returns:
    --------
    bool
        Retruns True, if a flooding is detected otherwise False.
    """
    Flood = False
    node_names = res1d.nodes
    flooded_nodes = []

    # Iterate over all nodes and retrieve the WaterVolumeAboveGround values
    for node in node_names:
        query_node = QueryDataNode(quantity="WaterVolumeAboveGround", name=node)

        try:
            water_level_values = query_node.get_values(res1d)

            if water_level_values is None or len(water_level_values) == 0:
                raise ValueError(f"WaterVolumeAboveGround for Node '{node}' doesn't exist.")

            if (water_level_values > 0).any():
                Flood = True
                flooded_nodes.append(node)
                print(f"Flood detected for Node: {node}")

        except Exception as e:
            print(f"Fehler beim Abrufen der Werte für Node {node}: {e}")

    if Flood:
        print("Flood detected in the system!")
        print("Flooded nodes:", flooded_nodes)
    else:
        print("No flood detected in the system.")

    return Flood
 ```

 %% Cell type:code id:654c4b86 tags:

 ``` python
 def single_sim_run(ev):
    '''
    Runs a single simulation for event ev. Returns a sting message on successfull finish.
    Intended for unsage with ThreadPool.map(). Use map to complete all simulations and read results after that
    '''
    sim_name = events.loc[events["Event"] == ev, "SimulationName"].iloc[0]

    print(f"Running simulation {sim_name} in {threading.current_thread()}.")
    engines[threading.current_thread()].run(sim_name)
    return f"Completed sim for {ev} in {threading.current_thread()}"
 ```

 %% Cell type:code id:b821fda2 tags:

 ``` python
 def simulation_run(**kwparams):
    '''
    Takes parameters as arguments, runs simulations as defined by events with them and returns the results as a series with Multiinex [event, time].
    Note that kwparams need keys that match the indeyx of the DataFrame param_specs
    '''
    assert set(kwparams.keys()) == set(
        param_specs.index
    ), "parameters need to match param_specs"

    for k in kwparams.keys():
        tab = param_specs.loc[k, "Table"]
        col = param_specs.loc[k, "Column"]
        muid = param_specs.loc[k, "Muid"]
        val = kwparams[k]

        if pd.notna(param_specs.loc[k, "Expression"]):
            expression = param_specs.loc[k, "Expression"].format(ConditionValue=val)
            dta.set_value(tab, muid, col, expression)
            print(f"Set value for {tab}.{muid}.{col} to expression: {expression}")

        else:
            dta.set_value(tab, muid, col, val)
            print(f"Set value for {tab}.{muid}.{col} to value: {val}")

    if DELETE_RESFILE_BEFORE_SIM:
        for l in events.index:
            delete_result_file(l)

    # Now run each simulation on its own thread
    finish_msgs = sim_executor.map(single_sim_run,events["Event"].unique())
    print(finish_msgs)

    results_table = pd.DataFrame(columns=['Event', 'Result'])
    total_result = 0

    for l in events.index:
        result_file_path = temp_folder / events.loc[l, "ResultFile"]
        result_file_path_str = str(temp_folder / events.loc[l, "ResultFile"])

        # Check if the result file exists
        if not Path(result_file_path).exists():
            print(f"Result file does not exist: {result_file_path}")
            continue

        try:
            res1d = Res1D(str(Path(result_file_path)))  # Create Res1D instance for each file
            flood = check_flood(res1d)

            if flood == True: # Give a penelty for flooding
                results_ev = 10e100

            else:
                # Calculate discharge values
                discharge_values = [calculate_discharge_for_structure(res1d, structure) for structure in structures_list]
                # Entferne None-Werte, falls es Fehler bei der Berechnung gab
                discharge_values = [value for value in discharge_values if value is not None]
                # Berechne die Gesamtzahl der Discharge-Werte
                results_ev = np.sum(discharge_values) if discharge_values else 0
                print(f"Result for result file {result_file_path}: {results_ev}")
            # Update total result
            total_result += results_ev
        except Exception as e:
            print(f"Error processing result file {result_file_path}: {e}")
            traceback.print_exc()
            results_ev = 0
        results_table = pd.concat([results_table, pd.DataFrame([{'Event': events.loc[l, "Event"], 'Result': results_ev}])], ignore_index=True)
    print(f"Total result: {total_result}")
    print(results_table)

    return total_result
 ```

 %% Cell type:code id:012fb1ff tags:

 ``` python
 def delete_result_file(event):
    '''
    Deletes existing result file before a simulation corresponding to event.
    '''
    file_path = str(temp_folder / events.loc[event, "ResultFile"])
    try:
        Path(file_path).unlink(missing_ok=True)
    except FileNotFoundError:
        pass
 ```

 %% Cell type:markdown id:19d16489 tags:

 ### Test simulation run for all events with the default parameters

 %% Cell type:code id:dffeeb37 tags:

 ``` python
 # test run with default parameters
 # the ** in the call unrolls the series into keyword parameters
 param_specs.dtypes
 _test_res = simulation_run(**param_specs["Default"])
 display("_test_res:")
 display(_test_res)
 ```

 %% Cell type:markdown id:baf6b68a tags:

 ### Preparations for optimisation

 %% Cell type:markdown id:3d54b910 tags:

 Define optimization parameters as needed by nevergrade

 %% Cell type:code id:0dbba878 tags:

 ``` python
 ng_params = {}
 for i in param_specs.index:
    vmin = param_specs.at[i, "Min"]
    vmax = param_specs.at[i, "Max"]
    vdef = param_specs.at[i, "Default"]
    ngp = ng.p.Scalar(init=vdef, lower=vmin, upper=vmax)
    ng_params[i] = ngp
 # now turn the list into an instrumentation
 # the instrumentation is used to tell the optimizer how the paremeters are passed to our function
 instrumentation = ng.p.Instrumentation(**ng_params)
 instrumentation
 ```

 %% Cell type:markdown id:6cb4a183 tags:

 Define constarins for the parameters

 %% Cell type:code id:62e0a803 tags:

 ``` python
 def constraint_pid_setpoints(params_val):
    """Constraint: SetPoints should be less than or equal to the CSO-reduction-301 value"""
    cso_reduction = params_val[1]["msm_RTCRule;Condition;CSO-reduction-301"]
    setpoint_0301 = params_val[1]["msm_RTCAction;PIDSetPoint;HUB0301_CSO_red"]

    # Ensure both setpoints are <= CSO-reduction-301
    return cso_reduction - setpoint_0301

 # Register the constraint
 instrumentation.register_cheap_constraint(constraint_pid_setpoints)
 ```

 %% Cell type:markdown id:7d5b751f tags:

 Define callback funtions for the optimizers

 %% Cell type:code id:f4a870f3 tags:

 ``` python
 def params_losses_callback():
    """returns two lists and a callback function.
    When the callback function is executed it puts the parameters and loss into the lists.
    !!Do not register this function itself as a callback, but the third returned parameter!!
    """
    # create two lists to store the params and the corresponding losses
    params = []
    losses = []

    def callback(optim, par, loss):
        params.append(par)
        losses.append(loss)

    # returns two lists and the function
    return params, losses, callback
 ```

 %% Cell type:code id:aea52f70 tags:

 ``` python
 def optim_run(optim_name):
    '''
    Function to start the optimizer.
    input = name of the optimizer
    output = dataframe with details from optimization model
    '''
    # set the optimizer based on the optim_name and instrumentation from params_spec
    optimizer = ng.optimizers.registry[optim_name](
            parametrization=instrumentation, budget=optimization_budget, num_workers=1
        )
    # show progressbar. updated when the optimizer gets the result of a step
    optimizer.register_callback("tell", ng.callbacks.ProgressBar())
    # put params and loss in list
    param_list, loss_list, plcb = params_losses_callback()
    optimizer.register_callback("tell", plcb)
    # get recommendation of parameters after run is finished
    reccomendation = optimizer.minimize(simulation_run, verbosity=2)
    optim_run_df = pd.concat(
        [
            pd.DataFrame(data=[p.value[1] for p in param_list]),
            pd.DataFrame({"Optimizer": optimizer.name, "Loss": loss_list}),
        ],
        axis="columns",
    )
    # also add a line for the reccomendation
    rec_params = reccomendation.value[1]
    optim_run_df.loc["reccomendation", rec_params.keys()] = rec_params
    optim_run_df.loc["reccomendation", "Loss"] = reccomendation.loss
    optim_run_df.loc["reccomendation", "Optimizer"] = optimizer.name
    # save the results of the optimization as a csv and excel file
    optim_run_df.to_csv(out_folder / f"optim_run{optimizer.name}.csv")
    optim_run_df.to_excel(out_folder / f"optim_run{optimizer.name}.xlsx")

    return optim_run_df
 ```

 %% Cell type:markdown id:312ebf1f tags:

 ### Run optimisation

 %% Cell type:code id:0517d4fe tags:

 ``` python
 # choose optimizers from nevergrad
 # https://en.wikipedia.org/wiki/CMA-ES
 # create a list with optimizers
 # it is also possible to use multiple optimizers and compare the results
 # create dictionary to store the rundata for each optimizer
 optim_run_dfs = {}
 for o in optimizers:
    # start the optimization
    optim_run_dfs[o] = optim_run(o)
    # display results
    display(optim_run_dfs[o])
    optim_run_dfs[o].plot(subplots=True, figsize=(5, 2 * len(param_specs)))

 # optim_run_df already has optimizer included, use .values instead of passing the dict, so that the optimizer is not added again.
 optim_run_df = pd.concat(optim_run_dfs.values())
 optim_run_df.to_csv(out_folder / "optim_run.csv")

 # set the index if the optim_run_df
 optim_run_df.index.name = "Step"
 optim_run_df = optim_run_df.reset_index().set_index(["Optimizer", "Step"])
 ```

 %% Cell type:markdown id:64de11d1 tags:

 Print the results of the Optimization

 %% Cell type:code id:7162ffac tags:

 ``` python
 # print the recommendations one last time
 for key, df in optim_run_df.groupby(level="Optimizer"):
    display(key,df.loc[(key, "reccomendation"), param_specs.index])
 ```

 %% Cell type:markdown id:06dd3f0e tags:

 Close database

 %% Cell type:code id:93579781 tags:

 ``` python
 dta.close_database()
 ```