Skip to content
Snippets Groups Projects
Commit 7226febb authored by Yu-Jin Schröer's avatar Yu-Jin Schröer
Browse files

1

parent f935f09c
Branches
No related tags found
No related merge requests found
%% Cell type:markdown id:dc6fe0c6 tags:
### Import of libraries
%% Cell type:markdown id:9f940e4e tags:
General libraries
%% Cell type:code id:ebbcf530 tags:
``` python
import typing
import datetime
import shutil
import subprocess
from multiprocessing.connection import Client
from time import sleep
from warnings import warn
from pathlib import Path
import functools
import threading
from multiprocessing.pool import ThreadPool
import traceback
import numpy as np
import pandas as pd
import nevergrad as ng
from IPython.utils import io as ipyio
import os
from scipy.ndimage import gaussian_filter1d
```
%% Cell type:markdown id:d260d7da tags:
Libraries for the interface with MIKE+
%% Cell type:code id:ebb5c530 tags:
``` python
from mikeplus import DataTableAccess
from mikeplus.engines import Engine1D
import matplotlib as mpl
import matplotlib.pyplot as plt
from mikeio1d import Res1D
from mikeio1d.query import QueryDataStructure
from mikeio1d.query import QueryDataNode
```
%% Cell type:markdown id:7de86ed5 tags:
### Setting variables
%% Cell type:code id:34c9b436 tags:
``` python
# a folder where used files are saved
# note this should be at the same folder depth as the original folder, so that relative paths lead to the same files
model_folder = Path("verteilte Steuerung/")
# path of .mupp file, relative to model_folder
db_path_rel = "240502_Modell Rodenkirchen.mupp"
# Number of simulations allowed for optimization
optimization_budget = 60
```
%% Cell type:markdown id:003d8aff tags:
Names of the input Excel files
%% Cell type:code id:afb44b21 tags:
``` python
parameter_xlsx = "config_parameter.xlsx"
event_xlsx = "config_events.xlsx"
structure_xlsx = "structures.xlsx"
```
%% Cell type:markdown id:32eb5a4f tags:
Further Variables
%% Cell type:code id:5ffa64b6 tags:
``` python
# delete result files(as specified by event config) before a sim, assures that no old file is reread in the case that a simulation does not run
DELETE_RESFILE_BEFORE_SIM = True
# output heaps of additional data, may be useful for debugging, Default = False
VERBOSE_OUTPUT = False
# attempt simulation at most so many times, used when a simulation fails
MAX_SIMULATION_TRY = 1
# Consider a simulation as failed and retry if so many values in the result compared to the reference are missing
MAX_MISSING_RATIO = 0.6
# delete the temp folder at end, Default = True
DELETE_TEMP = True
# time string for output folder
time_path_part = datetime.datetime.now().strftime("%Y-%m-%dT%H.%M.%S")
# set path of temp and output folder
temp_folder = Path("tmp_" + time_path_part)
# output folder
out_folder = Path("out_" + time_path_part)
# wheter output from mike simulation get shown on error
show_sim_out_on_error = True
# set the id of the sensors and the timezone
timezone = "UTC+01:00"
#Number of seats for the Engine, limis amount of simulation we can run at once
engine_seats = 4
optimizers = ["BayesOptimBO"]
```
%% Cell type:markdown id:b9dceb58 tags:
### Preparation of the database
%% Cell type:code id:b04a0f9f tags:
``` python
def file_to_df(path: Path) -> pd.DataFrame:
"""
Tries to read the file at path as a pandas DataFrame.
Supports Excel, CSV, and .res1d files.
"""
if path.suffix == ".res1d":
res1d = Res1D(str(path)) # Create a Res1D instance with the file
df = res1d.read_all() # Read all data from the .res1d file
return df
elif path.suffix == ".xlsx":
return pd.read_excel(path)
elif path.suffix == ".csv":
return pd.read_csv(path)
else:
raise NotImplementedError(f"No method for {path.suffix} implemented")
```
%% Cell type:markdown id:14138d91 tags:
Creating an output folder and a temporary folder
%% Cell type:code id:fa253ce4 tags:
``` python
out_folder.mkdir(exist_ok=True,parents=True)
assert not temp_folder.exists(), "Make sure not to mess with existing folders"
shutil.copytree(model_folder,temp_folder)
db_path = temp_folder / db_path_rel
# print the path of the temp and out folder
temp_folder.absolute()
out_folder.absolute()
```
%% Cell type:markdown id:8283a2e5 tags:
Read the excel config_events
%% Cell type:code id:0efc3c47 tags:
``` python
events = file_to_df(temp_folder / event_xlsx).set_index('Event', drop=False)
# check if start time of events is before the end time.
assert (events["Start"] <= events["End"]).all(
axis=None
), "Event end needs to be after start."
# add timezone. Use same as sensor_date
events["Start"]=events["Start"].dt.tz_localize(timezone)
events["End"]=events["End"].dt.tz_localize(timezone)
# check if there are duplicated events
assert events.index.is_unique, "Need unique event"
assert events.drop_duplicates(subset="Event")["SimulationName"].is_unique, "Need exactly one simulation name per event"
# Conversion of the SimulationName column to the String data type, if it is not already there
events["SimulationName"] = events["SimulationName"].astype(str)
events
```
%% Cell type:markdown id:3db96632 tags:
Read the excel config_parameter
%% Cell type:code id:5d1e12ad tags:
``` python
param_specs = file_to_df(temp_folder / parameter_xlsx)
# give params_specs a sensible string index, this is later used to pass optimization parameters as a dictionary
param_specs.index = pd.Index(
param_specs[["Table", "Column", "Muid"]].agg(";".join, axis="columns"),
name="ParamKey",
)
assert (
param_specs.index.is_unique
), "Index needs to be unique. Otherwise indicates dupplicates in the parameters."
param_specs
```
%% Cell type:markdown id:0c455656 tags:
Read the excel strucutres
%% Cell type:code id:b60324a6 tags:
``` python
structures = file_to_df(temp_folder / structure_xlsx)
structures_list = structures.tolist()
structures_list = structures['structures'].tolist()
structures_list
```
%% Cell type:markdown id:21eab72c tags:
Open Mike+ database and start engine
%% Cell type:code id:fe607e54 tags:
``` python
dta = DataTableAccess(db_path)
# open the database from MIKE+
dta.open_database()
# check if the database is open.
dta.is_database_open()
# Create the Engine1D object that will be used to run MIKE 1D simulations
engine = Engine1D(dta.datatables)
```
%% Cell type:markdown id:a67a5823 tags:
Threading is used to run multiple simulations with differnt engines
%% Cell type:code id:371fc664 tags:
``` python
engines = {}
def init_engine():
engines[threading.current_thread()] = Engine1D(dta.datatables)
print(f'initialized engine for {threading.current_thread()}')
# Use threadpool from the multiprocessing package, this has a rather simple interface
sim_executor = ThreadPool(processes=engine_seats,initializer=init_engine)
```
%% Cell type:markdown id:40f2b6da tags:
Functions for the simulation
%% Cell type:code id:a51fa7f3 tags:
``` python
def calculate_discharge_for_structure(res1d, structure):
"""
Calculates the discharge for a given structure from the loaded Res1D object.
Parameters
----------
res1d : Res1D
The loaded Res1D object containing simulation results.
structure : str
The name of the structure for which discharge is to be calculated.
Returns
-------
float or None
The total discharge in liters, or None if an error occurs.
"""
try:
values = res1d.structures[structure].Discharge.read()
total_discharge = np.sum(values) * 60 * 1000
return total_discharge
except KeyError:
print(f"Structure {structure} not found in the Res1D data.")
return None
except AttributeError:
print(f"Discharge data not available for structure {structure}.")
return None
except Exception as e:
print(f"An error occurred while calculating discharge for {structure}: {e}")
return None
```
%% Cell type:code id:f3ab9f06 tags:
``` python
def check_flood(res1d) -> bool:
"""
Checks whether there is flooding in the system by checking all nodes of the .res1d file
are checked for the WaterVolumeAboveGround value.
Parameters:
-----------
res1d_file_path: str
The file path to the .res1d file containing the simulation results.
Returns:
--------
bool
Retruns True, if a flooding is detected otherwise False.
"""
Flood = False
node_names = res1d.nodes
flooded_nodes = []
# Iterate over all nodes and retrieve the WaterVolumeAboveGround values
for node in node_names:
query_node = QueryDataNode(quantity="WaterVolumeAboveGround", name=node)
try:
water_level_values = query_node.get_values(res1d)
if water_level_values is None or len(water_level_values) == 0:
raise ValueError(f"WaterVolumeAboveGround for Node '{node}' doesn't exist.")
if (water_level_values > 0).any():
Flood = True
flooded_nodes.append(node)
print(f"Flood detected for Node: {node}")
except Exception as e:
print(f"Fehler beim Abrufen der Werte für Node {node}: {e}")
if Flood:
print("Flood detected in the system!")
print("Flooded nodes:", flooded_nodes)
else:
print("No flood detected in the system.")
return Flood
```
%% Cell type:code id:654c4b86 tags:
``` python
def single_sim_run(ev):
'''
Runs a single simulation for event ev. Returns a sting message on successfull finish.
Intended for unsage with ThreadPool.map(). Use map to complete all simulations and read results after that
'''
sim_name = events.loc[events["Event"] == ev, "SimulationName"].iloc[0]
print(f"Running simulation {sim_name} in {threading.current_thread()}.")
engines[threading.current_thread()].run(sim_name)
return f"Completed sim for {ev} in {threading.current_thread()}"
```
%% Cell type:code id:b821fda2 tags:
``` python
def simulation_run(**kwparams):
'''
Takes parameters as arguments, runs simulations as defined by events with them and returns the results as a series with Multiinex [event, time].
Note that kwparams need keys that match the indeyx of the DataFrame param_specs
'''
assert set(kwparams.keys()) == set(
param_specs.index
), "parameters need to match param_specs"
for k in kwparams.keys():
tab = param_specs.loc[k, "Table"]
col = param_specs.loc[k, "Column"]
muid = param_specs.loc[k, "Muid"]
val = kwparams[k]
if pd.notna(param_specs.loc[k, "Expression"]):
expression = param_specs.loc[k, "Expression"].format(ConditionValue=val)
dta.set_value(tab, muid, col, expression)
print(f"Set value for {tab}.{muid}.{col} to expression: {expression}")
else:
dta.set_value(tab, muid, col, val)
print(f"Set value for {tab}.{muid}.{col} to value: {val}")
if DELETE_RESFILE_BEFORE_SIM:
for l in events.index:
delete_result_file(l)
# Now run each simulation on its own thread
finish_msgs = sim_executor.map(single_sim_run,events["Event"].unique())
print(finish_msgs)
results_table = pd.DataFrame(columns=['Event', 'Result'])
total_result = 0
for l in events.index:
result_file_path = temp_folder / events.loc[l, "ResultFile"]
result_file_path_str = str(temp_folder / events.loc[l, "ResultFile"])
# Check if the result file exists
if not Path(result_file_path).exists():
print(f"Result file does not exist: {result_file_path}")
continue
try:
res1d = Res1D(str(Path(result_file_path))) # Create Res1D instance for each file
flood = check_flood(res1d)
if flood == True: # Give a penelty for flooding
results_ev = 10e100
else:
# Calculate discharge values
discharge_values = [calculate_discharge_for_structure(res1d, structure) for structure in structures_list]
# Entferne None-Werte, falls es Fehler bei der Berechnung gab
discharge_values = [value for value in discharge_values if value is not None]
# Berechne die Gesamtzahl der Discharge-Werte
results_ev = np.sum(discharge_values) if discharge_values else 0
print(f"Result for result file {result_file_path}: {results_ev}")
# Update total result
total_result += results_ev
except Exception as e:
print(f"Error processing result file {result_file_path}: {e}")
traceback.print_exc()
results_ev = 0
results_table = pd.concat([results_table, pd.DataFrame([{'Event': events.loc[l, "Event"], 'Result': results_ev}])], ignore_index=True)
print(f"Total result: {total_result}")
print(results_table)
return total_result
```
%% Cell type:code id:012fb1ff tags:
``` python
def delete_result_file(event):
'''
Deletes existing result file before a simulation corresponding to event.
'''
file_path = str(temp_folder / events.loc[event, "ResultFile"])
try:
Path(file_path).unlink(missing_ok=True)
except FileNotFoundError:
pass
```
%% Cell type:markdown id:19d16489 tags:
### Test simulation run for all events with the default parameters
%% Cell type:code id:dffeeb37 tags:
``` python
# test run with default parameters
# the ** in the call unrolls the series into keyword parameters
param_specs.dtypes
_test_res = simulation_run(**param_specs["Default"])
display("_test_res:")
display(_test_res)
```
%% Cell type:markdown id:baf6b68a tags:
### Preparations for optimisation
%% Cell type:markdown id:3d54b910 tags:
Define optimization parameters as needed by nevergrade
%% Cell type:code id:0dbba878 tags:
``` python
ng_params = {}
for i in param_specs.index:
vmin = param_specs.at[i, "Min"]
vmax = param_specs.at[i, "Max"]
vdef = param_specs.at[i, "Default"]
ngp = ng.p.Scalar(init=vdef, lower=vmin, upper=vmax)
ng_params[i] = ngp
# now turn the list into an instrumentation
# the instrumentation is used to tell the optimizer how the paremeters are passed to our function
instrumentation = ng.p.Instrumentation(**ng_params)
instrumentation
```
%% Cell type:markdown id:6cb4a183 tags:
Define constarins for the parameters
%% Cell type:code id:62e0a803 tags:
``` python
def constraint_pid_setpoints(params_val):
"""Constraint: SetPoints should be less than or equal to the CSO-reduction-301 value"""
cso_reduction = params_val[1]["msm_RTCRule;Condition;CSO-reduction-301"]
setpoint_0301 = params_val[1]["msm_RTCAction;PIDSetPoint;HUB0301_CSO_red"]
# Ensure both setpoints are <= CSO-reduction-301
return cso_reduction - setpoint_0301
# Register the constraint
instrumentation.register_cheap_constraint(constraint_pid_setpoints)
```
%% Cell type:markdown id:7d5b751f tags:
Define callback funtions for the optimizers
%% Cell type:code id:f4a870f3 tags:
``` python
def params_losses_callback():
"""returns two lists and a callback function.
When the callback function is executed it puts the parameters and loss into the lists.
!!Do not register this function itself as a callback, but the third returned parameter!!
"""
# create two lists to store the params and the corresponding losses
params = []
losses = []
def callback(optim, par, loss):
params.append(par)
losses.append(loss)
# returns two lists and the function
return params, losses, callback
```
%% Cell type:code id:aea52f70 tags:
``` python
def optim_run(optim_name):
'''
Function to start the optimizer.
input = name of the optimizer
output = dataframe with details from optimization model
'''
# set the optimizer based on the optim_name and instrumentation from params_spec
optimizer = ng.optimizers.registry[optim_name](
parametrization=instrumentation, budget=optimization_budget, num_workers=1
)
# show progressbar. updated when the optimizer gets the result of a step
optimizer.register_callback("tell", ng.callbacks.ProgressBar())
# put params and loss in list
param_list, loss_list, plcb = params_losses_callback()
optimizer.register_callback("tell", plcb)
# get recommendation of parameters after run is finished
reccomendation = optimizer.minimize(simulation_run, verbosity=2)
optim_run_df = pd.concat(
[
pd.DataFrame(data=[p.value[1] for p in param_list]),
pd.DataFrame({"Optimizer": optimizer.name, "Loss": loss_list}),
],
axis="columns",
)
# also add a line for the reccomendation
rec_params = reccomendation.value[1]
optim_run_df.loc["reccomendation", rec_params.keys()] = rec_params
optim_run_df.loc["reccomendation", "Loss"] = reccomendation.loss
optim_run_df.loc["reccomendation", "Optimizer"] = optimizer.name
# save the results of the optimization as a csv and excel file
optim_run_df.to_csv(out_folder / f"optim_run{optimizer.name}.csv")
optim_run_df.to_excel(out_folder / f"optim_run{optimizer.name}.xlsx")
return optim_run_df
```
%% Cell type:markdown id:312ebf1f tags:
### Run optimisation
%% Cell type:code id:0517d4fe tags:
``` python
# choose optimizers from nevergrad
# https://en.wikipedia.org/wiki/CMA-ES
# create a list with optimizers
# it is also possible to use multiple optimizers and compare the results
# create dictionary to store the rundata for each optimizer
optim_run_dfs = {}
for o in optimizers:
# start the optimization
optim_run_dfs[o] = optim_run(o)
# display results
display(optim_run_dfs[o])
optim_run_dfs[o].plot(subplots=True, figsize=(5, 2 * len(param_specs)))
# optim_run_df already has optimizer included, use .values instead of passing the dict, so that the optimizer is not added again.
optim_run_df = pd.concat(optim_run_dfs.values())
optim_run_df.to_csv(out_folder / "optim_run.csv")
# set the index if the optim_run_df
optim_run_df.index.name = "Step"
optim_run_df = optim_run_df.reset_index().set_index(["Optimizer", "Step"])
```
%% Cell type:markdown id:64de11d1 tags:
Print the results of the Optimization
%% Cell type:code id:7162ffac tags:
``` python
# print the recommendations one last time
for key, df in optim_run_df.groupby(level="Optimizer"):
display(key,df.loc[(key, "reccomendation"), param_specs.index])
```
%% Cell type:markdown id:06dd3f0e tags:
Close database
%% Cell type:code id:93579781 tags:
``` python
dta.close_database()
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment