From 7d91c93b7ee2f70152d62e8fb1ecdd9248bc3c9a Mon Sep 17 00:00:00 2001
From: Sebastian Kerger <sebastian.kerger@rwth-aachen.de>
Date: Mon, 13 May 2024 17:45:20 +0200
Subject: [PATCH] Upload New File

---
 .../optimization_hydraulic.ipynb              | 1405 +++++++++++++++++
 1 file changed, 1405 insertions(+)
 create mode 100644 hydraulic_calibration/optimization_hydraulic.ipynb

diff --git a/hydraulic_calibration/optimization_hydraulic.ipynb b/hydraulic_calibration/optimization_hydraulic.ipynb
new file mode 100644
index 0000000..9894e00
--- /dev/null
+++ b/hydraulic_calibration/optimization_hydraulic.ipynb
@@ -0,0 +1,1405 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\trojan\\AppData\\Local\\Temp\\ipykernel_14844\\805292381.py:21: DeprecationWarning: \n",
+      "Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),\n",
+      "(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)\n",
+      "but was not found to be installed on your system.\n",
+      "If this would cause problems for you,\n",
+      "please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466\n",
+      "        \n",
+      "  import pandas as pd\n"
+     ]
+    }
+   ],
+   "source": [
+    "\"\"\"\n",
+    "Optimization with mikepluspy\n",
+    "The nevergrad package is used for optimization.\n",
+    "Note: mikepluspy requires a full licence for mike+ to run.\n",
+    "In many cases the seat of the license is not properly released; closing python and waiting some time should fix this.\n",
+    "\"\"\"\n",
+    "\n",
+    "import typing\n",
+    "import datetime\n",
+    "import shutil\n",
+    "import subprocess\n",
+    "from multiprocessing.connection import Client\n",
+    "from time import sleep\n",
+    "from warnings import warn\n",
+    "from pathlib import Path\n",
+    "import functools\n",
+    "\n",
+    "\n",
+    "from mikeplus import DataTableAccess\n",
+    "from mikeplus.engines import Engine1D\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import nevergrad as ng\n",
+    "import matplotlib as mpl\n",
+    "import matplotlib.pyplot as plt\n",
+    "import hydroeval as hde\n",
+    "import sqlalchemy as sa\n",
+    "from scipy.ndimage import gaussian_filter1d\n",
+    "from IPython.utils import io as ipyio"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def file_to_df(path: Path) -> pd.DataFrame:\n",
+    "    \"\"\"Tries to read a file at path as a pandas DataFrame.\"\"\"\n",
+    "    if path.suffix == \".xlsx\":\n",
+    "        return pd.read_excel(path)\n",
+    "    elif path.suffix == \".dfs0\":\n",
+    "        reader_c.send({\"filename\": path})\n",
+    "        return reader_c.recv()\n",
+    "    elif path.suffix == \".csv\":\n",
+    "        return pd.read_csv(path)\n",
+    "    else:\n",
+    "        raise NotImplementedError(f\"no method for {path.suffix} implemented\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set variables for later use\n",
+    "# delete  result files(as specified by event config) before a sim, assures that no old file is reread in the case that a simulation does not run\n",
+    "DELETE_RESFILE_BEFORE_SIM = True\n",
+    "# output heaps of additional data, may be useful for debugging\n",
+    "VERBOSE_OUTPUT = False\n",
+    "# attempt simulation at most so many times, used when a simulation fails\n",
+    "MAX_SUMILATION_TRY = 3\n",
+    "# Consider a simulation as failed and retry if so many values in the result compared to the reference are missing\n",
+    "MAX_MISSING_RATIO = 0.6\n",
+    "#delete the temp folder at end\n",
+    "DELETE_TEMP = True\n",
+    "# time string for output folder\n",
+    "time_path_part = datetime.datetime.now().strftime(\"%Y-%m-%dT%H.%M.%S\")\n",
+    "# set path of temp and output folder\n",
+    "temp_folder = Path(\"tmp_\" + time_path_part)\n",
+    "# output folder\n",
+    "out_folder = Path(\"out_\" + time_path_part)\n",
+    "# wheter output from mike simulation get shown on error\n",
+    "show_sim_out_on_error = True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# variables which need to be set before each optimization\n",
+    "# Number of simulations allowed for optimization\n",
+    "optimization_budget = 60\n",
+    "# a folder where used files are saved\n",
+    "# note this should be at the same folder depth as the original folder, so that relative paths lead to the same files\n",
+    "model_folder = Path(\"Modell_Kalibrierung_AD/\")\n",
+    "# path of .mupp file, relative to model_folder\n",
+    "db_path_rel = \"240304_Godorf_AD.mupp\"\n",
+    "# set the id of the sensors and the timezone\n",
+    "timezone = \"UTC+01:00\"\n",
+    "sensor_conc = \"2209NP80036_RUEB0901\"\n",
+    "sensor_q = \"2216NF73722_RUEB0901\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create output folder\n",
+    "out_folder.mkdir(exist_ok=True,parents=True)\n",
+    "# make a copy of the model folder into the temp folder, since the programm might mess with the model\n",
+    "assert not temp_folder.exists(), \"Make sure not to mess with existing folders\"\n",
+    "shutil.copytree(model_folder,temp_folder)\n",
+    "db_path = temp_folder / db_path_rel"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "WindowsPath('d:/entfrachten/tmp_2024-05-13T10.41.48')"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#print the path of the temp folder\n",
+    "temp_folder.absolute()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# connection to database used to fetch the sensordata\n",
+    "db_engine = sa.create_engine(\n",
+    "    \"postgresql+psycopg2://postgres:P0stgr3SQL!@137.226.85.103:5432/entfrachten\"\n",
+    ")\n",
+    "'''\n",
+    "Alternativly read the data from excel files:\n",
+    "'''"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "starting mike reader\n"
+     ]
+    }
+   ],
+   "source": [
+    "# MIKE+Py import it's own version of mikeio1d, which does not work for our files and also has odd problems with mikeio.\n",
+    "# Therefore use a multiprocessing.Listener, which run in its own process with its own imports, and reads the file.\n",
+    "# the script \"mikeio_listener.py\" needs to be in the same folder, or has to be startd beforehand.\n",
+    "reader_address = R\"\\\\.\\pipe\\mikeio_pipe\"\n",
+    "auth_key = b\"res1dreader\"\n",
+    "try:\n",
+    "    # try to connect to existing server\n",
+    "    reader_c = Client(reader_address, authkey=auth_key)\n",
+    "except (ConnectionRefusedError, FileNotFoundError) as e:\n",
+    "    print(\"starting mike reader\")\n",
+    "    # start a server in a new console window\n",
+    "    subprocess.Popen(\n",
+    "        [\"python\", \"mikeio_listener.py\"], creationflags=subprocess.CREATE_NEW_CONSOLE\n",
+    "    )\n",
+    "    # give server some time to start\n",
+    "    sleep(10)\n",
+    "    reader_c = Client(reader_address, authkey=auth_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# open the MIKE+ database using MIKE+Py.\n",
+    "dta = DataTableAccess(db_path)\n",
+    "# open the database from MIKE+\n",
+    "dta.open_database()\n",
+    "# check if the database is open.\n",
+    "dta.is_database_open()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create the Engine1D object that will be used to run MIKE 1D simulations.\n",
+    "engine = Engine1D(dta.datatables)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def activate_sim(sim_muid):\n",
+    "    '''\n",
+    "    Activates a simulation defined by sim_muid from MIKE+.\n",
+    "    sim_muid is the name of a simulation in the simulation setup in MIKE+.\n",
+    "    '''\n",
+    "    # first deactivate all\n",
+    "    for id in dta.get_muid_where(\"msm_Project\"):\n",
+    "        dta.set_value(\"msm_Project\", id, \"ActiveProject\", False)\n",
+    "    # activate correct simulation\n",
+    "    dta.set_value(\"msm_Project\", sim_muid, \"ActiveProject\", True)\n",
+    "    # check that the sim is active\n",
+    "    if VERBOSE_OUTPUT:\n",
+    "        display(\"active sim\")\n",
+    "        display(dta.get_muid_where(\"msm_Project\", \"ActiveProject == 1\"))\n",
+    "    assert dta.get_muid_where(\"msm_Project\", \"ActiveProject == 1\") == [\n",
+    "        sim_muid\n",
+    "    ], \"only selected sim should be active\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# specify which parameters are optimized and in which range they should be\n",
+    "# could make the table in python like this\n",
+    "# param_specs = pd.DataFrame([\n",
+    "#     {\"Table\":\"msm_HParA\",\"Muid\":DEF_STRING,\"Column\":\"RedFactor\",\"Min\":.4,\"Max\":1,\"Steps\":4},\n",
+    "#     {\"Table\":\"msm_HParA\",\"Muid\":DEF_STRING,\"Column\":\"InitLoss\",\"Min\":0,\"Max\":5*M_TO_MM,\"Steps\":4}\n",
+    "# ])\n",
+    "# or load from a file\n",
+    "param_specs = file_to_df(temp_folder / \"parameter_config.xlsx\")\n",
+    "# give params_specs a sensible string index, this is later used to pass optimization parameters as a dictionary\n",
+    "param_specs.index = pd.Index(\n",
+    "    param_specs[[\"Table\", \"Column\", \"Muid\"]].agg(\";\".join, axis=\"columns\"),\n",
+    "    name=\"ParamKey\",\n",
+    ")\n",
+    "assert (\n",
+    "    param_specs.index.is_unique\n",
+    "), \"Index needs to be unique. Otherwise indicates dupplicates in the parameters.\"\n",
+    "param_specs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Event</th>\n",
+       "      <th>EventWeight</th>\n",
+       "      <th>Start</th>\n",
+       "      <th>End</th>\n",
+       "      <th>Timestep</th>\n",
+       "      <th>SimulationName</th>\n",
+       "      <th>ReferenceFile</th>\n",
+       "      <th>ReferenceColumn</th>\n",
+       "      <th>ReferenceUnitFactor</th>\n",
+       "      <th>ResultFile</th>\n",
+       "      <th>ResultReach</th>\n",
+       "      <th>ResultNode</th>\n",
+       "      <th>ResultColumn</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Label</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>397_AFS</th>\n",
+       "      <td>397</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2022-09-13 21:36:00+01:00</td>\n",
+       "      <td>2022-09-15 00:52:00+01:00</td>\n",
+       "      <td>1min</td>\n",
+       "      <td>RW_Kal_397</td>\n",
+       "      <td>***readings_smoothed***</td>\n",
+       "      <td>conc_mgpl</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Results Kalibrierung/RW_Kal_397BaseDefault_Net...</td>\n",
+       "      <td>H0069212</td>\n",
+       "      <td></td>\n",
+       "      <td>AFS:H0069212:17.0052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>405_AFS</th>\n",
+       "      <td>405</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2022-09-27 22:22:00+01:00</td>\n",
+       "      <td>2022-09-28 03:05:00+01:00</td>\n",
+       "      <td>1min</td>\n",
+       "      <td>RW_Kal_405</td>\n",
+       "      <td>***readings_smoothed***</td>\n",
+       "      <td>conc_mgpl</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Results Kalibrierung/RW_Kal_405BaseDefault_Net...</td>\n",
+       "      <td>H0069212</td>\n",
+       "      <td></td>\n",
+       "      <td>AFS:H0069212:17.0052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>406_AFS</th>\n",
+       "      <td>406</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2022-10-01 02:36:00+01:00</td>\n",
+       "      <td>2022-10-01 20:11:00+01:00</td>\n",
+       "      <td>1min</td>\n",
+       "      <td>RW_Kal_406</td>\n",
+       "      <td>***readings_smoothed***</td>\n",
+       "      <td>conc_mgpl</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Results Kalibrierung/RW_Kal_406BaseDefault_Net...</td>\n",
+       "      <td>H0069212</td>\n",
+       "      <td></td>\n",
+       "      <td>AFS:H0069212:17.0052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>407_AFS</th>\n",
+       "      <td>407</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2022-10-02 04:43:00+01:00</td>\n",
+       "      <td>2022-10-02 07:31:00+01:00</td>\n",
+       "      <td>1min</td>\n",
+       "      <td>RW_Kal_407</td>\n",
+       "      <td>***readings_smoothed***</td>\n",
+       "      <td>conc_mgpl</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Results Kalibrierung/RW_Kal_407BaseDefault_Net...</td>\n",
+       "      <td>H0069212</td>\n",
+       "      <td></td>\n",
+       "      <td>AFS:H0069212:17.0052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>411_AFS</th>\n",
+       "      <td>411</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2022-10-17 21:54:00+01:00</td>\n",
+       "      <td>2022-10-18 12:38:00+01:00</td>\n",
+       "      <td>1min</td>\n",
+       "      <td>RW_Kal_411</td>\n",
+       "      <td>***readings_smoothed***</td>\n",
+       "      <td>conc_mgpl</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Results Kalibrierung/RW_Kal_411BaseDefault_Net...</td>\n",
+       "      <td>H0069212</td>\n",
+       "      <td></td>\n",
+       "      <td>AFS:H0069212:17.0052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>431_AFS</th>\n",
+       "      <td>431</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2022-11-23 17:18:00+01:00</td>\n",
+       "      <td>2022-11-24 00:29:00+01:00</td>\n",
+       "      <td>1min</td>\n",
+       "      <td>RW_Kal_431</td>\n",
+       "      <td>***readings_smoothed***</td>\n",
+       "      <td>conc_mgpl</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Results Kalibrierung/RW_Kal_431BaseDefault_Net...</td>\n",
+       "      <td>H0069212</td>\n",
+       "      <td></td>\n",
+       "      <td>AFS:H0069212:17.0052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>439_AFS</th>\n",
+       "      <td>439</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2022-12-05 03:28:00+01:00</td>\n",
+       "      <td>2022-12-05 09:12:00+01:00</td>\n",
+       "      <td>1min</td>\n",
+       "      <td>RW_Kal_439</td>\n",
+       "      <td>***readings_smoothed***</td>\n",
+       "      <td>conc_mgpl</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Results Kalibrierung/RW_Kal_439BaseDefault_Net...</td>\n",
+       "      <td>H0069212</td>\n",
+       "      <td></td>\n",
+       "      <td>AFS:H0069212:17.0052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>490_AFS</th>\n",
+       "      <td>490</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2023-03-07 15:11:00+01:00</td>\n",
+       "      <td>2023-03-10 19:48:00+01:00</td>\n",
+       "      <td>1min</td>\n",
+       "      <td>RW_Kal_490</td>\n",
+       "      <td>***readings_smoothed***</td>\n",
+       "      <td>conc_mgpl</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Results Kalibrierung/RW_Kal_490BaseDefault_Net...</td>\n",
+       "      <td>H0069212</td>\n",
+       "      <td></td>\n",
+       "      <td>AFS:H0069212:17.0052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>494_AFS</th>\n",
+       "      <td>494</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2023-03-15 03:15:00+01:00</td>\n",
+       "      <td>2023-03-15 11:05:00+01:00</td>\n",
+       "      <td>1min</td>\n",
+       "      <td>RW_Kal_494</td>\n",
+       "      <td>***readings_smoothed***</td>\n",
+       "      <td>conc_mgpl</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Results Kalibrierung/RW_Kal_494BaseDefault_Net...</td>\n",
+       "      <td>H0069212</td>\n",
+       "      <td></td>\n",
+       "      <td>AFS:H0069212:17.0052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>501_AFS</th>\n",
+       "      <td>501</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2023-03-24 06:48:00+01:00</td>\n",
+       "      <td>2023-03-25 01:55:00+01:00</td>\n",
+       "      <td>1min</td>\n",
+       "      <td>RW_Kal_501</td>\n",
+       "      <td>***readings_smoothed***</td>\n",
+       "      <td>conc_mgpl</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Results Kalibrierung/RW_Kal_501BaseDefault_Net...</td>\n",
+       "      <td>H0069212</td>\n",
+       "      <td></td>\n",
+       "      <td>AFS:H0069212:17.0052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>528_AFS</th>\n",
+       "      <td>528</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2023-05-11 23:21:00+01:00</td>\n",
+       "      <td>2023-05-12 09:56:00+01:00</td>\n",
+       "      <td>1min</td>\n",
+       "      <td>RW_Kal_528</td>\n",
+       "      <td>***readings_smoothed***</td>\n",
+       "      <td>conc_mgpl</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Results Kalibrierung/RW_Kal_528BaseDefault_Net...</td>\n",
+       "      <td>H0069212</td>\n",
+       "      <td></td>\n",
+       "      <td>AFS:H0069212:17.0052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>589_AFS</th>\n",
+       "      <td>589</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2023-10-19 14:52:00+01:00</td>\n",
+       "      <td>2023-10-19 19:00:00+01:00</td>\n",
+       "      <td>1min</td>\n",
+       "      <td>RW_Kal_589</td>\n",
+       "      <td>***readings_smoothed***</td>\n",
+       "      <td>conc_mgpl</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Results Kalibrierung/RW_Kal_589BaseDefault_Net...</td>\n",
+       "      <td>H0069212</td>\n",
+       "      <td></td>\n",
+       "      <td>AFS:H0069212:17.0052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>597_AFS</th>\n",
+       "      <td>597</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2023-10-28 01:11:00+01:00</td>\n",
+       "      <td>2023-10-28 05:58:00+01:00</td>\n",
+       "      <td>1min</td>\n",
+       "      <td>RW_Kal_597</td>\n",
+       "      <td>***readings_smoothed***</td>\n",
+       "      <td>conc_mgpl</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Results Kalibrierung/RW_Kal_597BaseDefault_Net...</td>\n",
+       "      <td>H0069212</td>\n",
+       "      <td></td>\n",
+       "      <td>AFS:H0069212:17.0052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>614_AFS</th>\n",
+       "      <td>614</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2023-11-24 00:57:00+01:00</td>\n",
+       "      <td>2023-11-24 23:59:00+01:00</td>\n",
+       "      <td>1min</td>\n",
+       "      <td>RW_Kal_614</td>\n",
+       "      <td>***readings_smoothed***</td>\n",
+       "      <td>conc_mgpl</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Results Kalibrierung/RW_Kal_614BaseDefault_Net...</td>\n",
+       "      <td>H0069212</td>\n",
+       "      <td></td>\n",
+       "      <td>AFS:H0069212:17.0052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>615_AFS</th>\n",
+       "      <td>615</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2023-11-25 14:13:00+01:00</td>\n",
+       "      <td>2023-11-25 21:38:00+01:00</td>\n",
+       "      <td>1min</td>\n",
+       "      <td>RW_Kal_615</td>\n",
+       "      <td>***readings_smoothed***</td>\n",
+       "      <td>conc_mgpl</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Results Kalibrierung/RW_Kal_615BaseDefault_Net...</td>\n",
+       "      <td>H0069212</td>\n",
+       "      <td></td>\n",
+       "      <td>AFS:H0069212:17.0052</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>623_AFS</th>\n",
+       "      <td>623</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2023-12-08 05:14:00+01:00</td>\n",
+       "      <td>2023-12-08 08:35:00+01:00</td>\n",
+       "      <td>1min</td>\n",
+       "      <td>RW_Kal_623</td>\n",
+       "      <td>***readings_smoothed***</td>\n",
+       "      <td>conc_mgpl</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Results Kalibrierung/RW_Kal_623BaseDefault_Net...</td>\n",
+       "      <td>H0069212</td>\n",
+       "      <td></td>\n",
+       "      <td>AFS:H0069212:17.0052</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         Event  EventWeight                     Start  \\\n",
+       "Label                                                   \n",
+       "397_AFS    397            1 2022-09-13 21:36:00+01:00   \n",
+       "405_AFS    405            1 2022-09-27 22:22:00+01:00   \n",
+       "406_AFS    406            1 2022-10-01 02:36:00+01:00   \n",
+       "407_AFS    407            1 2022-10-02 04:43:00+01:00   \n",
+       "411_AFS    411            1 2022-10-17 21:54:00+01:00   \n",
+       "431_AFS    431            1 2022-11-23 17:18:00+01:00   \n",
+       "439_AFS    439            1 2022-12-05 03:28:00+01:00   \n",
+       "490_AFS    490            1 2023-03-07 15:11:00+01:00   \n",
+       "494_AFS    494            1 2023-03-15 03:15:00+01:00   \n",
+       "501_AFS    501            1 2023-03-24 06:48:00+01:00   \n",
+       "528_AFS    528            1 2023-05-11 23:21:00+01:00   \n",
+       "589_AFS    589            1 2023-10-19 14:52:00+01:00   \n",
+       "597_AFS    597            1 2023-10-28 01:11:00+01:00   \n",
+       "614_AFS    614            1 2023-11-24 00:57:00+01:00   \n",
+       "615_AFS    615            1 2023-11-25 14:13:00+01:00   \n",
+       "623_AFS    623            1 2023-12-08 05:14:00+01:00   \n",
+       "\n",
+       "                              End Timestep SimulationName  \\\n",
+       "Label                                                       \n",
+       "397_AFS 2022-09-15 00:52:00+01:00     1min     RW_Kal_397   \n",
+       "405_AFS 2022-09-28 03:05:00+01:00     1min     RW_Kal_405   \n",
+       "406_AFS 2022-10-01 20:11:00+01:00     1min     RW_Kal_406   \n",
+       "407_AFS 2022-10-02 07:31:00+01:00     1min     RW_Kal_407   \n",
+       "411_AFS 2022-10-18 12:38:00+01:00     1min     RW_Kal_411   \n",
+       "431_AFS 2022-11-24 00:29:00+01:00     1min     RW_Kal_431   \n",
+       "439_AFS 2022-12-05 09:12:00+01:00     1min     RW_Kal_439   \n",
+       "490_AFS 2023-03-10 19:48:00+01:00     1min     RW_Kal_490   \n",
+       "494_AFS 2023-03-15 11:05:00+01:00     1min     RW_Kal_494   \n",
+       "501_AFS 2023-03-25 01:55:00+01:00     1min     RW_Kal_501   \n",
+       "528_AFS 2023-05-12 09:56:00+01:00     1min     RW_Kal_528   \n",
+       "589_AFS 2023-10-19 19:00:00+01:00     1min     RW_Kal_589   \n",
+       "597_AFS 2023-10-28 05:58:00+01:00     1min     RW_Kal_597   \n",
+       "614_AFS 2023-11-24 23:59:00+01:00     1min     RW_Kal_614   \n",
+       "615_AFS 2023-11-25 21:38:00+01:00     1min     RW_Kal_615   \n",
+       "623_AFS 2023-12-08 08:35:00+01:00     1min     RW_Kal_623   \n",
+       "\n",
+       "                   ReferenceFile ReferenceColumn  ReferenceUnitFactor  \\\n",
+       "Label                                                                   \n",
+       "397_AFS  ***readings_smoothed***       conc_mgpl                    1   \n",
+       "405_AFS  ***readings_smoothed***       conc_mgpl                    1   \n",
+       "406_AFS  ***readings_smoothed***       conc_mgpl                    1   \n",
+       "407_AFS  ***readings_smoothed***       conc_mgpl                    1   \n",
+       "411_AFS  ***readings_smoothed***       conc_mgpl                    1   \n",
+       "431_AFS  ***readings_smoothed***       conc_mgpl                    1   \n",
+       "439_AFS  ***readings_smoothed***       conc_mgpl                    1   \n",
+       "490_AFS  ***readings_smoothed***       conc_mgpl                    1   \n",
+       "494_AFS  ***readings_smoothed***       conc_mgpl                    1   \n",
+       "501_AFS  ***readings_smoothed***       conc_mgpl                    1   \n",
+       "528_AFS  ***readings_smoothed***       conc_mgpl                    1   \n",
+       "589_AFS  ***readings_smoothed***       conc_mgpl                    1   \n",
+       "597_AFS  ***readings_smoothed***       conc_mgpl                    1   \n",
+       "614_AFS  ***readings_smoothed***       conc_mgpl                    1   \n",
+       "615_AFS  ***readings_smoothed***       conc_mgpl                    1   \n",
+       "623_AFS  ***readings_smoothed***       conc_mgpl                    1   \n",
+       "\n",
+       "                                                ResultFile ResultReach  \\\n",
+       "Label                                                                    \n",
+       "397_AFS  Results Kalibrierung/RW_Kal_397BaseDefault_Net...    H0069212   \n",
+       "405_AFS  Results Kalibrierung/RW_Kal_405BaseDefault_Net...    H0069212   \n",
+       "406_AFS  Results Kalibrierung/RW_Kal_406BaseDefault_Net...    H0069212   \n",
+       "407_AFS  Results Kalibrierung/RW_Kal_407BaseDefault_Net...    H0069212   \n",
+       "411_AFS  Results Kalibrierung/RW_Kal_411BaseDefault_Net...    H0069212   \n",
+       "431_AFS  Results Kalibrierung/RW_Kal_431BaseDefault_Net...    H0069212   \n",
+       "439_AFS  Results Kalibrierung/RW_Kal_439BaseDefault_Net...    H0069212   \n",
+       "490_AFS  Results Kalibrierung/RW_Kal_490BaseDefault_Net...    H0069212   \n",
+       "494_AFS  Results Kalibrierung/RW_Kal_494BaseDefault_Net...    H0069212   \n",
+       "501_AFS  Results Kalibrierung/RW_Kal_501BaseDefault_Net...    H0069212   \n",
+       "528_AFS  Results Kalibrierung/RW_Kal_528BaseDefault_Net...    H0069212   \n",
+       "589_AFS  Results Kalibrierung/RW_Kal_589BaseDefault_Net...    H0069212   \n",
+       "597_AFS  Results Kalibrierung/RW_Kal_597BaseDefault_Net...    H0069212   \n",
+       "614_AFS  Results Kalibrierung/RW_Kal_614BaseDefault_Net...    H0069212   \n",
+       "615_AFS  Results Kalibrierung/RW_Kal_615BaseDefault_Net...    H0069212   \n",
+       "623_AFS  Results Kalibrierung/RW_Kal_623BaseDefault_Net...    H0069212   \n",
+       "\n",
+       "        ResultNode          ResultColumn  \n",
+       "Label                                     \n",
+       "397_AFS             AFS:H0069212:17.0052  \n",
+       "405_AFS             AFS:H0069212:17.0052  \n",
+       "406_AFS             AFS:H0069212:17.0052  \n",
+       "407_AFS             AFS:H0069212:17.0052  \n",
+       "411_AFS             AFS:H0069212:17.0052  \n",
+       "431_AFS             AFS:H0069212:17.0052  \n",
+       "439_AFS             AFS:H0069212:17.0052  \n",
+       "490_AFS             AFS:H0069212:17.0052  \n",
+       "494_AFS             AFS:H0069212:17.0052  \n",
+       "501_AFS             AFS:H0069212:17.0052  \n",
+       "528_AFS             AFS:H0069212:17.0052  \n",
+       "589_AFS             AFS:H0069212:17.0052  \n",
+       "597_AFS             AFS:H0069212:17.0052  \n",
+       "614_AFS             AFS:H0069212:17.0052  \n",
+       "615_AFS             AFS:H0069212:17.0052  \n",
+       "623_AFS             AFS:H0069212:17.0052  "
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# read the event_config.xlsx in which for each sim_muid=events variables are set.\n",
+    "events = file_to_df(temp_folder / \"event_config.xlsx\").set_index(\"Label\")\n",
+    "# make sure that empty strings are translated.\n",
+    "events[\"ResultReach\"] = events[\"ResultReach\"].fillna(\"\").astype(str)\n",
+    "events[\"ResultNode\"] = events[\"ResultNode\"].fillna(\"\").astype(str)\n",
+    "# check if start time of events is before the end time.\n",
+    "assert (events[\"Start\"] <= events[\"End\"]).all(\n",
+    "    axis=None\n",
+    "), \"Event end needs to be after start.\"\n",
+    "# add timezone. Use same as sensor_date.\n",
+    "events[\"Start\"]=events[\"Start\"].dt.tz_localize(timezone)\n",
+    "events[\"End\"]=events[\"End\"].dt.tz_localize(timezone)\n",
+    "# check if threre are duplicated events.\n",
+    "assert events.index.is_unique, \"Need unique labels\"\n",
+    "assert events.drop_duplicates(subset=\"Event\")[\"SimulationName\"].is_unique, \"Need exactly one simulation name per event\"\n",
+    "events"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def _read_result_helper(file, reach, node, column, timestep, start, end) -> pd.Series:\n",
+    "    \"\"\"Read the result with specified parameters.\n",
+    "    returns:\n",
+    "        a pandas Series with Datetime as index.\n",
+    "    \"\"\"\n",
+    "    # file_path is read from the event table\n",
+    "    # the reader should be executed in the same directory as this file, but it is safer to send absolute paths.\n",
+    "    # reaches/nodes is used to only read some parts of the result.\n",
+    "    # a reach can be any connection between two nodes.\n",
+    "    # loc may return a series or dataframe.\n",
+    "    file_path: Path = temp_folder / file\n",
+    "    params = {\n",
+    "        \"file_path\": str(file_path.absolute()),\n",
+    "        \"reaches\": [reach],\n",
+    "        \"nodes\": [node],\n",
+    "    }\n",
+    "    # uses the MIKEio listener script first to send the parameters\n",
+    "    reader_c.send(params)\n",
+    "    # second to receive the data from the result file    \n",
+    "    df = reader_c.recv()\n",
+    "    # reduce result to desired column.\n",
+    "    series = df[column]\n",
+    "    # add timezone to the index.\n",
+    "    series.index = series.index.tz_localize(timezone)\n",
+    "    # print the data before processing.\n",
+    "    if VERBOSE_OUTPUT:\n",
+    "        display(f\"{file_path}[{column}] before resample:\")\n",
+    "        display(series)\n",
+    "    # resample to be comparable to results.\n",
+    "    # Linear interpolation with pandas first throws away all values where the index does not match to the frequency,\n",
+    "    # therefore use nearest instead of interpolate.\n",
+    "    series = series.resample(timestep).nearest()\n",
+    "    # reduce result to desired times\n",
+    "    series = series.loc[(series.index >= start) & (series.index <= end)]\n",
+    "    # set index name\n",
+    "    series.index.name = \"Datetime\"\n",
+    "    return series\n",
+    "\n",
+    "\n",
+    "def read_result(label) -> pd.Series:\n",
+    "    \"\"\"\n",
+    "    Read the result of the part of the collection system with the corresponding label, \n",
+    "    assuming the simulation was run, using values from DataFrame events.\n",
+    "    returns:\n",
+    "        a pandas Series with Datetime as index.\n",
+    "    \"\"\"\n",
+    "    # used above function, gets parameters from the table\n",
+    "    return _read_result_helper(\n",
+    "        events.at[label, \"ResultFile\"],\n",
+    "        events.at[label, \"ResultReach\"],\n",
+    "        events.at[label, \"Timestep\"],\n",
+    "        events.at[label, \"ResultNode\"],\n",
+    "        events.at[label, \"ResultColumn\"],\n",
+    "        events.at[label, \"Start\"],\n",
+    "        events.at[label, \"End\"],\n",
+    "    )\n",
+    "\n",
+    "\n",
+    "def delete_result_file(label):\n",
+    "    '''\n",
+    "    deletes existing result file before a simulation correspoding to label.\n",
+    "    '''\n",
+    "    file_path: Path = temp_folder / events.at[label, \"ResultFile\"]\n",
+    "    file_path.unlink(True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a dictionary to save parts of the reference corresponding to label.\n",
+    "ref_parts = {}\n",
+    "\n",
+    "# read the data for each label.\n",
+    "for label in events.index:\n",
+    "    filepath = temp_folder / events.loc[label, \"ReferenceFile\"]\n",
+    "    col = events.loc[label, \"ReferenceColumn\"]\n",
+    "    # only look at time that is of interest:\n",
+    "    start = events.loc[label, \"Start\"]\n",
+    "    end = events.loc[label, \"End\"]\n",
+    "    # read the reference values\n",
+    "    with db_engine.begin() as con:\n",
+    "        ref_part = (\n",
+    "            pd.read_sql_query(\n",
+    "                sa.text(\n",
+    "                    f\"select datetime, {col} from readings_smoothed where sensor_name = :sn and  datetime>=:start and datetime<=:end\"\n",
+    "                ),\n",
+    "                con,\n",
+    "                params={\"start\": start, \"end\": end, \"sn\": sensor_conc},\n",
+    "                index_col=\"datetime\",\n",
+    "            )[col]\n",
+    "        )\n",
+    "    assert isinstance(\n",
+    "        ref_part.index, pd.DatetimeIndex\n",
+    "    ), \"Need DatetimeIndex (expected in first column)\"\n",
+    "    # take column as defined in the event table\n",
+    "    if VERBOSE_OUTPUT:\n",
+    "        display(\"Ref part before resample\")\n",
+    "        display(ref_part)\n",
+    "    # time step of simulation output is one minute, also adjust units\n",
+    "    ref_part = ref_part.resample(events.loc[label, \"Timestep\"]).nearest()\n",
+    "    ref_part = ref_part*events.loc[label, \"ReferenceUnitFactor\"]\n",
+    "    assert ref_part.isna().any(axis=None) == False\n",
+    "    # write in dictionary.\n",
+    "    ref_parts[label] = ref_part\n",
+    "\n",
+    "# write dictionary into a pandas series with index Label and Datetime    \n",
+    "reference = pd.concat(ref_parts)\n",
+    "reference.index.names = [\"Label\", \"Datetime\"]\n",
+    "reference"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# read result which are already in folder\n",
+    "# test run for read_result\n",
+    "_test_rs = pd.concat({ev:read_result(ev) for ev in events.index})\n",
+    "_test_rs.index.names = reference.index.names\n",
+    "# plot results, if this looks very different from plots in mike, unit conversion may need to be added\n",
+    "for label in events.index:\n",
+    "    txs=_test_rs.xs(label,level=\"Label\")\n",
+    "    rxs=reference.xs(label,level=\"Label\")\n",
+    "    missing_ratio = txs.reindex_like(rxs).isna().mean()\n",
+    "    if missing_ratio>0.2:\n",
+    "        warn(f\"Label {label} has many missing values compared to the reference missing ratio is {missing_ratio} \")\n",
+    "    pd.DataFrame({\"ref\":rxs,\"test\":txs}).plot(title=str(label),figsize=(7,2.5))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define possible objectivefuntcions\n",
+    "# note: if your objective function should be maximized, use the negative objective function as loss function\n",
+    "\n",
+    "def rmse(ref: pd.Series, res: pd.Series) -> float:\n",
+    "    \"\"\"Root Mean Square Error.\"\"\"\n",
+    "    if not ref.index.equals(res.index):\n",
+    "        raise ValueError(\"Indices are not equal, cannot compare\")\n",
+    "    return ((ref - res) ** 2).mean(axis=None) ** 0.5\n",
+    "\n",
+    "\n",
+    "\n",
+    "def vfp(ref: pd.Series, res: pd.Series) -> float:\n",
+    "    '''\n",
+    "    Volume Error\n",
+    "    '''\n",
+    "    if not ref.index.equals(res.index):\n",
+    "        raise ValueError(\"Indices are not equal, cannot compare\")\n",
+    "    ref_volume = ref.sum()  # Gesamtvolumen der Referenzdaten\n",
+    "    res_volume = res.sum()  # Gesamtvolumen der Ergebnisdaten\n",
+    "    dif = abs((res_volume - ref_volume))\n",
+    "    # if reference volumne is 0 use replacement value. More useful for optimization\n",
+    "    # replacement value for the divisior when ref_volume is zero.\n",
+    "    _vfp_divisor_on_zero = reference.groupby(level=\"Label\").sum().mean()\n",
+    "    if ref_volume == 0:\n",
+    "        ref_volume = _vfp_divisor_on_zero\n",
+    "        # ToDo: Herausfinden was man sonst sinnvolles benutzen kann! * pd.to_timedelta(events[\"Timestep\"]).dt.total_seconds().to_numpy()\n",
+    "    return 100 * dif / ref_volume  # Volumenfehler in Prozent\n",
+    "\n",
+    "\n",
+    "def weighted_vfp(ref: pd.Series, res: pd.Series) -> float:\n",
+    "    '''Weighted Volume Error'''\n",
+    "    labels = events.index.get_level_values(\"Label\")\n",
+    "    # read the event weights also put timestep into weights.\n",
+    "    weights = events[\"EventWeight\"].to_numpy()\n",
+    "    # calculate the vfp for each label\n",
+    "    per_label = np.array(\n",
+    "        [vfp(ref.xs(l, level=\"Label\"), res.xs(l, level=\"Label\")) for l in labels]\n",
+    "    )\n",
+    "    # control if there a missing weights\n",
+    "    nan_parts = np.isnan(weights)|np.isnan(per_label)\n",
+    "    if np.any(nan_parts):\n",
+    "        warn(\"weighted_vfp: some parts are nan, skipping those\")\n",
+    "    # return the weightes average vfp over all labels where no value is nan\n",
+    "    return np.average(per_label[~nan_parts], weights=weights[~nan_parts])\n",
+    "\n",
+    "\n",
+    "def mean_negative_bounded_nse(ref: pd.Series, res: pd.Series) -> float:\n",
+    "    \"\"\"\n",
+    "    Mean over Events of negative nse_c2m. range [-1,1]\n",
+    "\n",
+    "    Args:\n",
+    "        actual (pandas.DataFrame): DataFrame containing actual values.\n",
+    "        forecast (pandas.DataFrame): DataFrame containing forecasted values.\n",
+    "    Returns:\n",
+    "        float: Cost of the solution.\n",
+    "    \"\"\"\n",
+    "    if not ref.index.equals(res.index):\n",
+    "        raise ValueError(\"Indices are not equal, cannot compare\")\n",
+    "    loss_parts = []\n",
+    "    for ev in ref.index.unique(\"Event\"):\n",
+    "        ref_part = ref.xs(ev, level=\"Event\")\n",
+    "        res_part = res.xs(ev, level=\"Event\")\n",
+    "        loss_parts.append(-hde.nse_c2m(res_part.to_numpy(), ref_part.to_numpy()))\n",
+    "    return np.mean(loss_parts)\n",
+    "\n",
+    "# define which objectivefunction is used.\n",
+    "loss_fn = weighted_vfp\n",
+    "# use this value to penalize errors\n",
+    "loss_on_error = loss_fn(reference, -reference)\n",
+    "display(loss_on_error)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ParamSetError(Exception):\n",
+    "    \"\"\"Class that marks an Error where Parameters were not correctly set.\"\"\"\n",
+    "    # class only needed to distinguish Error, no special behavour needed.\n",
+    "    pass\n",
+    "\n",
+    "# the ** means that simulation run may be called like so:\n",
+    "# result = simulation_run(key1=p1,key2=p2, ...)\n",
+    "# but kwparams will contain a dict of form {\"key1\":p1,\"key2\":p2,...}\n",
+    "# if you called the simulation like simulation_run(key1=p1,key2=p2, ...),\n",
+    "# key1, key2, ... would need to be valid python names\n",
+    "# but with simulation_run(**{\"key1\":p1,\"key2\":p2,...}) key1, key2 could be any string.\n",
+    "# Because the optimizer uses the second form, we can use the index from param_specs as a key,\n",
+    "# even though the index may not be a valid python name\n",
+    "def single_sim_run(ev,**kwparams):\n",
+    "    \"\"\"\n",
+    "    Runs a single simulation for event ev.\n",
+    "    \n",
+    "    \"\"\"\n",
+    "    labels = events.index[events[\"Event\"]==ev]\n",
+    "    # delete previous result file\n",
+    "    if DELETE_RESFILE_BEFORE_SIM:\n",
+    "        for l in labels:\n",
+    "            delete_result_file(l)\n",
+    "    # multiple rows per event may exist, use first row, values should be equal\n",
+    "    sim_name = events.loc[events[\"Event\"]==ev, \"SimulationName\"].iloc[0]\n",
+    "    # activate event\n",
+    "    activate_sim(sim_name)\n",
+    "    # set all parameters to the values defined by params.\n",
+    "    # param_specs defines where the parameter is located in the mike database\n",
+    "    for k in kwparams.keys():\n",
+    "        tab = param_specs.loc[k, \"Table\"]\n",
+    "        col = param_specs.loc[k, \"Column\"]\n",
+    "        muid = param_specs.loc[k, \"Muid\"]\n",
+    "        val = kwparams[k]\n",
+    "        dta.set_value(tab, muid, col, val)\n",
+    "        try:\n",
+    "            val_get = dta.get_field_values(tab,muid,[col])[0]\n",
+    "        except Exception as ex:\n",
+    "            val_get = ex\n",
+    "        if not val_get==val:\n",
+    "            raise ParamSetError(f\"\"\"Value not correctly set. \n",
+    "                                set {val}, got {val_get}.\n",
+    "                                tab: {tab}\n",
+    "                                col: {col}\n",
+    "                                muid: {muid}\"\"\")\n",
+    "    print(f\"Running simulation {sim_name}.\")\n",
+    "\n",
+    "    # usually the simulation would print a lot of stuff,\n",
+    "    # supress output; it would be too much text for repeated calls\n",
+    "    with ipyio.capture_output() as captured:\n",
+    "        engine.run(sim_name)\n",
+    "    if (len(captured.stderr) > 0) or (\" ERROR: \" in captured.stdout):\n",
+    "        warn(f\"Error during simulation for Event {ev}. If show_sim_out_on_error is true, output from sululation follows.\")\n",
+    "        if show_sim_out_on_error:\n",
+    "            captured()\n",
+    "    if VERBOSE_OUTPUT:\n",
+    "        captured()\n",
+    "    # concatenate event data for multiple labels.\n",
+    "    results_ev = pd.concat({l:read_result(l) for l in labels},names=[\"Label\"])\n",
+    "    # select part of the reference that corresponds to the events\n",
+    "    references_ev = reference[reference.index.get_level_values('Label').isin(labels)]\n",
+    "    # calculate ratio of missing values in the part of the reference file\n",
+    "    missing_ratio = results_ev.reindex_like(references_ev).isna().mean()\n",
+    "    assert missing_ratio<MAX_MISSING_RATIO, \"Result is missing many values compared to the reference\"\n",
+    "    # returns results as a pandas series.\n",
+    "    return results_ev\n",
+    "\n",
+    "\n",
+    "def simulation_run(**kwparams):\n",
+    "    \"\"\"\n",
+    "    Runs single_sim_run for each event.\n",
+    "    Takes parameters as arguments, runs simulations as defined by events with them\n",
+    "    and returns the results as a series with MultiIndex[event, time].\n",
+    "\n",
+    "    Note that kwparams needs keys that match the index of DataFrame param_specs.\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    assert set(kwparams.keys()) == set(\n",
+    "        param_specs.index\n",
+    "    ), \"parameters need to match param_specs\"\n",
+    "\n",
+    "    # Create a list to store result parts for each event\n",
+    "    res_parts = []\n",
+    "    for ev in events[\"Event\"].unique():\n",
+    "        # Tries the MIKE+ simulation a maximum of times\n",
+    "        for i in range(MAX_SUMILATION_TRY):\n",
+    "            try:\n",
+    "                res_parts.append(single_sim_run(ev,**kwparams))\n",
+    "                # exit the loop if we have a result\n",
+    "                # if an error occurs, the try block is immediately exited and the break is not executed\n",
+    "                break\n",
+    "            except ParamSetError as pex:\n",
+    "                raise pex\n",
+    "            except Exception as ex:\n",
+    "                warn(f\"Simulation try {i} failed for event {ev}:{ex}\")\n",
+    "        else:\n",
+    "            # Else is executed when the for loop completed without break,\n",
+    "            # i.e the simulation did not complete even after multiple runs.\n",
+    "            # in that case return a nan Dataframe\n",
+    "            warn(f\"Simulation failed too many times, returning nan-result\")\n",
+    "            labels = events.index[events[\"Event\"]==ev]\n",
+    "            idx = pd.MultiIndex.from_product([labels,[pd.to_datetime(np.NaN)]])\n",
+    "            idx.names = reference.index.names\n",
+    "            res_parts.append(pd.Series(data=pd.NA,index=idx))\n",
+    "    # concatenate the results, note that single_sim_run has already added the label to the index\n",
+    "    result = pd.concat(res_parts)\n",
+    "    # returns results as pandas series\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# test run with default parameters\n",
+    "# the ** in the call unrolls the series into keyword parameters\n",
+    "_test_res = simulation_run(**param_specs[\"Default\"])\n",
+    "display(\"_test_res\")\n",
+    "display(_test_res)\n",
+    "_test_res = _test_res.reindex_like(reference)\n",
+    "display(\"missing ratio\")\n",
+    "display(_test_res.reindex_like(reference).isna().mean())\n",
+    "display(\"loss_fn\")\n",
+    "display(loss_fn(reference, _test_res))\n",
+    "for label in events.index:\n",
+    "    txs=_test_res.xs(label,level=\"Label\")\n",
+    "    rxs=reference.xs(label,level=\"Label\")\n",
+    "    missing_ratio = txs.reindex_like(rxs).isna().mean()\n",
+    "    if missing_ratio>0.2:\n",
+    "        warn(f\"Label {label} has many missing values compared to the reference missing ratio is {missing_ratio} \")\n",
+    "\n",
+    "    pd.DataFrame({\"ref\":rxs,\"test\":txs}).plot(title=str(label),figsize=(7,2.5))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define optimization paramaters as needed by nevergrad\n",
+    "# first make a dictionary of scalar values with information from param_specs\n",
+    "ng_params = {}\n",
+    "for i in param_specs.index:\n",
+    "    vmin = param_specs.at[i, \"Min\"]\n",
+    "    vmax = param_specs.at[i, \"Max\"]\n",
+    "    vdef = param_specs.at[i, \"Default\"]\n",
+    "    ngp = ng.p.Scalar(init=vdef, lower=vmin, upper=vmax)\n",
+    "    ng_params[i] = ngp\n",
+    "# now turn the list into an instrumentation\n",
+    "# the instrumentation is used to tell the optimizer how the paremeters are passed to our function\n",
+    "instrumentation = ng.p.Instrumentation(**ng_params)\n",
+    "instrumentation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define the parameter calibration as a minimization problem\n",
+    "def simulation_compare(**kwparams):\n",
+    "    \"\"\"\n",
+    "    Runs simulation with the specified parameters and uses the loss function on the results.\n",
+    "    \"\"\"\n",
+    "    # start simulation for all events with one parameter set defined by kwparams.\n",
+    "    result = simulation_run(**kwparams)\n",
+    "    # reindex the result according to the reference, important if values are missing in the reference.\n",
+    "    # If there are missing values in results they are inserted as nan. Can be avoided by a proper fixed timestep definition in MIKE+.\n",
+    "    result_matched = result.reindex_like(reference)\n",
+    "    # calculate the objextive function for all results\n",
+    "    plain_loss = loss_fn(reference, result_matched)\n",
+    "    # values should not be missing, penalize by increasing the loss\n",
+    "    missing_ratio = result_matched.isna().mean()\n",
+    "    if missing_ratio>0:\n",
+    "        warn(f\"Values are missing from result:{missing_ratio}\")\n",
+    "    return (1-missing_ratio)*plain_loss + missing_ratio*loss_on_error"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define callback funtions for the optimizers\n",
+    "# we pass these to the optimizer and the optimizer then executes the function at each step of the optimization\n",
+    "def params_losses_callback():\n",
+    "    \"\"\"returns two lists and a callback function.\n",
+    "    When the callback function is executed it puts the parameters and loss into the lists.\n",
+    "    !!Do not register this function itself as a callback, but the third returned parameter!!\n",
+    "    \"\"\"\n",
+    "    # create two lists to store the params and the corresponding losses\n",
+    "    params = []\n",
+    "    losses = []\n",
+    "\n",
+    "    def callback(optim, par, loss):\n",
+    "        params.append(par)\n",
+    "        losses.append(loss)\n",
+    "\n",
+    "    # returns two lists and the function\n",
+    "    return params, losses, callback"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def optim_run(optim_name):\n",
+    "    '''\n",
+    "    Function to start the optimizer.\n",
+    "    input = name of the optimizer\n",
+    "    output = dataframe with details from optimization model\n",
+    "    '''\n",
+    "    # set the optimizer based on the optim_name and instrumentation from params_spec\n",
+    "    optimizer = ng.optimizers.registry[optim_name](\n",
+    "            parametrization=instrumentation, budget=optimization_budget, num_workers=1\n",
+    "        )\n",
+    "    # show progressbar. updated when the optimizer gets the result of a step\n",
+    "    optimizer.register_callback(\"tell\", ng.callbacks.ProgressBar())\n",
+    "    # put params and loss in list\n",
+    "    param_list, loss_list, plcb = params_losses_callback()\n",
+    "    optimizer.register_callback(\"tell\", plcb)\n",
+    "    # get recommendation of parameters after run is finished\n",
+    "    reccomendation = optimizer.minimize(simulation_compare, verbosity=2)\n",
+    "    optim_run_df = pd.concat(\n",
+    "        [\n",
+    "            pd.DataFrame(data=[p.value[1] for p in param_list]),\n",
+    "            pd.DataFrame({\"Optimizer\": optimizer.name, \"Loss\": loss_list}),\n",
+    "        ],\n",
+    "        axis=\"columns\",\n",
+    "    )\n",
+    "    # also add a line for the reccomendation\n",
+    "    rec_params = reccomendation.value[1]\n",
+    "    optim_run_df.loc[\"reccomendation\", rec_params.keys()] = rec_params\n",
+    "    optim_run_df.loc[\"reccomendation\", \"Loss\"] = reccomendation.loss\n",
+    "    optim_run_df.loc[\"reccomendation\", \"Optimizer\"] = optimizer.name\n",
+    "    # save the results of the optimization as a csv and excel file\n",
+    "    optim_run_df.to_csv(out_folder / f\"optim_run{optimizer.name}.csv\")\n",
+    "    optim_run_df.to_excel(out_folder / f\"optim_run{optimizer.name}.xlsx\")\n",
+    "\n",
+    "    return optim_run_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# choose optimizers from nevergrad\n",
+    "# https://en.wikipedia.org/wiki/CMA-ES\n",
+    "# create a list with optimizers\n",
+    "# it is also possible to use multiple optimizers and compare the results\n",
+    "optimizers = [\"BayesOptimBO\"] #[\"CMA\",\"BayesOptimBO\"]\n",
+    "# create dictionary to store the rundata for each optimizer\n",
+    "optim_run_dfs = {}\n",
+    "for o in optimizers:\n",
+    "    # start the optimization\n",
+    "    optim_run_dfs[o] = optim_run(o)\n",
+    "    # display results\n",
+    "    display(optim_run_dfs[o])\n",
+    "    optim_run_dfs[o].plot(subplots=True, figsize=(5, 2 * len(param_specs)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# only interesting if multiple optimizers are used.\n",
+    "# optim_run_df already has optimizer included, use .values instead of passing the dict, so that the optimizer is not added again.\n",
+    "optim_run_df = pd.concat(optim_run_dfs.values())\n",
+    "optim_run_df.to_csv(out_folder / \"optim_run.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# set the index if the optim_run_df\n",
+    "optim_run_df.index.name = \"Step\"\n",
+    "optim_run_df = optim_run_df.reset_index().set_index([\"Optimizer\", \"Step\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# plot the loss for each step of all optimizers\n",
+    "optim_run_df[\"Loss\"].unstack(level=\"Optimizer\").plot(\n",
+    "    subplots=True, sharey=True, title=\"loss over steps\"\n",
+    ")[0].get_figure().savefig(out_folder / \"loss_over_steps.png\", bbox_inches=\"tight\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# plot recommendation as a bar plot\n",
+    "ax = optim_run_df[\"Loss\"].unstack(level=0).loc[\"reccomendation\"].plot(kind=\"bar\")\n",
+    "ax.bar_label(ax.containers[0])\n",
+    "optim_run_df[\"Loss\"].xs(\"reccomendation\", level=\"Step\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# plot the cumulative loss over each step\n",
+    "optim_run_df[\"Loss\"].unstack(level=0).cummin().plot(\n",
+    "    title=\"cummin loss over steps\"\n",
+    ").get_figure().savefig(out_folder / \"loss_cummin.png\", bbox_inches=\"tight\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# store the simulation results for the recommended results\n",
+    "res_dict = {\"ref\": reference}\n",
+    "for key, df in optim_run_df.groupby(level=\"Optimizer\"):\n",
+    "    ps = df.loc[(key, \"reccomendation\"), param_specs.index]\n",
+    "    display(ps)\n",
+    "    # start single simulation with recommended params\n",
+    "    _test_res = simulation_run(**ps)\n",
+    "    res_dict[key] = _test_res"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# concatenate best results in one dataframe\n",
+    "best_res = pd.concat(res_dict, axis=\"columns\")\n",
+    "# save best results in a csv file\n",
+    "best_res.to_csv(out_folder / \"best_res.csv\")\n",
+    "# plot best result for each label\n",
+    "ev_group = best_res.groupby(level=\"Label\")\n",
+    "fig, axes = plt.subplots(nrows=len(ev_group), figsize=(6, 2.5 * len(ev_group)))\n",
+    "for (label, df), ax in zip(ev_group, axes):\n",
+    "    df.droplevel(\"Label\").plot(ax=ax,title=str(label))\n",
+    "fig.suptitle(\"Optimizer Result and Reference\")\n",
+    "fig.tight_layout()\n",
+    "# save plot as png file\n",
+    "fig.savefig(out_folder / \"best_res.png\", bbox_inches=\"tight\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# measure how much the parameters are adjusted\n",
+    "# first normalize to range than take norm2 of difference to previous step\n",
+    "# geometric interpretation: project parameters into hyper unit cube and calculate the velocity\n",
+    "\n",
+    "# only look at params and exclude reccomendation\n",
+    "run_param_df = optim_run_df.loc[\n",
+    "    optim_run_df.index.get_level_values(\"Step\") != \"reccomendation\", param_specs.index\n",
+    "]\n",
+    "normalized = (run_param_df - param_specs[\"Min\"]) / (\n",
+    "    param_specs[\"Max\"] - param_specs[\"Min\"]\n",
+    ")\n",
+    "velocity = normalized.diff()\n",
+    "# norm2 (root of sum of squares)\n",
+    "velocity = (velocity**2).sum(axis=\"columns\") ** 0.5\n",
+    "# plot the velocity of the parameter adjustments\n",
+    "velocity.unstack(level=0).plot(title=\"velocity of parameter adjustments\")\n",
+    "velocity.unstack(level=0).plot(\n",
+    "    subplots=True, sharey=True, title=\"velocity of parameter adjustments\"\n",
+    ")\n",
+    "velocity.unstack(level=0).plot(\n",
+    "    subplots=True, sharey=False, title=\"velocity of parameter adjustments\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# close database\n",
+    "dta.close_database()\n",
+    "# close connection\n",
+    "reader_c.send(\"close\")\n",
+    "reader_c.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# delete the temporary folder\n",
+    "if DELETE_TEMP:\n",
+    "    shutil.rmtree(temp_folder)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# print the recommendations one last time\n",
+    "for key, df in optim_run_df.groupby(level=\"Optimizer\"):\n",
+    "    display(key,df.loc[(key, \"reccomendation\"), param_specs.index])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
-- 
GitLab