From b0c6a0ac2d0e033c6b6f792c996832ec75f61efb Mon Sep 17 00:00:00 2001
From: Sebastian Kerger <sebastian.kerger@rwth-aachen.de>
Date: Mon, 17 Mar 2025 13:11:55 +0100
Subject: [PATCH] Upload New File

---
 .../water-quality-optimization.ipynb          | 769 ++++++++++++++++++
 1 file changed, 769 insertions(+)
 create mode 100644 Water quality calibration/water-quality-optimization.ipynb

diff --git a/Water quality calibration/water-quality-optimization.ipynb b/Water quality calibration/water-quality-optimization.ipynb
new file mode 100644
index 0000000..2cd219b
--- /dev/null
+++ b/Water quality calibration/water-quality-optimization.ipynb	
@@ -0,0 +1,769 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Script to optimize the simulation parameters for a water quality model in MIKE+\n",
+    "\n",
+    "The nevergrad package is used for optimization.\n",
+    "\n",
+    "Note:  \n",
+    "- mikepluspy requires a full license for mike+ to run. In many cases the seat of the license is not properly released; closing python and waiting some time should fix this.\n",
+    "- With multithreading results may take some time to show up in the output, even if the cell is supposed to be completed.\n",
+    "\n",
+    "Other scripts to run:\n",
+    "- first run optim_prep if wanted\n",
+    "- scripts creates the reference data and also the optimn_parts excel which contains the timeframes for the optimization\n",
+    "\n",
+    "Things to do in MIKE+:\n",
+    "\n",
+    "Water quality models:\n",
+    "- activate Transport in \"Model Type\"\n",
+    "- Set at least one WQ component\n",
+    "- Set the SWQ advanced methods\n",
+    "- double check on the method unter Build up wash off\n",
+    "- check WQ boundary conditions and that the WQ component is the WQ component set above\n",
+    "- activate Transport in Simulation setup\n",
+    "\n",
+    "\n",
+    "Sediment transport models:\n",
+    "- activate Sediment Transport in \"Model Type\"\n",
+    "- set all relevant parameters under sediment transport  \n",
+    "  General Parameters: Density and porosity  \n",
+    "  Sediment fractions: Insert a new Fraction and use a cohesive sediment fraction as non cohesive are not fully supported yet  \n",
+    "- change WQ boundary conditions to type \"Sediment concentration\" and type in the name of the sediment transport component defined above\n",
+    "- under SWQ advanced methods: Change surface load type to \"Sediment\" and choose the method you want to use\n",
+    "- check that the result files of ST save the concentration\n",
+    "- run all models with the ST results to get the test results for the script below\n",
+    "- check the optimn_parts excel and make sure the results files are the ST files and that the column name is changed to concentration\n",
+    "- activate Sediment Transport in Simulation setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import packages\n",
+    "import typing\n",
+    "import datetime\n",
+    "import shutil\n",
+    "from time import sleep\n",
+    "from warnings import warn\n",
+    "from pathlib import Path\n",
+    "import typing\n",
+    "import mikeio\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import nevergrad as ng\n",
+    "import matplotlib as mpl\n",
+    "import matplotlib.pyplot as plt\n",
+    "from tqdm.notebook import tqdm\n",
+    "import IPython.display as idsp\n",
+    "\n",
+    "# import simulation scripts from the project ENTfrachtEN\n",
+    "from entfrachten.optim import simulation, loss_functions, optim_dumper"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Constants, you should not need to adjust these, unless you are debugging."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# output heaps of additional data, may be useful for debugging\n",
+    "VERBOSE_OUTPUT = False\n",
+    "# delete the temp folder at end\n",
+    "DELETE_TEMP = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Constants, you should adjust"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Number of simulations allowed for optimization\n",
+    "optimization_budget = 100\n",
+    "# Number of seats for the Engine, limits amount of simulations we can run at once. Default of MIKE+ Licence is 4\n",
+    "engine_seats = 4\n",
+    "# set a correation factor to multiply the data of the measurements if necessary. Default 1.  ENTfrachtEN wastewater = 3.3757\n",
+    "factor_underestimation = 1\n",
+    "# time string for temp and output folder\n",
+    "  # if temp_folder exists, this script tries to continue the optimization.\n",
+    "  # Note that continuing the optimization does not check whether settings match;\n",
+    "  # do not change anything when continuing an optimization run!\n",
+    "time_path_part = datetime.datetime.now().strftime(\"%Y-%m-%dT%H.%M.%S\")\n",
+    "model_folder = Path(\"example_folder/\")\n",
+    "# path of .mupp file, relative to model_folder\n",
+    "db_path_rel = \"example.mupp\"\n",
+    "# path of table which specifies which parts of the simulation result should be read, relative to model_folder.\n",
+    "  # from optimization_prep.ipnyb\n",
+    "result_spec_path = \"optim_parts.xlsx\"\n",
+    "# path of reference file, relative to model_folder\n",
+    "  # This is expected to be a table with 3 Columns: \"Label\", \"Datetime\" and a data column, where the name does not matter.\n",
+    "  # Label has to be consistent with result_spec\n",
+    "reference_path = \"reference.csv\"\n",
+    "# path of parameter configuration table, relative to model_folder.\n",
+    "  # Specifies which parameters are optimized an their allowed range\n",
+    "param_spec_path = \"parameter_config.xlsx\"\n",
+    "# choose optimizers\n",
+    "  # for all options:\n",
+    "  # print(sorted(ng.optimizers.registry.keys()))\n",
+    "optimizers = [\"BayesOptimBO\"]\n",
+    "# loss_fn is later used for optimization, use this line to choose a loss function.\n",
+    "loss_fn = loss_functions.mean_negative_bounded_nse\n",
+    "# this value is later used to penalize errors. A simulation result with only nan would returns exactly this value.\n",
+    "loss_on_error = 1  # alternative:loss_fn(reference, -reference)\n",
+    "display(loss_on_error)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## create temp(orary) and out(put) folders for the simulation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# a folder where used files are saved\n",
+    "# note that this should be in the same folder as the model folder, so that relative paths lead to the same files\n",
+    "temp_folder = Path(\"tmp_\" + time_path_part)\n",
+    "# output folder\n",
+    "out_folder = Path(\"out_\" + time_path_part)\n",
+    "# create output folder\n",
+    "out_folder.mkdir(exist_ok=True, parents=True)\n",
+    "if not (temp_folder.exists()):\n",
+    "    # make a copy of the model folder, since the program might mess with the model\n",
+    "    shutil.copytree(model_folder, temp_folder)\n",
+    "    should_continue_optimization = False\n",
+    "    print(\"starting new optimization.\")\n",
+    "else:\n",
+    "    should_continue_optimization = True\n",
+    "    print(\"continuing previous optimization.\")\n",
+    "\n",
+    "db_path = temp_folder / db_path_rel"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# display the temp folder. If an error occurs, it may be useful to check it.\n",
+    "# The folder path is also needed to resume an interrupted optimization.\n",
+    "temp_folder.absolute()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## read the input data for the simulation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def file_to_df(path: Path) -> pd.DataFrame:\n",
+    "    \"\"\"Tries to read the file at path as a pandas DataFrame.\"\"\"\n",
+    "    if path.suffix == \".xlsx\":\n",
+    "        return pd.read_excel(path)\n",
+    "    elif path.suffix == \".dfs0\":\n",
+    "        return mikeio.read(filename=path).to_dataframe()\n",
+    "    elif path.suffix == \".csv\":\n",
+    "        return pd.read_csv(path)\n",
+    "    else:\n",
+    "        raise NotImplementedError(f\"No method for {path.suffix} implemented.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# read events to be simulated\n",
+    "result_spec = file_to_df(temp_folder / result_spec_path).set_index(\"Label\")\n",
+    "result_spec[\"ResultReach\"] = result_spec[\"ResultReach\"].fillna(\"\").astype(str)\n",
+    "result_spec[\"ResultNode\"] = result_spec[\"ResultNode\"].fillna(\"\").astype(str)\n",
+    "assert (result_spec[\"Start\"] <= result_spec[\"End\"]).all(\n",
+    "    axis=None\n",
+    "), \"Event end needs to be after start.\"\n",
+    "result_spec[\"SimulationName\"] = result_spec[\"SimulationName\"].astype(str)\n",
+    "assert result_spec.index.is_unique, \"Need unique labels\"\n",
+    "result_spec"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# read reference data\n",
+    "reference = file_to_df(temp_folder / reference_path)\n",
+    "# parse dates and remove timezone\n",
+    "reference[\"Datetime\"] = pd.to_datetime(reference[\"Datetime\"]).dt.tz_localize(None)\n",
+    "spec_labels = set(result_spec.index.unique())\n",
+    "ref_labels = set(reference[\"Label\"])\n",
+    "if spec_labels!=ref_labels and ref_labels.issuperset(spec_labels):\n",
+    "    warn(\"reference has labels not in spec, dropping superfluous labels.\")\n",
+    "    reference=reference.query(\"Label in @ spec_labels\")\n",
+    "if spec_labels!=ref_labels and ref_labels.issubset(spec_labels):\n",
+    "    raise Exception(\"Referene is missing lables specified by result_spec.\")\n",
+    "reference = reference.set_index([\"Label\", \"Datetime\"])\n",
+    "# convert to series and change type to 32-bit float\n",
+    "reference = reference.iloc[:,0].astype(\"float32\")\n",
+    "reference = reference * factor_underestimation\n",
+    "reference"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference contains shifted times, adjust start/end in event table\n",
+    "for label in result_spec.index:\n",
+    "    ref_part = reference.xs(label, level=\"Label\")\n",
+    "    result_spec.loc[label, \"Start\"] = ref_part.index.min()\n",
+    "    result_spec.loc[label, \"End\"] = ref_part.index.max()\n",
+    "result_spec[[\"Start\", \"End\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# specify which parameters are optimized and in which range they should be\n",
+    "# could make the table in python like this\n",
+    "# param_specs = pd.DataFrame([\n",
+    "#     {\"Table\":\"msm_HParA\",\"Muid\":DEF_STRING,\"Column\":\"RedFactor\",\"Min\":.4,\"Max\":1,\"Steps\":4},\n",
+    "#     {\"Table\":\"msm_HParA\",\"Muid\":DEF_STRING,\"Column\":\"InitLoss\",\"Min\":0,\"Max\":5*M_TO_MM,\"Steps\":4}\n",
+    "# ])\n",
+    "# or load from a file\n",
+    "param_spec = file_to_df(temp_folder / param_spec_path)\n",
+    "# give params_specs a sensible string index, this is later used to pass optimization parameters as a dictionary\n",
+    "param_spec.index = pd.Index(\n",
+    "    param_spec[[\"Table\", \"Column\", \"Muid\"]].agg(\";\".join, axis=\"columns\"),\n",
+    "    name=\"ParamKey\",\n",
+    ")\n",
+    "assert (\n",
+    "    param_spec.index.is_unique\n",
+    "), \"Index needs to be unique. Otherwise indicates dupplicates in the parameters.\"\n",
+    "param_spec"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "db_path"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create Simulation runs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert (\n",
+    "    db_path.exists()\n",
+    "), f\"DB not found, check path. Expected file in {db_path.absolute()}\"\n",
+    "sim_runner = simulation.SimulationRunner(db_path=db_path,result_folder=temp_folder,param_spec=param_spec,result_spec=result_spec,parallel_limit=engine_seats)\n",
+    "# Test dummy\n",
+    "#sim_runner = simulation_dummy.SimulationDummy(reference=reference,param_spec=param_spec,result_spec=result_spec)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Read test results (not necessary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read results which are already in folder.\n",
+    "# This serves as a test for the result specification.\n",
+    "# This is skipped if optimization was started before.\n",
+    "if not should_continue_optimization:\n",
+    "    existing_result = sim_runner.read_results()\n",
+    "    existing_result.index.names = reference.index.names\n",
+    "    print(\n",
+    "        \"Plotting results, if this looks very different from plots in mike, unit conversion may need to be added.\"\n",
+    "    )\n",
+    "    for label in result_spec.index:\n",
+    "        try:\n",
+    "            txs = existing_result.xs(label, level=\"Label\")\n",
+    "            rxs = reference.xs(label, level=\"Label\")\n",
+    "            missing_ratio = txs.reindex_like(rxs).isna().mean()\n",
+    "            if missing_ratio > 0.2:\n",
+    "                warn(\n",
+    "                    f\"Label {label} has many missing values compared to the reference missing ratio is {missing_ratio} \"\n",
+    "                )\n",
+    "            pd.DataFrame({\"ref\": rxs, \"test\": txs}).plot(\n",
+    "                title=str(label), figsize=(7, 2.5)\n",
+    "            )\n",
+    "        except Exception as ex:\n",
+    "            warn(str(ex))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## make a test run to see if the optmization works"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# test run\n",
+    "test_res_df = pd.DataFrame({\"Reference\":reference})\n",
+    "test_res_info = pd.DataFrame()\n",
+    "status_dh = idsp.display(\"starting\",display_id=True)\n",
+    "info_dh = idsp.display(\"\",display_id=True)\n",
+    "fig_dh = idsp.display(\"figure placeholder\",display_id=True)\n",
+    "for key in [\"Default\", \"Min\", \"Max\"]:\n",
+    "    status_dh.update(f\"Getting results for {key}\")\n",
+    "    csv_path = out_folder / f\"test_{key.lower()}.csv\"\n",
+    "    if not should_continue_optimization:\n",
+    "        # the ** in the call unrolls the series into keyword parameters\"\n",
+    "        test_res = sim_runner.simulation_run(**param_spec[key])\n",
+    "        test_res.to_csv(csv_path, index=True)\n",
+    "    else:\n",
+    "        assert (\n",
+    "            csv_path.exists()\n",
+    "        ), \"Tried to continue optimization run, but csv from test does not exist. This may mean that optimization was not started before.\"\n",
+    "        test_res = pd.read_csv(\n",
+    "            csv_path, index_col=[0, 1], parse_dates=True\n",
+    "        ).iloc[:, 0]\n",
+    "    info = test_res.describe()\n",
+    "    test_res = test_res.reindex_like(reference)\n",
+    "    info[\"missing ratio\"] = test_res.reindex_like(reference).isna().mean()\n",
+    "    info[\"loss\"] = loss_fn(reference, test_res)\n",
+    "    test_res_info[key]=info\n",
+    "    test_res_df[key]=test_res\n",
+    "    info_dh.update(pd.DataFrame(test_res_info))\n",
+    "    status_dh.update(f\"Plotting results for {key}\")\n",
+    "    plt.ioff()\n",
+    "    label_group = test_res_df.groupby(level=\"Label\")\n",
+    "    fig, axes = plt.subplots(nrows=len(label_group), figsize=(6, 2.5 * len(label_group)),layout=\"constrained\",squeeze=False)\n",
+    "    for row,(label, df) in enumerate(label_group):\n",
+    "        df.droplevel(\"Label\").plot(ax=axes[row,0], title=str(label))\n",
+    "        axes[row,0].xaxis.label.set_visible(False)\n",
+    "    fig.suptitle(\"Simulation with different parameters\")\n",
+    "    fig.savefig(out_folder/\"test_sim.png\",bbox_inches=\"tight\")\n",
+    "    fig_dh.update(fig)\n",
+    "    plt.ion()\n",
+    "        #plt.close(fig)\n",
+    "# check results for equality. This test assumes that the default may be equal to either min or max.\n",
+    "# But if all are the same, something is wrong with the simulation.\n",
+    "assert not (\n",
+    "    test_res_df[\"Default\"].equals(test_res_df[\"Min\"]) and test_res_df[\"Default\"].equals(test_res_df[\"Max\"])\n",
+    "), \"Test results are equal. This may mean that parameters were not set at all.\"\n",
+    "status_dh.update(f\"Finished\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_res_df.isna().mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## set all optimization parameters and contraints"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define the parameter calibration as a minimization problem\n",
+    "def simulation_compare(**kwparams):\n",
+    "    \"\"\"runs simulation with the specified parameters and uses the loss function on the results.\n",
+    "\n",
+    "    Note that kwparams keys need to match the index of param_specs.\"\"\"\n",
+    "    result = sim_runner.simulation_run(**kwparams)\n",
+    "    result_matched = result.reindex_like(reference)\n",
+    "    plain_loss = loss_fn(reference, result_matched)\n",
+    "    # values should not be missing, penalize by increasing the loss\n",
+    "    missing_ratio = result_matched.isna().mean()\n",
+    "    if missing_ratio > 0:\n",
+    "        warn(f\"Values are missing from result:{missing_ratio}\")\n",
+    "    return (1 - missing_ratio) * plain_loss + missing_ratio * loss_on_error\n",
+    "\n",
+    "# define constrains for the parameters, takes parameters in the same format as simulation_compare\n",
+    "# the nevergrad developers recommend to use float valued constraints, such that >=0 is ok\n",
+    "# https://facebookresearch.github.io/nevergrad/optimization.html#optimization-with-constraints\n",
+    "# this function takes the parameter values as they are defined by nevergrad\n",
+    "# for instrumentation it contains a tuple of args and a dict of kwargs\n",
+    "def constraint_min_max(params_val):\n",
+    "    \"\"\"Constraint: mindispcoef should be smaller than maxdispcoef\"\"\"\n",
+    "    mindispcoef = params_val[1][\"msm_ADDispersion;mindispcoef;Global AD dispersion\"]\n",
+    "    maxdispcoef = params_val[1][\"msm_ADDispersion;maxdispcoef;Global AD dispersion\"]\n",
+    "    return maxdispcoef - mindispcoef\n",
+    "\n",
+    "\n",
+    "# define optimization parameters as needed by nevergrad\n",
+    "ng_params = {}\n",
+    "# first make a list of scalar values with information from param_specs\n",
+    "for step in param_spec.index:\n",
+    "    vmin = param_spec.at[step, \"Min\"]\n",
+    "    vmax = param_spec.at[step, \"Max\"]\n",
+    "    vdef = param_spec.at[step, \"Default\"]\n",
+    "    ngp = ng.p.Scalar(init=vdef, lower=vmin, upper=vmax)\n",
+    "    ng_params[step] = ngp\n",
+    "# now turn the list into an instrumentation\n",
+    "# the instrumentation is used to tell the optimizer how the paremeters are passed to our function\n",
+    "instrumentation = ng.p.Instrumentation(**ng_params)\n",
+    "instrumentation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# register constraint\n",
+    "instrumentation.register_cheap_constraint(constraint_min_max)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def optim_run(optim_name):\n",
+    "    # optim_dumper is used to save/load the optimization state\n",
+    "    state_dumper = optim_dumper.OptimDumper(temp_folder/f\"dmp-{optim_name}\")\n",
+    "    has_dump = state_dumper.get_last_step() >= 0\n",
+    "    if has_dump != should_continue_optimization:\n",
+    "        print(\"No dump exists for \", optim_name)\n",
+    "    if not should_continue_optimization or not has_dump:\n",
+    "        # create optimizer\n",
+    "        print(\"starting new optimization for \", optim_name)\n",
+    "        optimizer = ng.optimizers.registry[optim_name](\n",
+    "            parametrization=instrumentation, budget=optimization_budget, num_workers=1\n",
+    "        )\n",
+    "        # tested parameters and loss are saved to list for later evaluation.\n",
+    "        params = []\n",
+    "        losses = []\n",
+    "    else:\n",
+    "        # load the last dumped optimization state\n",
+    "        print(\"loading optimizer \", optim_name)\n",
+    "        optimizer, params, losses = state_dumper.load()\n",
+    "\n",
+    "    # do optimization\n",
+    "    for step in tqdm(range(optimizer.num_tell, optimizer.budget)):\n",
+    "        # next parameter set to simulate\n",
+    "        candidate = optimizer.ask()\n",
+    "        loss = simulation_compare(*candidate.args, **candidate.kwargs)\n",
+    "        # tell optimizer the loss value\n",
+    "        optimizer.tell(candidate, loss)\n",
+    "        # keep parameters and loss in a list\n",
+    "        params.append(candidate)\n",
+    "        losses.append(loss)\n",
+    "        # save optimization state to disk\n",
+    "        state_dumper.dump(optimizer, params, losses)\n",
+    "\n",
+    "    # optimization has finished. now get recommendation and write result into dataframe\n",
+    "    recommendation = optimizer.recommend()\n",
+    "    # params contains the candidates as an instrumentation object.\n",
+    "    # The list comprehension creates a list of dicts of parameter values,\n",
+    "    # which pandas then transforms into a dataframe where each parameter has its own column.\n",
+    "    optim_run_df = pd.concat(\n",
+    "        [\n",
+    "            pd.DataFrame(data=[p.value[1] for p in params]),\n",
+    "            pd.DataFrame({\"Optimizer\": optimizer.name, \"Loss\": losses}),\n",
+    "        ],\n",
+    "        axis=\"columns\",\n",
+    "    )\n",
+    "    # also add a line for the recommendation\n",
+    "    rec_params = recommendation.value[1]\n",
+    "    optim_run_df.loc[\"recommendation\", rec_params.keys()] = rec_params\n",
+    "    optim_run_df.loc[\"recommendation\", \"Loss\"] = recommendation.loss\n",
+    "    optim_run_df.loc[\"recommendation\", \"Optimizer\"] = optimizer.name\n",
+    "    optim_run_df.to_csv(out_folder / f\"optim_run{optimizer.name}.csv\")\n",
+    "\n",
+    "    return optim_run_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## start the optimization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "optim_run_dfs = {}\n",
+    "for o in optimizers:\n",
+    "    optim_run_dfs[o] = optim_run(o)\n",
+    "    display(optim_run_dfs[o])\n",
+    "    optim_run_dfs[o].plot(subplots=True, figsize=(5, 2 * len(param_spec)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## save all optimization results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# concatenate optim_run_dfs and output into a csv.\n",
+    "# optim_run_df already has optimizer included, use .values instead of passing the dict, so that the optimizer is not added again.\n",
+    "optim_run_df = pd.concat(optim_run_dfs.values())\n",
+    "optim_run_df.to_csv(out_folder / \"optim_run.csv\")\n",
+    "optim_run_df.to_excel(out_folder / \"optim_run.xlsx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# adjust index for evaluation\n",
+    "optim_run_df.index.name = \"Step\"\n",
+    "optim_run_df = optim_run_df.reset_index().set_index([\"Optimizer\", \"Step\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# Plot the loss for each step. This indicates how fast (and if at all) the optimizer converged to a solution.\n",
+    "# Note that some optimizers are not designed to converge.\n",
+    "optim_run_df[\"Loss\"].unstack(level=\"Optimizer\").plot(\n",
+    "    subplots=True, sharey=True, title=\"loss over steps\"\n",
+    ")[0].get_figure().savefig(out_folder / \"loss_over_steps.png\", bbox_inches=\"tight\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Shows the cumulative minimum of the loss over the steps.\n",
+    "# Compared to the plain loss plot, cumulative minimum filters out the effect of exploration.\n",
+    "# This plot can help to judge how fast the optimizer found a good solution.\n",
+    "optim_run_df[\"Loss\"].unstack(level=0).cummin().plot(\n",
+    "    title=\"cummin loss over steps\"\n",
+    ").get_figure().savefig(out_folder / \"loss_cummin.png\", bbox_inches=\"tight\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Bar plot comparing the loss of the recommendation from each optimizer.\n",
+    "# This is not interesting if only one optimizer was used.\n",
+    "ax = optim_run_df[\"Loss\"].unstack(level=0).loc[\"recommendation\"].plot(kind=\"bar\")\n",
+    "ax.bar_label(ax.containers[0])\n",
+    "optim_run_df[\"Loss\"].xs(\"recommendation\", level=\"Step\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# runs the simulation for the recommended parameters from each optimizer.\n",
+    "# The results are saved as a csv together with the reference values.\n",
+    "# Plots are also created, which can be used to judge the quality of the simulation.\n",
+    "res_dict = {\"ref\": reference}\n",
+    "for key, df in optim_run_df.groupby(level=\"Optimizer\"):\n",
+    "    ps = df.loc[(key, \"recommendation\"), param_spec.index]\n",
+    "    display(ps)\n",
+    "    test_res_def = sim_runner.simulation_run(**ps)\n",
+    "    res_dict[key] = test_res_def\n",
+    "best_res = pd.concat(res_dict, axis=\"columns\")\n",
+    "best_res.to_csv(out_folder / \"best_res.csv\")\n",
+    "label_group = best_res.groupby(level=\"Label\")\n",
+    "fig, axes = plt.subplots(nrows=len(label_group), figsize=(6, 2.5 * len(label_group)),layout=\"constrained\",squeeze=False)\n",
+    "for row,(label, df) in enumerate(label_group):\n",
+    "    df.droplevel(\"Label\").plot(ax=axes[row,0], title=str(label))\n",
+    "fig.suptitle(\"Optimizer Result and Reference\")\n",
+    "fig.savefig(out_folder / \"best_res.png\", bbox_inches=\"tight\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# measure how much the parameters are adjusted\n",
+    "# first normalize to range than take norm2 of difference to previous step\n",
+    "# geometric interpretation: project parameters into hyper unit cube and calculate the velocity\n",
+    "\n",
+    "# only look at params and exclude recommendation\n",
+    "run_param_df = optim_run_df.loc[\n",
+    "    optim_run_df.index.get_level_values(\"Step\") != \"recommendation\", param_spec.index\n",
+    "]\n",
+    "normalized = (run_param_df - param_spec[\"Min\"]) / (\n",
+    "    param_spec[\"Max\"] - param_spec[\"Min\"]\n",
+    ")\n",
+    "velocity = normalized.diff()\n",
+    "# norm2 (root of sum of squares)\n",
+    "velocity = (velocity**2).sum(axis=\"columns\") ** 0.5\n",
+    "velocity.unstack(level=0).plot(title=\"velocity of parameter adjustments\")\n",
+    "velocity.unstack(level=0).plot(\n",
+    "    subplots=True, sharey=True, title=\"velocity of parameter adjustments\"\n",
+    ")\n",
+    "velocity.unstack(level=0).plot(\n",
+    "    subplots=True, sharey=False, title=\"velocity of parameter adjustments\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## close simulation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# clean up used resources\n",
+    "sim_runner.close()\n",
+    "# delete temp folder\n",
+    "if DELETE_TEMP:\n",
+    "    shutil.rmtree(temp_folder)\n",
+    "# print the recommendations\n",
+    "for key, df in optim_run_df.groupby(level=\"Optimizer\"):\n",
+    "    display(key, df.loc[(key, \"recommendation\"), param_spec.index])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for testing compare to ideal parameters\n",
+    "recs = {}\n",
+    "if hasattr(sim_runner,\"ideal_params\"):\n",
+    "    recs[\"ideal\"] = sim_runner.ideal_params\n",
+    "for key, df in optim_run_df.groupby(level=\"Optimizer\"):\n",
+    "    recs[key]=df.loc[(key, \"recommendation\"), param_spec.index]\n",
+    "pd.DataFrame(recs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (Spyder)",
+   "language": "python3",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.21"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
-- 
GitLab