Upload New File

b0c6a0ac · Sebastian Kerger · 371c7d92 · b0c6a0ac
Commit b0c6a0ac authored 3 months ago by Sebastian Kerger
--- a/Water quality calibration/water-quality-optimization.ipynb
+++ b/Water quality calibration/water-quality-optimization.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Script to optimize the simulation parameters for a water quality model in MIKE+\n",
+    "\n",
+    "The nevergrad package is used for optimization.\n",
+    "\n",
+    "Note:  \n",
+    "- mikepluspy requires a full license for mike+ to run. In many cases the seat of the license is not properly released; closing python and waiting some time should fix this.\n",
+    "- With multithreading results may take some time to show up in the output, even if the cell is supposed to be completed.\n",
+    "\n",
+    "Other scripts to run:\n",
+    "- first run optim_prep if wanted\n",
+    "- scripts creates the reference data and also the optimn_parts excel which contains the timeframes for the optimization\n",
+    "\n",
+    "Things to do in MIKE+:\n",
+    "\n",
+    "Water quality models:\n",
+    "- activate Transport in \"Model Type\"\n",
+    "- Set at least one WQ component\n",
+    "- Set the SWQ advanced methods\n",
+    "- double check on the method unter Build up wash off\n",
+    "- check WQ boundary conditions and that the WQ component is the WQ component set above\n",
+    "- activate Transport in Simulation setup\n",
+    "\n",
+    "\n",
+    "Sediment transport models:\n",
+    "- activate Sediment Transport in \"Model Type\"\n",
+    "- set all relevant parameters under sediment transport  \n",
+    "  General Parameters: Density and porosity  \n",
+    "  Sediment fractions: Insert a new Fraction and use a cohesive sediment fraction as non cohesive are not fully supported yet  \n",
+    "- change WQ boundary conditions to type \"Sediment concentration\" and type in the name of the sediment transport component defined above\n",
+    "- under SWQ advanced methods: Change surface load type to \"Sediment\" and choose the method you want to use\n",
+    "- check that the result files of ST save the concentration\n",
+    "- run all models with the ST results to get the test results for the script below\n",
+    "- check the optimn_parts excel and make sure the results files are the ST files and that the column name is changed to concentration\n",
+    "- activate Sediment Transport in Simulation setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import packages\n",
+    "import typing\n",
+    "import datetime\n",
+    "import shutil\n",
+    "from time import sleep\n",
+    "from warnings import warn\n",
+    "from pathlib import Path\n",
+    "import typing\n",
+    "import mikeio\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import nevergrad as ng\n",
+    "import matplotlib as mpl\n",
+    "import matplotlib.pyplot as plt\n",
+    "from tqdm.notebook import tqdm\n",
+    "import IPython.display as idsp\n",
+    "\n",
+    "# import simulation scripts from the project ENTfrachtEN\n",
+    "from entfrachten.optim import simulation, loss_functions, optim_dumper"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Constants, you should not need to adjust these, unless you are debugging."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# output heaps of additional data, may be useful for debugging\n",
+    "VERBOSE_OUTPUT = False\n",
+    "# delete the temp folder at end\n",
+    "DELETE_TEMP = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Constants, you should adjust"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Number of simulations allowed for optimization\n",
+    "optimization_budget = 100\n",
+    "# Number of seats for the Engine, limits amount of simulations we can run at once. Default of MIKE+ Licence is 4\n",
+    "engine_seats = 4\n",
+    "# set a correation factor to multiply the data of the measurements if necessary. Default 1.  ENTfrachtEN wastewater = 3.3757\n",
+    "factor_underestimation = 1\n",
+    "# time string for temp and output folder\n",
+    "  # if temp_folder exists, this script tries to continue the optimization.\n",
+    "  # Note that continuing the optimization does not check whether settings match;\n",
+    "  # do not change anything when continuing an optimization run!\n",
+    "time_path_part = datetime.datetime.now().strftime(\"%Y-%m-%dT%H.%M.%S\")\n",
+    "model_folder = Path(\"example_folder/\")\n",
+    "# path of .mupp file, relative to model_folder\n",
+    "db_path_rel = \"example.mupp\"\n",
+    "# path of table which specifies which parts of the simulation result should be read, relative to model_folder.\n",
+    "  # from optimization_prep.ipnyb\n",
+    "result_spec_path = \"optim_parts.xlsx\"\n",
+    "# path of reference file, relative to model_folder\n",
+    "  # This is expected to be a table with 3 Columns: \"Label\", \"Datetime\" and a data column, where the name does not matter.\n",
+    "  # Label has to be consistent with result_spec\n",
+    "reference_path = \"reference.csv\"\n",
+    "# path of parameter configuration table, relative to model_folder.\n",
+    "  # Specifies which parameters are optimized an their allowed range\n",
+    "param_spec_path = \"parameter_config.xlsx\"\n",
+    "# choose optimizers\n",
+    "  # for all options:\n",
+    "  # print(sorted(ng.optimizers.registry.keys()))\n",
+    "optimizers = [\"BayesOptimBO\"]\n",
+    "# loss_fn is later used for optimization, use this line to choose a loss function.\n",
+    "loss_fn = loss_functions.mean_negative_bounded_nse\n",
+    "# this value is later used to penalize errors. A simulation result with only nan would returns exactly this value.\n",
+    "loss_on_error = 1  # alternative:loss_fn(reference, -reference)\n",
+    "display(loss_on_error)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## create temp(orary) and out(put) folders for the simulation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# a folder where used files are saved\n",
+    "# note that this should be in the same folder as the model folder, so that relative paths lead to the same files\n",
+    "temp_folder = Path(\"tmp_\" + time_path_part)\n",
+    "# output folder\n",
+    "out_folder = Path(\"out_\" + time_path_part)\n",
+    "# create output folder\n",
+    "out_folder.mkdir(exist_ok=True, parents=True)\n",
+    "if not (temp_folder.exists()):\n",
+    "    # make a copy of the model folder, since the program might mess with the model\n",
+    "    shutil.copytree(model_folder, temp_folder)\n",
+    "    should_continue_optimization = False\n",
+    "    print(\"starting new optimization.\")\n",
+    "else:\n",
+    "    should_continue_optimization = True\n",
+    "    print(\"continuing previous optimization.\")\n",
+    "\n",
+    "db_path = temp_folder / db_path_rel"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# display the temp folder. If an error occurs, it may be useful to check it.\n",
+    "# The folder path is also needed to resume an interrupted optimization.\n",
+    "temp_folder.absolute()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## read the input data for the simulation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def file_to_df(path: Path) -> pd.DataFrame:\n",
+    "    \"\"\"Tries to read the file at path as a pandas DataFrame.\"\"\"\n",
+    "    if path.suffix == \".xlsx\":\n",
+    "        return pd.read_excel(path)\n",
+    "    elif path.suffix == \".dfs0\":\n",
+    "        return mikeio.read(filename=path).to_dataframe()\n",
+    "    elif path.suffix == \".csv\":\n",
+    "        return pd.read_csv(path)\n",
+    "    else:\n",
+    "        raise NotImplementedError(f\"No method for {path.suffix} implemented.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# read events to be simulated\n",
+    "result_spec = file_to_df(temp_folder / result_spec_path).set_index(\"Label\")\n",
+    "result_spec[\"ResultReach\"] = result_spec[\"ResultReach\"].fillna(\"\").astype(str)\n",
+    "result_spec[\"ResultNode\"] = result_spec[\"ResultNode\"].fillna(\"\").astype(str)\n",
+    "assert (result_spec[\"Start\"] <= result_spec[\"End\"]).all(\n",
+    "    axis=None\n",
+    "), \"Event end needs to be after start.\"\n",
+    "result_spec[\"SimulationName\"] = result_spec[\"SimulationName\"].astype(str)\n",
+    "assert result_spec.index.is_unique, \"Need unique labels\"\n",
+    "result_spec"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# read reference data\n",
+    "reference = file_to_df(temp_folder / reference_path)\n",
+    "# parse dates and remove timezone\n",
+    "reference[\"Datetime\"] = pd.to_datetime(reference[\"Datetime\"]).dt.tz_localize(None)\n",
+    "spec_labels = set(result_spec.index.unique())\n",
+    "ref_labels = set(reference[\"Label\"])\n",
+    "if spec_labels!=ref_labels and ref_labels.issuperset(spec_labels):\n",
+    "    warn(\"reference has labels not in spec, dropping superfluous labels.\")\n",
+    "    reference=reference.query(\"Label in @ spec_labels\")\n",
+    "if spec_labels!=ref_labels and ref_labels.issubset(spec_labels):\n",
+    "    raise Exception(\"Referene is missing lables specified by result_spec.\")\n",
+    "reference = reference.set_index([\"Label\", \"Datetime\"])\n",
+    "# convert to series and change type to 32-bit float\n",
+    "reference = reference.iloc[:,0].astype(\"float32\")\n",
+    "reference = reference * factor_underestimation\n",
+    "reference"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference contains shifted times, adjust start/end in event table\n",
+    "for label in result_spec.index:\n",
+    "    ref_part = reference.xs(label, level=\"Label\")\n",
+    "    result_spec.loc[label, \"Start\"] = ref_part.index.min()\n",
+    "    result_spec.loc[label, \"End\"] = ref_part.index.max()\n",
+    "result_spec[[\"Start\", \"End\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# specify which parameters are optimized and in which range they should be\n",
+    "# could make the table in python like this\n",
+    "# param_specs = pd.DataFrame([\n",
+    "#     {\"Table\":\"msm_HParA\",\"Muid\":DEF_STRING,\"Column\":\"RedFactor\",\"Min\":.4,\"Max\":1,\"Steps\":4},\n",
+    "#     {\"Table\":\"msm_HParA\",\"Muid\":DEF_STRING,\"Column\":\"InitLoss\",\"Min\":0,\"Max\":5*M_TO_MM,\"Steps\":4}\n",
+    "# ])\n",
+    "# or load from a file\n",
+    "param_spec = file_to_df(temp_folder / param_spec_path)\n",
+    "# give params_specs a sensible string index, this is later used to pass optimization parameters as a dictionary\n",
+    "param_spec.index = pd.Index(\n",
+    "    param_spec[[\"Table\", \"Column\", \"Muid\"]].agg(\";\".join, axis=\"columns\"),\n",
+    "    name=\"ParamKey\",\n",
+    ")\n",
+    "assert (\n",
+    "    param_spec.index.is_unique\n",
+    "), \"Index needs to be unique. Otherwise indicates dupplicates in the parameters.\"\n",
+    "param_spec"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "db_path"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create Simulation runs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert (\n",
+    "    db_path.exists()\n",
+    "), f\"DB not found, check path. Expected file in {db_path.absolute()}\"\n",
+    "sim_runner = simulation.SimulationRunner(db_path=db_path,result_folder=temp_folder,param_spec=param_spec,result_spec=result_spec,parallel_limit=engine_seats)\n",
+    "# Test dummy\n",
+    "#sim_runner = simulation_dummy.SimulationDummy(reference=reference,param_spec=param_spec,result_spec=result_spec)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Read test results (not necessary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read results which are already in folder.\n",
+    "# This serves as a test for the result specification.\n",
+    "# This is skipped if optimization was started before.\n",
+    "if not should_continue_optimization:\n",
+    "    existing_result = sim_runner.read_results()\n",
+    "    existing_result.index.names = reference.index.names\n",
+    "    print(\n",
+    "        \"Plotting results, if this looks very different from plots in mike, unit conversion may need to be added.\"\n",
+    "    )\n",
+    "    for label in result_spec.index:\n",
+    "        try:\n",
+    "            txs = existing_result.xs(label, level=\"Label\")\n",
+    "            rxs = reference.xs(label, level=\"Label\")\n",
+    "            missing_ratio = txs.reindex_like(rxs).isna().mean()\n",
+    "            if missing_ratio > 0.2:\n",
+    "                warn(\n",
+    "                    f\"Label {label} has many missing values compared to the reference missing ratio is {missing_ratio} \"\n",
+    "                )\n",
+    "            pd.DataFrame({\"ref\": rxs, \"test\": txs}).plot(\n",
+    "                title=str(label), figsize=(7, 2.5)\n",
+    "            )\n",
+    "        except Exception as ex:\n",
+    "            warn(str(ex))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## make a test run to see if the optmization works"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# test run\n",
+    "test_res_df = pd.DataFrame({\"Reference\":reference})\n",
+    "test_res_info = pd.DataFrame()\n",
+    "status_dh = idsp.display(\"starting\",display_id=True)\n",
+    "info_dh = idsp.display(\"\",display_id=True)\n",
+    "fig_dh = idsp.display(\"figure placeholder\",display_id=True)\n",
+    "for key in [\"Default\", \"Min\", \"Max\"]:\n",
+    "    status_dh.update(f\"Getting results for {key}\")\n",
+    "    csv_path = out_folder / f\"test_{key.lower()}.csv\"\n",
+    "    if not should_continue_optimization:\n",
+    "        # the ** in the call unrolls the series into keyword parameters\"\n",
+    "        test_res = sim_runner.simulation_run(**param_spec[key])\n",
+    "        test_res.to_csv(csv_path, index=True)\n",
+    "    else:\n",
+    "        assert (\n",
+    "            csv_path.exists()\n",
+    "        ), \"Tried to continue optimization run, but csv from test does not exist. This may mean that optimization was not started before.\"\n",
+    "        test_res = pd.read_csv(\n",
+    "            csv_path, index_col=[0, 1], parse_dates=True\n",
+    "        ).iloc[:, 0]\n",
+    "    info = test_res.describe()\n",
+    "    test_res = test_res.reindex_like(reference)\n",
+    "    info[\"missing ratio\"] = test_res.reindex_like(reference).isna().mean()\n",
+    "    info[\"loss\"] = loss_fn(reference, test_res)\n",
+    "    test_res_info[key]=info\n",
+    "    test_res_df[key]=test_res\n",
+    "    info_dh.update(pd.DataFrame(test_res_info))\n",
+    "    status_dh.update(f\"Plotting results for {key}\")\n",
+    "    plt.ioff()\n",
+    "    label_group = test_res_df.groupby(level=\"Label\")\n",
+    "    fig, axes = plt.subplots(nrows=len(label_group), figsize=(6, 2.5 * len(label_group)),layout=\"constrained\",squeeze=False)\n",
+    "    for row,(label, df) in enumerate(label_group):\n",
+    "        df.droplevel(\"Label\").plot(ax=axes[row,0], title=str(label))\n",
+    "        axes[row,0].xaxis.label.set_visible(False)\n",
+    "    fig.suptitle(\"Simulation with different parameters\")\n",
+    "    fig.savefig(out_folder/\"test_sim.png\",bbox_inches=\"tight\")\n",
+    "    fig_dh.update(fig)\n",
+    "    plt.ion()\n",
+    "        #plt.close(fig)\n",
+    "# check results for equality. This test assumes that the default may be equal to either min or max.\n",
+    "# But if all are the same, something is wrong with the simulation.\n",
+    "assert not (\n",
+    "    test_res_df[\"Default\"].equals(test_res_df[\"Min\"]) and test_res_df[\"Default\"].equals(test_res_df[\"Max\"])\n",
+    "), \"Test results are equal. This may mean that parameters were not set at all.\"\n",
+    "status_dh.update(f\"Finished\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_res_df.isna().mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## set all optimization parameters and contraints"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define the parameter calibration as a minimization problem\n",
+    "def simulation_compare(**kwparams):\n",
+    "    \"\"\"runs simulation with the specified parameters and uses the loss function on the results.\n",
+    "\n",
+    "    Note that kwparams keys need to match the index of param_specs.\"\"\"\n",
+    "    result = sim_runner.simulation_run(**kwparams)\n",
+    "    result_matched = result.reindex_like(reference)\n",
+    "    plain_loss = loss_fn(reference, result_matched)\n",
+    "    # values should not be missing, penalize by increasing the loss\n",
+    "    missing_ratio = result_matched.isna().mean()\n",
+    "    if missing_ratio > 0:\n",
+    "        warn(f\"Values are missing from result:{missing_ratio}\")\n",
+    "    return (1 - missing_ratio) * plain_loss + missing_ratio * loss_on_error\n",
+    "\n",
+    "# define constrains for the parameters, takes parameters in the same format as simulation_compare\n",
+    "# the nevergrad developers recommend to use float valued constraints, such that >=0 is ok\n",
+    "# https://facebookresearch.github.io/nevergrad/optimization.html#optimization-with-constraints\n",
+    "# this function takes the parameter values as they are defined by nevergrad\n",
+    "# for instrumentation it contains a tuple of args and a dict of kwargs\n",
+    "def constraint_min_max(params_val):\n",
+    "    \"\"\"Constraint: mindispcoef should be smaller than maxdispcoef\"\"\"\n",
+    "    mindispcoef = params_val[1][\"msm_ADDispersion;mindispcoef;Global AD dispersion\"]\n",
+    "    maxdispcoef = params_val[1][\"msm_ADDispersion;maxdispcoef;Global AD dispersion\"]\n",
+    "    return maxdispcoef - mindispcoef\n",
+    "\n",
+    "\n",
+    "# define optimization parameters as needed by nevergrad\n",
+    "ng_params = {}\n",
+    "# first make a list of scalar values with information from param_specs\n",
+    "for step in param_spec.index:\n",
+    "    vmin = param_spec.at[step, \"Min\"]\n",
+    "    vmax = param_spec.at[step, \"Max\"]\n",
+    "    vdef = param_spec.at[step, \"Default\"]\n",
+    "    ngp = ng.p.Scalar(init=vdef, lower=vmin, upper=vmax)\n",
+    "    ng_params[step] = ngp\n",
+    "# now turn the list into an instrumentation\n",
+    "# the instrumentation is used to tell the optimizer how the paremeters are passed to our function\n",
+    "instrumentation = ng.p.Instrumentation(**ng_params)\n",
+    "instrumentation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# register constraint\n",
+    "instrumentation.register_cheap_constraint(constraint_min_max)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def optim_run(optim_name):\n",
+    "    # optim_dumper is used to save/load the optimization state\n",
+    "    state_dumper = optim_dumper.OptimDumper(temp_folder/f\"dmp-{optim_name}\")\n",
+    "    has_dump = state_dumper.get_last_step() >= 0\n",
+    "    if has_dump != should_continue_optimization:\n",
+    "        print(\"No dump exists for \", optim_name)\n",
+    "    if not should_continue_optimization or not has_dump:\n",
+    "        # create optimizer\n",
+    "        print(\"starting new optimization for \", optim_name)\n",
+    "        optimizer = ng.optimizers.registry[optim_name](\n",
+    "            parametrization=instrumentation, budget=optimization_budget, num_workers=1\n",
+    "        )\n",
+    "        # tested parameters and loss are saved to list for later evaluation.\n",
+    "        params = []\n",
+    "        losses = []\n",
+    "    else:\n",
+    "        # load the last dumped optimization state\n",
+    "        print(\"loading optimizer \", optim_name)\n",
+    "        optimizer, params, losses = state_dumper.load()\n",
+    "\n",
+    "    # do optimization\n",
+    "    for step in tqdm(range(optimizer.num_tell, optimizer.budget)):\n",
+    "        # next parameter set to simulate\n",
+    "        candidate = optimizer.ask()\n",
+    "        loss = simulation_compare(*candidate.args, **candidate.kwargs)\n",
+    "        # tell optimizer the loss value\n",
+    "        optimizer.tell(candidate, loss)\n",
+    "        # keep parameters and loss in a list\n",
+    "        params.append(candidate)\n",
+    "        losses.append(loss)\n",
+    "        # save optimization state to disk\n",
+    "        state_dumper.dump(optimizer, params, losses)\n",
+    "\n",
+    "    # optimization has finished. now get recommendation and write result into dataframe\n",
+    "    recommendation = optimizer.recommend()\n",
+    "    # params contains the candidates as an instrumentation object.\n",
+    "    # The list comprehension creates a list of dicts of parameter values,\n",
+    "    # which pandas then transforms into a dataframe where each parameter has its own column.\n",
+    "    optim_run_df = pd.concat(\n",
+    "        [\n",
+    "            pd.DataFrame(data=[p.value[1] for p in params]),\n",
+    "            pd.DataFrame({\"Optimizer\": optimizer.name, \"Loss\": losses}),\n",
+    "        ],\n",
+    "        axis=\"columns\",\n",
+    "    )\n",
+    "    # also add a line for the recommendation\n",
+    "    rec_params = recommendation.value[1]\n",
+    "    optim_run_df.loc[\"recommendation\", rec_params.keys()] = rec_params\n",
+    "    optim_run_df.loc[\"recommendation\", \"Loss\"] = recommendation.loss\n",
+    "    optim_run_df.loc[\"recommendation\", \"Optimizer\"] = optimizer.name\n",
+    "    optim_run_df.to_csv(out_folder / f\"optim_run{optimizer.name}.csv\")\n",
+    "\n",
+    "    return optim_run_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## start the optimization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "optim_run_dfs = {}\n",
+    "for o in optimizers:\n",
+    "    optim_run_dfs[o] = optim_run(o)\n",
+    "    display(optim_run_dfs[o])\n",
+    "    optim_run_dfs[o].plot(subplots=True, figsize=(5, 2 * len(param_spec)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## save all optimization results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# concatenate optim_run_dfs and output into a csv.\n",
+    "# optim_run_df already has optimizer included, use .values instead of passing the dict, so that the optimizer is not added again.\n",
+    "optim_run_df = pd.concat(optim_run_dfs.values())\n",
+    "optim_run_df.to_csv(out_folder / \"optim_run.csv\")\n",
+    "optim_run_df.to_excel(out_folder / \"optim_run.xlsx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# adjust index for evaluation\n",
+    "optim_run_df.index.name = \"Step\"\n",
+    "optim_run_df = optim_run_df.reset_index().set_index([\"Optimizer\", \"Step\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# Plot the loss for each step. This indicates how fast (and if at all) the optimizer converged to a solution.\n",
+    "# Note that some optimizers are not designed to converge.\n",
+    "optim_run_df[\"Loss\"].unstack(level=\"Optimizer\").plot(\n",
+    "    subplots=True, sharey=True, title=\"loss over steps\"\n",
+    ")[0].get_figure().savefig(out_folder / \"loss_over_steps.png\", bbox_inches=\"tight\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Shows the cumulative minimum of the loss over the steps.\n",
+    "# Compared to the plain loss plot, cumulative minimum filters out the effect of exploration.\n",
+    "# This plot can help to judge how fast the optimizer found a good solution.\n",
+    "optim_run_df[\"Loss\"].unstack(level=0).cummin().plot(\n",
+    "    title=\"cummin loss over steps\"\n",
+    ").get_figure().savefig(out_folder / \"loss_cummin.png\", bbox_inches=\"tight\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Bar plot comparing the loss of the recommendation from each optimizer.\n",
+    "# This is not interesting if only one optimizer was used.\n",
+    "ax = optim_run_df[\"Loss\"].unstack(level=0).loc[\"recommendation\"].plot(kind=\"bar\")\n",
+    "ax.bar_label(ax.containers[0])\n",
+    "optim_run_df[\"Loss\"].xs(\"recommendation\", level=\"Step\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# runs the simulation for the recommended parameters from each optimizer.\n",
+    "# The results are saved as a csv together with the reference values.\n",
+    "# Plots are also created, which can be used to judge the quality of the simulation.\n",
+    "res_dict = {\"ref\": reference}\n",
+    "for key, df in optim_run_df.groupby(level=\"Optimizer\"):\n",
+    "    ps = df.loc[(key, \"recommendation\"), param_spec.index]\n",
+    "    display(ps)\n",
+    "    test_res_def = sim_runner.simulation_run(**ps)\n",
+    "    res_dict[key] = test_res_def\n",
+    "best_res = pd.concat(res_dict, axis=\"columns\")\n",
+    "best_res.to_csv(out_folder / \"best_res.csv\")\n",
+    "label_group = best_res.groupby(level=\"Label\")\n",
+    "fig, axes = plt.subplots(nrows=len(label_group), figsize=(6, 2.5 * len(label_group)),layout=\"constrained\",squeeze=False)\n",
+    "for row,(label, df) in enumerate(label_group):\n",
+    "    df.droplevel(\"Label\").plot(ax=axes[row,0], title=str(label))\n",
+    "fig.suptitle(\"Optimizer Result and Reference\")\n",
+    "fig.savefig(out_folder / \"best_res.png\", bbox_inches=\"tight\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# measure how much the parameters are adjusted\n",
+    "# first normalize to range than take norm2 of difference to previous step\n",
+    "# geometric interpretation: project parameters into hyper unit cube and calculate the velocity\n",
+    "\n",
+    "# only look at params and exclude recommendation\n",
+    "run_param_df = optim_run_df.loc[\n",
+    "    optim_run_df.index.get_level_values(\"Step\") != \"recommendation\", param_spec.index\n",
+    "]\n",
+    "normalized = (run_param_df - param_spec[\"Min\"]) / (\n",
+    "    param_spec[\"Max\"] - param_spec[\"Min\"]\n",
+    ")\n",
+    "velocity = normalized.diff()\n",
+    "# norm2 (root of sum of squares)\n",
+    "velocity = (velocity**2).sum(axis=\"columns\") ** 0.5\n",
+    "velocity.unstack(level=0).plot(title=\"velocity of parameter adjustments\")\n",
+    "velocity.unstack(level=0).plot(\n",
+    "    subplots=True, sharey=True, title=\"velocity of parameter adjustments\"\n",
+    ")\n",
+    "velocity.unstack(level=0).plot(\n",
+    "    subplots=True, sharey=False, title=\"velocity of parameter adjustments\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## close simulation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# clean up used resources\n",
+    "sim_runner.close()\n",
+    "# delete temp folder\n",
+    "if DELETE_TEMP:\n",
+    "    shutil.rmtree(temp_folder)\n",
+    "# print the recommendations\n",
+    "for key, df in optim_run_df.groupby(level=\"Optimizer\"):\n",
+    "    display(key, df.loc[(key, \"recommendation\"), param_spec.index])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for testing compare to ideal parameters\n",
+    "recs = {}\n",
+    "if hasattr(sim_runner,\"ideal_params\"):\n",
+    "    recs[\"ideal\"] = sim_runner.ideal_params\n",
+    "for key, df in optim_run_df.groupby(level=\"Optimizer\"):\n",
+    "    recs[key]=df.loc[(key, \"recommendation\"), param_spec.index]\n",
+    "pd.DataFrame(recs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (Spyder)",
+   "language": "python3",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.21"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
+%% Cell type:markdown id: tags:
+
+# Script to optimize the simulation parameters for a water quality model in MIKE+
+
+The nevergrad package is used for optimization.
+
+Note:
+- mikepluspy requires a full license for mike+ to run. In many cases the seat of the license is not properly released; closing python and waiting some time should fix this.
+- With multithreading results may take some time to show up in the output, even if the cell is supposed to be completed.
+
+Other scripts to run:
+- first run optim_prep if wanted
+- scripts creates the reference data and also the optimn_parts excel which contains the timeframes for the optimization
+
+Things to do in MIKE+:
+
+Water quality models:
+- activate Transport in "Model Type"
+- Set at least one WQ component
+- Set the SWQ advanced methods
+- double check on the method unter Build up wash off
+- check WQ boundary conditions and that the WQ component is the WQ component set above
+- activate Transport in Simulation setup
+
+
+Sediment transport models:
+- activate Sediment Transport in "Model Type"
+- set all relevant parameters under sediment transport
+  General Parameters: Density and porosity
+  Sediment fractions: Insert a new Fraction and use a cohesive sediment fraction as non cohesive are not fully supported yet
+- change WQ boundary conditions to type "Sediment concentration" and type in the name of the sediment transport component defined above
+- under SWQ advanced methods: Change surface load type to "Sediment" and choose the method you want to use
+- check that the result files of ST save the concentration
+- run all models with the ST results to get the test results for the script below
+- check the optimn_parts excel and make sure the results files are the ST files and that the column name is changed to concentration
+- activate Sediment Transport in Simulation setup
+
+%% Cell type:code id: tags:
+
+``` python3
+# import packages
+import typing
+import datetime
+import shutil
+from time import sleep
+from warnings import warn
+from pathlib import Path
+import typing
+import mikeio
+import numpy as np
+import pandas as pd
+import nevergrad as ng
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+from tqdm.notebook import tqdm
+import IPython.display as idsp
+
+# import simulation scripts from the project ENTfrachtEN
+from entfrachten.optim import simulation, loss_functions, optim_dumper
+```
+
+%% Cell type:markdown id: tags:
+
+## Constants, you should not need to adjust these, unless you are debugging.
+
+%% Cell type:code id: tags:
+
+``` python3
+# output heaps of additional data, may be useful for debugging
+VERBOSE_OUTPUT = False
+# delete the temp folder at end
+DELETE_TEMP = True
+```
+
+%% Cell type:markdown id: tags:
+
+## Constants, you should adjust
+
+%% Cell type:code id: tags:
+
+``` python3
+# Number of simulations allowed for optimization
+optimization_budget = 100
+# Number of seats for the Engine, limits amount of simulations we can run at once. Default of MIKE+ Licence is 4
+engine_seats = 4
+# set a correation factor to multiply the data of the measurements if necessary. Default 1.  ENTfrachtEN wastewater = 3.3757
+factor_underestimation = 1
+# time string for temp and output folder
+  # if temp_folder exists, this script tries to continue the optimization.
+  # Note that continuing the optimization does not check whether settings match;
+  # do not change anything when continuing an optimization run!
+time_path_part = datetime.datetime.now().strftime("%Y-%m-%dT%H.%M.%S")
+model_folder = Path("example_folder/")
+# path of .mupp file, relative to model_folder
+db_path_rel = "example.mupp"
+# path of table which specifies which parts of the simulation result should be read, relative to model_folder.
+  # from optimization_prep.ipnyb
+result_spec_path = "optim_parts.xlsx"
+# path of reference file, relative to model_folder
+  # This is expected to be a table with 3 Columns: "Label", "Datetime" and a data column, where the name does not matter.
+  # Label has to be consistent with result_spec
+reference_path = "reference.csv"
+# path of parameter configuration table, relative to model_folder.
+  # Specifies which parameters are optimized an their allowed range
+param_spec_path = "parameter_config.xlsx"
+# choose optimizers
+  # for all options:
+  # print(sorted(ng.optimizers.registry.keys()))
+optimizers = ["BayesOptimBO"]
+# loss_fn is later used for optimization, use this line to choose a loss function.
+loss_fn = loss_functions.mean_negative_bounded_nse
+# this value is later used to penalize errors. A simulation result with only nan would returns exactly this value.
+loss_on_error = 1  # alternative:loss_fn(reference, -reference)
+display(loss_on_error)
+```
+
+%% Cell type:markdown id: tags:
+
+## create temp(orary) and out(put) folders for the simulation
+
+%% Cell type:code id: tags:
+
+``` python3
+# a folder where used files are saved
+# note that this should be in the same folder as the model folder, so that relative paths lead to the same files
+temp_folder = Path("tmp_" + time_path_part)
+# output folder
+out_folder = Path("out_" + time_path_part)
+# create output folder
+out_folder.mkdir(exist_ok=True, parents=True)
+if not (temp_folder.exists()):
+    # make a copy of the model folder, since the program might mess with the model
+    shutil.copytree(model_folder, temp_folder)
+    should_continue_optimization = False
+    print("starting new optimization.")
+else:
+    should_continue_optimization = True
+    print("continuing previous optimization.")
+
+db_path = temp_folder / db_path_rel
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+# display the temp folder. If an error occurs, it may be useful to check it.
+# The folder path is also needed to resume an interrupted optimization.
+temp_folder.absolute()
+```
+
+%% Cell type:markdown id: tags:
+
+## read the input data for the simulation
+
+%% Cell type:code id: tags:
+
+``` python3
+def file_to_df(path: Path) -> pd.DataFrame:
+    """Tries to read the file at path as a pandas DataFrame."""
+    if path.suffix == ".xlsx":
+        return pd.read_excel(path)
+    elif path.suffix == ".dfs0":
+        return mikeio.read(filename=path).to_dataframe()
+    elif path.suffix == ".csv":
+        return pd.read_csv(path)
+    else:
+        raise NotImplementedError(f"No method for {path.suffix} implemented.")
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+# read events to be simulated
+result_spec = file_to_df(temp_folder / result_spec_path).set_index("Label")
+result_spec["ResultReach"] = result_spec["ResultReach"].fillna("").astype(str)
+result_spec["ResultNode"] = result_spec["ResultNode"].fillna("").astype(str)
+assert (result_spec["Start"] <= result_spec["End"]).all(
+    axis=None
+), "Event end needs to be after start."
+result_spec["SimulationName"] = result_spec["SimulationName"].astype(str)
+assert result_spec.index.is_unique, "Need unique labels"
+result_spec
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+# read reference data
+reference = file_to_df(temp_folder / reference_path)
+# parse dates and remove timezone
+reference["Datetime"] = pd.to_datetime(reference["Datetime"]).dt.tz_localize(None)
+spec_labels = set(result_spec.index.unique())
+ref_labels = set(reference["Label"])
+if spec_labels!=ref_labels and ref_labels.issuperset(spec_labels):
+    warn("reference has labels not in spec, dropping superfluous labels.")
+    reference=reference.query("Label in @ spec_labels")
+if spec_labels!=ref_labels and ref_labels.issubset(spec_labels):
+    raise Exception("Referene is missing lables specified by result_spec.")
+reference = reference.set_index(["Label", "Datetime"])
+# convert to series and change type to 32-bit float
+reference = reference.iloc[:,0].astype("float32")
+reference = reference * factor_underestimation
+reference
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+# reference contains shifted times, adjust start/end in event table
+for label in result_spec.index:
+    ref_part = reference.xs(label, level="Label")
+    result_spec.loc[label, "Start"] = ref_part.index.min()
+    result_spec.loc[label, "End"] = ref_part.index.max()
+result_spec[["Start", "End"]]
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+# specify which parameters are optimized and in which range they should be
+# could make the table in python like this
+# param_specs = pd.DataFrame([
+#     {"Table":"msm_HParA","Muid":DEF_STRING,"Column":"RedFactor","Min":.4,"Max":1,"Steps":4},
+#     {"Table":"msm_HParA","Muid":DEF_STRING,"Column":"InitLoss","Min":0,"Max":5*M_TO_MM,"Steps":4}
+# ])
+# or load from a file
+param_spec = file_to_df(temp_folder / param_spec_path)
+# give params_specs a sensible string index, this is later used to pass optimization parameters as a dictionary
+param_spec.index = pd.Index(
+    param_spec[["Table", "Column", "Muid"]].agg(";".join, axis="columns"),
+    name="ParamKey",
+)
+assert (
+    param_spec.index.is_unique
+), "Index needs to be unique. Otherwise indicates dupplicates in the parameters."
+param_spec
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+db_path
+```
+
+%% Cell type:markdown id: tags:
+
+## Create Simulation runs
+
+%% Cell type:code id: tags:
+
+``` python3
+assert (
+    db_path.exists()
+), f"DB not found, check path. Expected file in {db_path.absolute()}"
+sim_runner = simulation.SimulationRunner(db_path=db_path,result_folder=temp_folder,param_spec=param_spec,result_spec=result_spec,parallel_limit=engine_seats)
+# Test dummy
+#sim_runner = simulation_dummy.SimulationDummy(reference=reference,param_spec=param_spec,result_spec=result_spec)
+```
+
+%% Cell type:markdown id: tags:
+
+## Read test results (not necessary)
+
+%% Cell type:code id: tags:
+
+``` python3
+# Read results which are already in folder.
+# This serves as a test for the result specification.
+# This is skipped if optimization was started before.
+if not should_continue_optimization:
+    existing_result = sim_runner.read_results()
+    existing_result.index.names = reference.index.names
+    print(
+        "Plotting results, if this looks very different from plots in mike, unit conversion may need to be added."
+    )
+    for label in result_spec.index:
+        try:
+            txs = existing_result.xs(label, level="Label")
+            rxs = reference.xs(label, level="Label")
+            missing_ratio = txs.reindex_like(rxs).isna().mean()
+            if missing_ratio > 0.2:
+                warn(
+                    f"Label {label} has many missing values compared to the reference missing ratio is {missing_ratio} "
+                )
+            pd.DataFrame({"ref": rxs, "test": txs}).plot(
+                title=str(label), figsize=(7, 2.5)
+            )
+        except Exception as ex:
+            warn(str(ex))
+```
+
+%% Cell type:markdown id: tags:
+
+## make a test run to see if the optmization works
+
+%% Cell type:code id: tags:
+
+``` python3
+# test run
+test_res_df = pd.DataFrame({"Reference":reference})
+test_res_info = pd.DataFrame()
+status_dh = idsp.display("starting",display_id=True)
+info_dh = idsp.display("",display_id=True)
+fig_dh = idsp.display("figure placeholder",display_id=True)
+for key in ["Default", "Min", "Max"]:
+    status_dh.update(f"Getting results for {key}")
+    csv_path = out_folder / f"test_{key.lower()}.csv"
+    if not should_continue_optimization:
+        # the ** in the call unrolls the series into keyword parameters"
+        test_res = sim_runner.simulation_run(**param_spec[key])
+        test_res.to_csv(csv_path, index=True)
+    else:
+        assert (
+            csv_path.exists()
+        ), "Tried to continue optimization run, but csv from test does not exist. This may mean that optimization was not started before."
+        test_res = pd.read_csv(
+            csv_path, index_col=[0, 1], parse_dates=True
+        ).iloc[:, 0]
+    info = test_res.describe()
+    test_res = test_res.reindex_like(reference)
+    info["missing ratio"] = test_res.reindex_like(reference).isna().mean()
+    info["loss"] = loss_fn(reference, test_res)
+    test_res_info[key]=info
+    test_res_df[key]=test_res
+    info_dh.update(pd.DataFrame(test_res_info))
+    status_dh.update(f"Plotting results for {key}")
+    plt.ioff()
+    label_group = test_res_df.groupby(level="Label")
+    fig, axes = plt.subplots(nrows=len(label_group), figsize=(6, 2.5 * len(label_group)),layout="constrained",squeeze=False)
+    for row,(label, df) in enumerate(label_group):
+        df.droplevel("Label").plot(ax=axes[row,0], title=str(label))
+        axes[row,0].xaxis.label.set_visible(False)
+    fig.suptitle("Simulation with different parameters")
+    fig.savefig(out_folder/"test_sim.png",bbox_inches="tight")
+    fig_dh.update(fig)
+    plt.ion()
+        #plt.close(fig)
+# check results for equality. This test assumes that the default may be equal to either min or max.
+# But if all are the same, something is wrong with the simulation.
+assert not (
+    test_res_df["Default"].equals(test_res_df["Min"]) and test_res_df["Default"].equals(test_res_df["Max"])
+), "Test results are equal. This may mean that parameters were not set at all."
+status_dh.update(f"Finished")
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+test_res_df.isna().mean()
+```
+
+%% Cell type:markdown id: tags:
+
+## set all optimization parameters and contraints
+
+%% Cell type:code id: tags:
+
+``` python3
+# define the parameter calibration as a minimization problem
+def simulation_compare(**kwparams):
+    """runs simulation with the specified parameters and uses the loss function on the results.
+
+    Note that kwparams keys need to match the index of param_specs."""
+    result = sim_runner.simulation_run(**kwparams)
+    result_matched = result.reindex_like(reference)
+    plain_loss = loss_fn(reference, result_matched)
+    # values should not be missing, penalize by increasing the loss
+    missing_ratio = result_matched.isna().mean()
+    if missing_ratio > 0:
+        warn(f"Values are missing from result:{missing_ratio}")
+    return (1 - missing_ratio) * plain_loss + missing_ratio * loss_on_error
+
+# define constrains for the parameters, takes parameters in the same format as simulation_compare
+# the nevergrad developers recommend to use float valued constraints, such that >=0 is ok
+# https://facebookresearch.github.io/nevergrad/optimization.html#optimization-with-constraints
+# this function takes the parameter values as they are defined by nevergrad
+# for instrumentation it contains a tuple of args and a dict of kwargs
+def constraint_min_max(params_val):
+    """Constraint: mindispcoef should be smaller than maxdispcoef"""
+    mindispcoef = params_val[1]["msm_ADDispersion;mindispcoef;Global AD dispersion"]
+    maxdispcoef = params_val[1]["msm_ADDispersion;maxdispcoef;Global AD dispersion"]
+    return maxdispcoef - mindispcoef
+
+
+# define optimization parameters as needed by nevergrad
+ng_params = {}
+# first make a list of scalar values with information from param_specs
+for step in param_spec.index:
+    vmin = param_spec.at[step, "Min"]
+    vmax = param_spec.at[step, "Max"]
+    vdef = param_spec.at[step, "Default"]
+    ngp = ng.p.Scalar(init=vdef, lower=vmin, upper=vmax)
+    ng_params[step] = ngp
+# now turn the list into an instrumentation
+# the instrumentation is used to tell the optimizer how the paremeters are passed to our function
+instrumentation = ng.p.Instrumentation(**ng_params)
+instrumentation
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+# register constraint
+instrumentation.register_cheap_constraint(constraint_min_max)
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+def optim_run(optim_name):
+    # optim_dumper is used to save/load the optimization state
+    state_dumper = optim_dumper.OptimDumper(temp_folder/f"dmp-{optim_name}")
+    has_dump = state_dumper.get_last_step() >= 0
+    if has_dump != should_continue_optimization:
+        print("No dump exists for ", optim_name)
+    if not should_continue_optimization or not has_dump:
+        # create optimizer
+        print("starting new optimization for ", optim_name)
+        optimizer = ng.optimizers.registry[optim_name](
+            parametrization=instrumentation, budget=optimization_budget, num_workers=1
+        )
+        # tested parameters and loss are saved to list for later evaluation.
+        params = []
+        losses = []
+    else:
+        # load the last dumped optimization state
+        print("loading optimizer ", optim_name)
+        optimizer, params, losses = state_dumper.load()
+
+    # do optimization
+    for step in tqdm(range(optimizer.num_tell, optimizer.budget)):
+        # next parameter set to simulate
+        candidate = optimizer.ask()
+        loss = simulation_compare(*candidate.args, **candidate.kwargs)
+        # tell optimizer the loss value
+        optimizer.tell(candidate, loss)
+        # keep parameters and loss in a list
+        params.append(candidate)
+        losses.append(loss)
+        # save optimization state to disk
+        state_dumper.dump(optimizer, params, losses)
+
+    # optimization has finished. now get recommendation and write result into dataframe
+    recommendation = optimizer.recommend()
+    # params contains the candidates as an instrumentation object.
+    # The list comprehension creates a list of dicts of parameter values,
+    # which pandas then transforms into a dataframe where each parameter has its own column.
+    optim_run_df = pd.concat(
+        [
+            pd.DataFrame(data=[p.value[1] for p in params]),
+            pd.DataFrame({"Optimizer": optimizer.name, "Loss": losses}),
+        ],
+        axis="columns",
+    )
+    # also add a line for the recommendation
+    rec_params = recommendation.value[1]
+    optim_run_df.loc["recommendation", rec_params.keys()] = rec_params
+    optim_run_df.loc["recommendation", "Loss"] = recommendation.loss
+    optim_run_df.loc["recommendation", "Optimizer"] = optimizer.name
+    optim_run_df.to_csv(out_folder / f"optim_run{optimizer.name}.csv")
+
+    return optim_run_df
+```
+
+%% Cell type:markdown id: tags:
+
+## start the optimization
+
+%% Cell type:code id: tags:
+
+``` python3
+optim_run_dfs = {}
+for o in optimizers:
+    optim_run_dfs[o] = optim_run(o)
+    display(optim_run_dfs[o])
+    optim_run_dfs[o].plot(subplots=True, figsize=(5, 2 * len(param_spec)))
+```
+
+%% Cell type:markdown id: tags:
+
+## save all optimization results
+
+%% Cell type:code id: tags:
+
+``` python3
+# concatenate optim_run_dfs and output into a csv.
+# optim_run_df already has optimizer included, use .values instead of passing the dict, so that the optimizer is not added again.
+optim_run_df = pd.concat(optim_run_dfs.values())
+optim_run_df.to_csv(out_folder / "optim_run.csv")
+optim_run_df.to_excel(out_folder / "optim_run.xlsx")
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+# adjust index for evaluation
+optim_run_df.index.name = "Step"
+optim_run_df = optim_run_df.reset_index().set_index(["Optimizer", "Step"])
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+# Plot the loss for each step. This indicates how fast (and if at all) the optimizer converged to a solution.
+# Note that some optimizers are not designed to converge.
+optim_run_df["Loss"].unstack(level="Optimizer").plot(
+    subplots=True, sharey=True, title="loss over steps"
+)[0].get_figure().savefig(out_folder / "loss_over_steps.png", bbox_inches="tight")
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+# Shows the cumulative minimum of the loss over the steps.
+# Compared to the plain loss plot, cumulative minimum filters out the effect of exploration.
+# This plot can help to judge how fast the optimizer found a good solution.
+optim_run_df["Loss"].unstack(level=0).cummin().plot(
+    title="cummin loss over steps"
+).get_figure().savefig(out_folder / "loss_cummin.png", bbox_inches="tight")
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+# Bar plot comparing the loss of the recommendation from each optimizer.
+# This is not interesting if only one optimizer was used.
+ax = optim_run_df["Loss"].unstack(level=0).loc["recommendation"].plot(kind="bar")
+ax.bar_label(ax.containers[0])
+optim_run_df["Loss"].xs("recommendation", level="Step")
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+# runs the simulation for the recommended parameters from each optimizer.
+# The results are saved as a csv together with the reference values.
+# Plots are also created, which can be used to judge the quality of the simulation.
+res_dict = {"ref": reference}
+for key, df in optim_run_df.groupby(level="Optimizer"):
+    ps = df.loc[(key, "recommendation"), param_spec.index]
+    display(ps)
+    test_res_def = sim_runner.simulation_run(**ps)
+    res_dict[key] = test_res_def
+best_res = pd.concat(res_dict, axis="columns")
+best_res.to_csv(out_folder / "best_res.csv")
+label_group = best_res.groupby(level="Label")
+fig, axes = plt.subplots(nrows=len(label_group), figsize=(6, 2.5 * len(label_group)),layout="constrained",squeeze=False)
+for row,(label, df) in enumerate(label_group):
+    df.droplevel("Label").plot(ax=axes[row,0], title=str(label))
+fig.suptitle("Optimizer Result and Reference")
+fig.savefig(out_folder / "best_res.png", bbox_inches="tight")
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+# measure how much the parameters are adjusted
+# first normalize to range than take norm2 of difference to previous step
+# geometric interpretation: project parameters into hyper unit cube and calculate the velocity
+
+# only look at params and exclude recommendation
+run_param_df = optim_run_df.loc[
+    optim_run_df.index.get_level_values("Step") != "recommendation", param_spec.index
+]
+normalized = (run_param_df - param_spec["Min"]) / (
+    param_spec["Max"] - param_spec["Min"]
+)
+velocity = normalized.diff()
+# norm2 (root of sum of squares)
+velocity = (velocity**2).sum(axis="columns") ** 0.5
+velocity.unstack(level=0).plot(title="velocity of parameter adjustments")
+velocity.unstack(level=0).plot(
+    subplots=True, sharey=True, title="velocity of parameter adjustments"
+)
+velocity.unstack(level=0).plot(
+    subplots=True, sharey=False, title="velocity of parameter adjustments"
+)
+```
+
+%% Cell type:markdown id: tags:
+
+## close simulation
+
+%% Cell type:code id: tags:
+
+``` python3
+# clean up used resources
+sim_runner.close()
+# delete temp folder
+if DELETE_TEMP:
+    shutil.rmtree(temp_folder)
+# print the recommendations
+for key, df in optim_run_df.groupby(level="Optimizer"):
+    display(key, df.loc[(key, "recommendation"), param_spec.index])
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+# for testing compare to ideal parameters
+recs = {}
+if hasattr(sim_runner,"ideal_params"):
+    recs["ideal"] = sim_runner.ideal_params
+for key, df in optim_run_df.groupby(level="Optimizer"):
+    recs[key]=df.loc[(key, "recommendation"), param_spec.index]
+pd.DataFrame(recs)
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+```
+
+%% Cell type:code id: tags:
+
+``` python3
+```