From b0c6a0ac2d0e033c6b6f792c996832ec75f61efb Mon Sep 17 00:00:00 2001 From: Sebastian Kerger <sebastian.kerger@rwth-aachen.de> Date: Mon, 17 Mar 2025 13:11:55 +0100 Subject: [PATCH] Upload New File --- .../water-quality-optimization.ipynb | 769 ++++++++++++++++++ 1 file changed, 769 insertions(+) create mode 100644 Water quality calibration/water-quality-optimization.ipynb diff --git a/Water quality calibration/water-quality-optimization.ipynb b/Water quality calibration/water-quality-optimization.ipynb new file mode 100644 index 0000000..2cd219b --- /dev/null +++ b/Water quality calibration/water-quality-optimization.ipynb @@ -0,0 +1,769 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Script to optimize the simulation parameters for a water quality model in MIKE+\n", + "\n", + "The nevergrad package is used for optimization.\n", + "\n", + "Note: \n", + "- mikepluspy requires a full license for mike+ to run. In many cases the seat of the license is not properly released; closing python and waiting some time should fix this.\n", + "- With multithreading results may take some time to show up in the output, even if the cell is supposed to be completed.\n", + "\n", + "Other scripts to run:\n", + "- first run optim_prep if wanted\n", + "- scripts creates the reference data and also the optimn_parts excel which contains the timeframes for the optimization\n", + "\n", + "Things to do in MIKE+:\n", + "\n", + "Water quality models:\n", + "- activate Transport in \"Model Type\"\n", + "- Set at least one WQ component\n", + "- Set the SWQ advanced methods\n", + "- double check on the method unter Build up wash off\n", + "- check WQ boundary conditions and that the WQ component is the WQ component set above\n", + "- activate Transport in Simulation setup\n", + "\n", + "\n", + "Sediment transport models:\n", + "- activate Sediment Transport in \"Model Type\"\n", + "- set all relevant parameters under sediment transport \n", + " General Parameters: Density and porosity \n", + " Sediment fractions: Insert a new Fraction and use a cohesive sediment fraction as non cohesive are not fully supported yet \n", + "- change WQ boundary conditions to type \"Sediment concentration\" and type in the name of the sediment transport component defined above\n", + "- under SWQ advanced methods: Change surface load type to \"Sediment\" and choose the method you want to use\n", + "- check that the result files of ST save the concentration\n", + "- run all models with the ST results to get the test results for the script below\n", + "- check the optimn_parts excel and make sure the results files are the ST files and that the column name is changed to concentration\n", + "- activate Sediment Transport in Simulation setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import packages\n", + "import typing\n", + "import datetime\n", + "import shutil\n", + "from time import sleep\n", + "from warnings import warn\n", + "from pathlib import Path\n", + "import typing\n", + "import mikeio\n", + "import numpy as np\n", + "import pandas as pd\n", + "import nevergrad as ng\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "from tqdm.notebook import tqdm\n", + "import IPython.display as idsp\n", + "\n", + "# import simulation scripts from the project ENTfrachtEN\n", + "from entfrachten.optim import simulation, loss_functions, optim_dumper" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constants, you should not need to adjust these, unless you are debugging." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# output heaps of additional data, may be useful for debugging\n", + "VERBOSE_OUTPUT = False\n", + "# delete the temp folder at end\n", + "DELETE_TEMP = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constants, you should adjust" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Number of simulations allowed for optimization\n", + "optimization_budget = 100\n", + "# Number of seats for the Engine, limits amount of simulations we can run at once. Default of MIKE+ Licence is 4\n", + "engine_seats = 4\n", + "# set a correation factor to multiply the data of the measurements if necessary. Default 1. ENTfrachtEN wastewater = 3.3757\n", + "factor_underestimation = 1\n", + "# time string for temp and output folder\n", + " # if temp_folder exists, this script tries to continue the optimization.\n", + " # Note that continuing the optimization does not check whether settings match;\n", + " # do not change anything when continuing an optimization run!\n", + "time_path_part = datetime.datetime.now().strftime(\"%Y-%m-%dT%H.%M.%S\")\n", + "model_folder = Path(\"example_folder/\")\n", + "# path of .mupp file, relative to model_folder\n", + "db_path_rel = \"example.mupp\"\n", + "# path of table which specifies which parts of the simulation result should be read, relative to model_folder.\n", + " # from optimization_prep.ipnyb\n", + "result_spec_path = \"optim_parts.xlsx\"\n", + "# path of reference file, relative to model_folder\n", + " # This is expected to be a table with 3 Columns: \"Label\", \"Datetime\" and a data column, where the name does not matter.\n", + " # Label has to be consistent with result_spec\n", + "reference_path = \"reference.csv\"\n", + "# path of parameter configuration table, relative to model_folder.\n", + " # Specifies which parameters are optimized an their allowed range\n", + "param_spec_path = \"parameter_config.xlsx\"\n", + "# choose optimizers\n", + " # for all options:\n", + " # print(sorted(ng.optimizers.registry.keys()))\n", + "optimizers = [\"BayesOptimBO\"]\n", + "# loss_fn is later used for optimization, use this line to choose a loss function.\n", + "loss_fn = loss_functions.mean_negative_bounded_nse\n", + "# this value is later used to penalize errors. A simulation result with only nan would returns exactly this value.\n", + "loss_on_error = 1 # alternative:loss_fn(reference, -reference)\n", + "display(loss_on_error)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## create temp(orary) and out(put) folders for the simulation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# a folder where used files are saved\n", + "# note that this should be in the same folder as the model folder, so that relative paths lead to the same files\n", + "temp_folder = Path(\"tmp_\" + time_path_part)\n", + "# output folder\n", + "out_folder = Path(\"out_\" + time_path_part)\n", + "# create output folder\n", + "out_folder.mkdir(exist_ok=True, parents=True)\n", + "if not (temp_folder.exists()):\n", + " # make a copy of the model folder, since the program might mess with the model\n", + " shutil.copytree(model_folder, temp_folder)\n", + " should_continue_optimization = False\n", + " print(\"starting new optimization.\")\n", + "else:\n", + " should_continue_optimization = True\n", + " print(\"continuing previous optimization.\")\n", + "\n", + "db_path = temp_folder / db_path_rel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# display the temp folder. If an error occurs, it may be useful to check it.\n", + "# The folder path is also needed to resume an interrupted optimization.\n", + "temp_folder.absolute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## read the input data for the simulation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def file_to_df(path: Path) -> pd.DataFrame:\n", + " \"\"\"Tries to read the file at path as a pandas DataFrame.\"\"\"\n", + " if path.suffix == \".xlsx\":\n", + " return pd.read_excel(path)\n", + " elif path.suffix == \".dfs0\":\n", + " return mikeio.read(filename=path).to_dataframe()\n", + " elif path.suffix == \".csv\":\n", + " return pd.read_csv(path)\n", + " else:\n", + " raise NotImplementedError(f\"No method for {path.suffix} implemented.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# read events to be simulated\n", + "result_spec = file_to_df(temp_folder / result_spec_path).set_index(\"Label\")\n", + "result_spec[\"ResultReach\"] = result_spec[\"ResultReach\"].fillna(\"\").astype(str)\n", + "result_spec[\"ResultNode\"] = result_spec[\"ResultNode\"].fillna(\"\").astype(str)\n", + "assert (result_spec[\"Start\"] <= result_spec[\"End\"]).all(\n", + " axis=None\n", + "), \"Event end needs to be after start.\"\n", + "result_spec[\"SimulationName\"] = result_spec[\"SimulationName\"].astype(str)\n", + "assert result_spec.index.is_unique, \"Need unique labels\"\n", + "result_spec" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# read reference data\n", + "reference = file_to_df(temp_folder / reference_path)\n", + "# parse dates and remove timezone\n", + "reference[\"Datetime\"] = pd.to_datetime(reference[\"Datetime\"]).dt.tz_localize(None)\n", + "spec_labels = set(result_spec.index.unique())\n", + "ref_labels = set(reference[\"Label\"])\n", + "if spec_labels!=ref_labels and ref_labels.issuperset(spec_labels):\n", + " warn(\"reference has labels not in spec, dropping superfluous labels.\")\n", + " reference=reference.query(\"Label in @ spec_labels\")\n", + "if spec_labels!=ref_labels and ref_labels.issubset(spec_labels):\n", + " raise Exception(\"Referene is missing lables specified by result_spec.\")\n", + "reference = reference.set_index([\"Label\", \"Datetime\"])\n", + "# convert to series and change type to 32-bit float\n", + "reference = reference.iloc[:,0].astype(\"float32\")\n", + "reference = reference * factor_underestimation\n", + "reference" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference contains shifted times, adjust start/end in event table\n", + "for label in result_spec.index:\n", + " ref_part = reference.xs(label, level=\"Label\")\n", + " result_spec.loc[label, \"Start\"] = ref_part.index.min()\n", + " result_spec.loc[label, \"End\"] = ref_part.index.max()\n", + "result_spec[[\"Start\", \"End\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# specify which parameters are optimized and in which range they should be\n", + "# could make the table in python like this\n", + "# param_specs = pd.DataFrame([\n", + "# {\"Table\":\"msm_HParA\",\"Muid\":DEF_STRING,\"Column\":\"RedFactor\",\"Min\":.4,\"Max\":1,\"Steps\":4},\n", + "# {\"Table\":\"msm_HParA\",\"Muid\":DEF_STRING,\"Column\":\"InitLoss\",\"Min\":0,\"Max\":5*M_TO_MM,\"Steps\":4}\n", + "# ])\n", + "# or load from a file\n", + "param_spec = file_to_df(temp_folder / param_spec_path)\n", + "# give params_specs a sensible string index, this is later used to pass optimization parameters as a dictionary\n", + "param_spec.index = pd.Index(\n", + " param_spec[[\"Table\", \"Column\", \"Muid\"]].agg(\";\".join, axis=\"columns\"),\n", + " name=\"ParamKey\",\n", + ")\n", + "assert (\n", + " param_spec.index.is_unique\n", + "), \"Index needs to be unique. Otherwise indicates dupplicates in the parameters.\"\n", + "param_spec" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "db_path" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Simulation runs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert (\n", + " db_path.exists()\n", + "), f\"DB not found, check path. Expected file in {db_path.absolute()}\"\n", + "sim_runner = simulation.SimulationRunner(db_path=db_path,result_folder=temp_folder,param_spec=param_spec,result_spec=result_spec,parallel_limit=engine_seats)\n", + "# Test dummy\n", + "#sim_runner = simulation_dummy.SimulationDummy(reference=reference,param_spec=param_spec,result_spec=result_spec)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read test results (not necessary)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Read results which are already in folder.\n", + "# This serves as a test for the result specification.\n", + "# This is skipped if optimization was started before.\n", + "if not should_continue_optimization:\n", + " existing_result = sim_runner.read_results()\n", + " existing_result.index.names = reference.index.names\n", + " print(\n", + " \"Plotting results, if this looks very different from plots in mike, unit conversion may need to be added.\"\n", + " )\n", + " for label in result_spec.index:\n", + " try:\n", + " txs = existing_result.xs(label, level=\"Label\")\n", + " rxs = reference.xs(label, level=\"Label\")\n", + " missing_ratio = txs.reindex_like(rxs).isna().mean()\n", + " if missing_ratio > 0.2:\n", + " warn(\n", + " f\"Label {label} has many missing values compared to the reference missing ratio is {missing_ratio} \"\n", + " )\n", + " pd.DataFrame({\"ref\": rxs, \"test\": txs}).plot(\n", + " title=str(label), figsize=(7, 2.5)\n", + " )\n", + " except Exception as ex:\n", + " warn(str(ex))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## make a test run to see if the optmization works" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# test run\n", + "test_res_df = pd.DataFrame({\"Reference\":reference})\n", + "test_res_info = pd.DataFrame()\n", + "status_dh = idsp.display(\"starting\",display_id=True)\n", + "info_dh = idsp.display(\"\",display_id=True)\n", + "fig_dh = idsp.display(\"figure placeholder\",display_id=True)\n", + "for key in [\"Default\", \"Min\", \"Max\"]:\n", + " status_dh.update(f\"Getting results for {key}\")\n", + " csv_path = out_folder / f\"test_{key.lower()}.csv\"\n", + " if not should_continue_optimization:\n", + " # the ** in the call unrolls the series into keyword parameters\"\n", + " test_res = sim_runner.simulation_run(**param_spec[key])\n", + " test_res.to_csv(csv_path, index=True)\n", + " else:\n", + " assert (\n", + " csv_path.exists()\n", + " ), \"Tried to continue optimization run, but csv from test does not exist. This may mean that optimization was not started before.\"\n", + " test_res = pd.read_csv(\n", + " csv_path, index_col=[0, 1], parse_dates=True\n", + " ).iloc[:, 0]\n", + " info = test_res.describe()\n", + " test_res = test_res.reindex_like(reference)\n", + " info[\"missing ratio\"] = test_res.reindex_like(reference).isna().mean()\n", + " info[\"loss\"] = loss_fn(reference, test_res)\n", + " test_res_info[key]=info\n", + " test_res_df[key]=test_res\n", + " info_dh.update(pd.DataFrame(test_res_info))\n", + " status_dh.update(f\"Plotting results for {key}\")\n", + " plt.ioff()\n", + " label_group = test_res_df.groupby(level=\"Label\")\n", + " fig, axes = plt.subplots(nrows=len(label_group), figsize=(6, 2.5 * len(label_group)),layout=\"constrained\",squeeze=False)\n", + " for row,(label, df) in enumerate(label_group):\n", + " df.droplevel(\"Label\").plot(ax=axes[row,0], title=str(label))\n", + " axes[row,0].xaxis.label.set_visible(False)\n", + " fig.suptitle(\"Simulation with different parameters\")\n", + " fig.savefig(out_folder/\"test_sim.png\",bbox_inches=\"tight\")\n", + " fig_dh.update(fig)\n", + " plt.ion()\n", + " #plt.close(fig)\n", + "# check results for equality. This test assumes that the default may be equal to either min or max.\n", + "# But if all are the same, something is wrong with the simulation.\n", + "assert not (\n", + " test_res_df[\"Default\"].equals(test_res_df[\"Min\"]) and test_res_df[\"Default\"].equals(test_res_df[\"Max\"])\n", + "), \"Test results are equal. This may mean that parameters were not set at all.\"\n", + "status_dh.update(f\"Finished\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_res_df.isna().mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## set all optimization parameters and contraints" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# define the parameter calibration as a minimization problem\n", + "def simulation_compare(**kwparams):\n", + " \"\"\"runs simulation with the specified parameters and uses the loss function on the results.\n", + "\n", + " Note that kwparams keys need to match the index of param_specs.\"\"\"\n", + " result = sim_runner.simulation_run(**kwparams)\n", + " result_matched = result.reindex_like(reference)\n", + " plain_loss = loss_fn(reference, result_matched)\n", + " # values should not be missing, penalize by increasing the loss\n", + " missing_ratio = result_matched.isna().mean()\n", + " if missing_ratio > 0:\n", + " warn(f\"Values are missing from result:{missing_ratio}\")\n", + " return (1 - missing_ratio) * plain_loss + missing_ratio * loss_on_error\n", + "\n", + "# define constrains for the parameters, takes parameters in the same format as simulation_compare\n", + "# the nevergrad developers recommend to use float valued constraints, such that >=0 is ok\n", + "# https://facebookresearch.github.io/nevergrad/optimization.html#optimization-with-constraints\n", + "# this function takes the parameter values as they are defined by nevergrad\n", + "# for instrumentation it contains a tuple of args and a dict of kwargs\n", + "def constraint_min_max(params_val):\n", + " \"\"\"Constraint: mindispcoef should be smaller than maxdispcoef\"\"\"\n", + " mindispcoef = params_val[1][\"msm_ADDispersion;mindispcoef;Global AD dispersion\"]\n", + " maxdispcoef = params_val[1][\"msm_ADDispersion;maxdispcoef;Global AD dispersion\"]\n", + " return maxdispcoef - mindispcoef\n", + "\n", + "\n", + "# define optimization parameters as needed by nevergrad\n", + "ng_params = {}\n", + "# first make a list of scalar values with information from param_specs\n", + "for step in param_spec.index:\n", + " vmin = param_spec.at[step, \"Min\"]\n", + " vmax = param_spec.at[step, \"Max\"]\n", + " vdef = param_spec.at[step, \"Default\"]\n", + " ngp = ng.p.Scalar(init=vdef, lower=vmin, upper=vmax)\n", + " ng_params[step] = ngp\n", + "# now turn the list into an instrumentation\n", + "# the instrumentation is used to tell the optimizer how the paremeters are passed to our function\n", + "instrumentation = ng.p.Instrumentation(**ng_params)\n", + "instrumentation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# register constraint\n", + "instrumentation.register_cheap_constraint(constraint_min_max)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def optim_run(optim_name):\n", + " # optim_dumper is used to save/load the optimization state\n", + " state_dumper = optim_dumper.OptimDumper(temp_folder/f\"dmp-{optim_name}\")\n", + " has_dump = state_dumper.get_last_step() >= 0\n", + " if has_dump != should_continue_optimization:\n", + " print(\"No dump exists for \", optim_name)\n", + " if not should_continue_optimization or not has_dump:\n", + " # create optimizer\n", + " print(\"starting new optimization for \", optim_name)\n", + " optimizer = ng.optimizers.registry[optim_name](\n", + " parametrization=instrumentation, budget=optimization_budget, num_workers=1\n", + " )\n", + " # tested parameters and loss are saved to list for later evaluation.\n", + " params = []\n", + " losses = []\n", + " else:\n", + " # load the last dumped optimization state\n", + " print(\"loading optimizer \", optim_name)\n", + " optimizer, params, losses = state_dumper.load()\n", + "\n", + " # do optimization\n", + " for step in tqdm(range(optimizer.num_tell, optimizer.budget)):\n", + " # next parameter set to simulate\n", + " candidate = optimizer.ask()\n", + " loss = simulation_compare(*candidate.args, **candidate.kwargs)\n", + " # tell optimizer the loss value\n", + " optimizer.tell(candidate, loss)\n", + " # keep parameters and loss in a list\n", + " params.append(candidate)\n", + " losses.append(loss)\n", + " # save optimization state to disk\n", + " state_dumper.dump(optimizer, params, losses)\n", + "\n", + " # optimization has finished. now get recommendation and write result into dataframe\n", + " recommendation = optimizer.recommend()\n", + " # params contains the candidates as an instrumentation object.\n", + " # The list comprehension creates a list of dicts of parameter values,\n", + " # which pandas then transforms into a dataframe where each parameter has its own column.\n", + " optim_run_df = pd.concat(\n", + " [\n", + " pd.DataFrame(data=[p.value[1] for p in params]),\n", + " pd.DataFrame({\"Optimizer\": optimizer.name, \"Loss\": losses}),\n", + " ],\n", + " axis=\"columns\",\n", + " )\n", + " # also add a line for the recommendation\n", + " rec_params = recommendation.value[1]\n", + " optim_run_df.loc[\"recommendation\", rec_params.keys()] = rec_params\n", + " optim_run_df.loc[\"recommendation\", \"Loss\"] = recommendation.loss\n", + " optim_run_df.loc[\"recommendation\", \"Optimizer\"] = optimizer.name\n", + " optim_run_df.to_csv(out_folder / f\"optim_run{optimizer.name}.csv\")\n", + "\n", + " return optim_run_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## start the optimization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "optim_run_dfs = {}\n", + "for o in optimizers:\n", + " optim_run_dfs[o] = optim_run(o)\n", + " display(optim_run_dfs[o])\n", + " optim_run_dfs[o].plot(subplots=True, figsize=(5, 2 * len(param_spec)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## save all optimization results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# concatenate optim_run_dfs and output into a csv.\n", + "# optim_run_df already has optimizer included, use .values instead of passing the dict, so that the optimizer is not added again.\n", + "optim_run_df = pd.concat(optim_run_dfs.values())\n", + "optim_run_df.to_csv(out_folder / \"optim_run.csv\")\n", + "optim_run_df.to_excel(out_folder / \"optim_run.xlsx\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# adjust index for evaluation\n", + "optim_run_df.index.name = \"Step\"\n", + "optim_run_df = optim_run_df.reset_index().set_index([\"Optimizer\", \"Step\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Plot the loss for each step. This indicates how fast (and if at all) the optimizer converged to a solution.\n", + "# Note that some optimizers are not designed to converge.\n", + "optim_run_df[\"Loss\"].unstack(level=\"Optimizer\").plot(\n", + " subplots=True, sharey=True, title=\"loss over steps\"\n", + ")[0].get_figure().savefig(out_folder / \"loss_over_steps.png\", bbox_inches=\"tight\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Shows the cumulative minimum of the loss over the steps.\n", + "# Compared to the plain loss plot, cumulative minimum filters out the effect of exploration.\n", + "# This plot can help to judge how fast the optimizer found a good solution.\n", + "optim_run_df[\"Loss\"].unstack(level=0).cummin().plot(\n", + " title=\"cummin loss over steps\"\n", + ").get_figure().savefig(out_folder / \"loss_cummin.png\", bbox_inches=\"tight\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Bar plot comparing the loss of the recommendation from each optimizer.\n", + "# This is not interesting if only one optimizer was used.\n", + "ax = optim_run_df[\"Loss\"].unstack(level=0).loc[\"recommendation\"].plot(kind=\"bar\")\n", + "ax.bar_label(ax.containers[0])\n", + "optim_run_df[\"Loss\"].xs(\"recommendation\", level=\"Step\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# runs the simulation for the recommended parameters from each optimizer.\n", + "# The results are saved as a csv together with the reference values.\n", + "# Plots are also created, which can be used to judge the quality of the simulation.\n", + "res_dict = {\"ref\": reference}\n", + "for key, df in optim_run_df.groupby(level=\"Optimizer\"):\n", + " ps = df.loc[(key, \"recommendation\"), param_spec.index]\n", + " display(ps)\n", + " test_res_def = sim_runner.simulation_run(**ps)\n", + " res_dict[key] = test_res_def\n", + "best_res = pd.concat(res_dict, axis=\"columns\")\n", + "best_res.to_csv(out_folder / \"best_res.csv\")\n", + "label_group = best_res.groupby(level=\"Label\")\n", + "fig, axes = plt.subplots(nrows=len(label_group), figsize=(6, 2.5 * len(label_group)),layout=\"constrained\",squeeze=False)\n", + "for row,(label, df) in enumerate(label_group):\n", + " df.droplevel(\"Label\").plot(ax=axes[row,0], title=str(label))\n", + "fig.suptitle(\"Optimizer Result and Reference\")\n", + "fig.savefig(out_folder / \"best_res.png\", bbox_inches=\"tight\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# measure how much the parameters are adjusted\n", + "# first normalize to range than take norm2 of difference to previous step\n", + "# geometric interpretation: project parameters into hyper unit cube and calculate the velocity\n", + "\n", + "# only look at params and exclude recommendation\n", + "run_param_df = optim_run_df.loc[\n", + " optim_run_df.index.get_level_values(\"Step\") != \"recommendation\", param_spec.index\n", + "]\n", + "normalized = (run_param_df - param_spec[\"Min\"]) / (\n", + " param_spec[\"Max\"] - param_spec[\"Min\"]\n", + ")\n", + "velocity = normalized.diff()\n", + "# norm2 (root of sum of squares)\n", + "velocity = (velocity**2).sum(axis=\"columns\") ** 0.5\n", + "velocity.unstack(level=0).plot(title=\"velocity of parameter adjustments\")\n", + "velocity.unstack(level=0).plot(\n", + " subplots=True, sharey=True, title=\"velocity of parameter adjustments\"\n", + ")\n", + "velocity.unstack(level=0).plot(\n", + " subplots=True, sharey=False, title=\"velocity of parameter adjustments\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## close simulation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# clean up used resources\n", + "sim_runner.close()\n", + "# delete temp folder\n", + "if DELETE_TEMP:\n", + " shutil.rmtree(temp_folder)\n", + "# print the recommendations\n", + "for key, df in optim_run_df.groupby(level=\"Optimizer\"):\n", + " display(key, df.loc[(key, \"recommendation\"), param_spec.index])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# for testing compare to ideal parameters\n", + "recs = {}\n", + "if hasattr(sim_runner,\"ideal_params\"):\n", + " recs[\"ideal\"] = sim_runner.ideal_params\n", + "for key, df in optim_run_df.groupby(level=\"Optimizer\"):\n", + " recs[key]=df.loc[(key, \"recommendation\"), param_spec.index]\n", + "pd.DataFrame(recs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (Spyder)", + "language": "python3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.21" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- GitLab