diff --git a/Water quality calibration/optimization_prep.ipynb b/Water quality calibration/optimization_prep.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..22e9339b8670fbfa63461c9f9492b952a7d9a3f9
--- /dev/null
+++ b/Water quality calibration/optimization_prep.ipynb	
@@ -0,0 +1,840 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Prep work for Optimization with mikepluspy.\n",
+    "Adjusts time shift and generates a configuration table."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import typing\n",
+    "import datetime\n",
+    "from warnings import warn\n",
+    "from pathlib import Path\n",
+    "from dataclasses import dataclass\n",
+    "import typing\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib as mpl\n",
+    "import matplotlib.pyplot as plt\n",
+    "import hydroeval as hde\n",
+    "import sqlalchemy as sa\n",
+    "from scipy.ndimage import gaussian_filter1d\n",
+    "from scipy import signal\n",
+    "from tqdm.notebook import tqdm\n",
+    "from mikeio1d.res1d import Res1D"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# time string for output folder\n",
+    "time_path_part = datetime.datetime.now().strftime(\"%Y-%m-%dT%H.%M.%S\")\n",
+    "# outpit folder\n",
+    "out_folder = Path(\"out_\" + time_path_part)\n",
+    "model_folder = Path(\"model_folder_path_relative_to_script/\")\n",
+    "# path of .mupp file, relative to model_folder\n",
+    "db_path_rel = \"example.mupp\"\n",
+    "# define timezone of data\n",
+    "timezone = \"UTC+01:00\"\n",
+    "# define name of column in sensor data for flow\n",
+    "hd_col = \"q_lps\"\n",
+    "# define name of column in sensor data for concentration\n",
+    "conc_col = \"conc_mgpl\"\n",
+    "# this timestep is assumed for all reference/result timeseries\n",
+    "timestep_str = \"1min\"\n",
+    "timestep = pd.to_timedelta(timestep_str)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create output folder\n",
+    "out_folder.mkdir(exist_ok=True,parents=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# in this project data from a sql databank was used, can also be changed to table data further below\n",
+    "# if you dont use a sql you need to change \"def calc_plot\"\n",
+    "# also adjust the loop: \"for label in optim_parts.index:\"\n",
+    "# connection to datebase used to fetch data\n",
+    "db_engine = sa.create_engine(\n",
+    "    \"insert connenction to SQL database here\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This excel table tells us which times are to be used for calibration for each sensor and event.\n",
+    "# The table only contains times, if the (event,sensor) pair should be used, otherwise the values are empty\n",
+    "# The original excel file contains sensor in the first header line, the kind of column in the second header line\n",
+    "# and the event as the first column. Stack puts the sensor in a column and also removes values without times for us.\n",
+    "# This means that event_times only contains the needed (event,sensor) pairs after this line.\n",
+    "event_times = pd.read_excel(model_folder/\"calibration_event_times.xlsx\",header=[0,1],index_col=[0]).stack(level=0)\n",
+    "event_times.index.names = [\"event\",\"sensor\"]\n",
+    "display(event_times.reset_index().dtypes)\n",
+    "for dtcol in [\"start\",\"end\"]:\n",
+    "    event_times[dtcol]=event_times[dtcol].dt.tz_localize(timezone)\n",
+    "for id in event_times.index:\n",
+    "    start=event_times.loc[id,\"start\"]\n",
+    "    end=event_times.loc[id,\"end\"]\n",
+    "    start_round = start.round(timestep)\n",
+    "    end_round = end.round(timestep)\n",
+    "    if start!=start_round:\n",
+    "        warn(f\"for {id} start({start}) does not fit timestep({timestep}). rounding to {start_round})\")\n",
+    "        event_times.loc[id,\"start\"]=start_round\n",
+    "    if end!=end_round:\n",
+    "        warn(f\"for {id} end({end}) does not fit timestep({timestep}). rounding to {end_round})\")\n",
+    "        event_times.loc[id,\"end\"]=end_round\n",
+    "event_times"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This table defines for each concentration sensor where it is positioned whithin the model.\n",
+    "# make sure that the sensor name in the defined excel files corresponds to the names in the sql\n",
+    "# It also contains a the position and name of a discharge sensor, which is used to adjust the time shift later.\n",
+    "# Note that chainages are only approximate\n",
+    "sensor_positions = pd.read_excel(model_folder/\"sensor_data.xlsx\",sheet_name=\"sensor_position\",index_col=\"sensor\")\n",
+    "sensor_positions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#read table containing information which event is included in which result file\n",
+    "event_to_file = pd.read_excel(model_folder/\"event_dict.xlsx\",index_col=\"event\")[\"result file\"]\n",
+    "event_to_file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_result(file, reach, node, column, timestep, start, end) -> pd.Series:\n",
+    "    \"\"\"Read the result with specified parameters.\n",
+    "    returns:\n",
+    "        a pandas Series with Datetime as index.\n",
+    "    \"\"\"\n",
+    "    # file_path is read from the event table\n",
+    "    # reach/node is used to read only a part of the res1d file\n",
+    "    df = Res1D(file_path=str(model_folder / file), reaches=[reach], nodes=[node]).read_all()\n",
+    "    # reduce result to desired column\n",
+    "    series = df[column]\n",
+    "    series.index = series.index.tz_localize(timezone)\n",
+    "    # # resample to be comparable to results\n",
+    "    # TODO: sensible resampling or raise error\n",
+    "    # Note: Linear interpolation with pandas first throws away all values where the index does not match to the frequency (?!).\n",
+    "    full_idx = pd.date_range(start, end, freq=timestep)\n",
+    "    series = series.reindex(full_idx, method=\"nearest\")\n",
+    "    # set index name\n",
+    "    series.index.name = \"Datetime\"\n",
+    "    return series\n",
+    "\n",
+    "\n",
+    "def make_conc_result_spec(ev, sensor):\n",
+    "    \"\"\"Defines parameters which define how to read the simulation result for an event from the specified concentration sensor.\"\"\"\n",
+    "    file = f\"Results/{event_to_file[ev]}BaseDefault_Network_AD.res1d\"\n",
+    "    reach = sensor_positions.loc[sensor, \"reach\"]\n",
+    "    # column names feature very exact chainages, which we do not know beforehand.\n",
+    "    # This tries to find the column wiht most similar chainage\n",
+    "    approx_chainage = sensor_positions.loc[sensor, \"chainage\"]\n",
+    "    col_start = f\"AFS:{reach}:\"\n",
+    "    possible_columns = [\n",
+    "        c\n",
+    "        for c in Res1D(str(model_folder / file), reaches=[reach]).read_all().columns\n",
+    "        if str(c).startswith(col_start)\n",
+    "    ]\n",
+    "\n",
+    "    def chainage_diff(col: str):\n",
+    "        return abs(approx_chainage - float(col.removeprefix(col_start)))\n",
+    "\n",
+    "    best_col = min(possible_columns, key=chainage_diff)\n",
+    "    print(\n",
+    "        f\"\"\"\n",
+    "        approx_chainage: {approx_chainage}\n",
+    "        possible_columns: {possible_columns}\n",
+    "        best_col: {best_col}\n",
+    "    \"\"\"\n",
+    "    )\n",
+    "    return dict(\n",
+    "        file=file,\n",
+    "        reach=reach,\n",
+    "        node=\"\",\n",
+    "        column=best_col,\n",
+    "        timestep=timestep,\n",
+    "        start=event_times.loc[(ev, sensor), \"start\"],\n",
+    "        end=event_times.loc[(ev, sensor), \"end\"],\n",
+    "    )\n",
+    "\n",
+    "\n",
+    "def make_hd_result_spec(ev, sensor):\n",
+    "    \"\"\"Defines parameters which define how to read the simulation result for a event from the specified discharge sensor.\"\"\"\n",
+    "    file = f\"Results/{event_to_file[ev]}BaseDefault_Network_HD.res1d\"\n",
+    "    reach = sensor_positions.loc[sensor, \"hd_reach\"]\n",
+    "    # column names feature very exact chainages, which we do not know beforehand.\n",
+    "    # This tries to find the column wiht most similar chainage\n",
+    "    approx_chainage = sensor_positions.loc[sensor, \"hd_chainage\"]\n",
+    "    col_start = f\"Discharge:{reach}:\"\n",
+    "    possible_columns = [\n",
+    "        c\n",
+    "        for c in Res1D(str(model_folder / file), reaches=[reach]).read_all().columns\n",
+    "        if str(c).startswith(col_start)\n",
+    "    ]\n",
+    "\n",
+    "    def chainage_diff(col: str):\n",
+    "        return abs(approx_chainage - float(col.removeprefix(col_start)))\n",
+    "\n",
+    "    best_col = min(possible_columns, key=chainage_diff)\n",
+    "    print(\n",
+    "        f\"\"\"\n",
+    "        approx_chainage: {approx_chainage}\n",
+    "        possible_columns: {possible_columns}\n",
+    "        best_col: {best_col}\n",
+    "    \"\"\"\n",
+    "    )\n",
+    "    return dict(\n",
+    "        file=file,\n",
+    "        reach=reach,\n",
+    "        node=\"\",\n",
+    "        column=best_col,\n",
+    "        timestep=timestep,\n",
+    "        start=event_times.loc[(ev, sensor), \"start\"],\n",
+    "        end=event_times.loc[(ev, sensor), \"end\"],\n",
+    "    )\n",
+    "\n",
+    "\n",
+    "def make_conc_reference_spec(ev, sensor):\n",
+    "    \"\"\"Defines parameters which define how to read the reference values for a event and specified concentration sensor from the database.\"\"\"\n",
+    "    return {\n",
+    "        \"sensor\": sensor,\n",
+    "        \"start\": event_times.loc[(ev, sensor), \"start\"],\n",
+    "        \"end\": event_times.loc[(ev, sensor), \"end\"],\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "def make_hd_reference_spec(ev, sensor):\n",
+    "    \"\"\"Defines parameters which define how to read the reference values for a event and specified discharge sensor from the database.\"\"\"\n",
+    "    return {\n",
+    "        \"sensor\": sensor_positions.loc[sensor, \"hd_sensor\"],\n",
+    "        \"start\": event_times.loc[(ev, sensor), \"start\"],\n",
+    "        \"end\": event_times.loc[(ev, sensor), \"end\"],\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "def normalize_max(series):\n",
+    "    \"\"\"Normalize a time series so that the maximum value is 1.\"\"\"\n",
+    "    max = np.max(series)\n",
+    "    if max == 0:\n",
+    "        max = 1\n",
+    "    return series / max"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create result configuration table for optimization\n",
+    "# This table defines which simulations to run and where to find the result for each label.\n",
+    "optim_part_records = []\n",
+    "#event_times contains only the event-sensor pairs which have usable data\n",
+    "for event,sensor in tqdm(event_times.index):\n",
+    "    print(event,sensor)\n",
+    "    try:\n",
+    "        result_spec = make_conc_result_spec(event,sensor)\n",
+    "        optim_part_records.append({\n",
+    "            \"Label\":f\"{event}-{sensor}\",\n",
+    "            \"Event\":event,\n",
+    "            \"Sensor\":sensor,\n",
+    "            \"EventWeight\":1,\n",
+    "            \"Start\":event_times.loc[(event,sensor),\"start\"],\n",
+    "            \"End\":event_times.loc[(event,sensor),\"end\"],\n",
+    "            \"Timestep\":timestep_str,\n",
+    "            \"SimulationName\":f\"{event_to_file[event]}\",\n",
+    "            \"ResultFile\":result_spec[\"file\"],\n",
+    "            \"ResultReach\":result_spec[\"reach\"],\n",
+    "            \"ResultNode\":result_spec[\"node\"],\n",
+    "            \"ResultColumn\":result_spec[\"column\"],\n",
+    "        })\n",
+    "    except Exception as ex:\n",
+    "        print(ex)\n",
+    "\n",
+    "optim_parts = pd.DataFrame(optim_part_records)\n",
+    "optim_parts = optim_parts.set_index(\"Label\")\n",
+    "optim_parts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# The hydrodynamics simulation may be shifted in time. The water quality simulation depends on the hydrodynamics simulation and has the same lag.\n",
+    "# Since adjusting quality parameters cannot fix this time lag, we calculate the lag for the hydrodynamics\n",
+    "# and then shift the quality reference data with the same lag.\n",
+    "# The lag is calculated with cross correlation. Since the lag may vary over time, \n",
+    "# we use cross correlation over a window around each value to calculate a lag for each time point.\n",
+    "# The lag is then smoothed because it does not make sense for the lag to change suddenly.\n",
+    "\n",
+    "\n",
+    "def adjust_lag(series, lag):\n",
+    "    \"\"\"Shifts time in a series to account for lag\n",
+    "    series: values we want to adjust for lag\n",
+    "    lag: time the series lags behind the result.\n",
+    "    start,end: start and end of event, more values\n",
+    "    returns: series with time shifted acccording to lag. \n",
+    "        Note that the nearest value is used, if it does not exist in the original data.\n",
+    "    \"\"\"\n",
+    "    assert isinstance(series.index, pd.DatetimeIndex), \"Need DatetimeIndex in series\"\n",
+    "    assert isinstance(lag.index, pd.DatetimeIndex), \"Need DatetimeIndex in lag\"\n",
+    "    assert (\n",
+    "        len(series.index.difference(lag.index)) == 0\n",
+    "    ), \"need lag value for each element of series\"\n",
+    "    assert lag.notna().all(), \"lag cannot be nan\"\n",
+    "    # use .array to get data without index, since we want to adjust time.\n",
+    "    # Otherwise we would shift the order of the data, but it would still be associated with the old timestamp.\n",
+    "    series_shifted = pd.Series(\n",
+    "        series.reindex(series.index - lag[series.index], method=\"nearest\").array,\n",
+    "        index=series.index,\n",
+    "    )\n",
+    "    # modify the name of the series, this is useful when using plot() for both the original and the shifted data.\n",
+    "    series_shifted.name = series.name + \"_shift\"\n",
+    "    return series_shifted[series.index]\n",
+    "\n",
+    "\n",
+    "def adjust_start_end(start, end, lag):\n",
+    "    \"\"\"Adjust start and end according to lag. Also rounds to timestep, so that all needed time points are included.\n",
+    "    This is use because the reference values contain some kind of notable features. After shifting the reference values in time, \n",
+    "    the reference values should still contain the same features.\n",
+    "    returns: a tuple (start,end) of adjusted times.\n",
+    "    \"\"\"\n",
+    "    start_lag, end_lag = lag.reindex([start, end], method=\"nearest\")\n",
+    "    start_shifted, end_shifted = start + start_lag, end + end_lag\n",
+    "    # round start down and end up\n",
+    "    start_rounded = start_shifted.round(timestep)\n",
+    "    if start_rounded > start_shifted:\n",
+    "        start_rounded -= timestep\n",
+    "    end_rounded = end_shifted.round(timestep)\n",
+    "    if end_rounded < end_shifted:\n",
+    "        end_rounded += timestep\n",
+    "    return start_rounded, end_rounded\n",
+    "\n",
+    "\n",
+    "# cross correlation for a window arround each point to search where the part of te simulation result is located  in the reference.\n",
+    "\n",
+    "# put lags for each event into a dictionary\n",
+    "reference_lags = {}\n",
+    "\n",
+    "# This procedure was written as a class to make it simpler to experiment with various parameters. \n",
+    "# dataclasses are a python feature which help as avoid some boilerplate code, like writing an __init__ function.\n",
+    "@dataclass\n",
+    "class CcCalculator:\n",
+    "    \"\"\"Class to compute and plot cross correlation for an event. Label is the only required parameter, everything else has default values.\"\"\"\n",
+    "\n",
+    "    # event for which cross correlation is computed\n",
+    "    label: str\n",
+    "    # size of the window for which cross correlation is computed. Is passed to pd.to_timedelta() for computation.\n",
+    "    # Used in plot annotation as is.\n",
+    "    window: str = \"2h\"\n",
+    "    # tuple of times (as timestep indices) where a window and cross correlation may be plotted.\n",
+    "    # invalid times cause a warning and are otherwise ignored\n",
+    "    win_times: typing.Union[tuple[int, ...], typing.Literal[\"peak\"]] = ()\n",
+    "    # whether to plot window at win_times\n",
+    "    plt_win: bool = True\n",
+    "    # whether to plot correlation at win_times\n",
+    "    plt_corr: bool = True\n",
+    "    # whether to plot the lag\n",
+    "    plt_lag: bool = True\n",
+    "    # whether to plot the warped (lag corrected) reference with result\n",
+    "    plt_warp: bool = True\n",
+    "    # Set a lower limit for flow, below the limit no lag is calculated.\n",
+    "    # If we assume that time lag mostly comes from location differences of the rain measurement station, it makes no sense to calculate the timeshift from dry weather.\n",
+    "    # Additionally the influence of noise may be larger for low flow.\n",
+    "    min_q_res: typing.Union[float, str] = 0.0\n",
+    "\n",
+    "    # mode to use for cross correlation. \"full\" or \"same\" uses zero padding. Zero-Padding makes values at the edges smaller, which is desirable for our application.\n",
+    "    # when win_pad (see below)>= max_lag_i \"same\" is an efficent mode, otherwise \"full\" is needed\n",
+    "    # \"valid\" does not make sense here: cross correlation is a sum of products, it is largest where the largest values of one window match the largest values of the other window.\n",
+    "    # Therefore maximizing cross correlation only finds lag if the compared arrays have a similar shape. If one of our windows is wider than the other and the wider window has a large peak\n",
+    "    # that is as wide as the thinner window, cross correlation will be maximized where the thinner window is inside the peak, even if the shape does not match at all.\n",
+    "    correlation_mode: str = \"same\"\n",
+    "    # width for the plot, ignored if a fig is passed\n",
+    "    width: float = 12\n",
+    "    # a height is effectively 0 if the correspondig part is not plotted.\n",
+    "    # height for a window plot, in inches.\n",
+    "    height_win: float = 1\n",
+    "    # height for a correlation plot, in inches.\n",
+    "    height_corr: float = 1\n",
+    "    # height for the lag plot, in inches.\n",
+    "    height_lag: float = 1.5\n",
+    "    # height for the warp plot, in inches.\n",
+    "    height_warp: float = 5\n",
+    "\n",
+    "    def estimate_height(self):\n",
+    "        \"\"\"Estimate how high the plot should be, in inches.\"\"\"\n",
+    "        win_count = len(self.win_times)\n",
+    "        return (\n",
+    "            self.height_win * self.plt_win * win_count\n",
+    "            + self.height_corr * self.plt_corr * win_count\n",
+    "            + self.plt_lag * self.height_lag\n",
+    "            + self.plt_warp * self.height_warp\n",
+    "        )\n",
+    "\n",
+    "    def calc_plot(self, fig=None):\n",
+    "        \"\"\"Calculates the lag and plot a nice figure. If a figure is passed, plots on new axes inside that figure. else create a figure.\n",
+    "        Returns: The lag, a figure and some statistics.\"\"\"\n",
+    "        event = optim_parts.loc[self.label, \"Event\"]\n",
+    "        sensor = optim_parts.loc[self.label, \"Sensor\"]\n",
+    "        hd_result_spec = make_hd_result_spec(event, sensor)\n",
+    "        hd_reference_spec = make_hd_reference_spec(event, sensor)\n",
+    "        start = optim_parts.loc[self.label, \"Start\"]\n",
+    "        end = optim_parts.loc[self.label, \"End\"]\n",
+    "        # Add padding, so that later start and end can be shifted\n",
+    "        pad = pd.to_timedelta(self.window)\n",
+    "        start_og = start\n",
+    "        end_og = end\n",
+    "        start -= pad\n",
+    "        end += pad\n",
+    "        hd_result_spec[\"start\"] = start\n",
+    "        hd_reference_spec[\"start\"] = start\n",
+    "        hd_result_spec[\"end\"] = end\n",
+    "        hd_reference_spec[\"end\"] = end\n",
+    "        # Just in case the database or simulation file misses some results, we use the the index with all times for both.\n",
+    "        full_idx = pd.date_range(start, end, freq=timestep)\n",
+    "        result_hd = read_result(**hd_result_spec)\n",
+    "        result_hd = result_hd.rename(\"simulation result\")\n",
+    "\n",
+    "        with db_engine.begin() as con:\n",
+    "            reference_hd = pd.read_sql_query(\n",
+    "                sa.text(\n",
+    "                    f\"select datetime, {hd_col} from readings_smoothed where sensor_name = :sensor and  datetime>=:start and datetime<=:end\"\n",
+    "                ),\n",
+    "                con,\n",
+    "                params=hd_reference_spec,\n",
+    "                index_col=\"datetime\",\n",
+    "            )[hd_col].reindex(full_idx)\n",
+    "        # TODO: check times in database vs event table\n",
+    "        if not reference_hd.notna().all():\n",
+    "            warn(f\"\"\"readings_smoothed should contain no missing values. Check database. \n",
+    "            Database may not contain enough padding. Needed: {self.window}.\n",
+    "            First to last valid needed: {start} to {end}\"\n",
+    "            First to last valid found: {reference_hd.first_valid_index()} to {reference_hd.last_valid_index()}\"\"\")\n",
+    "            reference_hd = reference_hd.interpolate().ffill().bfill()\n",
+    "        # adjust unit\n",
+    "        reference_hd = reference_hd / 1000\n",
+    "        reference_hd = reference_hd.rename(\"reference\")\n",
+    "        assert reference_hd.index.equals(result_hd.index), \"Index needs to match\"\n",
+    "        result_numpy = result_hd.to_numpy().astype(\"double\")\n",
+    "        reference_numpy = reference_hd.to_numpy().astype(\"double\")\n",
+    "        if isinstance(self.min_q_res, str) and self.min_q_res.endswith(\"%\"):\n",
+    "            q = float(self.min_q_res.removesuffix(\"%\")) / 100\n",
+    "            min_q_res = result_hd.quantile(q)\n",
+    "        else:\n",
+    "            min_q_res = float(self.min_q_res)\n",
+    "        # set the maximum amount of lag in number of timesteps\n",
+    "        max_lag_i = int(np.rint(pd.to_timedelta(self.window) / timestep))\n",
+    "        # set size of the window as distance from the middle to the edges.\n",
+    "        win_pad = max_lag_i\n",
+    "        # save lag for each point in a series.\n",
+    "        lag = pd.Series(\n",
+    "            data=pd.to_timedelta(pd.NA),\n",
+    "            index=full_idx,\n",
+    "            name=\"lag\",\n",
+    "            dtype=\"timedelta64[ns]\",\n",
+    "        )\n",
+    "        # keep some examples of correlation for plotting\n",
+    "        if self.win_times == \"peak\":\n",
+    "            win_times = (np.argmax(result_numpy),)\n",
+    "        else:\n",
+    "            win_times = self.win_times\n",
+    "        cor_examples = []\n",
+    "        # check if examples are valid:\n",
+    "        for t_exp in win_times:\n",
+    "            if t_exp < 0 or t_exp >= len(lag):\n",
+    "                warn(\n",
+    "                    f\"t={t_exp} needs to be in range [0,{len(lag)-1}]. Label {self.label}\"\n",
+    "                )\n",
+    "        # calculate lag for each position.\n",
+    "\n",
+    "        for t in range(len(lag)):\n",
+    "            if result_numpy[t] >= min_q_res:\n",
+    "                # Start and end of windwow, make sure indices are inside the valid range.\n",
+    "                win_start = max(0, t - win_pad)\n",
+    "                win_end = min(t + win_pad, len(result_numpy) - 1)\n",
+    "                # extract window data\n",
+    "                res_part = result_numpy[win_start : win_end + 1]\n",
+    "                ref_part = reference_numpy[win_start : win_end + 1]\n",
+    "                # use scyipy.signal to calculate correlation\n",
+    "                correlation = signal.correlate(\n",
+    "                    res_part, ref_part, mode=self.correlation_mode\n",
+    "                )\n",
+    "                # signal.correlation_lags tells  us, how the index of the correlation result matches a lag in the original data.\n",
+    "                # For our cases the lags shoudl just be and array of the from [-n,...,0,...,n], but this method handles edge cases\n",
+    "                # and different correlation_modes for us.\n",
+    "                all_lags = signal.correlation_lags(\n",
+    "                    res_part.size, ref_part.size, mode=self.correlation_mode\n",
+    "                )\n",
+    "                # use is_lag_valid to only search lags smaller than the limit for the best result.\n",
+    "                is_lag_valid = np.abs(all_lags) <= max_lag_i\n",
+    "                idx_lag = all_lags[is_lag_valid][np.argmax(correlation[is_lag_valid])]\n",
+    "                lag.iloc[t] = idx_lag * timestep\n",
+    "                # put data from some steps into cor_examples for ploting them late, examples configured by self.win\n",
+    "                if t in win_times:\n",
+    "                    cor_examples.append(\n",
+    "                        dict(\n",
+    "                            t=t,\n",
+    "                            win_start=win_start,\n",
+    "                            win_end=win_end,\n",
+    "                            res_part=res_part,\n",
+    "                            ref_part=ref_part,\n",
+    "                            correlation=correlation,\n",
+    "                            all_lags=all_lags,\n",
+    "                            is_lag_valid=is_lag_valid,\n",
+    "                            idx_lag=idx_lag,\n",
+    "                        )\n",
+    "                    )\n",
+    "        raw_lag = lag.copy().rename(\"raw lag\")\n",
+    "        # interpolate nans, fill start/end with nearest value\n",
+    "        lag = lag.interpolate().bfill().ffill().fillna(pd.to_timedelta(\"0s\"))\n",
+    "        # smooth lag, requires conversion to float, use timestep for that\n",
+    "        # the smooting corrects random errors and sudden shifts in the lag.\n",
+    "        lag.loc[:] = (\n",
+    "            gaussian_filter1d((lag / timestep).to_numpy(), max_lag_i / 6) * timestep\n",
+    "        )\n",
+    "        lag = lag.rename(\"smoothed lag\")\n",
+    "        reference_hd_shifted = adjust_lag(reference_hd, lag)\n",
+    "        # compute nse values for hd data\n",
+    "        # this can be used to estimate wheter shifting improved the model\n",
+    "        # and - after calibration - as reference for nse on concentration\n",
+    "        # remember that start and end are padded, while _og are the oroginal values\n",
+    "        start_shifted, end_shifted = adjust_start_end(start_og, end_og, lag)\n",
+    "        hd_stats = {\n",
+    "            \"nse_og\": hde.nse(\n",
+    "                result_hd[start_og:end_og].to_numpy(), reference_hd[start_og:end_og].to_numpy()\n",
+    "            ),\n",
+    "            \"nse_shifted\": hde.nse(\n",
+    "                result_hd[start_shifted:end_shifted].to_numpy(),\n",
+    "                reference_hd_shifted[start_shifted:end_shifted].to_numpy(),\n",
+    "            ),\n",
+    "            \"neg_bounded_nse_og\": -hde.nse_c2m(\n",
+    "                result_hd[start_og:end_og].to_numpy(), reference_hd[start_og:end_og].to_numpy()\n",
+    "            ),\n",
+    "            \"neg_bounded_nse_shifted\": -hde.nse_c2m(\n",
+    "                result_hd[start_shifted:end_shifted].to_numpy(),\n",
+    "                reference_hd_shifted[start_shifted:end_shifted].to_numpy(),\n",
+    "            ),\n",
+    "        }\n",
+    "\n",
+    "        # now plot the result\n",
+    "        if fig == None:\n",
+    "            fig = plt.figure(figsize=(self.width, self.estimate_height()))\n",
+    "        # note that parts that are not plotted still get an ax\n",
+    "        # this ax gets a height_ratio of 0 and is later made invisible.\n",
+    "        axs = fig.subplots(\n",
+    "            2 + 2 * len(cor_examples),\n",
+    "            height_ratios=[\n",
+    "                self.height_win * self.plt_win,\n",
+    "                self.height_corr * self.plt_corr,\n",
+    "            ]\n",
+    "            * len(cor_examples)\n",
+    "            + [self.plt_lag * self.height_lag]\n",
+    "            + [self.plt_warp * self.height_warp],\n",
+    "        )\n",
+    "        # for each correlation example we plot the window with data and the correlation.\n",
+    "        exp_axs = axs[: 2 * len(cor_examples)]\n",
+    "        # go over axes  in pairs\n",
+    "        # ::2 mean we use steps of size 2, 1::2 means we start at idx 1 and uses step 2\n",
+    "        for winax, corax, exp in zip(exp_axs[::2], exp_axs[1::2], cor_examples):\n",
+    "            # plot windows\n",
+    "            if self.plt_win:\n",
+    "                win_x = np.arange(exp[\"win_start\"], exp[\"win_end\"] + 1)\n",
+    "                winax.plot(\n",
+    "                    win_x, exp[\"res_part\"], label=\"simulation window\", color=\"blue\"\n",
+    "                )\n",
+    "                winax.plot(\n",
+    "                    win_x, exp[\"ref_part\"], label=\"reference window\", color=\"black\"\n",
+    "                )\n",
+    "                # mark the time point to which the window refers\n",
+    "                winax.axvline(exp[\"t\"], label=f\"t={exp['t']}\", color=\"darkgray\")\n",
+    "                # set xlims so that the window is positioned relatively to all data  for that event.\n",
+    "                winax.set_xlim(0, len(result_numpy))\n",
+    "                winax.legend(ncol=2)\n",
+    "            else:\n",
+    "                winax.set_visible(False)\n",
+    "            # plot correlation\n",
+    "            if self.plt_corr:\n",
+    "                corax.plot(\n",
+    "                    exp[\"all_lags\"],\n",
+    "                    exp[\"correlation\"],\n",
+    "                    label=\"cor(simulation,reference)\",\n",
+    "                    color=(0.2, 0.2, 0.6, 1),\n",
+    "                )\n",
+    "                # mark the lag\n",
+    "                corax.axvline(\n",
+    "                    exp[\"idx_lag\"],\n",
+    "                    label=f\"lag={exp['idx_lag']} timesteps\",\n",
+    "                    color=\"orange\",\n",
+    "                )\n",
+    "                corax.legend()\n",
+    "            else:\n",
+    "                corax.set_visible(False)\n",
+    "        # now plot the lag\n",
+    "        ax_lag, ax_shifted = axs[2 * len(cor_examples) :]\n",
+    "        if self.plt_lag:\n",
+    "            raw_lag_ts = raw_lag / timestep\n",
+    "            lag_ts = lag / timestep\n",
+    "            raw_lag_ts.plot(\n",
+    "                ax=ax_lag,\n",
+    "                color=(0.5, 0.1, 0.1, 1),\n",
+    "            )\n",
+    "            # first draw non interpolated part\n",
+    "            s_lag = lag_ts.copy().rename(\"smoothed lag\")\n",
+    "            s_lag[raw_lag.isna()] = np.nan\n",
+    "            s_lag.plot(ax=ax_lag, color=(0.8, 0.2, 0.2, 0.7))\n",
+    "            # draw interpolated part, if any\n",
+    "            if raw_lag.isna().any():\n",
+    "                i_lag = lag_ts.copy().rename(\"interpolated lag\")\n",
+    "                i_lag[raw_lag.notna()] = np.nan\n",
+    "                i_lag.plot(ax=ax_lag, color=(0.8, 0.2, 0.2, 0.7), linestyle=\"--\")\n",
+    "            ax_lag.set_title(f\"lag, timestep={timestep}\")\n",
+    "            ax_lag.legend()\n",
+    "        else:\n",
+    "            ax_lag.set_visible(False)\n",
+    "\n",
+    "        # plot the original reference data and shifted reference data\n",
+    "        if self.plt_warp:\n",
+    "            reference_hd.rename(\"original reference\").plot(\n",
+    "                ax=ax_shifted, color=\"black\", linestyle=\":\"\n",
+    "            )\n",
+    "            reference_hd_shifted.rename(\"warped reference\").plot(\n",
+    "                ax=ax_shifted, color=\"black\"\n",
+    "            )\n",
+    "            # also the simulation result\n",
+    "            # if our method for lag correction worked, the shifted reference should match the simlution results better than before.\n",
+    "            result_hd.rename(\"simulated\").plot(ax=ax_shifted, color=\"blue\")\n",
+    "            ax_shifted.legend()\n",
+    "            fig.suptitle(\"label \" + str(self.label) + \", window \" + self.window, y=0.95)\n",
+    "        else:\n",
+    "            ax_shifted.set_visible(False)\n",
+    "        return lag, fig, hd_stats\n",
+    "\n",
+    "\n",
+    "hd_stats_dict = {}\n",
+    "for label in tqdm(optim_parts.index):\n",
+    "    lag, fig, part_stats = CcCalculator(label, \"6h\", min_q_res=0.1).calc_plot()\n",
+    "    hd_stats_dict[label] = part_stats\n",
+    "    fig.suptitle(\"Label \" + str(label) + \" lag from cross correlation over window\")\n",
+    "    fig.savefig(out_folder / f\"lag_hd_cc_{label}_.png\")\n",
+    "    reference_lags[label] = lag"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# adjust start and end times of events according to lag, \n",
+    "for label in optim_parts.index:\n",
+    "    start = optim_parts.loc[label, \"Start\"]\n",
+    "    end = optim_parts.loc[label, \"End\"]\n",
+    "    start_shifted, end_shifted = adjust_start_end(start,end,reference_lags[label])\n",
+    "    optim_parts.loc[label, \"Start\"] = start_shifted\n",
+    "    optim_parts.loc[label, \"End\"] = end_shifted\n",
+    "    optim_parts.loc[label, \"StartOriginal\"] = start\n",
+    "    optim_parts.loc[label, \"EndOriginal\"] = end"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "optim_parts[[\"Event\",\"Start\",\"End\",\"StartOriginal\",\"EndOriginal\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# remove timezones for excel\n",
+    "optim_parts_c = optim_parts.copy()\n",
+    "for col in [\"Start\",\"End\",\"StartOriginal\",\"EndOriginal\"]:\n",
+    "    optim_parts_c[col] = optim_parts_c[col].dt.tz_localize(None)\n",
+    "optim_parts_c.to_excel(model_folder/\"optim_parts.xlsx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hd_stats=pd.DataFrame(hd_stats_dict)\n",
+    "hd_stats[\"mean\"]=hd_stats.mean(axis=\"columns\")\n",
+    "hd_stats = hd_stats.transpose()\n",
+    "hd_stats.to_excel(model_folder/\"hd_stats.xlsx\")\n",
+    "hd_stats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Adjusts the time lag for the reference values and puts result into a csv.\n",
+    "# Also plots original and shifted concentration reference values.\n",
+    "# The plot also contains the anomaly score (higher means more trustworthy data) for reference.\n",
+    "\n",
+    "# collect reference for each label in a dict.\n",
+    "ref_parts = {}\n",
+    "# the defines wheter the plots are created.\n",
+    "plot_reference_shift = True\n",
+    "for label in optim_parts.index:\n",
+    "    # only look at time that is of interest:\n",
+    "    sensor = optim_parts.loc[label,\"Sensor\"]\n",
+    "    start = optim_parts.loc[label, \"Start\"]\n",
+    "    end = optim_parts.loc[label, \"End\"]\n",
+    "    start_read = reference_lags[label].index.min()\n",
+    "    end_read = reference_lags[label].index.max()\n",
+    "    timestep = optim_parts.loc[label, \"Timestep\"]\n",
+    "    # use this before warping, index with original time and padding\n",
+    "    read_idx = reference_lags[label].index\n",
+    "    # use adjusted times without padding for final index\n",
+    "    full_shifted_idx = pd.date_range(start, end, freq=timestep, tz=timezone)\n",
+    "    # read the reference values and convert them so that they fit the results from simulation\n",
+    "\n",
+    "    with db_engine.begin() as con:\n",
+    "        ref_part = pd.read_sql_query(\n",
+    "            sa.text(\n",
+    "                f\"select datetime, {conc_col} from readings_smoothed where sensor_name = :sn and  datetime>=:start and datetime<=:end\"\n",
+    "            ),\n",
+    "            con,\n",
+    "            params={\"start\": start_read, \"end\": end_read, \"sn\": sensor},\n",
+    "            index_col=\"datetime\",\n",
+    "        )[conc_col].tz_convert(timezone)\n",
+    "    ref_part_raw = ref_part.copy()\n",
+    "    ref_part=ref_part.sort_index().reindex(read_idx, method=\"nearest\")\n",
+    "    assert isinstance(\n",
+    "        ref_part.index, pd.DatetimeIndex\n",
+    "    ), \"Need DatetimeIndex (expected in first column)\"\n",
+    "    assert ref_part.isna().any(axis=None) == False\n",
+    "    # adjust lag\n",
+    "    ref_part_shifted = adjust_lag(ref_part,reference_lags[label])\n",
+    "    ref_part_shifted = ref_part_shifted.reindex(full_shifted_idx)\n",
+    "    ref_parts[label] = ref_part_shifted\n",
+    "    if plot_reference_shift:\n",
+    "        fig,ax = plt.subplots(figsize=(12,3))\n",
+    "        ref_part_shifted.plot(ax=ax,color=\"cyan\",label=\"shifted\")\n",
+    "        ref_part_raw.plot(ax=ax,color=\"darkblue\",linewidth=0.3, label = \"not shifted\",marker=\"o\", markersize=0.8)\n",
+    "        # also plot where score is low, may explain odd parts\n",
+    "        with db_engine.begin() as con:\n",
+    "            score = pd.read_sql(\n",
+    "                sa.text(\n",
+    "                    \"\"\"\n",
+    "                SELECT datetime,score\n",
+    "                FROM anomaly_scores \n",
+    "                WHERE sensor_name = :sn AND datetime>= :start AND datetime<= :end AND parameter= :param;\n",
+    "                \"\"\"\n",
+    "                ),\n",
+    "                con,\n",
+    "                params={\"start\": start, \"end\": end, \"sn\": sensor, \"param\":conc_col},\n",
+    "                index_col=\"datetime\",\n",
+    "                dtype={\n",
+    "                    \"score\": \"float16\",\n",
+    "                },\n",
+    "            )[\"score\"]\n",
+    "        assert score.index.is_unique, \"check query if index is not unique\"\n",
+    "        score_low = score<0.45\n",
+    "        if score_low.any():\n",
+    "            ax.fill_between(score.index, 0,1,where=score_low, facecolor=(0,0,0,0.2), transform=ax.get_xaxis_transform(), label=\"anomaly score<0.45\",\n",
+    "                edgecolor=(0,0,0,0), linewidth=0.0)\n",
+    "        data_missing=ref_part_raw.reindex(read_idx).isna()\n",
+    "        if data_missing.any():\n",
+    "            ax.fill_between(read_idx, 0,1,where=data_missing, facecolor=(1,0,0,0.2), transform=ax.get_xaxis_transform(), label=\"missing data\",\n",
+    "                edgecolor=(0,0,0,0), linewidth=0.0)\n",
+    "        ax.legend()\n",
+    "        ax.set_title(label)\n",
+    "        display(fig)\n",
+    "        if not ( ax.lines or ax.colllections or ax.get_images()):\n",
+    "            warn(\"empty plot for label \"+label)\n",
+    "        plt.close(fig)\n",
+    "reference = pd.concat(ref_parts)\n",
+    "reference.index.names = [\"Label\", \"Datetime\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert reference.notna().all(), \"reference should not have nan, check script for mistakes.\"\n",
+    "reference.to_csv(model_folder/\"reference.csv\")\n",
+    "reference"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (Spyder)",
+   "language": "python3",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.21"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}