From ec4d78416113a54a4308d7abdeb75de0254c5636 Mon Sep 17 00:00:00 2001
From: Sebastian Kerger <sebastian.kerger@rwth-aachen.de>
Date: Mon, 17 Mar 2025 15:00:38 +0100
Subject: [PATCH] Upload New File

---
 .../model_simplification.ipynb                | 667 ++++++++++++++++++
 1 file changed, 667 insertions(+)
 create mode 100644 Optimization of control rules/model_simplification.ipynb

diff --git a/Optimization of control rules/model_simplification.ipynb b/Optimization of control rules/model_simplification.ipynb
new file mode 100644
index 0000000..7247718
--- /dev/null
+++ b/Optimization of control rules/model_simplification.ipynb	
@@ -0,0 +1,667 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Script to read data from the mike+ model that is later used to build a simpler model\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from warnings import warn\n",
+    "import subprocess\n",
+    "from multiprocessing.connection import Client\n",
+    "from time import sleep\n",
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas as pd\n",
+    "import geopandas as gpd\n",
+    "import numpy as np\n",
+    "import matplotlib as mpl\n",
+    "import matplotlib.pyplot as plt\n",
+    "from tqdm.notebook import tqdm\n",
+    "import sqlalchemy as sa\n",
+    "import seaborn as sns\n",
+    "import scipy.signal\n",
+    "import scipy.optimize\n",
+    "import mikeio1d.res1d as mr\n",
+    "import networkx as nx\n",
+    "import scipy.integrate\n",
+    "import scipy.ndimage\n",
+    "import scipy.interpolate as si\n",
+    "from statsmodels.nonparametric.smoothers_lowess import lowess\n",
+    "\n",
+    "from  entfrachten.analysis import drainage_system_graph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# helper for more concise indexing in pandas\n",
+    "ids = pd.IndexSlice"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Settings\n",
+    "What data is looked at"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define the path to the model\n",
+    "model_path = Path(R\"path\\example_model.sqlite\")\n",
+    "# define a path to th\n",
+    "r1d_files = list(Path(\"results_path/\").glob(\"*.res1d\"))\n",
+    "network_res1d_path = r1d_files[0]\n",
+    "r1d_files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define an excel in which all storages for the EQF are listed. Each structure should contain the descriptive name, \n",
+    "# the name in mike (e.g. name of node), weir names if  present, the max level of the structure if it has no weir, and also \n",
+    "# names of the outflow pumps or reaches if they are further down the system and not directly at the storage structure\n",
+    "storage_df = pd.read_excel(\"storages.xlsx\")\n",
+    "storage_df = storage_df.set_index(\"name\")\n",
+    "storage_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Reading Network Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## open files/connections"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# model data is saved as a sqlite, which we can read from\n",
+    "model_db = sa.create_engine(\"sqlite:///\"+str(model_path))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Nodes and reaches are read from the res1d file.\n",
+    "# A reach in this file can be any connection between nodes, including weirs, pumps, etc. Therefore this is simpler than the model sqlite.\n",
+    "# for reference on reading the data see:\n",
+    "# https://github.com/DHI/mikeio1d/discussions/53#discussion-5231633\n",
+    "nwr = mr.Res1D(str(network_res1d_path))\n",
+    "nwr.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nwr.quantities"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Nodes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for accessing data see: https://github.com/DHI/mikeio1d/discussions/53\n",
+    "# make a dict of links, where the from_node_id is the key, for searching later\n",
+    "node_records = []\n",
+    "for i, node in enumerate(list(nwr.data.Nodes)):\n",
+    "    node_records.append(\n",
+    "        {\n",
+    "            \"name\": node.ID,\n",
+    "            \"r1d_index\": i,\n",
+    "            \"type\": str(node.GetType()).split(\".\")[-1].removeprefix(\"Res1D\"),\n",
+    "            \"x\":node.XCoordinate,\n",
+    "            \"y\": node.YCoordinate,\n",
+    "        }\n",
+    "    )\n",
+    "node_df = pd.DataFrame.from_records(node_records)\n",
+    "# this is later needed to get the start and end nodes of reaches, where the r1d only contains an index as a number\n",
+    "r1d_node = node_df[[\"r1d_index\",\"name\"]].set_index(\"r1d_index\")[\"name\"]\n",
+    "node_df = node_df.set_index(\"name\")\n",
+    "# some data is not in the res1d, use sqlite\n",
+    "with model_db.begin() as con:\n",
+    "    node_sqlite = pd.read_sql(\"\"\"\n",
+    "        SELECT muid as name, groundlevel as ground_level, invertlevel as bottom_level\n",
+    "        FROM msm_Node\"\"\",con).set_index(\"name\")\n",
+    "assert set(node_df.index)==set(node_sqlite.index)\n",
+    "node_df = node_df.join(node_sqlite)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Reaches"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "reach_records = []\n",
+    "for reach in list(nwr.data.Reaches):\n",
+    "    reach_records.append(\n",
+    "        {\n",
+    "            \"name\": reach.Name,\n",
+    "            \"start\": r1d_node[reach.StartNodeIndex],\n",
+    "            \"end\": r1d_node[reach.EndNodeIndex],\n",
+    "            \"length\": reach.Length,\n",
+    "        }\n",
+    "    )\n",
+    "reach_df = pd.DataFrame.from_records(reach_records).set_index(\"name\")\n",
+    "# add type from name, everything  other than link has a prefix \"{type}:\"\n",
+    "has_colon = reach_df.index.to_series().str.contains(\":\")\n",
+    "reach_df.loc[~has_colon,\"type\"]=\"Link\"\n",
+    "reach_df.loc[has_colon,\"type\"]=reach_df.index.to_series().str.split(\":\",expand=True)[0]\n",
+    "assert reach_df.index.is_unique"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Weirs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with model_db.begin() as con:\n",
+    "    weir_df = pd.read_sql(\"\"\"\n",
+    "        SELECT muid as id, 'Weir:'||muid as reach, fromnodeid as start, tonodeid as end, crestlevel as crest_level\n",
+    "        FROM msm_weir;\"\"\",con).set_index(\"id\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Analysis on Network Structure"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# some storages are located by a reach and some by node.\n",
+    "# for simplicity all should be located by node. For reaches use the end node.\n",
+    "for storage_name, mike_name in storage_df[\"mike_name\"].items():\n",
+    "    if mike_name in node_df.index:\n",
+    "        storage_df.loc[storage_name,\"node\"]=mike_name\n",
+    "    elif mike_name in reach_df.index:\n",
+    "        storage_df.loc[storage_name,\"node\"]=reach_df.loc[mike_name,\"end\"]\n",
+    "    else:\n",
+    "        raise Exception(f\"mike name {mike_name} not found in reaches or nodes.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# put weir height info into storage df\n",
+    "storage_df[\"weir_crest_level\"]=storage_df[\"weir\"].map(weir_df[\"crest_level\"])\n",
+    "storage_df[\"weir_reach\"]=storage_df[\"weir\"].map(weir_df[\"reach\"])\n",
+    "storage_df[\"weir_crest_level_2\"]=storage_df[\"weir_2\"].map(weir_df[\"crest_level\"])\n",
+    "storage_df[\"weir_reach_2\"]=storage_df[\"weir_2\"].map(weir_df[\"reach\"])\n",
+    "# now calculate max level\n",
+    "storage_df[\"max_level\"]=storage_df[[\"weir_crest_level\",\"weir_crest_level_2\",\"max_level_if_no_weir\"]].min(axis=\"columns\")\n",
+    "# add bottom level from nodes\n",
+    "storage_df[\"bottom_level\"]=storage_df[\"node\"].map(node_df[\"bottom_level\"])\n",
+    "storage_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "node_df.loc[storage_df[\"node\"],\"bottom_level\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "node_sqlite.loc[storage_df[\"node\"],\"bottom_level\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# check the network graph for circles\n",
+    "# for this we use the networkx package, which has a handy method for making a graph from a pandas dataframe \n",
+    "g = nx.from_pandas_edgelist(reach_df, source='start', target='end',\n",
+    "                            edge_attr='length',create_using=nx.DiGraph)\n",
+    "for c in nx.simple_cycles(g):\n",
+    "    warn(\"Cycle in network graph, some later evaluations may be incorrect\")\n",
+    "    print(c)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# draw a map of the network\n",
+    "color_map = {\n",
+    "    \"Manhole\":\"#0000ff\",\n",
+    "    \"Basin\":\"#00ff00\",\n",
+    "    \"SewerJunction\":\"#888888\",\n",
+    "    \"Outlet\":\"#ff0000\",\n",
+    "    \"Link\":\"#0000ff\",\n",
+    "    \"Weir\":\"#ff0000\",\n",
+    "    \"Valve\":\"#8888ff\",\n",
+    "    \"Pump\":\"#ffff00\",\n",
+    "    \"Orifice\":\"#000000\",\n",
+    "}\n",
+    "# This method translates the network to a geopandas dataframe.\n",
+    "# This is very handy for visualization, but does not contain all information needed for further analysis.\n",
+    "# Thats why we used other sources for network information.\n",
+    "map_df = nwr.network.to_geopandas()\n",
+    "map_df = map_df.merge(node_df[[\"type\"]],left_on=\"name\",right_on=\"name\",how=\"left\")\n",
+    "is_reach = map_df[\"group\"]==\"Reach\"\n",
+    "has_colon = map_df[\"name\"].str.contains(\":\")\n",
+    "map_df.loc[is_reach&~has_colon,\"type\"]=\"Link\"\n",
+    "map_df.loc[is_reach&has_colon,\"type\"]=map_df[\"name\"].str.split(\":\",expand=True)[0]\n",
+    "map_df[\"color\"]=map_df[\"type\"].map(color_map)\n",
+    "map_df.loc[(map_df[\"group\"]==\"Node\")&(map_df[\"name\"].isin(storage_df[\"node\"])),\"color\"]=\"#440000\"\n",
+    "important = ((map_df[\"type\"]!=\"Link\")&(map_df[\"type\"]!=\"Manhole\")&(map_df[\"type\"]!=\"SewerJunction\"))|map_df[\"name\"].isin(storage_df[\"node\"])\n",
+    "# map_df.explore(color=map_df.loc[:,\"color\"],marker_kwds={\"radius\":1.5})\n",
+    "map = map_df.loc[~important,:].explore(color=map_df.loc[~important,\"color\"],marker_kwds={\"radius\":1.5})\n",
+    "map_df.loc[important,:].explore(color=map_df.loc[important,\"color\"],marker_kwds={\"radius\":6},m=map)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Dynamic Volume"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_node_volume(res1d:mr.Res1D,node: str):\n",
+    "    \"\"\"Reads the volume in a node for every time in the res1d.\"\"\"\n",
+    "    try:\n",
+    "        \n",
+    "        volume = res1d.read(mr.QueryDataNode(\"WaterVolume\",node)).iloc[:,0]\n",
+    "    except Exception as ex:\n",
+    "        warn(f\"Could not read volume for node {node}. Returning 0\")\n",
+    "        volume = pd.Series(0,index=res1d.time_index)\n",
+    "    return volume\n",
+    "\n",
+    "def read_reach_volume(res1d:mr.Res1D, reach:str):\n",
+    "    \"\"\"Reads the volume in a reach for every time in the res1d.\"\"\"\n",
+    "    # See https://github.com/DHI/mikeio1d/blob/main/notebooks/res1d.ipynb:\n",
+    "    # 'Get time series values summed for all gridpoints in reach with given quantity, i.e. useful for getting total volume in reach.\n",
+    "    # values_sum = res1d_network.get_reach_sum_values(\"9l1\", \"Discharge\")'\n",
+    "    try:\n",
+    "        vol_values = res1d.get_reach_sum_values(reach, \"WaterVolume\")\n",
+    "    except Exception as ex:\n",
+    "        warn(f\"Could not read volume for reach {reach}. Returning 0\")\n",
+    "        vol_values = 0\n",
+    "    return pd.Series(vol_values,index=res1d.time_index)\n",
+    "\n",
+    "# we need to read only some nodes/reaches at a time to limit memory usage\n",
+    "# but reading one by one is too slow. Therefore read in chunks.\n",
+    "def chunk_list(l:list,n:int):\n",
+    "    \"\"\"\"splits l into chunks that are no larger than n\"\"\"\n",
+    "    return [l[i:i+n] for i in range(0,len(l),n)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make a collection of all upstream nodes and reaches\n",
+    "graph = drainage_system_graph.DrainageSystemGraph(reach_df)\n",
+    "upstream = {}\n",
+    "for storage in storage_df.index:\n",
+    "    node = storage_df.loc[storage, \"node\"]\n",
+    "    upstream[storage] = graph.find_upstream_nodes_reaches(\n",
+    "        node,storage_df[\"node\"]\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# set to true if you want to read the dynamic_raw from a previous run to save time.\n",
+    "read_csv = False\n",
+    "if read_csv:\n",
+    "    dynamic_raw_df = pd.read_csv(\"dynamic_raw.csv\", index_col=0)\n",
+    "else:\n",
+    "\n",
+    "    chunk_size = 100\n",
+    "    dynamic_parts = []\n",
+    "    for res_fp in tqdm(r1d_files):\n",
+    "        for storage, (up_nodes, up_reaches) in tqdm(upstream.items()):\n",
+    "            # water_level\n",
+    "            node = storage_df.loc[storage, \"node\"]\n",
+    "            # res1d is too large to read at once, read only one reach/node at a time\n",
+    "            water_level = (\n",
+    "                mr.Res1D(str(res_fp), nodes=[node])\n",
+    "                .read(mr.QueryDataNode(\"WaterLevel\", node))\n",
+    "                .iloc[:, 0]\n",
+    "            )\n",
+    "            # volume and water_level\n",
+    "            volume = pd.Series(index=water_level.index, data=0.0)\n",
+    "            for un_chunk in tqdm(chunk_list(list(up_nodes), chunk_size)):\n",
+    "                chunk_res = mr.Res1D(str(res_fp), nodes=un_chunk)\n",
+    "                for un in un_chunk:\n",
+    "                    volume += read_node_volume(chunk_res, un)\n",
+    "            for ur_chunk in tqdm(chunk_list(list(up_reaches), chunk_size)):\n",
+    "                chunk_res = mr.Res1D(str(res_fp), reaches=ur_chunk)\n",
+    "                for ur in ur_chunk:\n",
+    "                    volume += read_reach_volume(\n",
+    "                        chunk_res,\n",
+    "                        ur,\n",
+    "                    )\n",
+    "            chunk_res = None\n",
+    "            # weir discharge\n",
+    "            # TODO use sum of weir discharges?\n",
+    "            w1 = storage_df.loc[storage, \"weir_reach\"]\n",
+    "            w2 = storage_df.loc[storage, \"weir_reach_2\"]\n",
+    "            if not pd.isnull(w1):\n",
+    "                q_weir_1 = mr.Res1D(str(res_fp), reaches=[w1]).get_reach_start_values(\n",
+    "                    w1, \"Discharge\"\n",
+    "                )\n",
+    "            else:\n",
+    "                q_weir_1 = pd.Series(np.nan, index=water_level.index)\n",
+    "            if not pd.isnull(w2):\n",
+    "                q_weir_2 = mr.Res1D(str(res_fp), reaches=[w2]).get_reach_start_values(\n",
+    "                    w2, \"Discharge\"\n",
+    "                )\n",
+    "            else:\n",
+    "                q_weir_2 = pd.Series(np.nan, index=water_level.index)\n",
+    "\n",
+    "            # pump discharge and height\n",
+    "            pr = storage_df.loc[storage, \"pump_reach\"]\n",
+    "            if not pd.isnull(pr):\n",
+    "                q_pump = mr.Res1D(str(res_fp), reaches=[pr]).get_reach_start_values(\n",
+    "                    pr, \"Discharge\"\n",
+    "                )\n",
+    "            else:\n",
+    "                q_pump = pd.Series(np.nan, index=water_level.index)\n",
+    "            pn = storage_df.loc[storage, \"pump_node\"]\n",
+    "            if not pd.isnull(pn):\n",
+    "                level_pump = (\n",
+    "                    mr.Res1D(str(res_fp), nodes=[pn])\n",
+    "                    .read(mr.QueryDataNode(\"WaterLevel\", pn))\n",
+    "                    .iloc[:, 0]\n",
+    "                )\n",
+    "            else:\n",
+    "                level_pump = pd.Series(np.nan, index=water_level.index)\n",
+    "            dynamic_parts.append(\n",
+    "                pd.DataFrame(\n",
+    "                    {\n",
+    "                        \"storage\": storage,\n",
+    "                        \"water_level\": water_level,\n",
+    "                        \"volume\": volume,\n",
+    "                        \"q_weir_1\": q_weir_1,\n",
+    "                        \"q_weir_2\": q_weir_2,\n",
+    "                        \"q_pump\": q_pump,\n",
+    "                        \"level_pump\": level_pump,\n",
+    "                    }\n",
+    "                )\n",
+    "            )\n",
+    "\n",
+    "    dynamic_raw_df = pd.concat(dynamic_parts, ignore_index=False)\n",
+    "    del dynamic_parts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dynamic_raw_df.to_csv(\"dynamic_raw.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dynamic_raw_df.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dynamic_raw_df.set_index(\"storage\",append=True).isna().groupby(\"storage\").mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# transparent scatter plot of waterlevel vs volume, to get a feel for the distribution\n",
+    "alpha = 0.03\n",
+    "fig, axs = plt.subplots(nrows=len(storage_df),squeeze=False,figsize=(9,3*len(storage_df)))\n",
+    "for s,ax in zip(storage_df.index,axs[:,0]):\n",
+    "    dynamic_raw_df.query(\"storage==@s\").plot(kind=\"scatter\",x=\"water_level\",y=\"volume\",s=1,label=\"raw\",color=\"black\",ax=ax,alpha=alpha)\n",
+    "    ax.set_title(s)\n",
+    "    ax.legend()\n",
+    "fig"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dynamic_raw_df[\"h_group\"] = (dynamic_raw_df[\"water_level\"] * 100).round() / 100\n",
+    "dynamic_mean = dynamic_raw_df.groupby([\"storage\", \"h_group\"]).quantile(.05).reset_index()\n",
+    "\n",
+    "def smooth_fkt(df: pd.DataFrame):\n",
+    "    values = df.fillna(0).to_numpy()\n",
+    "    smoothed = scipy.ndimage.gaussian_filter1d(values, sigma=5, axis=0, mode=\"nearest\")\n",
+    "    return pd.DataFrame(smoothed, index=df.index, columns=df.columns)\n",
+    "\n",
+    "\n",
+    "dynamic_df = (\n",
+    "    dynamic_mean.groupby(\"storage\")\n",
+    "    .apply(smooth_fkt, include_groups=False)\n",
+    "    .reset_index()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dynamic_mean.to_excel(\"dynamic_mean.xlsx\")\n",
+    "dynamic_df.to_excel(\"dynamic.xlsx\")\n",
+    "# dynamic_lowess.to_excel(\"dynamic_lowess.xlsx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# line plots of small time spans, uncomment if something looks odd and you want to take a closer look\n",
+    "# plot_chunk_size = 3*60\n",
+    "# print(len(dynamic_raw_df)/plot_chunk_size)\n",
+    "# for i in range(0,min(len(dynamic_raw_df),plot_chunk_size*10),plot_chunk_size):\n",
+    "#     plt.figure(figsize=(12,4))\n",
+    "#     dynamic_raw_df.iloc[i:i+plot_chunk_size][[\"water_level\",\"volume\",\"q_weir_1\"]].plot(subplots=True)\n",
+    "#     plt.suptitle(f\"{i} to {i+plot_chunk_size} of {len(dynamic_raw_df)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_compare(x,y):\n",
+    "    fig, axs = plt.subplots(nrows=len(storage_df),squeeze=False,figsize=(9,3*len(storage_df)))\n",
+    "    a_dict = {}\n",
+    "    for s,ax in zip(storage_df.index,axs[:,0]):\n",
+    "        dynamic_raw_df.query(\"storage==@s\").plot(kind=\"scatter\",x=x,y=y,s=1.5,label=\"raw\",color=\"lightgray\",ax=ax)\n",
+    "        dynamic_mean.query(\"storage==@s\").plot(kind=\"scatter\",x=x,y=y,s=2,label=\"avg\",color=\"gray\",ax=ax)\n",
+    "        dynamic_df.query(\"storage==@s\").plot(kind=\"line\",x=x,y=y,label=\"smooth\",color=\"darkblue\",ax=ax)\n",
+    "        # dynamic_lowess.query(\"storage==@s\").plot(kind=\"line\",x=x,y=y,label=\"lowess\",color=\"red\",ax=ax)\n",
+    "        ax.set_title(s)\n",
+    "        ax.legend()\n",
+    "        a_dict[s]=ax\n",
+    "    return fig,a_dict\n",
+    "plot_compare(x=\"water_level\",y=\"volume\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f,ad = plot_compare(x=\"water_level\",y=\"q_weir_1\")\n",
+    "for s,ax in ad.items():\n",
+    "    crest_level = storage_df.loc[s,\"weir_crest_level\"]\n",
+    "    ax.axvline(crest_level,color=\"orange\",label=\"crest_level\")\n",
+    "    ax.legend()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f,ad = plot_compare(x=\"water_level\",y=\"q_weir_2\")\n",
+    "for s,ax in ad.items():\n",
+    "    crest_level = storage_df.loc[s,\"weir_crest_level_2\"]\n",
+    "    ax.axvline(crest_level,color=\"orange\",label=\"crest_level\")\n",
+    "    ax.legend()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_compare(\"water_level\",\"q_pump\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_compare(\"water_level\",\"level_pump\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (Spyder)",
+   "language": "python3",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.21"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
-- 
GitLab