Upload New File

ec4d7841 · Sebastian Kerger · 6aab0590 · ec4d7841
Commit ec4d7841 authored 3 months ago by Sebastian Kerger
--- a/Optimization of control rules/model_simplification.ipynb
+++ b/Optimization of control rules/model_simplification.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Script to read data from the mike+ model that is later used to build a simpler model\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from warnings import warn\n",
+    "import subprocess\n",
+    "from multiprocessing.connection import Client\n",
+    "from time import sleep\n",
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas as pd\n",
+    "import geopandas as gpd\n",
+    "import numpy as np\n",
+    "import matplotlib as mpl\n",
+    "import matplotlib.pyplot as plt\n",
+    "from tqdm.notebook import tqdm\n",
+    "import sqlalchemy as sa\n",
+    "import seaborn as sns\n",
+    "import scipy.signal\n",
+    "import scipy.optimize\n",
+    "import mikeio1d.res1d as mr\n",
+    "import networkx as nx\n",
+    "import scipy.integrate\n",
+    "import scipy.ndimage\n",
+    "import scipy.interpolate as si\n",
+    "from statsmodels.nonparametric.smoothers_lowess import lowess\n",
+    "\n",
+    "from  entfrachten.analysis import drainage_system_graph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# helper for more concise indexing in pandas\n",
+    "ids = pd.IndexSlice"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Settings\n",
+    "What data is looked at"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define the path to the model\n",
+    "model_path = Path(R\"path\\example_model.sqlite\")\n",
+    "# define a path to th\n",
+    "r1d_files = list(Path(\"results_path/\").glob(\"*.res1d\"))\n",
+    "network_res1d_path = r1d_files[0]\n",
+    "r1d_files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define an excel in which all storages for the EQF are listed. Each structure should contain the descriptive name, \n",
+    "# the name in mike (e.g. name of node), weir names if  present, the max level of the structure if it has no weir, and also \n",
+    "# names of the outflow pumps or reaches if they are further down the system and not directly at the storage structure\n",
+    "storage_df = pd.read_excel(\"storages.xlsx\")\n",
+    "storage_df = storage_df.set_index(\"name\")\n",
+    "storage_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Reading Network Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## open files/connections"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# model data is saved as a sqlite, which we can read from\n",
+    "model_db = sa.create_engine(\"sqlite:///\"+str(model_path))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Nodes and reaches are read from the res1d file.\n",
+    "# A reach in this file can be any connection between nodes, including weirs, pumps, etc. Therefore this is simpler than the model sqlite.\n",
+    "# for reference on reading the data see:\n",
+    "# https://github.com/DHI/mikeio1d/discussions/53#discussion-5231633\n",
+    "nwr = mr.Res1D(str(network_res1d_path))\n",
+    "nwr.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nwr.quantities"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Nodes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for accessing data see: https://github.com/DHI/mikeio1d/discussions/53\n",
+    "# make a dict of links, where the from_node_id is the key, for searching later\n",
+    "node_records = []\n",
+    "for i, node in enumerate(list(nwr.data.Nodes)):\n",
+    "    node_records.append(\n",
+    "        {\n",
+    "            \"name\": node.ID,\n",
+    "            \"r1d_index\": i,\n",
+    "            \"type\": str(node.GetType()).split(\".\")[-1].removeprefix(\"Res1D\"),\n",
+    "            \"x\":node.XCoordinate,\n",
+    "            \"y\": node.YCoordinate,\n",
+    "        }\n",
+    "    )\n",
+    "node_df = pd.DataFrame.from_records(node_records)\n",
+    "# this is later needed to get the start and end nodes of reaches, where the r1d only contains an index as a number\n",
+    "r1d_node = node_df[[\"r1d_index\",\"name\"]].set_index(\"r1d_index\")[\"name\"]\n",
+    "node_df = node_df.set_index(\"name\")\n",
+    "# some data is not in the res1d, use sqlite\n",
+    "with model_db.begin() as con:\n",
+    "    node_sqlite = pd.read_sql(\"\"\"\n",
+    "        SELECT muid as name, groundlevel as ground_level, invertlevel as bottom_level\n",
+    "        FROM msm_Node\"\"\",con).set_index(\"name\")\n",
+    "assert set(node_df.index)==set(node_sqlite.index)\n",
+    "node_df = node_df.join(node_sqlite)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Reaches"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "reach_records = []\n",
+    "for reach in list(nwr.data.Reaches):\n",
+    "    reach_records.append(\n",
+    "        {\n",
+    "            \"name\": reach.Name,\n",
+    "            \"start\": r1d_node[reach.StartNodeIndex],\n",
+    "            \"end\": r1d_node[reach.EndNodeIndex],\n",
+    "            \"length\": reach.Length,\n",
+    "        }\n",
+    "    )\n",
+    "reach_df = pd.DataFrame.from_records(reach_records).set_index(\"name\")\n",
+    "# add type from name, everything  other than link has a prefix \"{type}:\"\n",
+    "has_colon = reach_df.index.to_series().str.contains(\":\")\n",
+    "reach_df.loc[~has_colon,\"type\"]=\"Link\"\n",
+    "reach_df.loc[has_colon,\"type\"]=reach_df.index.to_series().str.split(\":\",expand=True)[0]\n",
+    "assert reach_df.index.is_unique"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Weirs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with model_db.begin() as con:\n",
+    "    weir_df = pd.read_sql(\"\"\"\n",
+    "        SELECT muid as id, 'Weir:'||muid as reach, fromnodeid as start, tonodeid as end, crestlevel as crest_level\n",
+    "        FROM msm_weir;\"\"\",con).set_index(\"id\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Analysis on Network Structure"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# some storages are located by a reach and some by node.\n",
+    "# for simplicity all should be located by node. For reaches use the end node.\n",
+    "for storage_name, mike_name in storage_df[\"mike_name\"].items():\n",
+    "    if mike_name in node_df.index:\n",
+    "        storage_df.loc[storage_name,\"node\"]=mike_name\n",
+    "    elif mike_name in reach_df.index:\n",
+    "        storage_df.loc[storage_name,\"node\"]=reach_df.loc[mike_name,\"end\"]\n",
+    "    else:\n",
+    "        raise Exception(f\"mike name {mike_name} not found in reaches or nodes.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# put weir height info into storage df\n",
+    "storage_df[\"weir_crest_level\"]=storage_df[\"weir\"].map(weir_df[\"crest_level\"])\n",
+    "storage_df[\"weir_reach\"]=storage_df[\"weir\"].map(weir_df[\"reach\"])\n",
+    "storage_df[\"weir_crest_level_2\"]=storage_df[\"weir_2\"].map(weir_df[\"crest_level\"])\n",
+    "storage_df[\"weir_reach_2\"]=storage_df[\"weir_2\"].map(weir_df[\"reach\"])\n",
+    "# now calculate max level\n",
+    "storage_df[\"max_level\"]=storage_df[[\"weir_crest_level\",\"weir_crest_level_2\",\"max_level_if_no_weir\"]].min(axis=\"columns\")\n",
+    "# add bottom level from nodes\n",
+    "storage_df[\"bottom_level\"]=storage_df[\"node\"].map(node_df[\"bottom_level\"])\n",
+    "storage_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "node_df.loc[storage_df[\"node\"],\"bottom_level\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "node_sqlite.loc[storage_df[\"node\"],\"bottom_level\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# check the network graph for circles\n",
+    "# for this we use the networkx package, which has a handy method for making a graph from a pandas dataframe \n",
+    "g = nx.from_pandas_edgelist(reach_df, source='start', target='end',\n",
+    "                            edge_attr='length',create_using=nx.DiGraph)\n",
+    "for c in nx.simple_cycles(g):\n",
+    "    warn(\"Cycle in network graph, some later evaluations may be incorrect\")\n",
+    "    print(c)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# draw a map of the network\n",
+    "color_map = {\n",
+    "    \"Manhole\":\"#0000ff\",\n",
+    "    \"Basin\":\"#00ff00\",\n",
+    "    \"SewerJunction\":\"#888888\",\n",
+    "    \"Outlet\":\"#ff0000\",\n",
+    "    \"Link\":\"#0000ff\",\n",
+    "    \"Weir\":\"#ff0000\",\n",
+    "    \"Valve\":\"#8888ff\",\n",
+    "    \"Pump\":\"#ffff00\",\n",
+    "    \"Orifice\":\"#000000\",\n",
+    "}\n",
+    "# This method translates the network to a geopandas dataframe.\n",
+    "# This is very handy for visualization, but does not contain all information needed for further analysis.\n",
+    "# Thats why we used other sources for network information.\n",
+    "map_df = nwr.network.to_geopandas()\n",
+    "map_df = map_df.merge(node_df[[\"type\"]],left_on=\"name\",right_on=\"name\",how=\"left\")\n",
+    "is_reach = map_df[\"group\"]==\"Reach\"\n",
+    "has_colon = map_df[\"name\"].str.contains(\":\")\n",
+    "map_df.loc[is_reach&~has_colon,\"type\"]=\"Link\"\n",
+    "map_df.loc[is_reach&has_colon,\"type\"]=map_df[\"name\"].str.split(\":\",expand=True)[0]\n",
+    "map_df[\"color\"]=map_df[\"type\"].map(color_map)\n",
+    "map_df.loc[(map_df[\"group\"]==\"Node\")&(map_df[\"name\"].isin(storage_df[\"node\"])),\"color\"]=\"#440000\"\n",
+    "important = ((map_df[\"type\"]!=\"Link\")&(map_df[\"type\"]!=\"Manhole\")&(map_df[\"type\"]!=\"SewerJunction\"))|map_df[\"name\"].isin(storage_df[\"node\"])\n",
+    "# map_df.explore(color=map_df.loc[:,\"color\"],marker_kwds={\"radius\":1.5})\n",
+    "map = map_df.loc[~important,:].explore(color=map_df.loc[~important,\"color\"],marker_kwds={\"radius\":1.5})\n",
+    "map_df.loc[important,:].explore(color=map_df.loc[important,\"color\"],marker_kwds={\"radius\":6},m=map)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Dynamic Volume"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_node_volume(res1d:mr.Res1D,node: str):\n",
+    "    \"\"\"Reads the volume in a node for every time in the res1d.\"\"\"\n",
+    "    try:\n",
+    "        \n",
+    "        volume = res1d.read(mr.QueryDataNode(\"WaterVolume\",node)).iloc[:,0]\n",
+    "    except Exception as ex:\n",
+    "        warn(f\"Could not read volume for node {node}. Returning 0\")\n",
+    "        volume = pd.Series(0,index=res1d.time_index)\n",
+    "    return volume\n",
+    "\n",
+    "def read_reach_volume(res1d:mr.Res1D, reach:str):\n",
+    "    \"\"\"Reads the volume in a reach for every time in the res1d.\"\"\"\n",
+    "    # See https://github.com/DHI/mikeio1d/blob/main/notebooks/res1d.ipynb:\n",
+    "    # 'Get time series values summed for all gridpoints in reach with given quantity, i.e. useful for getting total volume in reach.\n",
+    "    # values_sum = res1d_network.get_reach_sum_values(\"9l1\", \"Discharge\")'\n",
+    "    try:\n",
+    "        vol_values = res1d.get_reach_sum_values(reach, \"WaterVolume\")\n",
+    "    except Exception as ex:\n",
+    "        warn(f\"Could not read volume for reach {reach}. Returning 0\")\n",
+    "        vol_values = 0\n",
+    "    return pd.Series(vol_values,index=res1d.time_index)\n",
+    "\n",
+    "# we need to read only some nodes/reaches at a time to limit memory usage\n",
+    "# but reading one by one is too slow. Therefore read in chunks.\n",
+    "def chunk_list(l:list,n:int):\n",
+    "    \"\"\"\"splits l into chunks that are no larger than n\"\"\"\n",
+    "    return [l[i:i+n] for i in range(0,len(l),n)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make a collection of all upstream nodes and reaches\n",
+    "graph = drainage_system_graph.DrainageSystemGraph(reach_df)\n",
+    "upstream = {}\n",
+    "for storage in storage_df.index:\n",
+    "    node = storage_df.loc[storage, \"node\"]\n",
+    "    upstream[storage] = graph.find_upstream_nodes_reaches(\n",
+    "        node,storage_df[\"node\"]\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# set to true if you want to read the dynamic_raw from a previous run to save time.\n",
+    "read_csv = False\n",
+    "if read_csv:\n",
+    "    dynamic_raw_df = pd.read_csv(\"dynamic_raw.csv\", index_col=0)\n",
+    "else:\n",
+    "\n",
+    "    chunk_size = 100\n",
+    "    dynamic_parts = []\n",
+    "    for res_fp in tqdm(r1d_files):\n",
+    "        for storage, (up_nodes, up_reaches) in tqdm(upstream.items()):\n",
+    "            # water_level\n",
+    "            node = storage_df.loc[storage, \"node\"]\n",
+    "            # res1d is too large to read at once, read only one reach/node at a time\n",
+    "            water_level = (\n",
+    "                mr.Res1D(str(res_fp), nodes=[node])\n",
+    "                .read(mr.QueryDataNode(\"WaterLevel\", node))\n",
+    "                .iloc[:, 0]\n",
+    "            )\n",
+    "            # volume and water_level\n",
+    "            volume = pd.Series(index=water_level.index, data=0.0)\n",
+    "            for un_chunk in tqdm(chunk_list(list(up_nodes), chunk_size)):\n",
+    "                chunk_res = mr.Res1D(str(res_fp), nodes=un_chunk)\n",
+    "                for un in un_chunk:\n",
+    "                    volume += read_node_volume(chunk_res, un)\n",
+    "            for ur_chunk in tqdm(chunk_list(list(up_reaches), chunk_size)):\n",
+    "                chunk_res = mr.Res1D(str(res_fp), reaches=ur_chunk)\n",
+    "                for ur in ur_chunk:\n",
+    "                    volume += read_reach_volume(\n",
+    "                        chunk_res,\n",
+    "                        ur,\n",
+    "                    )\n",
+    "            chunk_res = None\n",
+    "            # weir discharge\n",
+    "            # TODO use sum of weir discharges?\n",
+    "            w1 = storage_df.loc[storage, \"weir_reach\"]\n",
+    "            w2 = storage_df.loc[storage, \"weir_reach_2\"]\n",
+    "            if not pd.isnull(w1):\n",
+    "                q_weir_1 = mr.Res1D(str(res_fp), reaches=[w1]).get_reach_start_values(\n",
+    "                    w1, \"Discharge\"\n",
+    "                )\n",
+    "            else:\n",
+    "                q_weir_1 = pd.Series(np.nan, index=water_level.index)\n",
+    "            if not pd.isnull(w2):\n",
+    "                q_weir_2 = mr.Res1D(str(res_fp), reaches=[w2]).get_reach_start_values(\n",
+    "                    w2, \"Discharge\"\n",
+    "                )\n",
+    "            else:\n",
+    "                q_weir_2 = pd.Series(np.nan, index=water_level.index)\n",
+    "\n",
+    "            # pump discharge and height\n",
+    "            pr = storage_df.loc[storage, \"pump_reach\"]\n",
+    "            if not pd.isnull(pr):\n",
+    "                q_pump = mr.Res1D(str(res_fp), reaches=[pr]).get_reach_start_values(\n",
+    "                    pr, \"Discharge\"\n",
+    "                )\n",
+    "            else:\n",
+    "                q_pump = pd.Series(np.nan, index=water_level.index)\n",
+    "            pn = storage_df.loc[storage, \"pump_node\"]\n",
+    "            if not pd.isnull(pn):\n",
+    "                level_pump = (\n",
+    "                    mr.Res1D(str(res_fp), nodes=[pn])\n",
+    "                    .read(mr.QueryDataNode(\"WaterLevel\", pn))\n",
+    "                    .iloc[:, 0]\n",
+    "                )\n",
+    "            else:\n",
+    "                level_pump = pd.Series(np.nan, index=water_level.index)\n",
+    "            dynamic_parts.append(\n",
+    "                pd.DataFrame(\n",
+    "                    {\n",
+    "                        \"storage\": storage,\n",
+    "                        \"water_level\": water_level,\n",
+    "                        \"volume\": volume,\n",
+    "                        \"q_weir_1\": q_weir_1,\n",
+    "                        \"q_weir_2\": q_weir_2,\n",
+    "                        \"q_pump\": q_pump,\n",
+    "                        \"level_pump\": level_pump,\n",
+    "                    }\n",
+    "                )\n",
+    "            )\n",
+    "\n",
+    "    dynamic_raw_df = pd.concat(dynamic_parts, ignore_index=False)\n",
+    "    del dynamic_parts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dynamic_raw_df.to_csv(\"dynamic_raw.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dynamic_raw_df.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dynamic_raw_df.set_index(\"storage\",append=True).isna().groupby(\"storage\").mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# transparent scatter plot of waterlevel vs volume, to get a feel for the distribution\n",
+    "alpha = 0.03\n",
+    "fig, axs = plt.subplots(nrows=len(storage_df),squeeze=False,figsize=(9,3*len(storage_df)))\n",
+    "for s,ax in zip(storage_df.index,axs[:,0]):\n",
+    "    dynamic_raw_df.query(\"storage==@s\").plot(kind=\"scatter\",x=\"water_level\",y=\"volume\",s=1,label=\"raw\",color=\"black\",ax=ax,alpha=alpha)\n",
+    "    ax.set_title(s)\n",
+    "    ax.legend()\n",
+    "fig"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dynamic_raw_df[\"h_group\"] = (dynamic_raw_df[\"water_level\"] * 100).round() / 100\n",
+    "dynamic_mean = dynamic_raw_df.groupby([\"storage\", \"h_group\"]).quantile(.05).reset_index()\n",
+    "\n",
+    "def smooth_fkt(df: pd.DataFrame):\n",
+    "    values = df.fillna(0).to_numpy()\n",
+    "    smoothed = scipy.ndimage.gaussian_filter1d(values, sigma=5, axis=0, mode=\"nearest\")\n",
+    "    return pd.DataFrame(smoothed, index=df.index, columns=df.columns)\n",
+    "\n",
+    "\n",
+    "dynamic_df = (\n",
+    "    dynamic_mean.groupby(\"storage\")\n",
+    "    .apply(smooth_fkt, include_groups=False)\n",
+    "    .reset_index()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dynamic_mean.to_excel(\"dynamic_mean.xlsx\")\n",
+    "dynamic_df.to_excel(\"dynamic.xlsx\")\n",
+    "# dynamic_lowess.to_excel(\"dynamic_lowess.xlsx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# line plots of small time spans, uncomment if something looks odd and you want to take a closer look\n",
+    "# plot_chunk_size = 3*60\n",
+    "# print(len(dynamic_raw_df)/plot_chunk_size)\n",
+    "# for i in range(0,min(len(dynamic_raw_df),plot_chunk_size*10),plot_chunk_size):\n",
+    "#     plt.figure(figsize=(12,4))\n",
+    "#     dynamic_raw_df.iloc[i:i+plot_chunk_size][[\"water_level\",\"volume\",\"q_weir_1\"]].plot(subplots=True)\n",
+    "#     plt.suptitle(f\"{i} to {i+plot_chunk_size} of {len(dynamic_raw_df)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_compare(x,y):\n",
+    "    fig, axs = plt.subplots(nrows=len(storage_df),squeeze=False,figsize=(9,3*len(storage_df)))\n",
+    "    a_dict = {}\n",
+    "    for s,ax in zip(storage_df.index,axs[:,0]):\n",
+    "        dynamic_raw_df.query(\"storage==@s\").plot(kind=\"scatter\",x=x,y=y,s=1.5,label=\"raw\",color=\"lightgray\",ax=ax)\n",
+    "        dynamic_mean.query(\"storage==@s\").plot(kind=\"scatter\",x=x,y=y,s=2,label=\"avg\",color=\"gray\",ax=ax)\n",
+    "        dynamic_df.query(\"storage==@s\").plot(kind=\"line\",x=x,y=y,label=\"smooth\",color=\"darkblue\",ax=ax)\n",
+    "        # dynamic_lowess.query(\"storage==@s\").plot(kind=\"line\",x=x,y=y,label=\"lowess\",color=\"red\",ax=ax)\n",
+    "        ax.set_title(s)\n",
+    "        ax.legend()\n",
+    "        a_dict[s]=ax\n",
+    "    return fig,a_dict\n",
+    "plot_compare(x=\"water_level\",y=\"volume\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f,ad = plot_compare(x=\"water_level\",y=\"q_weir_1\")\n",
+    "for s,ax in ad.items():\n",
+    "    crest_level = storage_df.loc[s,\"weir_crest_level\"]\n",
+    "    ax.axvline(crest_level,color=\"orange\",label=\"crest_level\")\n",
+    "    ax.legend()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f,ad = plot_compare(x=\"water_level\",y=\"q_weir_2\")\n",
+    "for s,ax in ad.items():\n",
+    "    crest_level = storage_df.loc[s,\"weir_crest_level_2\"]\n",
+    "    ax.axvline(crest_level,color=\"orange\",label=\"crest_level\")\n",
+    "    ax.legend()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_compare(\"water_level\",\"q_pump\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_compare(\"water_level\",\"level_pump\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (Spyder)",
+   "language": "python3",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.21"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
+%% Cell type:markdown id: tags:
+# Script to read data from the mike+ model that is later used to build a simpler model
+%% Cell type:code id: tags:
+``` python3
+from warnings import warn
+import subprocess
+from multiprocessing.connection import Client
+from time import sleep
+from pathlib import Path
+import pandas as pd
+import geopandas as gpd
+import numpy as np
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+from tqdm.notebook import tqdm
+import sqlalchemy as sa
+import seaborn as sns
+import scipy.signal
+import scipy.optimize
+import mikeio1d.res1d as mr
+import networkx as nx
+import scipy.integrate
+import scipy.ndimage
+import scipy.interpolate as si
+from statsmodels.nonparametric.smoothers_lowess import lowess
+from  entfrachten.analysis import drainage_system_graph
+```
+%% Cell type:code id: tags:
+``` python3
+# helper for more concise indexing in pandas
+ids = pd.IndexSlice
+```
+%% Cell type:markdown id: tags:
+# Settings
+What data is looked at
+%% Cell type:code id: tags:
+``` python3
+# define the path to the model
+model_path = Path(R"path\example_model.sqlite")
+# define a path to th
+r1d_files = list(Path("results_path/").glob("*.res1d"))
+network_res1d_path = r1d_files[0]
+r1d_files
+```
+%% Cell type:code id: tags:
+``` python3
+# define an excel in which all storages for the EQF are listed. Each structure should contain the descriptive name,
+# the name in mike (e.g. name of node), weir names if  present, the max level of the structure if it has no weir, and also
+# names of the outflow pumps or reaches if they are further down the system and not directly at the storage structure
+storage_df = pd.read_excel("storages.xlsx")
+storage_df = storage_df.set_index("name")
+storage_df
+```
+%% Cell type:markdown id: tags:
+# Reading Network Data
+%% Cell type:markdown id: tags:
+## open files/connections
+%% Cell type:code id: tags:
+``` python3
+# model data is saved as a sqlite, which we can read from
+model_db = sa.create_engine("sqlite:///"+str(model_path))
+```
+%% Cell type:code id: tags:
+``` python3
+# Nodes and reaches are read from the res1d file.
+# A reach in this file can be any connection between nodes, including weirs, pumps, etc. Therefore this is simpler than the model sqlite.
+# for reference on reading the data see:
+# https://github.com/DHI/mikeio1d/discussions/53#discussion-5231633
+nwr = mr.Res1D(str(network_res1d_path))
+nwr.info()
+```
+%% Cell type:code id: tags:
+``` python3
+nwr.quantities
+```
+%% Cell type:markdown id: tags:
+## Nodes
+%% Cell type:code id: tags:
+``` python3
+# for accessing data see: https://github.com/DHI/mikeio1d/discussions/53
+# make a dict of links, where the from_node_id is the key, for searching later
+node_records = []
+for i, node in enumerate(list(nwr.data.Nodes)):
+    node_records.append(
+        {
+            "name": node.ID,
+            "r1d_index": i,
+            "type": str(node.GetType()).split(".")[-1].removeprefix("Res1D"),
+            "x":node.XCoordinate,
+            "y": node.YCoordinate,
+        }
+    )
+node_df = pd.DataFrame.from_records(node_records)
+# this is later needed to get the start and end nodes of reaches, where the r1d only contains an index as a number
+r1d_node = node_df[["r1d_index","name"]].set_index("r1d_index")["name"]
+node_df = node_df.set_index("name")
+# some data is not in the res1d, use sqlite
+with model_db.begin() as con:
+    node_sqlite = pd.read_sql("""
+        SELECT muid as name, groundlevel as ground_level, invertlevel as bottom_level
+        FROM msm_Node""",con).set_index("name")
+assert set(node_df.index)==set(node_sqlite.index)
+node_df = node_df.join(node_sqlite)
+```
+%% Cell type:markdown id: tags:
+## Reaches
+%% Cell type:code id: tags:
+``` python3
+reach_records = []
+for reach in list(nwr.data.Reaches):
+    reach_records.append(
+        {
+            "name": reach.Name,
+            "start": r1d_node[reach.StartNodeIndex],
+            "end": r1d_node[reach.EndNodeIndex],
+            "length": reach.Length,
+        }
+    )
+reach_df = pd.DataFrame.from_records(reach_records).set_index("name")
+# add type from name, everything  other than link has a prefix "{type}:"
+has_colon = reach_df.index.to_series().str.contains(":")
+reach_df.loc[~has_colon,"type"]="Link"
+reach_df.loc[has_colon,"type"]=reach_df.index.to_series().str.split(":",expand=True)[0]
+assert reach_df.index.is_unique
+```
+%% Cell type:markdown id: tags:
+# Weirs
+%% Cell type:code id: tags:
+``` python3
+with model_db.begin() as con:
+    weir_df = pd.read_sql("""
+        SELECT muid as id, 'Weir:'||muid as reach, fromnodeid as start, tonodeid as end, crestlevel as crest_level
+        FROM msm_weir;""",con).set_index("id")
+```
+%% Cell type:markdown id: tags:
+# Analysis on Network Structure
+%% Cell type:code id: tags:
+``` python3
+# some storages are located by a reach and some by node.
+# for simplicity all should be located by node. For reaches use the end node.
+for storage_name, mike_name in storage_df["mike_name"].items():
+    if mike_name in node_df.index:
+        storage_df.loc[storage_name,"node"]=mike_name
+    elif mike_name in reach_df.index:
+        storage_df.loc[storage_name,"node"]=reach_df.loc[mike_name,"end"]
+    else:
+        raise Exception(f"mike name {mike_name} not found in reaches or nodes.")
+```
+%% Cell type:code id: tags:
+``` python3
+# put weir height info into storage df
+storage_df["weir_crest_level"]=storage_df["weir"].map(weir_df["crest_level"])
+storage_df["weir_reach"]=storage_df["weir"].map(weir_df["reach"])
+storage_df["weir_crest_level_2"]=storage_df["weir_2"].map(weir_df["crest_level"])
+storage_df["weir_reach_2"]=storage_df["weir_2"].map(weir_df["reach"])
+# now calculate max level
+storage_df["max_level"]=storage_df[["weir_crest_level","weir_crest_level_2","max_level_if_no_weir"]].min(axis="columns")
+# add bottom level from nodes
+storage_df["bottom_level"]=storage_df["node"].map(node_df["bottom_level"])
+storage_df
+```
+%% Cell type:code id: tags:
+``` python3
+node_df.loc[storage_df["node"],"bottom_level"]
+```
+%% Cell type:code id: tags:
+``` python3
+node_sqlite.loc[storage_df["node"],"bottom_level"]
+```
+%% Cell type:code id: tags:
+``` python3
+# check the network graph for circles
+# for this we use the networkx package, which has a handy method for making a graph from a pandas dataframe
+g = nx.from_pandas_edgelist(reach_df, source='start', target='end',
+                            edge_attr='length',create_using=nx.DiGraph)
+for c in nx.simple_cycles(g):
+    warn("Cycle in network graph, some later evaluations may be incorrect")
+    print(c)
+```
+%% Cell type:code id: tags:
+``` python3
+# draw a map of the network
+color_map = {
+    "Manhole":"#0000ff",
+    "Basin":"#00ff00",
+    "SewerJunction":"#888888",
+    "Outlet":"#ff0000",
+    "Link":"#0000ff",
+    "Weir":"#ff0000",
+    "Valve":"#8888ff",
+    "Pump":"#ffff00",
+    "Orifice":"#000000",
+}
+# This method translates the network to a geopandas dataframe.
+# This is very handy for visualization, but does not contain all information needed for further analysis.
+# Thats why we used other sources for network information.
+map_df = nwr.network.to_geopandas()
+map_df = map_df.merge(node_df[["type"]],left_on="name",right_on="name",how="left")
+is_reach = map_df["group"]=="Reach"
+has_colon = map_df["name"].str.contains(":")
+map_df.loc[is_reach&~has_colon,"type"]="Link"
+map_df.loc[is_reach&has_colon,"type"]=map_df["name"].str.split(":",expand=True)[0]
+map_df["color"]=map_df["type"].map(color_map)
+map_df.loc[(map_df["group"]=="Node")&(map_df["name"].isin(storage_df["node"])),"color"]="#440000"
+important = ((map_df["type"]!="Link")&(map_df["type"]!="Manhole")&(map_df["type"]!="SewerJunction"))|map_df["name"].isin(storage_df["node"])
+# map_df.explore(color=map_df.loc[:,"color"],marker_kwds={"radius":1.5})
+map = map_df.loc[~important,:].explore(color=map_df.loc[~important,"color"],marker_kwds={"radius":1.5})
+map_df.loc[important,:].explore(color=map_df.loc[important,"color"],marker_kwds={"radius":6},m=map)
+```
+%% Cell type:markdown id: tags:
+# Dynamic Volume
+%% Cell type:code id: tags:
+``` python3
+def read_node_volume(res1d:mr.Res1D,node: str):
+    """Reads the volume in a node for every time in the res1d."""
+    try:
+        volume = res1d.read(mr.QueryDataNode("WaterVolume",node)).iloc[:,0]
+    except Exception as ex:
+        warn(f"Could not read volume for node {node}. Returning 0")
+        volume = pd.Series(0,index=res1d.time_index)
+    return volume
+def read_reach_volume(res1d:mr.Res1D, reach:str):
+    """Reads the volume in a reach for every time in the res1d."""
+    # See https://github.com/DHI/mikeio1d/blob/main/notebooks/res1d.ipynb:
+    # 'Get time series values summed for all gridpoints in reach with given quantity, i.e. useful for getting total volume in reach.
+    # values_sum = res1d_network.get_reach_sum_values("9l1", "Discharge")'
+    try:
+        vol_values = res1d.get_reach_sum_values(reach, "WaterVolume")
+    except Exception as ex:
+        warn(f"Could not read volume for reach {reach}. Returning 0")
+        vol_values = 0
+    return pd.Series(vol_values,index=res1d.time_index)
+# we need to read only some nodes/reaches at a time to limit memory usage
+# but reading one by one is too slow. Therefore read in chunks.
+def chunk_list(l:list,n:int):
+    """"splits l into chunks that are no larger than n"""
+    return [l[i:i+n] for i in range(0,len(l),n)]
+```
+%% Cell type:code id: tags:
+``` python3
+# make a collection of all upstream nodes and reaches
+graph = drainage_system_graph.DrainageSystemGraph(reach_df)
+upstream = {}
+for storage in storage_df.index:
+    node = storage_df.loc[storage, "node"]
+    upstream[storage] = graph.find_upstream_nodes_reaches(
+        node,storage_df["node"]
+    )
+```
+%% Cell type:code id: tags:
+``` python3
+# set to true if you want to read the dynamic_raw from a previous run to save time.
+read_csv = False
+if read_csv:
+    dynamic_raw_df = pd.read_csv("dynamic_raw.csv", index_col=0)
+else:
+    chunk_size = 100
+    dynamic_parts = []
+    for res_fp in tqdm(r1d_files):
+        for storage, (up_nodes, up_reaches) in tqdm(upstream.items()):
+            # water_level
+            node = storage_df.loc[storage, "node"]
+            # res1d is too large to read at once, read only one reach/node at a time
+            water_level = (
+                mr.Res1D(str(res_fp), nodes=[node])
+                .read(mr.QueryDataNode("WaterLevel", node))
+                .iloc[:, 0]
+            )
+            # volume and water_level
+            volume = pd.Series(index=water_level.index, data=0.0)
+            for un_chunk in tqdm(chunk_list(list(up_nodes), chunk_size)):
+                chunk_res = mr.Res1D(str(res_fp), nodes=un_chunk)
+                for un in un_chunk:
+                    volume += read_node_volume(chunk_res, un)
+            for ur_chunk in tqdm(chunk_list(list(up_reaches), chunk_size)):
+                chunk_res = mr.Res1D(str(res_fp), reaches=ur_chunk)
+                for ur in ur_chunk:
+                    volume += read_reach_volume(
+                        chunk_res,
+                        ur,
+                    )
+            chunk_res = None
+            # weir discharge
+            # TODO use sum of weir discharges?
+            w1 = storage_df.loc[storage, "weir_reach"]
+            w2 = storage_df.loc[storage, "weir_reach_2"]
+            if not pd.isnull(w1):
+                q_weir_1 = mr.Res1D(str(res_fp), reaches=[w1]).get_reach_start_values(
+                    w1, "Discharge"
+                )
+            else:
+                q_weir_1 = pd.Series(np.nan, index=water_level.index)
+            if not pd.isnull(w2):
+                q_weir_2 = mr.Res1D(str(res_fp), reaches=[w2]).get_reach_start_values(
+                    w2, "Discharge"
+                )
+            else:
+                q_weir_2 = pd.Series(np.nan, index=water_level.index)
+            # pump discharge and height
+            pr = storage_df.loc[storage, "pump_reach"]
+            if not pd.isnull(pr):
+                q_pump = mr.Res1D(str(res_fp), reaches=[pr]).get_reach_start_values(
+                    pr, "Discharge"
+                )
+            else:
+                q_pump = pd.Series(np.nan, index=water_level.index)
+            pn = storage_df.loc[storage, "pump_node"]
+            if not pd.isnull(pn):
+                level_pump = (
+                    mr.Res1D(str(res_fp), nodes=[pn])
+                    .read(mr.QueryDataNode("WaterLevel", pn))
+                    .iloc[:, 0]
+                )
+            else:
+                level_pump = pd.Series(np.nan, index=water_level.index)
+            dynamic_parts.append(
+                pd.DataFrame(
+                    {
+                        "storage": storage,
+                        "water_level": water_level,
+                        "volume": volume,
+                        "q_weir_1": q_weir_1,
+                        "q_weir_2": q_weir_2,
+                        "q_pump": q_pump,
+                        "level_pump": level_pump,
+                    }
+                )
+            )
+    dynamic_raw_df = pd.concat(dynamic_parts, ignore_index=False)
+    del dynamic_parts
+```
+%% Cell type:code id: tags:
+``` python3
+dynamic_raw_df.to_csv("dynamic_raw.csv")
+```
+%% Cell type:code id: tags:
+``` python3
+dynamic_raw_df.describe()
+```
+%% Cell type:code id: tags:
+``` python3
+dynamic_raw_df.set_index("storage",append=True).isna().groupby("storage").mean()
+```
+%% Cell type:code id: tags:
+``` python3
+# transparent scatter plot of waterlevel vs volume, to get a feel for the distribution
+alpha = 0.03
+fig, axs = plt.subplots(nrows=len(storage_df),squeeze=False,figsize=(9,3*len(storage_df)))
+for s,ax in zip(storage_df.index,axs[:,0]):
+    dynamic_raw_df.query("storage==@s").plot(kind="scatter",x="water_level",y="volume",s=1,label="raw",color="black",ax=ax,alpha=alpha)
+    ax.set_title(s)
+    ax.legend()
+fig
+```
+%% Cell type:code id: tags:
+``` python3
+dynamic_raw_df["h_group"] = (dynamic_raw_df["water_level"] * 100).round() / 100
+dynamic_mean = dynamic_raw_df.groupby(["storage", "h_group"]).quantile(.05).reset_index()
+def smooth_fkt(df: pd.DataFrame):
+    values = df.fillna(0).to_numpy()
+    smoothed = scipy.ndimage.gaussian_filter1d(values, sigma=5, axis=0, mode="nearest")
+    return pd.DataFrame(smoothed, index=df.index, columns=df.columns)
+dynamic_df = (
+    dynamic_mean.groupby("storage")
+    .apply(smooth_fkt, include_groups=False)
+    .reset_index()
+)
+```
+%% Cell type:code id: tags:
+``` python3
+dynamic_mean.to_excel("dynamic_mean.xlsx")
+dynamic_df.to_excel("dynamic.xlsx")
+# dynamic_lowess.to_excel("dynamic_lowess.xlsx")
+```
+%% Cell type:code id: tags:
+``` python3
+# line plots of small time spans, uncomment if something looks odd and you want to take a closer look
+# plot_chunk_size = 3*60
+# print(len(dynamic_raw_df)/plot_chunk_size)
+# for i in range(0,min(len(dynamic_raw_df),plot_chunk_size*10),plot_chunk_size):
+#     plt.figure(figsize=(12,4))
+#     dynamic_raw_df.iloc[i:i+plot_chunk_size][["water_level","volume","q_weir_1"]].plot(subplots=True)
+#     plt.suptitle(f"{i} to {i+plot_chunk_size} of {len(dynamic_raw_df)}")
+```
+%% Cell type:code id: tags:
+``` python3
+def plot_compare(x,y):
+    fig, axs = plt.subplots(nrows=len(storage_df),squeeze=False,figsize=(9,3*len(storage_df)))
+    a_dict = {}
+    for s,ax in zip(storage_df.index,axs[:,0]):
+        dynamic_raw_df.query("storage==@s").plot(kind="scatter",x=x,y=y,s=1.5,label="raw",color="lightgray",ax=ax)
+        dynamic_mean.query("storage==@s").plot(kind="scatter",x=x,y=y,s=2,label="avg",color="gray",ax=ax)
+        dynamic_df.query("storage==@s").plot(kind="line",x=x,y=y,label="smooth",color="darkblue",ax=ax)
+        # dynamic_lowess.query("storage==@s").plot(kind="line",x=x,y=y,label="lowess",color="red",ax=ax)
+        ax.set_title(s)
+        ax.legend()
+        a_dict[s]=ax
+    return fig,a_dict
+plot_compare(x="water_level",y="volume")
+```
+%% Cell type:code id: tags:
+``` python3
+f,ad = plot_compare(x="water_level",y="q_weir_1")
+for s,ax in ad.items():
+    crest_level = storage_df.loc[s,"weir_crest_level"]
+    ax.axvline(crest_level,color="orange",label="crest_level")
+    ax.legend()
+```
+%% Cell type:code id: tags:
+``` python3
+f,ad = plot_compare(x="water_level",y="q_weir_2")
+for s,ax in ad.items():
+    crest_level = storage_df.loc[s,"weir_crest_level_2"]
+    ax.axvline(crest_level,color="orange",label="crest_level")
+    ax.legend()
+```
+%% Cell type:code id: tags:
+``` python3
+plot_compare("water_level","q_pump")
+```
+%% Cell type:code id: tags:
+``` python3
+plot_compare("water_level","level_pump")
+```
+%% Cell type:code id: tags:
+``` python3
+```