Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
MIKE+Py_scripts_ENTfrachtEN
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Sebastian Kerger
MIKE+Py_scripts_ENTfrachtEN
Commits
ec4d7841
Commit
ec4d7841
authored
3 months ago
by
Sebastian Kerger
Browse files
Options
Downloads
Patches
Plain Diff
Upload New File
parent
6aab0590
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
Optimization of control rules/model_simplification.ipynb
+667
-0
667 additions, 0 deletions
Optimization of control rules/model_simplification.ipynb
with
667 additions
and
0 deletions
Optimization of control rules/model_simplification.ipynb
0 → 100644
+
667
−
0
View file @
ec4d7841
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Script to read data from the mike+ model that is later used to build a simpler model\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from warnings import warn\n",
"import subprocess\n",
"from multiprocessing.connection import Client\n",
"from time import sleep\n",
"from pathlib import Path\n",
"\n",
"import pandas as pd\n",
"import geopandas as gpd\n",
"import numpy as np\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"from tqdm.notebook import tqdm\n",
"import sqlalchemy as sa\n",
"import seaborn as sns\n",
"import scipy.signal\n",
"import scipy.optimize\n",
"import mikeio1d.res1d as mr\n",
"import networkx as nx\n",
"import scipy.integrate\n",
"import scipy.ndimage\n",
"import scipy.interpolate as si\n",
"from statsmodels.nonparametric.smoothers_lowess import lowess\n",
"\n",
"from entfrachten.analysis import drainage_system_graph"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# helper for more concise indexing in pandas\n",
"ids = pd.IndexSlice"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Settings\n",
"What data is looked at"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# define the path to the model\n",
"model_path = Path(R\"path\\example_model.sqlite\")\n",
"# define a path to th\n",
"r1d_files = list(Path(\"results_path/\").glob(\"*.res1d\"))\n",
"network_res1d_path = r1d_files[0]\n",
"r1d_files"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# define an excel in which all storages for the EQF are listed. Each structure should contain the descriptive name, \n",
"# the name in mike (e.g. name of node), weir names if present, the max level of the structure if it has no weir, and also \n",
"# names of the outflow pumps or reaches if they are further down the system and not directly at the storage structure\n",
"storage_df = pd.read_excel(\"storages.xlsx\")\n",
"storage_df = storage_df.set_index(\"name\")\n",
"storage_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Reading Network Data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## open files/connections"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# model data is saved as a sqlite, which we can read from\n",
"model_db = sa.create_engine(\"sqlite:///\"+str(model_path))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Nodes and reaches are read from the res1d file.\n",
"# A reach in this file can be any connection between nodes, including weirs, pumps, etc. Therefore this is simpler than the model sqlite.\n",
"# for reference on reading the data see:\n",
"# https://github.com/DHI/mikeio1d/discussions/53#discussion-5231633\n",
"nwr = mr.Res1D(str(network_res1d_path))\n",
"nwr.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nwr.quantities"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Nodes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# for accessing data see: https://github.com/DHI/mikeio1d/discussions/53\n",
"# make a dict of links, where the from_node_id is the key, for searching later\n",
"node_records = []\n",
"for i, node in enumerate(list(nwr.data.Nodes)):\n",
" node_records.append(\n",
" {\n",
" \"name\": node.ID,\n",
" \"r1d_index\": i,\n",
" \"type\": str(node.GetType()).split(\".\")[-1].removeprefix(\"Res1D\"),\n",
" \"x\":node.XCoordinate,\n",
" \"y\": node.YCoordinate,\n",
" }\n",
" )\n",
"node_df = pd.DataFrame.from_records(node_records)\n",
"# this is later needed to get the start and end nodes of reaches, where the r1d only contains an index as a number\n",
"r1d_node = node_df[[\"r1d_index\",\"name\"]].set_index(\"r1d_index\")[\"name\"]\n",
"node_df = node_df.set_index(\"name\")\n",
"# some data is not in the res1d, use sqlite\n",
"with model_db.begin() as con:\n",
" node_sqlite = pd.read_sql(\"\"\"\n",
" SELECT muid as name, groundlevel as ground_level, invertlevel as bottom_level\n",
" FROM msm_Node\"\"\",con).set_index(\"name\")\n",
"assert set(node_df.index)==set(node_sqlite.index)\n",
"node_df = node_df.join(node_sqlite)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Reaches"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"reach_records = []\n",
"for reach in list(nwr.data.Reaches):\n",
" reach_records.append(\n",
" {\n",
" \"name\": reach.Name,\n",
" \"start\": r1d_node[reach.StartNodeIndex],\n",
" \"end\": r1d_node[reach.EndNodeIndex],\n",
" \"length\": reach.Length,\n",
" }\n",
" )\n",
"reach_df = pd.DataFrame.from_records(reach_records).set_index(\"name\")\n",
"# add type from name, everything other than link has a prefix \"{type}:\"\n",
"has_colon = reach_df.index.to_series().str.contains(\":\")\n",
"reach_df.loc[~has_colon,\"type\"]=\"Link\"\n",
"reach_df.loc[has_colon,\"type\"]=reach_df.index.to_series().str.split(\":\",expand=True)[0]\n",
"assert reach_df.index.is_unique"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Weirs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with model_db.begin() as con:\n",
" weir_df = pd.read_sql(\"\"\"\n",
" SELECT muid as id, 'Weir:'||muid as reach, fromnodeid as start, tonodeid as end, crestlevel as crest_level\n",
" FROM msm_weir;\"\"\",con).set_index(\"id\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Analysis on Network Structure"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# some storages are located by a reach and some by node.\n",
"# for simplicity all should be located by node. For reaches use the end node.\n",
"for storage_name, mike_name in storage_df[\"mike_name\"].items():\n",
" if mike_name in node_df.index:\n",
" storage_df.loc[storage_name,\"node\"]=mike_name\n",
" elif mike_name in reach_df.index:\n",
" storage_df.loc[storage_name,\"node\"]=reach_df.loc[mike_name,\"end\"]\n",
" else:\n",
" raise Exception(f\"mike name {mike_name} not found in reaches or nodes.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# put weir height info into storage df\n",
"storage_df[\"weir_crest_level\"]=storage_df[\"weir\"].map(weir_df[\"crest_level\"])\n",
"storage_df[\"weir_reach\"]=storage_df[\"weir\"].map(weir_df[\"reach\"])\n",
"storage_df[\"weir_crest_level_2\"]=storage_df[\"weir_2\"].map(weir_df[\"crest_level\"])\n",
"storage_df[\"weir_reach_2\"]=storage_df[\"weir_2\"].map(weir_df[\"reach\"])\n",
"# now calculate max level\n",
"storage_df[\"max_level\"]=storage_df[[\"weir_crest_level\",\"weir_crest_level_2\",\"max_level_if_no_weir\"]].min(axis=\"columns\")\n",
"# add bottom level from nodes\n",
"storage_df[\"bottom_level\"]=storage_df[\"node\"].map(node_df[\"bottom_level\"])\n",
"storage_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"node_df.loc[storage_df[\"node\"],\"bottom_level\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"node_sqlite.loc[storage_df[\"node\"],\"bottom_level\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# check the network graph for circles\n",
"# for this we use the networkx package, which has a handy method for making a graph from a pandas dataframe \n",
"g = nx.from_pandas_edgelist(reach_df, source='start', target='end',\n",
" edge_attr='length',create_using=nx.DiGraph)\n",
"for c in nx.simple_cycles(g):\n",
" warn(\"Cycle in network graph, some later evaluations may be incorrect\")\n",
" print(c)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# draw a map of the network\n",
"color_map = {\n",
" \"Manhole\":\"#0000ff\",\n",
" \"Basin\":\"#00ff00\",\n",
" \"SewerJunction\":\"#888888\",\n",
" \"Outlet\":\"#ff0000\",\n",
" \"Link\":\"#0000ff\",\n",
" \"Weir\":\"#ff0000\",\n",
" \"Valve\":\"#8888ff\",\n",
" \"Pump\":\"#ffff00\",\n",
" \"Orifice\":\"#000000\",\n",
"}\n",
"# This method translates the network to a geopandas dataframe.\n",
"# This is very handy for visualization, but does not contain all information needed for further analysis.\n",
"# Thats why we used other sources for network information.\n",
"map_df = nwr.network.to_geopandas()\n",
"map_df = map_df.merge(node_df[[\"type\"]],left_on=\"name\",right_on=\"name\",how=\"left\")\n",
"is_reach = map_df[\"group\"]==\"Reach\"\n",
"has_colon = map_df[\"name\"].str.contains(\":\")\n",
"map_df.loc[is_reach&~has_colon,\"type\"]=\"Link\"\n",
"map_df.loc[is_reach&has_colon,\"type\"]=map_df[\"name\"].str.split(\":\",expand=True)[0]\n",
"map_df[\"color\"]=map_df[\"type\"].map(color_map)\n",
"map_df.loc[(map_df[\"group\"]==\"Node\")&(map_df[\"name\"].isin(storage_df[\"node\"])),\"color\"]=\"#440000\"\n",
"important = ((map_df[\"type\"]!=\"Link\")&(map_df[\"type\"]!=\"Manhole\")&(map_df[\"type\"]!=\"SewerJunction\"))|map_df[\"name\"].isin(storage_df[\"node\"])\n",
"# map_df.explore(color=map_df.loc[:,\"color\"],marker_kwds={\"radius\":1.5})\n",
"map = map_df.loc[~important,:].explore(color=map_df.loc[~important,\"color\"],marker_kwds={\"radius\":1.5})\n",
"map_df.loc[important,:].explore(color=map_df.loc[important,\"color\"],marker_kwds={\"radius\":6},m=map)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Dynamic Volume"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def read_node_volume(res1d:mr.Res1D,node: str):\n",
" \"\"\"Reads the volume in a node for every time in the res1d.\"\"\"\n",
" try:\n",
" \n",
" volume = res1d.read(mr.QueryDataNode(\"WaterVolume\",node)).iloc[:,0]\n",
" except Exception as ex:\n",
" warn(f\"Could not read volume for node {node}. Returning 0\")\n",
" volume = pd.Series(0,index=res1d.time_index)\n",
" return volume\n",
"\n",
"def read_reach_volume(res1d:mr.Res1D, reach:str):\n",
" \"\"\"Reads the volume in a reach for every time in the res1d.\"\"\"\n",
" # See https://github.com/DHI/mikeio1d/blob/main/notebooks/res1d.ipynb:\n",
" # 'Get time series values summed for all gridpoints in reach with given quantity, i.e. useful for getting total volume in reach.\n",
" # values_sum = res1d_network.get_reach_sum_values(\"9l1\", \"Discharge\")'\n",
" try:\n",
" vol_values = res1d.get_reach_sum_values(reach, \"WaterVolume\")\n",
" except Exception as ex:\n",
" warn(f\"Could not read volume for reach {reach}. Returning 0\")\n",
" vol_values = 0\n",
" return pd.Series(vol_values,index=res1d.time_index)\n",
"\n",
"# we need to read only some nodes/reaches at a time to limit memory usage\n",
"# but reading one by one is too slow. Therefore read in chunks.\n",
"def chunk_list(l:list,n:int):\n",
" \"\"\"\"splits l into chunks that are no larger than n\"\"\"\n",
" return [l[i:i+n] for i in range(0,len(l),n)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# make a collection of all upstream nodes and reaches\n",
"graph = drainage_system_graph.DrainageSystemGraph(reach_df)\n",
"upstream = {}\n",
"for storage in storage_df.index:\n",
" node = storage_df.loc[storage, \"node\"]\n",
" upstream[storage] = graph.find_upstream_nodes_reaches(\n",
" node,storage_df[\"node\"]\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# set to true if you want to read the dynamic_raw from a previous run to save time.\n",
"read_csv = False\n",
"if read_csv:\n",
" dynamic_raw_df = pd.read_csv(\"dynamic_raw.csv\", index_col=0)\n",
"else:\n",
"\n",
" chunk_size = 100\n",
" dynamic_parts = []\n",
" for res_fp in tqdm(r1d_files):\n",
" for storage, (up_nodes, up_reaches) in tqdm(upstream.items()):\n",
" # water_level\n",
" node = storage_df.loc[storage, \"node\"]\n",
" # res1d is too large to read at once, read only one reach/node at a time\n",
" water_level = (\n",
" mr.Res1D(str(res_fp), nodes=[node])\n",
" .read(mr.QueryDataNode(\"WaterLevel\", node))\n",
" .iloc[:, 0]\n",
" )\n",
" # volume and water_level\n",
" volume = pd.Series(index=water_level.index, data=0.0)\n",
" for un_chunk in tqdm(chunk_list(list(up_nodes), chunk_size)):\n",
" chunk_res = mr.Res1D(str(res_fp), nodes=un_chunk)\n",
" for un in un_chunk:\n",
" volume += read_node_volume(chunk_res, un)\n",
" for ur_chunk in tqdm(chunk_list(list(up_reaches), chunk_size)):\n",
" chunk_res = mr.Res1D(str(res_fp), reaches=ur_chunk)\n",
" for ur in ur_chunk:\n",
" volume += read_reach_volume(\n",
" chunk_res,\n",
" ur,\n",
" )\n",
" chunk_res = None\n",
" # weir discharge\n",
" # TODO use sum of weir discharges?\n",
" w1 = storage_df.loc[storage, \"weir_reach\"]\n",
" w2 = storage_df.loc[storage, \"weir_reach_2\"]\n",
" if not pd.isnull(w1):\n",
" q_weir_1 = mr.Res1D(str(res_fp), reaches=[w1]).get_reach_start_values(\n",
" w1, \"Discharge\"\n",
" )\n",
" else:\n",
" q_weir_1 = pd.Series(np.nan, index=water_level.index)\n",
" if not pd.isnull(w2):\n",
" q_weir_2 = mr.Res1D(str(res_fp), reaches=[w2]).get_reach_start_values(\n",
" w2, \"Discharge\"\n",
" )\n",
" else:\n",
" q_weir_2 = pd.Series(np.nan, index=water_level.index)\n",
"\n",
" # pump discharge and height\n",
" pr = storage_df.loc[storage, \"pump_reach\"]\n",
" if not pd.isnull(pr):\n",
" q_pump = mr.Res1D(str(res_fp), reaches=[pr]).get_reach_start_values(\n",
" pr, \"Discharge\"\n",
" )\n",
" else:\n",
" q_pump = pd.Series(np.nan, index=water_level.index)\n",
" pn = storage_df.loc[storage, \"pump_node\"]\n",
" if not pd.isnull(pn):\n",
" level_pump = (\n",
" mr.Res1D(str(res_fp), nodes=[pn])\n",
" .read(mr.QueryDataNode(\"WaterLevel\", pn))\n",
" .iloc[:, 0]\n",
" )\n",
" else:\n",
" level_pump = pd.Series(np.nan, index=water_level.index)\n",
" dynamic_parts.append(\n",
" pd.DataFrame(\n",
" {\n",
" \"storage\": storage,\n",
" \"water_level\": water_level,\n",
" \"volume\": volume,\n",
" \"q_weir_1\": q_weir_1,\n",
" \"q_weir_2\": q_weir_2,\n",
" \"q_pump\": q_pump,\n",
" \"level_pump\": level_pump,\n",
" }\n",
" )\n",
" )\n",
"\n",
" dynamic_raw_df = pd.concat(dynamic_parts, ignore_index=False)\n",
" del dynamic_parts"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dynamic_raw_df.to_csv(\"dynamic_raw.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dynamic_raw_df.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dynamic_raw_df.set_index(\"storage\",append=True).isna().groupby(\"storage\").mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# transparent scatter plot of waterlevel vs volume, to get a feel for the distribution\n",
"alpha = 0.03\n",
"fig, axs = plt.subplots(nrows=len(storage_df),squeeze=False,figsize=(9,3*len(storage_df)))\n",
"for s,ax in zip(storage_df.index,axs[:,0]):\n",
" dynamic_raw_df.query(\"storage==@s\").plot(kind=\"scatter\",x=\"water_level\",y=\"volume\",s=1,label=\"raw\",color=\"black\",ax=ax,alpha=alpha)\n",
" ax.set_title(s)\n",
" ax.legend()\n",
"fig"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dynamic_raw_df[\"h_group\"] = (dynamic_raw_df[\"water_level\"] * 100).round() / 100\n",
"dynamic_mean = dynamic_raw_df.groupby([\"storage\", \"h_group\"]).quantile(.05).reset_index()\n",
"\n",
"def smooth_fkt(df: pd.DataFrame):\n",
" values = df.fillna(0).to_numpy()\n",
" smoothed = scipy.ndimage.gaussian_filter1d(values, sigma=5, axis=0, mode=\"nearest\")\n",
" return pd.DataFrame(smoothed, index=df.index, columns=df.columns)\n",
"\n",
"\n",
"dynamic_df = (\n",
" dynamic_mean.groupby(\"storage\")\n",
" .apply(smooth_fkt, include_groups=False)\n",
" .reset_index()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dynamic_mean.to_excel(\"dynamic_mean.xlsx\")\n",
"dynamic_df.to_excel(\"dynamic.xlsx\")\n",
"# dynamic_lowess.to_excel(\"dynamic_lowess.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# line plots of small time spans, uncomment if something looks odd and you want to take a closer look\n",
"# plot_chunk_size = 3*60\n",
"# print(len(dynamic_raw_df)/plot_chunk_size)\n",
"# for i in range(0,min(len(dynamic_raw_df),plot_chunk_size*10),plot_chunk_size):\n",
"# plt.figure(figsize=(12,4))\n",
"# dynamic_raw_df.iloc[i:i+plot_chunk_size][[\"water_level\",\"volume\",\"q_weir_1\"]].plot(subplots=True)\n",
"# plt.suptitle(f\"{i} to {i+plot_chunk_size} of {len(dynamic_raw_df)}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def plot_compare(x,y):\n",
" fig, axs = plt.subplots(nrows=len(storage_df),squeeze=False,figsize=(9,3*len(storage_df)))\n",
" a_dict = {}\n",
" for s,ax in zip(storage_df.index,axs[:,0]):\n",
" dynamic_raw_df.query(\"storage==@s\").plot(kind=\"scatter\",x=x,y=y,s=1.5,label=\"raw\",color=\"lightgray\",ax=ax)\n",
" dynamic_mean.query(\"storage==@s\").plot(kind=\"scatter\",x=x,y=y,s=2,label=\"avg\",color=\"gray\",ax=ax)\n",
" dynamic_df.query(\"storage==@s\").plot(kind=\"line\",x=x,y=y,label=\"smooth\",color=\"darkblue\",ax=ax)\n",
" # dynamic_lowess.query(\"storage==@s\").plot(kind=\"line\",x=x,y=y,label=\"lowess\",color=\"red\",ax=ax)\n",
" ax.set_title(s)\n",
" ax.legend()\n",
" a_dict[s]=ax\n",
" return fig,a_dict\n",
"plot_compare(x=\"water_level\",y=\"volume\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"f,ad = plot_compare(x=\"water_level\",y=\"q_weir_1\")\n",
"for s,ax in ad.items():\n",
" crest_level = storage_df.loc[s,\"weir_crest_level\"]\n",
" ax.axvline(crest_level,color=\"orange\",label=\"crest_level\")\n",
" ax.legend()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"f,ad = plot_compare(x=\"water_level\",y=\"q_weir_2\")\n",
"for s,ax in ad.items():\n",
" crest_level = storage_df.loc[s,\"weir_crest_level_2\"]\n",
" ax.axvline(crest_level,color=\"orange\",label=\"crest_level\")\n",
" ax.legend()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plot_compare(\"water_level\",\"q_pump\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plot_compare(\"water_level\",\"level_pump\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (Spyder)",
"language": "python3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.21"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
%% Cell type:markdown id: tags:
# Script to read data from the mike+ model that is later used to build a simpler model
%% Cell type:code id: tags:
```
python3
from warnings import warn
import subprocess
from multiprocessing.connection import Client
from time import sleep
from pathlib import Path
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import sqlalchemy as sa
import seaborn as sns
import scipy.signal
import scipy.optimize
import mikeio1d.res1d as mr
import networkx as nx
import scipy.integrate
import scipy.ndimage
import scipy.interpolate as si
from statsmodels.nonparametric.smoothers_lowess import lowess
from entfrachten.analysis import drainage_system_graph
```
%% Cell type:code id: tags:
```
python3
# helper for more concise indexing in pandas
ids = pd.IndexSlice
```
%% Cell type:markdown id: tags:
# Settings
What data is looked at
%% Cell type:code id: tags:
```
python3
# define the path to the model
model_path = Path(R"path\example_model.sqlite")
# define a path to th
r1d_files = list(Path("results_path/").glob("*.res1d"))
network_res1d_path = r1d_files[0]
r1d_files
```
%% Cell type:code id: tags:
```
python3
# define an excel in which all storages for the EQF are listed. Each structure should contain the descriptive name,
# the name in mike (e.g. name of node), weir names if present, the max level of the structure if it has no weir, and also
# names of the outflow pumps or reaches if they are further down the system and not directly at the storage structure
storage_df = pd.read_excel("storages.xlsx")
storage_df = storage_df.set_index("name")
storage_df
```
%% Cell type:markdown id: tags:
# Reading Network Data
%% Cell type:markdown id: tags:
## open files/connections
%% Cell type:code id: tags:
```
python3
# model data is saved as a sqlite, which we can read from
model_db = sa.create_engine("sqlite:///"+str(model_path))
```
%% Cell type:code id: tags:
```
python3
# Nodes and reaches are read from the res1d file.
# A reach in this file can be any connection between nodes, including weirs, pumps, etc. Therefore this is simpler than the model sqlite.
# for reference on reading the data see:
# https://github.com/DHI/mikeio1d/discussions/53#discussion-5231633
nwr = mr.Res1D(str(network_res1d_path))
nwr.info()
```
%% Cell type:code id: tags:
```
python3
nwr.quantities
```
%% Cell type:markdown id: tags:
## Nodes
%% Cell type:code id: tags:
```
python3
# for accessing data see: https://github.com/DHI/mikeio1d/discussions/53
# make a dict of links, where the from_node_id is the key, for searching later
node_records = []
for i, node in enumerate(list(nwr.data.Nodes)):
node_records.append(
{
"name": node.ID,
"r1d_index": i,
"type": str(node.GetType()).split(".")[-1].removeprefix("Res1D"),
"x":node.XCoordinate,
"y": node.YCoordinate,
}
)
node_df = pd.DataFrame.from_records(node_records)
# this is later needed to get the start and end nodes of reaches, where the r1d only contains an index as a number
r1d_node = node_df[["r1d_index","name"]].set_index("r1d_index")["name"]
node_df = node_df.set_index("name")
# some data is not in the res1d, use sqlite
with model_db.begin() as con:
node_sqlite = pd.read_sql("""
SELECT muid as name, groundlevel as ground_level, invertlevel as bottom_level
FROM msm_Node""",con).set_index("name")
assert set(node_df.index)==set(node_sqlite.index)
node_df = node_df.join(node_sqlite)
```
%% Cell type:markdown id: tags:
## Reaches
%% Cell type:code id: tags:
```
python3
reach_records = []
for reach in list(nwr.data.Reaches):
reach_records.append(
{
"name": reach.Name,
"start": r1d_node[reach.StartNodeIndex],
"end": r1d_node[reach.EndNodeIndex],
"length": reach.Length,
}
)
reach_df = pd.DataFrame.from_records(reach_records).set_index("name")
# add type from name, everything other than link has a prefix "{type}:"
has_colon = reach_df.index.to_series().str.contains(":")
reach_df.loc[~has_colon,"type"]="Link"
reach_df.loc[has_colon,"type"]=reach_df.index.to_series().str.split(":",expand=True)[0]
assert reach_df.index.is_unique
```
%% Cell type:markdown id: tags:
# Weirs
%% Cell type:code id: tags:
```
python3
with model_db.begin() as con:
weir_df = pd.read_sql("""
SELECT muid as id, 'Weir:'||muid as reach, fromnodeid as start, tonodeid as end, crestlevel as crest_level
FROM msm_weir;""",con).set_index("id")
```
%% Cell type:markdown id: tags:
# Analysis on Network Structure
%% Cell type:code id: tags:
```
python3
# some storages are located by a reach and some by node.
# for simplicity all should be located by node. For reaches use the end node.
for storage_name, mike_name in storage_df["mike_name"].items():
if mike_name in node_df.index:
storage_df.loc[storage_name,"node"]=mike_name
elif mike_name in reach_df.index:
storage_df.loc[storage_name,"node"]=reach_df.loc[mike_name,"end"]
else:
raise Exception(f"mike name {mike_name} not found in reaches or nodes.")
```
%% Cell type:code id: tags:
```
python3
# put weir height info into storage df
storage_df["weir_crest_level"]=storage_df["weir"].map(weir_df["crest_level"])
storage_df["weir_reach"]=storage_df["weir"].map(weir_df["reach"])
storage_df["weir_crest_level_2"]=storage_df["weir_2"].map(weir_df["crest_level"])
storage_df["weir_reach_2"]=storage_df["weir_2"].map(weir_df["reach"])
# now calculate max level
storage_df["max_level"]=storage_df[["weir_crest_level","weir_crest_level_2","max_level_if_no_weir"]].min(axis="columns")
# add bottom level from nodes
storage_df["bottom_level"]=storage_df["node"].map(node_df["bottom_level"])
storage_df
```
%% Cell type:code id: tags:
```
python3
node_df.loc[storage_df["node"],"bottom_level"]
```
%% Cell type:code id: tags:
```
python3
node_sqlite.loc[storage_df["node"],"bottom_level"]
```
%% Cell type:code id: tags:
```
python3
# check the network graph for circles
# for this we use the networkx package, which has a handy method for making a graph from a pandas dataframe
g = nx.from_pandas_edgelist(reach_df, source='start', target='end',
edge_attr='length',create_using=nx.DiGraph)
for c in nx.simple_cycles(g):
warn("Cycle in network graph, some later evaluations may be incorrect")
print(c)
```
%% Cell type:code id: tags:
```
python3
# draw a map of the network
color_map = {
"Manhole":"#0000ff",
"Basin":"#00ff00",
"SewerJunction":"#888888",
"Outlet":"#ff0000",
"Link":"#0000ff",
"Weir":"#ff0000",
"Valve":"#8888ff",
"Pump":"#ffff00",
"Orifice":"#000000",
}
# This method translates the network to a geopandas dataframe.
# This is very handy for visualization, but does not contain all information needed for further analysis.
# Thats why we used other sources for network information.
map_df = nwr.network.to_geopandas()
map_df = map_df.merge(node_df[["type"]],left_on="name",right_on="name",how="left")
is_reach = map_df["group"]=="Reach"
has_colon = map_df["name"].str.contains(":")
map_df.loc[is_reach&~has_colon,"type"]="Link"
map_df.loc[is_reach&has_colon,"type"]=map_df["name"].str.split(":",expand=True)[0]
map_df["color"]=map_df["type"].map(color_map)
map_df.loc[(map_df["group"]=="Node")&(map_df["name"].isin(storage_df["node"])),"color"]="#440000"
important = ((map_df["type"]!="Link")&(map_df["type"]!="Manhole")&(map_df["type"]!="SewerJunction"))|map_df["name"].isin(storage_df["node"])
# map_df.explore(color=map_df.loc[:,"color"],marker_kwds={"radius":1.5})
map = map_df.loc[~important,:].explore(color=map_df.loc[~important,"color"],marker_kwds={"radius":1.5})
map_df.loc[important,:].explore(color=map_df.loc[important,"color"],marker_kwds={"radius":6},m=map)
```
%% Cell type:markdown id: tags:
# Dynamic Volume
%% Cell type:code id: tags:
```
python3
def read_node_volume(res1d:mr.Res1D,node: str):
"""Reads the volume in a node for every time in the res1d."""
try:
volume = res1d.read(mr.QueryDataNode("WaterVolume",node)).iloc[:,0]
except Exception as ex:
warn(f"Could not read volume for node {node}. Returning 0")
volume = pd.Series(0,index=res1d.time_index)
return volume
def read_reach_volume(res1d:mr.Res1D, reach:str):
"""Reads the volume in a reach for every time in the res1d."""
# See https://github.com/DHI/mikeio1d/blob/main/notebooks/res1d.ipynb:
# 'Get time series values summed for all gridpoints in reach with given quantity, i.e. useful for getting total volume in reach.
# values_sum = res1d_network.get_reach_sum_values("9l1", "Discharge")'
try:
vol_values = res1d.get_reach_sum_values(reach, "WaterVolume")
except Exception as ex:
warn(f"Could not read volume for reach {reach}. Returning 0")
vol_values = 0
return pd.Series(vol_values,index=res1d.time_index)
# we need to read only some nodes/reaches at a time to limit memory usage
# but reading one by one is too slow. Therefore read in chunks.
def chunk_list(l:list,n:int):
""""splits l into chunks that are no larger than n"""
return [l[i:i+n] for i in range(0,len(l),n)]
```
%% Cell type:code id: tags:
```
python3
# make a collection of all upstream nodes and reaches
graph = drainage_system_graph.DrainageSystemGraph(reach_df)
upstream = {}
for storage in storage_df.index:
node = storage_df.loc[storage, "node"]
upstream[storage] = graph.find_upstream_nodes_reaches(
node,storage_df["node"]
)
```
%% Cell type:code id: tags:
```
python3
# set to true if you want to read the dynamic_raw from a previous run to save time.
read_csv = False
if read_csv:
dynamic_raw_df = pd.read_csv("dynamic_raw.csv", index_col=0)
else:
chunk_size = 100
dynamic_parts = []
for res_fp in tqdm(r1d_files):
for storage, (up_nodes, up_reaches) in tqdm(upstream.items()):
# water_level
node = storage_df.loc[storage, "node"]
# res1d is too large to read at once, read only one reach/node at a time
water_level = (
mr.Res1D(str(res_fp), nodes=[node])
.read(mr.QueryDataNode("WaterLevel", node))
.iloc[:, 0]
)
# volume and water_level
volume = pd.Series(index=water_level.index, data=0.0)
for un_chunk in tqdm(chunk_list(list(up_nodes), chunk_size)):
chunk_res = mr.Res1D(str(res_fp), nodes=un_chunk)
for un in un_chunk:
volume += read_node_volume(chunk_res, un)
for ur_chunk in tqdm(chunk_list(list(up_reaches), chunk_size)):
chunk_res = mr.Res1D(str(res_fp), reaches=ur_chunk)
for ur in ur_chunk:
volume += read_reach_volume(
chunk_res,
ur,
)
chunk_res = None
# weir discharge
# TODO use sum of weir discharges?
w1 = storage_df.loc[storage, "weir_reach"]
w2 = storage_df.loc[storage, "weir_reach_2"]
if not pd.isnull(w1):
q_weir_1 = mr.Res1D(str(res_fp), reaches=[w1]).get_reach_start_values(
w1, "Discharge"
)
else:
q_weir_1 = pd.Series(np.nan, index=water_level.index)
if not pd.isnull(w2):
q_weir_2 = mr.Res1D(str(res_fp), reaches=[w2]).get_reach_start_values(
w2, "Discharge"
)
else:
q_weir_2 = pd.Series(np.nan, index=water_level.index)
# pump discharge and height
pr = storage_df.loc[storage, "pump_reach"]
if not pd.isnull(pr):
q_pump = mr.Res1D(str(res_fp), reaches=[pr]).get_reach_start_values(
pr, "Discharge"
)
else:
q_pump = pd.Series(np.nan, index=water_level.index)
pn = storage_df.loc[storage, "pump_node"]
if not pd.isnull(pn):
level_pump = (
mr.Res1D(str(res_fp), nodes=[pn])
.read(mr.QueryDataNode("WaterLevel", pn))
.iloc[:, 0]
)
else:
level_pump = pd.Series(np.nan, index=water_level.index)
dynamic_parts.append(
pd.DataFrame(
{
"storage": storage,
"water_level": water_level,
"volume": volume,
"q_weir_1": q_weir_1,
"q_weir_2": q_weir_2,
"q_pump": q_pump,
"level_pump": level_pump,
}
)
)
dynamic_raw_df = pd.concat(dynamic_parts, ignore_index=False)
del dynamic_parts
```
%% Cell type:code id: tags:
```
python3
dynamic_raw_df.to_csv("dynamic_raw.csv")
```
%% Cell type:code id: tags:
```
python3
dynamic_raw_df.describe()
```
%% Cell type:code id: tags:
```
python3
dynamic_raw_df.set_index("storage",append=True).isna().groupby("storage").mean()
```
%% Cell type:code id: tags:
```
python3
# transparent scatter plot of waterlevel vs volume, to get a feel for the distribution
alpha = 0.03
fig, axs = plt.subplots(nrows=len(storage_df),squeeze=False,figsize=(9,3*len(storage_df)))
for s,ax in zip(storage_df.index,axs[:,0]):
dynamic_raw_df.query("storage==@s").plot(kind="scatter",x="water_level",y="volume",s=1,label="raw",color="black",ax=ax,alpha=alpha)
ax.set_title(s)
ax.legend()
fig
```
%% Cell type:code id: tags:
```
python3
dynamic_raw_df["h_group"] = (dynamic_raw_df["water_level"] * 100).round() / 100
dynamic_mean = dynamic_raw_df.groupby(["storage", "h_group"]).quantile(.05).reset_index()
def smooth_fkt(df: pd.DataFrame):
values = df.fillna(0).to_numpy()
smoothed = scipy.ndimage.gaussian_filter1d(values, sigma=5, axis=0, mode="nearest")
return pd.DataFrame(smoothed, index=df.index, columns=df.columns)
dynamic_df = (
dynamic_mean.groupby("storage")
.apply(smooth_fkt, include_groups=False)
.reset_index()
)
```
%% Cell type:code id: tags:
```
python3
dynamic_mean.to_excel("dynamic_mean.xlsx")
dynamic_df.to_excel("dynamic.xlsx")
# dynamic_lowess.to_excel("dynamic_lowess.xlsx")
```
%% Cell type:code id: tags:
```
python3
# line plots of small time spans, uncomment if something looks odd and you want to take a closer look
# plot_chunk_size = 3*60
# print(len(dynamic_raw_df)/plot_chunk_size)
# for i in range(0,min(len(dynamic_raw_df),plot_chunk_size*10),plot_chunk_size):
# plt.figure(figsize=(12,4))
# dynamic_raw_df.iloc[i:i+plot_chunk_size][["water_level","volume","q_weir_1"]].plot(subplots=True)
# plt.suptitle(f"{i} to {i+plot_chunk_size} of {len(dynamic_raw_df)}")
```
%% Cell type:code id: tags:
```
python3
def plot_compare(x,y):
fig, axs = plt.subplots(nrows=len(storage_df),squeeze=False,figsize=(9,3*len(storage_df)))
a_dict = {}
for s,ax in zip(storage_df.index,axs[:,0]):
dynamic_raw_df.query("storage==@s").plot(kind="scatter",x=x,y=y,s=1.5,label="raw",color="lightgray",ax=ax)
dynamic_mean.query("storage==@s").plot(kind="scatter",x=x,y=y,s=2,label="avg",color="gray",ax=ax)
dynamic_df.query("storage==@s").plot(kind="line",x=x,y=y,label="smooth",color="darkblue",ax=ax)
# dynamic_lowess.query("storage==@s").plot(kind="line",x=x,y=y,label="lowess",color="red",ax=ax)
ax.set_title(s)
ax.legend()
a_dict[s]=ax
return fig,a_dict
plot_compare(x="water_level",y="volume")
```
%% Cell type:code id: tags:
```
python3
f,ad = plot_compare(x="water_level",y="q_weir_1")
for s,ax in ad.items():
crest_level = storage_df.loc[s,"weir_crest_level"]
ax.axvline(crest_level,color="orange",label="crest_level")
ax.legend()
```
%% Cell type:code id: tags:
```
python3
f,ad = plot_compare(x="water_level",y="q_weir_2")
for s,ax in ad.items():
crest_level = storage_df.loc[s,"weir_crest_level_2"]
ax.axvline(crest_level,color="orange",label="crest_level")
ax.legend()
```
%% Cell type:code id: tags:
```
python3
plot_compare("water_level","q_pump")
```
%% Cell type:code id: tags:
```
python3
plot_compare("water_level","level_pump")
```
%% Cell type:code id: tags:
```
python3
```
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment