Skip to content
Snippets Groups Projects
Commit ec4d7841 authored by Sebastian Kerger's avatar Sebastian Kerger
Browse files

Upload New File

parent 6aab0590
Branches
No related tags found
No related merge requests found
%% Cell type:markdown id: tags:
# Script to read data from the mike+ model that is later used to build a simpler model
%% Cell type:code id: tags:
``` python3
from warnings import warn
import subprocess
from multiprocessing.connection import Client
from time import sleep
from pathlib import Path
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import sqlalchemy as sa
import seaborn as sns
import scipy.signal
import scipy.optimize
import mikeio1d.res1d as mr
import networkx as nx
import scipy.integrate
import scipy.ndimage
import scipy.interpolate as si
from statsmodels.nonparametric.smoothers_lowess import lowess
from entfrachten.analysis import drainage_system_graph
```
%% Cell type:code id: tags:
``` python3
# helper for more concise indexing in pandas
ids = pd.IndexSlice
```
%% Cell type:markdown id: tags:
# Settings
What data is looked at
%% Cell type:code id: tags:
``` python3
# define the path to the model
model_path = Path(R"path\example_model.sqlite")
# define a path to th
r1d_files = list(Path("results_path/").glob("*.res1d"))
network_res1d_path = r1d_files[0]
r1d_files
```
%% Cell type:code id: tags:
``` python3
# define an excel in which all storages for the EQF are listed. Each structure should contain the descriptive name,
# the name in mike (e.g. name of node), weir names if present, the max level of the structure if it has no weir, and also
# names of the outflow pumps or reaches if they are further down the system and not directly at the storage structure
storage_df = pd.read_excel("storages.xlsx")
storage_df = storage_df.set_index("name")
storage_df
```
%% Cell type:markdown id: tags:
# Reading Network Data
%% Cell type:markdown id: tags:
## open files/connections
%% Cell type:code id: tags:
``` python3
# model data is saved as a sqlite, which we can read from
model_db = sa.create_engine("sqlite:///"+str(model_path))
```
%% Cell type:code id: tags:
``` python3
# Nodes and reaches are read from the res1d file.
# A reach in this file can be any connection between nodes, including weirs, pumps, etc. Therefore this is simpler than the model sqlite.
# for reference on reading the data see:
# https://github.com/DHI/mikeio1d/discussions/53#discussion-5231633
nwr = mr.Res1D(str(network_res1d_path))
nwr.info()
```
%% Cell type:code id: tags:
``` python3
nwr.quantities
```
%% Cell type:markdown id: tags:
## Nodes
%% Cell type:code id: tags:
``` python3
# for accessing data see: https://github.com/DHI/mikeio1d/discussions/53
# make a dict of links, where the from_node_id is the key, for searching later
node_records = []
for i, node in enumerate(list(nwr.data.Nodes)):
node_records.append(
{
"name": node.ID,
"r1d_index": i,
"type": str(node.GetType()).split(".")[-1].removeprefix("Res1D"),
"x":node.XCoordinate,
"y": node.YCoordinate,
}
)
node_df = pd.DataFrame.from_records(node_records)
# this is later needed to get the start and end nodes of reaches, where the r1d only contains an index as a number
r1d_node = node_df[["r1d_index","name"]].set_index("r1d_index")["name"]
node_df = node_df.set_index("name")
# some data is not in the res1d, use sqlite
with model_db.begin() as con:
node_sqlite = pd.read_sql("""
SELECT muid as name, groundlevel as ground_level, invertlevel as bottom_level
FROM msm_Node""",con).set_index("name")
assert set(node_df.index)==set(node_sqlite.index)
node_df = node_df.join(node_sqlite)
```
%% Cell type:markdown id: tags:
## Reaches
%% Cell type:code id: tags:
``` python3
reach_records = []
for reach in list(nwr.data.Reaches):
reach_records.append(
{
"name": reach.Name,
"start": r1d_node[reach.StartNodeIndex],
"end": r1d_node[reach.EndNodeIndex],
"length": reach.Length,
}
)
reach_df = pd.DataFrame.from_records(reach_records).set_index("name")
# add type from name, everything other than link has a prefix "{type}:"
has_colon = reach_df.index.to_series().str.contains(":")
reach_df.loc[~has_colon,"type"]="Link"
reach_df.loc[has_colon,"type"]=reach_df.index.to_series().str.split(":",expand=True)[0]
assert reach_df.index.is_unique
```
%% Cell type:markdown id: tags:
# Weirs
%% Cell type:code id: tags:
``` python3
with model_db.begin() as con:
weir_df = pd.read_sql("""
SELECT muid as id, 'Weir:'||muid as reach, fromnodeid as start, tonodeid as end, crestlevel as crest_level
FROM msm_weir;""",con).set_index("id")
```
%% Cell type:markdown id: tags:
# Analysis on Network Structure
%% Cell type:code id: tags:
``` python3
# some storages are located by a reach and some by node.
# for simplicity all should be located by node. For reaches use the end node.
for storage_name, mike_name in storage_df["mike_name"].items():
if mike_name in node_df.index:
storage_df.loc[storage_name,"node"]=mike_name
elif mike_name in reach_df.index:
storage_df.loc[storage_name,"node"]=reach_df.loc[mike_name,"end"]
else:
raise Exception(f"mike name {mike_name} not found in reaches or nodes.")
```
%% Cell type:code id: tags:
``` python3
# put weir height info into storage df
storage_df["weir_crest_level"]=storage_df["weir"].map(weir_df["crest_level"])
storage_df["weir_reach"]=storage_df["weir"].map(weir_df["reach"])
storage_df["weir_crest_level_2"]=storage_df["weir_2"].map(weir_df["crest_level"])
storage_df["weir_reach_2"]=storage_df["weir_2"].map(weir_df["reach"])
# now calculate max level
storage_df["max_level"]=storage_df[["weir_crest_level","weir_crest_level_2","max_level_if_no_weir"]].min(axis="columns")
# add bottom level from nodes
storage_df["bottom_level"]=storage_df["node"].map(node_df["bottom_level"])
storage_df
```
%% Cell type:code id: tags:
``` python3
node_df.loc[storage_df["node"],"bottom_level"]
```
%% Cell type:code id: tags:
``` python3
node_sqlite.loc[storage_df["node"],"bottom_level"]
```
%% Cell type:code id: tags:
``` python3
# check the network graph for circles
# for this we use the networkx package, which has a handy method for making a graph from a pandas dataframe
g = nx.from_pandas_edgelist(reach_df, source='start', target='end',
edge_attr='length',create_using=nx.DiGraph)
for c in nx.simple_cycles(g):
warn("Cycle in network graph, some later evaluations may be incorrect")
print(c)
```
%% Cell type:code id: tags:
``` python3
# draw a map of the network
color_map = {
"Manhole":"#0000ff",
"Basin":"#00ff00",
"SewerJunction":"#888888",
"Outlet":"#ff0000",
"Link":"#0000ff",
"Weir":"#ff0000",
"Valve":"#8888ff",
"Pump":"#ffff00",
"Orifice":"#000000",
}
# This method translates the network to a geopandas dataframe.
# This is very handy for visualization, but does not contain all information needed for further analysis.
# Thats why we used other sources for network information.
map_df = nwr.network.to_geopandas()
map_df = map_df.merge(node_df[["type"]],left_on="name",right_on="name",how="left")
is_reach = map_df["group"]=="Reach"
has_colon = map_df["name"].str.contains(":")
map_df.loc[is_reach&~has_colon,"type"]="Link"
map_df.loc[is_reach&has_colon,"type"]=map_df["name"].str.split(":",expand=True)[0]
map_df["color"]=map_df["type"].map(color_map)
map_df.loc[(map_df["group"]=="Node")&(map_df["name"].isin(storage_df["node"])),"color"]="#440000"
important = ((map_df["type"]!="Link")&(map_df["type"]!="Manhole")&(map_df["type"]!="SewerJunction"))|map_df["name"].isin(storage_df["node"])
# map_df.explore(color=map_df.loc[:,"color"],marker_kwds={"radius":1.5})
map = map_df.loc[~important,:].explore(color=map_df.loc[~important,"color"],marker_kwds={"radius":1.5})
map_df.loc[important,:].explore(color=map_df.loc[important,"color"],marker_kwds={"radius":6},m=map)
```
%% Cell type:markdown id: tags:
# Dynamic Volume
%% Cell type:code id: tags:
``` python3
def read_node_volume(res1d:mr.Res1D,node: str):
"""Reads the volume in a node for every time in the res1d."""
try:
volume = res1d.read(mr.QueryDataNode("WaterVolume",node)).iloc[:,0]
except Exception as ex:
warn(f"Could not read volume for node {node}. Returning 0")
volume = pd.Series(0,index=res1d.time_index)
return volume
def read_reach_volume(res1d:mr.Res1D, reach:str):
"""Reads the volume in a reach for every time in the res1d."""
# See https://github.com/DHI/mikeio1d/blob/main/notebooks/res1d.ipynb:
# 'Get time series values summed for all gridpoints in reach with given quantity, i.e. useful for getting total volume in reach.
# values_sum = res1d_network.get_reach_sum_values("9l1", "Discharge")'
try:
vol_values = res1d.get_reach_sum_values(reach, "WaterVolume")
except Exception as ex:
warn(f"Could not read volume for reach {reach}. Returning 0")
vol_values = 0
return pd.Series(vol_values,index=res1d.time_index)
# we need to read only some nodes/reaches at a time to limit memory usage
# but reading one by one is too slow. Therefore read in chunks.
def chunk_list(l:list,n:int):
""""splits l into chunks that are no larger than n"""
return [l[i:i+n] for i in range(0,len(l),n)]
```
%% Cell type:code id: tags:
``` python3
# make a collection of all upstream nodes and reaches
graph = drainage_system_graph.DrainageSystemGraph(reach_df)
upstream = {}
for storage in storage_df.index:
node = storage_df.loc[storage, "node"]
upstream[storage] = graph.find_upstream_nodes_reaches(
node,storage_df["node"]
)
```
%% Cell type:code id: tags:
``` python3
# set to true if you want to read the dynamic_raw from a previous run to save time.
read_csv = False
if read_csv:
dynamic_raw_df = pd.read_csv("dynamic_raw.csv", index_col=0)
else:
chunk_size = 100
dynamic_parts = []
for res_fp in tqdm(r1d_files):
for storage, (up_nodes, up_reaches) in tqdm(upstream.items()):
# water_level
node = storage_df.loc[storage, "node"]
# res1d is too large to read at once, read only one reach/node at a time
water_level = (
mr.Res1D(str(res_fp), nodes=[node])
.read(mr.QueryDataNode("WaterLevel", node))
.iloc[:, 0]
)
# volume and water_level
volume = pd.Series(index=water_level.index, data=0.0)
for un_chunk in tqdm(chunk_list(list(up_nodes), chunk_size)):
chunk_res = mr.Res1D(str(res_fp), nodes=un_chunk)
for un in un_chunk:
volume += read_node_volume(chunk_res, un)
for ur_chunk in tqdm(chunk_list(list(up_reaches), chunk_size)):
chunk_res = mr.Res1D(str(res_fp), reaches=ur_chunk)
for ur in ur_chunk:
volume += read_reach_volume(
chunk_res,
ur,
)
chunk_res = None
# weir discharge
# TODO use sum of weir discharges?
w1 = storage_df.loc[storage, "weir_reach"]
w2 = storage_df.loc[storage, "weir_reach_2"]
if not pd.isnull(w1):
q_weir_1 = mr.Res1D(str(res_fp), reaches=[w1]).get_reach_start_values(
w1, "Discharge"
)
else:
q_weir_1 = pd.Series(np.nan, index=water_level.index)
if not pd.isnull(w2):
q_weir_2 = mr.Res1D(str(res_fp), reaches=[w2]).get_reach_start_values(
w2, "Discharge"
)
else:
q_weir_2 = pd.Series(np.nan, index=water_level.index)
# pump discharge and height
pr = storage_df.loc[storage, "pump_reach"]
if not pd.isnull(pr):
q_pump = mr.Res1D(str(res_fp), reaches=[pr]).get_reach_start_values(
pr, "Discharge"
)
else:
q_pump = pd.Series(np.nan, index=water_level.index)
pn = storage_df.loc[storage, "pump_node"]
if not pd.isnull(pn):
level_pump = (
mr.Res1D(str(res_fp), nodes=[pn])
.read(mr.QueryDataNode("WaterLevel", pn))
.iloc[:, 0]
)
else:
level_pump = pd.Series(np.nan, index=water_level.index)
dynamic_parts.append(
pd.DataFrame(
{
"storage": storage,
"water_level": water_level,
"volume": volume,
"q_weir_1": q_weir_1,
"q_weir_2": q_weir_2,
"q_pump": q_pump,
"level_pump": level_pump,
}
)
)
dynamic_raw_df = pd.concat(dynamic_parts, ignore_index=False)
del dynamic_parts
```
%% Cell type:code id: tags:
``` python3
dynamic_raw_df.to_csv("dynamic_raw.csv")
```
%% Cell type:code id: tags:
``` python3
dynamic_raw_df.describe()
```
%% Cell type:code id: tags:
``` python3
dynamic_raw_df.set_index("storage",append=True).isna().groupby("storage").mean()
```
%% Cell type:code id: tags:
``` python3
# transparent scatter plot of waterlevel vs volume, to get a feel for the distribution
alpha = 0.03
fig, axs = plt.subplots(nrows=len(storage_df),squeeze=False,figsize=(9,3*len(storage_df)))
for s,ax in zip(storage_df.index,axs[:,0]):
dynamic_raw_df.query("storage==@s").plot(kind="scatter",x="water_level",y="volume",s=1,label="raw",color="black",ax=ax,alpha=alpha)
ax.set_title(s)
ax.legend()
fig
```
%% Cell type:code id: tags:
``` python3
dynamic_raw_df["h_group"] = (dynamic_raw_df["water_level"] * 100).round() / 100
dynamic_mean = dynamic_raw_df.groupby(["storage", "h_group"]).quantile(.05).reset_index()
def smooth_fkt(df: pd.DataFrame):
values = df.fillna(0).to_numpy()
smoothed = scipy.ndimage.gaussian_filter1d(values, sigma=5, axis=0, mode="nearest")
return pd.DataFrame(smoothed, index=df.index, columns=df.columns)
dynamic_df = (
dynamic_mean.groupby("storage")
.apply(smooth_fkt, include_groups=False)
.reset_index()
)
```
%% Cell type:code id: tags:
``` python3
dynamic_mean.to_excel("dynamic_mean.xlsx")
dynamic_df.to_excel("dynamic.xlsx")
# dynamic_lowess.to_excel("dynamic_lowess.xlsx")
```
%% Cell type:code id: tags:
``` python3
# line plots of small time spans, uncomment if something looks odd and you want to take a closer look
# plot_chunk_size = 3*60
# print(len(dynamic_raw_df)/plot_chunk_size)
# for i in range(0,min(len(dynamic_raw_df),plot_chunk_size*10),plot_chunk_size):
# plt.figure(figsize=(12,4))
# dynamic_raw_df.iloc[i:i+plot_chunk_size][["water_level","volume","q_weir_1"]].plot(subplots=True)
# plt.suptitle(f"{i} to {i+plot_chunk_size} of {len(dynamic_raw_df)}")
```
%% Cell type:code id: tags:
``` python3
def plot_compare(x,y):
fig, axs = plt.subplots(nrows=len(storage_df),squeeze=False,figsize=(9,3*len(storage_df)))
a_dict = {}
for s,ax in zip(storage_df.index,axs[:,0]):
dynamic_raw_df.query("storage==@s").plot(kind="scatter",x=x,y=y,s=1.5,label="raw",color="lightgray",ax=ax)
dynamic_mean.query("storage==@s").plot(kind="scatter",x=x,y=y,s=2,label="avg",color="gray",ax=ax)
dynamic_df.query("storage==@s").plot(kind="line",x=x,y=y,label="smooth",color="darkblue",ax=ax)
# dynamic_lowess.query("storage==@s").plot(kind="line",x=x,y=y,label="lowess",color="red",ax=ax)
ax.set_title(s)
ax.legend()
a_dict[s]=ax
return fig,a_dict
plot_compare(x="water_level",y="volume")
```
%% Cell type:code id: tags:
``` python3
f,ad = plot_compare(x="water_level",y="q_weir_1")
for s,ax in ad.items():
crest_level = storage_df.loc[s,"weir_crest_level"]
ax.axvline(crest_level,color="orange",label="crest_level")
ax.legend()
```
%% Cell type:code id: tags:
``` python3
f,ad = plot_compare(x="water_level",y="q_weir_2")
for s,ax in ad.items():
crest_level = storage_df.loc[s,"weir_crest_level_2"]
ax.axvline(crest_level,color="orange",label="crest_level")
ax.legend()
```
%% Cell type:code id: tags:
``` python3
plot_compare("water_level","q_pump")
```
%% Cell type:code id: tags:
``` python3
plot_compare("water_level","level_pump")
```
%% Cell type:code id: tags:
``` python3
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment