Merge branch 'dev_jgn_debug'

510cb9ad · Jingyu Gong · 0442366d · a92aa0cd · 510cb9ad · 510cb9ad
Commit 510cb9ad authored Apr 17, 2023 by Jingyu Gong
--- a/demand_generator/electrical_demand/ElectricalDemand.py
+++ b/demand_generator/electrical_demand/ElectricalDemand.py
@@ -162,10 +162,22 @@ class ElectricalDemand:
                                  self.seasons[p][1], 0, 0)
            b = datetime.datetime(self.year, self.seasons[p][2],
                                  self.seasons[p][3], 23, 59)
+            # Old Version
+            # --------------------------------------------------------
+            # new_df.update(pd.DataFrame.merge(
+            #     tmp_df[tmp_df['period'] == p[:-1]], time_df[a:b],
+            #     left_on=left_cols, right_on=right_cols,
+            #     how='inner', left_index=True).sort_index().drop(
+            #     ['hour_of_day'], 1))
+            # --------------------------------------------------------
+            # New Version
+            # Tested with pandas version 1.3.1
            new_df.update(pd.DataFrame.merge(
                tmp_df[tmp_df['period'] == p[:-1]], time_df[a:b],
-                left_on=left_cols, right_on=right_cols,
+                left_on=left_cols, right_on=right_cols, how='inner').sort_values(
-                how='inner', left_index=True).sort_index().drop(
+                by=["date", "weekday", "hour", "minute"]).set_index(
+                time_df[a:b].index).drop(
                ['hour_of_day'], 1))
        new_df.drop('date', axis=1, inplace=True)

--- a/input_profile_processor/input_profile_processor.py
+++ b/input_profile_processor/input_profile_processor.py
@@ -26,12 +26,18 @@ from datetime import datetime
 from Tooling.demand_generator.electrical_demand.ElectricalDemand import ElectricalDemand
 from Tooling.demand_generator.thermal_demand.ThermalDemand import ThermalDemand
 from Tooling.input_profile_processor.calc_irradiance import generate_g_t_series
+from Tooling.modifier import Modifier
 def process_input_profiles(input_profile_dict, t_start, t_horizon, t_step):
    input_profiles = {}
    for input_profile_name, input_profile_config in input_profile_dict.items():
        if input_profile_config[1] == 'generate':
-            profile = generate_profile(input_profile_config[0], input_profile_config[2:], input_profiles, t_start, t_horizon, t_step)
+            profile = generate_profile(input_profile_config[0], input_profile_config[2:], input_profiles, t_start,
+                                       t_horizon, t_step)
+        elif input_profile_config[1] == 'modify':
+            profile = modify_profile(input_profile_config[0], input_profile_config[2:], input_profiles, t_start,
+                                     t_horizon, t_step)
        else:
            profile = pd.read_csv(input_profile_config[1], index_col=0)
            try:
@@ -50,13 +56,15 @@ def process_input_profiles(input_profile_dict, t_start, t_horizon, t_step):
            phi = 52.21
            psi_f = 0
            beta = 30
-            input_profile = generate_g_t_series(input_profile, beta, psi_f, phi, lambda_st, lambda_1, t_start, t_horizon, t_step)
+            input_profile = generate_g_t_series(input_profile, beta, psi_f, phi, lambda_st, lambda_1, t_start,
+                                                t_horizon, t_step)
        input_profile = input_profile.squeeze()
        input_profile.set_axis(list(range(t_horizon)), inplace = True)
        input_profiles[input_profile_name] = input_profile
    return input_profiles
 def generate_profile(profile_type, parameters, input_profiles, t_start, t_horizon, t_step):
    t_last = t_start + pd.Timedelta(hours=(t_horizon - 1) * t_step)
    years = range(t_start.year, t_last.year + 1)
@@ -64,7 +72,8 @@ def generate_profile(profile_type, parameters, input_profiles, t_start, t_horizo
    if profile_type == 'elec_demand':
        profiles = []
        for year in years:
-            profile = ElectricalDemand(year).get_profile(parameters[0], 'h0', True) # True: with smoothing function for household profiles, False: no smoothing function for household profiles
+            profile = ElectricalDemand(year).get_profile(parameters[0], 'h0',
+                                                         True)  # True: with smoothing function for household profiles, False: no smoothing function for household profiles
            profile = profile.resample(str(t_step) + 'H').mean().interpolate('linear')
            profiles.append(profile)
        timeseries = pd.concat(profiles)
@@ -108,3 +117,27 @@ def generate_profile(profile_type, parameters, input_profiles, t_start, t_horizo
        return timeseries
    else:
        raise Exception("Generator for profile type " + str(profile_type) + " is not implemented!")
+def modify_profile(mod_type: str, parameters, input_profiles, t_start, t_horizon, t_step):
+    # Example of the parameters for use in the runme
+    # ------------------------------------------------------------------------------------------------------------------
+    # 'temperature_1': ['prophet', 'modify', 'input_files/data/temperature/temperature.csv', 'temperature',
+    #                       {"winter1": 0, "spring": 0, "summer": 0,"fall": 0,"winter2": 0},
+    #                       "Tooling/Modifier/prophet_models/serialized_model_08032023.json", True, True]
+    # ------------------------------------------------------------------------------------------------------------------
+    if mod_type == "prophet":
+        # t_start : timestamp, e.g.: pd.Timestamp("2021-01-01 00:00:00")
+        # t_horizon: int, number of days that should be created, for now hardcoded to 365 due to external dependencies
+        # parameters[]: array of additional parameters, should contain csv_path, season_offset, model_path, new, save.
+        # For details on the specific parameters look at the documentation of the prophet_based_modification
+        profile = Modifier.prophet_based_modification(csv_path=parameters[0],
+                                                      feature_name=parameters[1],
+                                                      season_offset=parameters[2],
+                                                      start=t_start,
+                                                      model_path=parameters[3],
+                                                      new=parameters[4], mod_period=365, save=parameters[5])
+        return profile
+    else:
+        raise Exception("Modification for mod type " + str(mod_type) + " is not implemented!")
--- a/modifier/Modifier.py
+++ b/modifier/Modifier.py
+from copy import copy
+import numpy as np
+import pandas as pd
+import seaborn as sns
+from matplotlib import pyplot as plt
+from fbprophet import Prophet as pr
+from fbprophet.serialize import model_to_json, model_from_json
+from sklearn import preprocessing
+from matplotlib.lines import Line2D
+# ----------------------------------------------------------------------------------------------------------------------
+# Prophet Based Modification
+# ----------------------------------------------------------------------------------------------------------------------
+def __create_season_offset_array(ref: pd.DataFrame, season_offset):
+    """
+    Combine a reference dataframe and a given offset for each season into a new dataframe.
+    Should only be used for the prophet based predictions.
+    Parameters
+    ----------
+    ref : pd.Dataframe
+        Reference dataframe, which can cover multiple seasons and years
+    season_offset : float64[4]
+        Offset for each of the seasons, provided as absolute values, e.g:[0.05,0,0.2,0].
+    Returns
+    -------
+    pd.Dataframe
+        Dataframe with same index as the reference dataframe and the proper offset values for each season
+    """
+    # TODO: Add smoothing to start and end of the seasons
+    seasons = {
+        'winter1': pd.DataFrame(
+            index=pd.date_range(start=pd.Timestamp("2001/01/01"), end=pd.Timestamp("2001/03/20 23:59:59"),
+                                freq="15min")),
+        # winter1:  01.01. to 20.03
+        'spring': pd.DataFrame(
+            index=pd.date_range(start=pd.Timestamp("2001/03/21"), end=pd.Timestamp("2001/05/31 23:00:00"),
+                                freq="15min")),
+        # spring :21.03. to 31.05
+        'summer': pd.DataFrame(
+            index=pd.date_range(start=pd.Timestamp("2001/06/01"), end=pd.Timestamp("2001/08/31 23:00:00"),
+                                freq="15min")),
+        # summer: 15.05. to 30.08
+        'fall': pd.DataFrame(
+            index=pd.date_range(start=pd.Timestamp("2001/09/01"), end=pd.Timestamp("2001/10/31 23:00:00"),
+                                freq="15min")),
+        # fall :15.09. to 31.10
+        'winter2': pd.DataFrame(
+            index=pd.date_range(start=pd.Timestamp("2001/11/01"), end=pd.Timestamp("2001/12/31 23:00:00"),
+                                freq="15min"))
+        # winter2: 01.11. to 31.12
+    }
+    for name in seasons.keys():
+        seasons[name]["pred_base"] = np.full(seasons[name].shape[0], season_offset[name])
+    s_offset = copy(ref)
+    s_offset.iloc[:] = 0
+    for year in ref.index.year.unique():
+        for name, season in seasons.items():
+            season.index = season.index + pd.DateOffset(years=year - season.index.year[0])
+            s_offset.update(season)
+    return s_offset
+def prophet_based_modification(csv_path, season_offset, start: pd.Timestamp, feature_name: str = "",
+                               mod_period: int = 365,
+                               new: bool = True,
+                               save: bool = False, model_path: str = None):
+    """
+    Prophet based prediction covering seasonal changes for a given period.
+    # 1.train prophet based on historical data
+    # 2.use prophet as predictor for the given time period -> baseline
+    # 3.change baseline with given parameter
+    # 4.reintroduce variance. Currently only random walk, could be expanded with further capability.
+    Parameters
+    ----------
+    csv_path : str
+        path to the reference dataframe, make sure that the feature to be modified is named data !!!
+    season_offset : dict{str : float}
+        Offset for each of the seasons,
+        in absolute e.g:{"winter1": 0, "spring": 0.5, "summer": 3, "fall": 0, "winter2": 0}
+    start : pd.Timestamp
+        At which point the output should start, must be in the reference or the predicted time period
+    feature_name: str = ""
+        name of the feature to be modified, has to be identical to the column name in the csv path, although it is
+        encouraged to specify the name it can be left empty, in that case the first column will be used as the
+        feature.
+    mod_period : int
+        Period for which a prediction is made, given in days, default 365 days
+    new : bool
+        if True creates and trains a new model, else tries to load an existing model
+    save : bool
+        default false, if true the model is saved in the specified location by model_path
+    model_path : str
+        location where a model is saved or loaded from
+    Returns
+    -------
+    pd.Dataframe
+        Time series predicted based on the given reference time series, which is then modified with seasonal offsets and
+        noise. Contains both the base prediction and the modified prediction.
+    """
+    reference = pd.read_csv(csv_path)
+    # Create new prophet or load existing model
+    if new:
+        reference = reference.set_index(pd.to_datetime(reference['timestamp']))
+        # Calculate timedelta (in hours) between start and end of reference dataframe
+        lh = (reference.index[-1] - reference.index[0]) / pd.Timedelta('1 hour')
+        if (lh <= 8784 and reference.index[0].is_leap_year) or lh <= 8760:  # Length of 1 year in hours
+            # Chain the dataframe to provide a broader base for the prophet
+            ref_start = reference.index[0]
+            ref_end = reference.index[-1]
+            t = copy(reference)
+            t["timestamp"] = pd.to_datetime(t['timestamp']) + pd.Timedelta(days=365)
+            t.index = t["timestamp"]
+            t.drop(labels=["timestamp"], axis=1, inplace=True)
+            reference.drop(labels=["timestamp"], axis=1, inplace=True)
+            ix = pd.date_range(start=ref_start, end=ref_end + pd.Timedelta(days=365),
+                               freq='H')
+            reference = reference.reindex(ix)
+            reference.update(t, overwrite=False)
+        # Change the relevant columns to adhere to the input specification of prophet
+        reference["ds"] = reference.index
+        if feature_name == "":
+            reference.rename(columns={reference.columns[0]: "y"}, inplace=True)
+        else:
+            reference["y"] = reference[feature_name]
+        reference.fillna(method="ffill", inplace=True)
+        m = pr(yearly_seasonality=True, changepoint_prior_scale=0.43, weekly_seasonality=False)
+        m.fit(reference)
+        if save:
+            with open(model_path, 'w') as fout:
+                fout.write(model_to_json(m))  # Save model
+    else:
+        with open(model_path, 'r') as fin:
+            m = model_from_json(fin.read())  # Load model
+    # Hardcoded until further changes are made in other parts of the code, DO NOT CHANGE !!!
+    mod_period = 365
+    future = m.make_future_dataframe(periods=mod_period * 24, freq="H")
+    forecast = m.predict(future)
+    prediction = forecast[["ds", "yhat"]]
+    prediction.index = prediction["ds"]
+    prediction.drop(labels=["ds"], axis=1, inplace=True)
+    prediction.rename(columns={"yhat": "pred_base"}, inplace=True)
+    prediction = prediction[prediction.index >= start]
+    prediction = prediction[prediction.index < start + pd.Timedelta(days=mod_period)]
+    # - Array with offset in % random but smooth (Random Walk)
+    l_r = prediction.shape[0]
+    mms = preprocessing.MinMaxScaler(feature_range=(-1, 1))
+    r_off = mms.fit_transform(
+        np.cumsum(np.random.choice([-0.1, 0.1], size=l_r) + 0.05 * np.random.randn(l_r)).reshape(-1, 1)).flatten()
+    # - Array with season specific offsets
+    s_offset = __create_season_offset_array(prediction, season_offset)
+    # - Combine : new = base + season_offset + random_offset
+    prediction[feature_name] = np.add(prediction["pred_base"].to_numpy(dtype=float),
+                                      np.add(s_offset["pred_base"].to_numpy(dtype=float), r_off))
+    # Visualisation
+    vis = True
+    if vis:
+        fig1 = m.plot(forecast)
+        fig2 = m.plot_components(forecast)
+        custom_lines = [Line2D([0], [0], color="Blue", lw=4),
+                        Line2D([0], [0], color="Orange", lw=4)]
+        fig, ax = plt.subplots(figsize=(15, 10))
+        sns.lineplot(data=prediction, x=prediction.index, y="pred_base", ax=ax)
+        sns.lineplot(data=prediction, x=prediction.index, y=feature_name, ax=ax)
+        ax.legend(custom_lines, ['Baseline Temperature', 'Modified Temperature'])
+        ax.set_title("Baseline vs Modified Temperature with offset=[0,0,0,0,0]")
+        ax.set_ylabel(feature_name)
+        ax.set_xlabel("Time")
+        plt.show()
+    prediction.drop(columns=["pred_base"], inplace=True)
+    return prediction
+    # Assumes that one time step is equal to 1h, should this change this code needs to be adjusted
+    # Get list of time steps that need to be predicted. time_steps_rh \ time_steps_fix
+    pred_steps = np.array(list(set(time_steps_rh) - set(time_steps_fix)))
+    # Remove all pred_steps that are below 24, because no data from 24h earlier is present in the data.
+    pred_steps = pred_steps[np.abs(pred_steps) > 23]
+    # Replace all pred_steps with its data from 24 hours earlier
+    reference.loc[pred_steps] = reference[pred_steps - 24].tolist()
+    return reference
\ No newline at end of file
--- a/modifier/prophet_models/serialized_model.json
+++ b/modifier/prophet_models/serialized_model.json
--- a/modifier/prophet_models/serialized_model_08032023.json
+++ b/modifier/prophet_models/serialized_model_08032023.json
--- a/modifier/prophet_models/serialized_model_23032023.json
+++ b/modifier/prophet_models/serialized_model_23032023.json
--- a/predictor/Predictor.py
+++ b/predictor/Predictor.py
+import numpy as np
+import copy as cp
+# ----------------------------------------------------------------------------------------------------------------------
+# Functions which can be used as predictors for rolling horizon steps in timeseries
+# ----------------------------------------------------------------------------------------------------------------------
+class Predictor:
+    """
+    Can be used to predict rolling horizon time steps. It holds the original time series, as well as a name and the
+    prediction method that will be used for the time step prediction.
+    Parameters
+    ----------
+    time_series:
+        The original time series that should be used for predictions.
+    name: str
+        Does not have to be unique, should be used so that the nature of the time series can be identified.
+    method: str
+        Name of the prediction method to be used. The default method is "same_as_last_day".
+    """
+    def __init__(self, time_series, type: str, method: str = "same_as_last_day"):
+        self.profile = time_series
+        self.type = type
+        self.method = method
+    def rh_update(self, time_steps_rh, time_steps_fix):
+        """
+        Selects the correct prediction method based on the selected prediction method.
+        Parameters
+        ----------
+        time_steps_rh:
+            Series of time steps covering the entire horizon
+        time_steps_fix:
+            Series of time steps covering the fix part of the horizon, i.e. the part that should not be predicted.
+        Returns
+        -------
+        The original time series after applying the prediction method.
+        """
+        if self.method == "time_forward":
+            return time_forward(time_steps_rh, time_steps_fix, cp.deepcopy(self.profile))
+        elif self.method == "same_as_last_day":
+            return same_as_last_day(time_steps_rh, time_steps_fix, cp.deepcopy(self.profile))
+        elif self.method == "same_as_last_week":
+            return same_as_last_week(time_steps_rh, time_steps_fix, cp.deepcopy(self.profile))
+        elif self.method == "perfect_foresight":
+            return perfect_foresight(time_steps_rh, time_steps_fix, cp.deepcopy(self.profile))
+        else:
+            # Default: Time Forward
+            return time_forward(time_steps_rh, time_steps_fix, cp.deepcopy(self.profile))
+    def get_profile(self):
+        """
+        Returns a copy of the original time series stored in the predictor.
+        Returns
+        -------
+        Copy of the original time series.
+        """
+        return cp.deepcopy(self.profile)
+def time_forward(time_steps_rh, time_steps_fix, reference):
+    reference.loc[list(set(time_steps_rh) - set(time_steps_fix))] = reference[time_steps_fix[-1]]
+    return reference
+def same_as_last_day(time_steps_rh, time_steps_fix, reference):
+    # Assumes that one time step is equal to 1h, should this change this code needs to be adjusted
+    # Get list of time steps that need to be predicted. time_steps_rh \ time_steps_fix
+    pred_steps = np.array(list(set(time_steps_rh) - set(time_steps_fix)))
+    # Remove all pred_steps that are below 24, because no data from 24h earlier is present in the data.
+    pred_steps = pred_steps[np.abs(pred_steps) > 23]
+    # Replace all pred_steps with its data from 24 hours earlier
+    reference.loc[pred_steps] = reference[pred_steps - 24].tolist()
+    return reference
+def same_as_last_week(time_steps_rh, time_steps_fix, reference):
+    # Assumes that one time step is equal to 1h, should this change this code needs to be adjusted
+    # Get list of time steps that need to be predicted. time_steps_rh \ time_steps_fix
+    pred_steps = np.array(list(set(time_steps_rh) - set(time_steps_fix)))
+    # Remove all pred_steps that are below 168, because no data from one week earlier is present in the data.
+    pred_steps = pred_steps[np.abs(pred_steps) > 167]
+    # Replace all pred_steps with its data from 7*24 hours earlier
+    reference.loc[pred_steps] = reference[pred_steps - 168].tolist()
+    return reference
+def perfect_foresight(time_steps_rh, time_steps_fix, reference):
+    # Assume that we can perfectly "predict" the future
+    return reference