calibration_class.py

import json
import shutil
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from aixcalibuha import CalibrationClass, Calibrator, Goals, TunerParas
from ebcpy.data_types import TimeSeriesData
from ebcpy.utils.statistics_analyzer import StatisticsAnalyzer

from energyplus_calibrator.energy_plus_api import EnergyPlusAPI
from energyplus_calibrator.utils import infos_from_calibration_log


class EnergyPlusCalibrator:
    """
    A class for calibrating EnergyPlus models against measured data.

    This class provides functionality to:
    1. Set up and validate simulation parameters
    2. Compare simulation results with real measurements
    3. Optimize model parameters to minimize the difference
    4. Generate validation plots and statistics
    """

    def __init__(self,
                 cd: Path,
                 model_path: Path,
                 epw_file_path: Path,
                 energyplus_install_dir: Path,
                 energyplus_version: str,
                 simulation_setup: Dict[str, Any],
                 tuner_paras: Union[Dict[str, Tuple[float, Tuple[float, float]]], List[str]],
                 goals: List[str],
                 real_data_dataframe: pd.DataFrame,
                 goal_var_convert_dict: Dict[str, str],
                 schedule_vars: Optional[Dict[str, Tuple[int, str]]] = None,
                 calc_after_sim_function: Optional[callable] = None,
                 error_metric: str = 'NMBE',
                 n_cpu: int = 1,
                 force_same: Optional[Dict[str, Tuple[str, float]]] = None,
                 envelope_calibration: bool = False,
                 res_name_add: Optional[str] = None):
        """
        Initialize the EnergyPlus Calibrator.

        Args:
            cd: Working directory for calibration
            model_path: Path to EnergyPlus IDF model file
            epw_file_path: Path to weather file
            energyplus_install_dir: EnergyPlus installation directory
            energyplus_version: EnergyPlus version string
            simulation_setup: Dictionary containing simulation parameters
            tuner_paras: Parameters to be calibrated, either as dict or list
            goals: List of output variables to calibrate against
            real_data_dataframe: Measured data for comparison
            goal_var_convert_dict: Mapping between simulation and measurement variables
            schedule_vars: Dictionary of schedule variables to be calibrated
            calc_after_sim_function: Post-processing function for simulation results
            error_metric: Error metric for calibration ('NMBE', 'CVRMSE', or 'RMSE')
            n_cpu: Number of CPU cores to use
            force_same: Parameters to force to same values
            envelope_calibration: Whether to calibrate envelope parameters
            res_name_add: Additional string for result file names

        Raises:
            NameError: If error_metric is invalid
            ValueError: If data consistency checks fail
        """

        if error_metric not in ['NMBE', 'CVRMSE', 'RMSE']:
            raise NameError(
                'Error metric must be either "NMBE", "CVRMSE" or "RMSE"')
        self.error_metric = error_metric
        self.energy_plus_api = EnergyPlusAPI(
            cd=cd,
            override_cd=True,
            model_path=model_path,
            epw_filepath=epw_file_path,
            delete_temp_files=True,
            energyplus_version=energyplus_version,
            force_same=force_same,
            schedule_vars=schedule_vars,
            calc_after_sim_function=calc_after_sim_function,
            energyplus_install_dir=energyplus_install_dir,
            n_cpu=n_cpu,
            envelope_model=envelope_calibration)
        self.energyplus_version = energyplus_version

        self.model_path = model_path
        self.epw_filepath = epw_file_path
        self.energyplus_install_dir = energyplus_install_dir
        self.envelope_calibration = envelope_calibration

        self.tuner_params = tuner_paras
        self.force_same = force_same
        if schedule_vars is None:
            schedule_vars = []
        self.schedule_vars = schedule_vars

        self._check_tuner_paras()

        if isinstance(tuner_paras, list):
            self.tuner_params = self._get_tuner_paras_dict()

        self.real_data = real_data_dataframe

        self.energy_plus_api.set_sim_setup(sim_setup=simulation_setup)
        self.simulation_setup = simulation_setup
        _temp_dfs = self.energy_plus_api.simulate(
            parameters=None, return_option='time_series')

        # Important to only set here because in the run before, there are no parameters
        self._temp_dfs = _temp_dfs
        self._goals = goals
        self._goal_var_convert_dict = goal_var_convert_dict

        self._check_goals()

        if len(self._goal_var_convert_dict) != 0:
            rename_dict = {val: key for key,
                           val in self._goal_var_convert_dict.items()}
            self.real_data = self.real_data.rename(rename_dict, axis=1)

        self.real_data = self._check_data_consistency(
            sim_setup_dict=simulation_setup,
            sim_df=self._temp_dfs,
            real_df=self.real_data
        )

        self.calibration_class = self._create_aixcalibuha_calibration_class()
        self._simulation_setup_dict = simulation_setup
        self.res_name_add = res_name_add if res_name_add is not None else ''
        # To silence warnings
        self.energy_plus_api.outputs = {i: None for i in self._goals}

    def _check_tuner_paras(self) -> None:
        """
        Validate tuner parameters against the EnergyPlus model structure.

        This method checks that:
        1. All parameters exist in the model
        2. Parameters lead to actual values, not further dictionaries
        3. Schedule variables have proper data structure

        Raises:
            NameError: If parameter not found in model
            ValueError: If parameter structure is invalid
        """

        def check_param(param: str,
                        is_schedule_var: bool = False):
            """
            Check individual parameter validity.

            Args:
                param: Parameter path in model
                is_schedule_var: Whether parameter is a schedule variable

            Raises:
                NameError: If parameter not in model
                ValueError: If parameter structure invalid
            """

            keys = param.split('/')
            _model_dict = self.energy_plus_api.ep_json.copy()

            if is_schedule_var:
                keys = keys[:-1]
            for key in keys:
                if key in _model_dict:
                    _model_dict = _model_dict[key]
                else:
                    raise NameError(
                        f'{param} is not part of the model description')

            if is_schedule_var:
                if 'data' not in _model_dict:
                    raise ValueError(
                        f'{param} is a schedule var, but there is no "data" in the resulting dict')
                return

            if isinstance(_model_dict, dict):
                raise ValueError(f'{param} seems to be missing a description which leads to'
                                 f'a value. It still ends in a dict')

        for param in self.tuner_params:
            is_schedule_var = param in self.schedule_vars
            check_param(param, is_schedule_var=is_schedule_var)

        if self.force_same is not None:
            for force_to, force_from in self.force_same.items():
                check_param(force_to)
                check_param(force_from[0])

    def _get_tuner_paras_dict(self) -> Dict[str, Tuple[float, Tuple[float, float]]]:
        """
        Create tuner parameters dictionary with bounds from current model values.

        Returns:
            Dictionary with parameter names as keys and tuples of
            (initial_value, (lower_bound, upper_bound)) as values

        Note:
            Bounds are set to ±50% of initial value
        """

        new_tuner_paras = {}
        for param in self.tuner_params:
            keys = param.split('/')
            _model_dict = self.energy_plus_api.ep_json.copy()

            for key in keys:
                _model_dict = _model_dict[key]

            # here model_dict is now a value, since the tuner params have been checked
            # before that they result in a value
            initial = _model_dict
            high_bound = 1.5 * initial
            low_bound = 0.5 * initial

            new_tuner_paras[param] = (initial, (low_bound, high_bound))

        return new_tuner_paras

    def _check_goals(self) -> None:
        """
        Validate calibration goals against available data.

        Checks:
        1. Goals exist in simulation output
        2. Goals exist in measurement data (directly or via conversion)

        Raises:
            NameError: If goals not found in simulation or measurement data
        """

        simulated_cols = [i[0] for i in list(self._temp_dfs)]
        for goal in self._goals:
            if goal not in simulated_cols:
                raise NameError(
                    f'{goal} is not part of the simulated dataframe')

            if goal not in self._goal_var_convert_dict:
                if goal not in list(self.real_data):
                    raise NameError(f'{goal} is not part of the real data. Think about '
                                    f'adding an entry in the goal_var_convert_dict')

            goal_in_real_data = self._goal_var_convert_dict[goal]
            if goal_in_real_data not in list(self.real_data):
                raise NameError(
                    f'{goal_in_real_data} is not part of the real dataframe')

    @staticmethod
    def _check_data_consistency(sim_setup_dict: Dict[str, Any],
                                sim_df: pd.DataFrame,
                                real_df: pd.DataFrame) -> pd.DataFrame:
        """
        Check consistency between simulation setup and data.

        Args:
            sim_setup_dict: Simulation setup parameters
            sim_df: Simulation results DataFrame
            real_df: Measurement data DataFrame

        Returns:
            Processed measurement DataFrame with aligned timestamps

        Raises:
            ValueError: If data inconsistencies found
        """

        sim_intervals = sim_setup_dict['sim_intervals']
        evaluation_intervals = sim_setup_dict['evaluation_intervals']

        if evaluation_intervals is not None:
            intervals = evaluation_intervals
        elif sim_intervals is not None:
            intervals = sim_intervals
        else:
            intervals = [
                f'{sim_setup_dict["start_date"]} to {sim_setup_dict["stop_data"]}']
        _intervals = []
        for sim_interval in intervals:
            start, stop = sim_interval.split(' to ')
            start = datetime.strptime(start, "%Y-%m-%d")
            stop = datetime.strptime(stop, "%Y-%m-%d")

            if stop <= start:
                raise ValueError('Start must be before stop in sim intervals')

            _intervals.append((start, stop))

        _df_reals = []

        for start, stop in _intervals:
            stop = stop + pd.Timedelta(days=1)
            _df_sim = sim_df.loc[(sim_df.index > start) &
                                 (sim_df.index <= stop)].copy()
            _df_rea = real_df.loc[(real_df.index > start) &
                                  (real_df.index <= stop)].copy()

            if _df_sim.shape[0] != _df_rea.shape[0]:
                raise ValueError(
                    'Shape mismatch between sim data and real data')

            timedelta_sim = list(set(_df_sim.index.to_series().diff()[1:]))
            if len(timedelta_sim) != 1:
                raise ValueError('Simulation results must be evenly space')

            timedelta_rea = _df_rea.index.to_series().diff()[1:]
            lb = timedelta_sim[0] - pd.Timedelta(minutes=1)
            ub = timedelta_sim[0] + pd.Timedelta(minutes=1)

            lower_then = (timedelta_rea < lb).any()
            higher_then = (timedelta_rea > ub).any()

            if lower_then or higher_then:
                raise ValueError(
                    'TimeIndex of real data is not evenly spaced enough')

            _df_rea.index = _df_sim.index
            _df_reals.append(_df_rea)
        df_real = pd.concat(_df_reals, axis=0)

        if df_real.shape[0] != sim_df.shape[0]:
            raise ValueError('Shape mismatch after pd.concat')
        return df_real

    def _create_aixcalibuha_calibration_class(self) -> CalibrationClass:
        """
        Create calibration configuration using aixcalibuha.

        Returns:
            CalibrationClass instance configured with tuner parameters and goals
        """

        names = []
        initial_values = []
        bounds = []
        for name, (initial_value, _bounds) in self.tuner_params.items():
            names.append(name)
            initial_values.append(initial_value)
            bounds.append(_bounds)

        tuner_paras = TunerParas(
            names=names,
            initial_values=initial_values,
            bounds=bounds
        )

        variable_names = {}

        # At this point, the names in the dataframes are the same for the output variables
        for n, goal in enumerate(self._goals):
            name = f'goal_{n}'
            variable_names[name] = {
                'meas': goal,
                'sim': goal
            }

        meas_target_data = TimeSeriesData(self.real_data.reset_index())
        start_time = meas_target_data.index[0]
        stop_time = meas_target_data.index[-1]
        goals = Goals(
            meas_target_data=TimeSeriesData(self.real_data.reset_index()),
            variable_names=variable_names,
            statistical_measure=self.error_metric
        )

        return CalibrationClass(
            name='default',
            start_time=start_time,
            stop_time=stop_time,
            goals=goals,
            tuner_paras=tuner_paras
        )

    def create_plots(self,
                     parameters_opt: Dict[str, Union[float, str]]) -> List[str]:
        """
        Create visualization plots comparing original, optimized, and measured data.

        Args:
            parameters_opt: Optimized parameter values

        Returns:
            List of created file names
        """

        to_copy = []

        df_sim_opt = self.energy_plus_api.simulate(parameters=parameters_opt)
        df_sim_orig = self._temp_dfs
        res_dict = {}

        for col in list(self.real_data):
            y_pred = df_sim_opt[col].to_numpy().flatten()
            y_pred_orig = df_sim_orig[col].to_numpy().flatten()
            y_true = self.real_data[col].to_numpy().flatten()
            ix = np.arange(self.real_data.shape[0])

            _ix = []
            _x_ticks = []

            for n, i in enumerate(ix):
                tick = self.real_data.index[n]
                if tick.hour != 1:
                    continue
                _ix.append(i)
                _x_ticks.append(tick.strftime('%Y-%m-%d'))

            res_dict[col] = {"before_calibration": {},
                             "after_calibration": {}}

            res_dict[col]["after_calibration"]['NMBE'] =\
                StatisticsAnalyzer.calc_nmbe(meas=y_true,
                                             sim=y_pred) * 100

            res_dict[col]["after_calibration"]['CVRMSE'] =\
                StatisticsAnalyzer.calc_cvrmse(meas=y_true,
                                               sim=y_pred) * 100

            res_dict[col]["before_calibration"]['NMBE'] =\
                StatisticsAnalyzer.calc_nmbe(meas=y_true,
                                             sim=y_pred_orig) * 100

            res_dict[col]["before_calibration"]['CVRMSE'] =\
                StatisticsAnalyzer.calc_cvrmse(meas=y_true,
                                               sim=y_pred_orig) * 100

            plt.figure(figsize=(16, 9))
            plt.plot(ix, y_true, label='measured values')
            plt.plot(ix, y_pred_orig, label='simulated values before calibration')
            plt.plot(ix, y_pred, label='simulated values after calibration')
            plt.ylabel(col)
            plt.xlabel('Date at 01:00')
            plt.xticks(_ix, _x_ticks, rotation=45)
            plt.legend()
            _save_name = col.replace(' ', '_').replace('/', '_').replace(':', '_').replace('[', '_').replace(']', '_').replace('(', '_').replace(')', '_')
            save_name = self.energy_plus_api.working_directory / f'{_save_name}.png'
            to_copy.append(f'{_save_name}.png')

            plt.savefig(save_name, bbox_inches='tight')
            plt.close()

        with open(self.energy_plus_api.working_directory / 'calibration_kpis.json', 'w') as f:
            json.dump(res_dict, f, indent=4)

        to_copy.append('calibration_kpis.json')

        infos_from_calibration_log(result_path=self.energy_plus_api.working_directory,
                                   used_error_type=self.error_metric,
                                   save_path=self.energy_plus_api.working_directory / 'convergence.png')
        to_copy.append('convergence.png')
        return to_copy

    def convert_model(self) -> None:
        """
        Convert calibrated model from JSON to IDF format.

        Updates the EnergyPlus version identifier and converts the model
        using the EnergyPlus converter.
        """

        calibrated_model_path = self.energy_plus_api.working_directory / 'calibrated_model.json'

        with open(calibrated_model_path, 'r') as f:
            model = json.load(f)

        model['Version']['Version 1']['version_identifier'] = self.energyplus_version
        with open(calibrated_model_path, 'w') as f:
            json.dump(model, f)

        self.energy_plus_api.convert_json_to_idf(calibrated_model_path)

    def create_final_files(self) -> None:
        """
        Create and organize final calibration results.

        Creates:
        1. Calibrated parameter file
        2. Calibrated model files (JSON and IDF)
        3. Visualization plots
        4. Copies all files to results directory
        """

        with open(Path(self.energy_plus_api.working_directory) /\
            'calibrated_parameters.json', 'r') as f:
            parameters = json.load(f)

        calibrated_model = self.energy_plus_api.ep_json.copy()
        calibrated_model = self.energy_plus_api.change_model_dict_with_parameters(
            model_dict=calibrated_model,
            parameters=parameters,
            schedule_vars=self.schedule_vars
        )

        parameters_use = parameters.copy()

        for param in parameters:
            if param in self.schedule_vars:
                _, var = self.schedule_vars[param]
                value = parameters[param]
                if var == 'time' and not isinstance(value, str):
                    value = round(value * 2) / 2

                    hours = int(value)
                    minutes = int((value - hours) * 60)

                    # Format the time string with leading zeros
                    time_string = f"{hours:02d}:{minutes:02d}"
                    parameters[param] = time_string

        with open(Path(self.energy_plus_api.working_directory) /\
            'calibrated_parameters.json', 'w') as f:
            json.dump(parameters, f)

        calibrated_model_path = Path(
            self.energy_plus_api.working_directory) / 'calibrated_model.json'

        with open(calibrated_model_path, 'w') as f:
            json.dump(calibrated_model, f)

        self.convert_model()

        now = datetime.now()
        formatted_date = now.strftime("%Y-%m-%d_%H-%M-%S")

        if self.res_name_add is None:
            self.res_name_add = ''
        folder_name = formatted_date + f"_{self.res_name_add}" if len(self.res_name_add) > 0 else formatted_date

        results_folder = Path(
            __file__).parents[1] / 'calibration_results' / folder_name
        results_folder.mkdir(parents=True)

        to_copy = ['calibrated_model.json',
                   'calibrated_model.idf',
                   'calibrated_parameters.json',
                   ]

        try:
            to_copy2 = self.create_plots(parameters_opt=parameters_use)
        except:
            print('Error in creating plots. Do again manually')
            to_copy2 = []

        to_copy += to_copy2

        source_path = Path(self.energy_plus_api.working_directory)
        for file in to_copy:
            _source_path = source_path / file

            shutil.copy(_source_path, results_folder)

    def calibrate(self,
                  framework: str,
                  method: str,
                  max_time: int = 120) -> None:
        """
        Run the calibration process.

        Args:
            framework: Optimization framework to use
            method: Optimization method within the framework
            max_time: Maximum runtime in seconds

        Note:
            Saves calibration results and creates final files
        """

        self.energy_plus_api.use_for_calibration = True
        kwargs_optimization, kwargs_calibrator = self._get_kwargs_optimization(framework=framework,
                                                                               max_time=max_time)

        energyplus_calibrator = Calibrator(
            working_directory=self.energy_plus_api.working_directory,
            sim_api=self.energy_plus_api,
            calibration_class=self.calibration_class,
            **kwargs_calibrator)

        with open(Path(self.energy_plus_api.working_directory) /
                  'kwargs_calibrator.json', 'w') as f:
            json.dump(kwargs_calibrator, f, indent=4)

        with open(Path(self.energy_plus_api.working_directory) /
                  'kwargs_optimization.json', 'w') as f:
            json.dump(kwargs_optimization, f, indent=4)

        with open(Path(self.energy_plus_api.working_directory) /
                  'simulation_setup.json', 'w') as f:
            json.dump(self._simulation_setup_dict, f, indent=4)

        result = energyplus_calibrator.calibrate(
            framework=framework,
            method=method,
            **kwargs_optimization
        )
        with open(Path(self.energy_plus_api.working_directory) /
                  'calibrated_parameters.json', 'w') as f:
            json.dump(result, f)

        self.create_final_files()

    def _get_kwargs_optimization(self,
                                 framework: str,
                                 max_time: int) -> Tuple[Dict[str, Any], Dict[str, Any]]:
        """
        Get optimization configuration based on framework.

        Args:
            framework: Optimization framework to use
            max_time: Maximum runtime in seconds

        Returns:
            Tuple of (optimization_kwargs, calibrator_kwargs)

        Raises:
            TypeError: If framework doesn't support multiprocessing when needed
        """

        kwargs_calibrator = {"timedelta": 0,
                             "save_files": False,  # save files not yet implemented
                             "verbose_logging": False,
                             "show_plot": False,
                             "create_tsd_plot": False,
                             "save_tsd_plot": False,
                             "show_plot_pause_time": 1e-3,
                             "plot_file_type": "png",
                             "fail_on_error": False,
                             "ret_val_on_error": 10_000,
                             # For this example, let's keep the runtime low
                             "max_itercount": 1_000_000,
                             "max_time": max_time
                             }

        kwargs_scipy_dif_evo = {"maxiter": 1_000_000,
                                "popsize": 5,
                                "mutation": (0.5, 1),
                                "recombination": 0.7,
                                "seed": None,
                                "polish": True,
                                "init": 'latinhypercube',
                                "atol": 0}
        kwargs_dlib_min = {"num_function_calls": int(1e9),
                           "solver_epsilon": 0}
        kwargs_scipy_min = {"tol": None,
                            "options": {"maxfun": 1},
                            "constraints": None,
                            "jac": None,
                            "hess": None,
                            "hessp": None}
        kwargs_pymoo = {"pop_size": 50,
                        "n_gen": 1_000_000,
                        "sampling": "real_random",
                        # Notice that changing Hyper-Parameters may change pop size.
                        "selection": "random",
                        "crossover": "real_sbx",
                        "mutation": "real_pm",
                        "eliminate_duplicates": True,
                        "n_offsprings": None}

        kwargs_bayesian = {"random_state": 42,
                           "allow_dublicate_points": True,
                           "init_points": 5,
                           "n_iter": 1_000_000,
                           "kind_of_utility_function": "ei",
                           "xi": 0.4
                           }

        # Merge the dictionaries into one.
        # If you change the solver, also change the solver-kwargs-dict in the line below
        if framework == "scipy_differential_evolution":
            kwargs_optimization = kwargs_scipy_dif_evo
        elif framework == "scipy_minimize":
            kwargs_optimization = kwargs_scipy_min
        elif framework == "dlib_minimize":
            kwargs_optimization = kwargs_dlib_min
        elif framework == "pymoo":
            kwargs_optimization = kwargs_pymoo
        elif framework == "bayesian_optimization":
            kwargs_optimization = kwargs_bayesian
        else:
            kwargs_optimization = {}
        # Check if pymoo is being used for Multiprocessing
        if framework != "pymoo" and self.energy_plus_api.n_cpu > 1:
            raise TypeError(f"Given framework {framework} does not support Multiprocessing."
                            f"Please use pymoo as your framework.")
        return kwargs_optimization, kwargs_calibrator