Select Git revision
-
Carl Philipp Klemm authoredCarl Philipp Klemm authored
calibration_class.py 26.21 KiB
import json
import shutil
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from aixcalibuha import CalibrationClass, Calibrator, Goals, TunerParas
from ebcpy.data_types import TimeSeriesData
from ebcpy.utils.statistics_analyzer import StatisticsAnalyzer
from energyplus_calibrator.energy_plus_api import EnergyPlusAPI
from energyplus_calibrator.utils import infos_from_calibration_log
class EnergyPlusCalibrator:
"""
A class for calibrating EnergyPlus models against measured data.
This class provides functionality to:
1. Set up and validate simulation parameters
2. Compare simulation results with real measurements
3. Optimize model parameters to minimize the difference
4. Generate validation plots and statistics
"""
def __init__(self,
cd: Path,
model_path: Path,
epw_file_path: Path,
energyplus_install_dir: Path,
energyplus_version: str,
simulation_setup: Dict[str, Any],
tuner_paras: Union[Dict[str, Tuple[float, Tuple[float, float]]], List[str]],
goals: List[str],
real_data_dataframe: pd.DataFrame,
goal_var_convert_dict: Dict[str, str],
schedule_vars: Optional[Dict[str, Tuple[int, str]]] = None,
calc_after_sim_function: Optional[callable] = None,
error_metric: str = 'NMBE',
n_cpu: int = 1,
force_same: Optional[Dict[str, Tuple[str, float]]] = None,
envelope_calibration: bool = False,
res_name_add: Optional[str] = None):
"""
Initialize the EnergyPlus Calibrator.
Args:
cd: Working directory for calibration
model_path: Path to EnergyPlus IDF model file
epw_file_path: Path to weather file
energyplus_install_dir: EnergyPlus installation directory
energyplus_version: EnergyPlus version string
simulation_setup: Dictionary containing simulation parameters
tuner_paras: Parameters to be calibrated, either as dict or list
goals: List of output variables to calibrate against
real_data_dataframe: Measured data for comparison
goal_var_convert_dict: Mapping between simulation and measurement variables
schedule_vars: Dictionary of schedule variables to be calibrated
calc_after_sim_function: Post-processing function for simulation results
error_metric: Error metric for calibration ('NMBE', 'CVRMSE', or 'RMSE')
n_cpu: Number of CPU cores to use
force_same: Parameters to force to same values
envelope_calibration: Whether to calibrate envelope parameters
res_name_add: Additional string for result file names
Raises:
NameError: If error_metric is invalid
ValueError: If data consistency checks fail
"""
if error_metric not in ['NMBE', 'CVRMSE', 'RMSE']:
raise NameError(
'Error metric must be either "NMBE", "CVRMSE" or "RMSE"')
self.error_metric = error_metric
self.energy_plus_api = EnergyPlusAPI(
cd=cd,
override_cd=True,
model_path=model_path,
epw_filepath=epw_file_path,
delete_temp_files=True,
energyplus_version=energyplus_version,
force_same=force_same,
schedule_vars=schedule_vars,
calc_after_sim_function=calc_after_sim_function,
energyplus_install_dir=energyplus_install_dir,
n_cpu=n_cpu,
envelope_model=envelope_calibration)
self.energyplus_version = energyplus_version
self.model_path = model_path
self.epw_filepath = epw_file_path
self.energyplus_install_dir = energyplus_install_dir
self.envelope_calibration = envelope_calibration
self.tuner_params = tuner_paras
self.force_same = force_same
if schedule_vars is None:
schedule_vars = []
self.schedule_vars = schedule_vars
self._check_tuner_paras()
if isinstance(tuner_paras, list):
self.tuner_params = self._get_tuner_paras_dict()
self.real_data = real_data_dataframe
self.energy_plus_api.set_sim_setup(sim_setup=simulation_setup)
self.simulation_setup = simulation_setup
_temp_dfs = self.energy_plus_api.simulate(
parameters=None, return_option='time_series')
# Important to only set here because in the run before, there are no parameters
self._temp_dfs = _temp_dfs
self._goals = goals
self._goal_var_convert_dict = goal_var_convert_dict
self._check_goals()
if len(self._goal_var_convert_dict) != 0:
rename_dict = {val: key for key,
val in self._goal_var_convert_dict.items()}
self.real_data = self.real_data.rename(rename_dict, axis=1)
self.real_data = self._check_data_consistency(
sim_setup_dict=simulation_setup,
sim_df=self._temp_dfs,
real_df=self.real_data
)
self.calibration_class = self._create_aixcalibuha_calibration_class()
self._simulation_setup_dict = simulation_setup
self.res_name_add = res_name_add if res_name_add is not None else ''
# To silence warnings
self.energy_plus_api.outputs = {i: None for i in self._goals}
def _check_tuner_paras(self) -> None:
"""
Validate tuner parameters against the EnergyPlus model structure.
This method checks that:
1. All parameters exist in the model
2. Parameters lead to actual values, not further dictionaries
3. Schedule variables have proper data structure
Raises:
NameError: If parameter not found in model
ValueError: If parameter structure is invalid
"""
def check_param(param: str,
is_schedule_var: bool = False):
"""
Check individual parameter validity.
Args:
param: Parameter path in model
is_schedule_var: Whether parameter is a schedule variable
Raises:
NameError: If parameter not in model
ValueError: If parameter structure invalid
"""
keys = param.split('/')
_model_dict = self.energy_plus_api.ep_json.copy()
if is_schedule_var:
keys = keys[:-1]
for key in keys:
if key in _model_dict:
_model_dict = _model_dict[key]
else:
raise NameError(
f'{param} is not part of the model description')
if is_schedule_var:
if 'data' not in _model_dict:
raise ValueError(
f'{param} is a schedule var, but there is no "data" in the resulting dict')
return
if isinstance(_model_dict, dict):
raise ValueError(f'{param} seems to be missing a description which leads to'
f'a value. It still ends in a dict')
for param in self.tuner_params:
is_schedule_var = param in self.schedule_vars
check_param(param, is_schedule_var=is_schedule_var)
if self.force_same is not None:
for force_to, force_from in self.force_same.items():
check_param(force_to)
check_param(force_from[0])
def _get_tuner_paras_dict(self) -> Dict[str, Tuple[float, Tuple[float, float]]]:
"""
Create tuner parameters dictionary with bounds from current model values.
Returns:
Dictionary with parameter names as keys and tuples of
(initial_value, (lower_bound, upper_bound)) as values
Note:
Bounds are set to ±50% of initial value
"""
new_tuner_paras = {}
for param in self.tuner_params:
keys = param.split('/')
_model_dict = self.energy_plus_api.ep_json.copy()
for key in keys:
_model_dict = _model_dict[key]
# here model_dict is now a value, since the tuner params have been checked
# before that they result in a value
initial = _model_dict
high_bound = 1.5 * initial
low_bound = 0.5 * initial
new_tuner_paras[param] = (initial, (low_bound, high_bound))
return new_tuner_paras
def _check_goals(self) -> None:
"""
Validate calibration goals against available data.
Checks:
1. Goals exist in simulation output
2. Goals exist in measurement data (directly or via conversion)
Raises:
NameError: If goals not found in simulation or measurement data
"""
simulated_cols = [i[0] for i in list(self._temp_dfs)]
for goal in self._goals:
if goal not in simulated_cols:
raise NameError(
f'{goal} is not part of the simulated dataframe')
if goal not in self._goal_var_convert_dict:
if goal not in list(self.real_data):
raise NameError(f'{goal} is not part of the real data. Think about '
f'adding an entry in the goal_var_convert_dict')
goal_in_real_data = self._goal_var_convert_dict[goal]
if goal_in_real_data not in list(self.real_data):
raise NameError(
f'{goal_in_real_data} is not part of the real dataframe')
@staticmethod
def _check_data_consistency(sim_setup_dict: Dict[str, Any],
sim_df: pd.DataFrame,
real_df: pd.DataFrame) -> pd.DataFrame:
"""
Check consistency between simulation setup and data.
Args:
sim_setup_dict: Simulation setup parameters
sim_df: Simulation results DataFrame
real_df: Measurement data DataFrame
Returns:
Processed measurement DataFrame with aligned timestamps
Raises:
ValueError: If data inconsistencies found
"""
sim_intervals = sim_setup_dict['sim_intervals']
evaluation_intervals = sim_setup_dict['evaluation_intervals']
if evaluation_intervals is not None:
intervals = evaluation_intervals
elif sim_intervals is not None:
intervals = sim_intervals
else:
intervals = [
f'{sim_setup_dict["start_date"]} to {sim_setup_dict["stop_data"]}']
_intervals = []
for sim_interval in intervals:
start, stop = sim_interval.split(' to ')
start = datetime.strptime(start, "%Y-%m-%d")
stop = datetime.strptime(stop, "%Y-%m-%d")
if stop <= start:
raise ValueError('Start must be before stop in sim intervals')
_intervals.append((start, stop))
_df_reals = []
for start, stop in _intervals:
stop = stop + pd.Timedelta(days=1)
_df_sim = sim_df.loc[(sim_df.index > start) &
(sim_df.index <= stop)].copy()
_df_rea = real_df.loc[(real_df.index > start) &
(real_df.index <= stop)].copy()
if _df_sim.shape[0] != _df_rea.shape[0]:
raise ValueError(
'Shape mismatch between sim data and real data')
timedelta_sim = list(set(_df_sim.index.to_series().diff()[1:]))
if len(timedelta_sim) != 1:
raise ValueError('Simulation results must be evenly space')
timedelta_rea = _df_rea.index.to_series().diff()[1:]
lb = timedelta_sim[0] - pd.Timedelta(minutes=1)
ub = timedelta_sim[0] + pd.Timedelta(minutes=1)
lower_then = (timedelta_rea < lb).any()
higher_then = (timedelta_rea > ub).any()
if lower_then or higher_then:
raise ValueError(
'TimeIndex of real data is not evenly spaced enough')
_df_rea.index = _df_sim.index
_df_reals.append(_df_rea)
df_real = pd.concat(_df_reals, axis=0)
if df_real.shape[0] != sim_df.shape[0]:
raise ValueError('Shape mismatch after pd.concat')
return df_real
def _create_aixcalibuha_calibration_class(self) -> CalibrationClass:
"""
Create calibration configuration using aixcalibuha.
Returns:
CalibrationClass instance configured with tuner parameters and goals
"""
names = []
initial_values = []
bounds = []
for name, (initial_value, _bounds) in self.tuner_params.items():
names.append(name)
initial_values.append(initial_value)
bounds.append(_bounds)
tuner_paras = TunerParas(
names=names,
initial_values=initial_values,
bounds=bounds
)
variable_names = {}
# At this point, the names in the dataframes are the same for the output variables
for n, goal in enumerate(self._goals):
name = f'goal_{n}'
variable_names[name] = {
'meas': goal,
'sim': goal
}
meas_target_data = TimeSeriesData(self.real_data.reset_index())
start_time = meas_target_data.index[0]
stop_time = meas_target_data.index[-1]
goals = Goals(
meas_target_data=TimeSeriesData(self.real_data.reset_index()),
variable_names=variable_names,
statistical_measure=self.error_metric
)
return CalibrationClass(
name='default',
start_time=start_time,
stop_time=stop_time,
goals=goals,
tuner_paras=tuner_paras
)
def create_plots(self,
parameters_opt: Dict[str, Union[float, str]]) -> List[str]:
"""
Create visualization plots comparing original, optimized, and measured data.
Args:
parameters_opt: Optimized parameter values
Returns:
List of created file names
"""
to_copy = []
df_sim_opt = self.energy_plus_api.simulate(parameters=parameters_opt)
df_sim_orig = self._temp_dfs
res_dict = {}
for col in list(self.real_data):
y_pred = df_sim_opt[col].to_numpy().flatten()
y_pred_orig = df_sim_orig[col].to_numpy().flatten()
y_true = self.real_data[col].to_numpy().flatten()
ix = np.arange(self.real_data.shape[0])
_ix = []
_x_ticks = []
for n, i in enumerate(ix):
tick = self.real_data.index[n]
if tick.hour != 1:
continue
_ix.append(i)
_x_ticks.append(tick.strftime('%Y-%m-%d'))
res_dict[col] = {"before_calibration": {},
"after_calibration": {}}
res_dict[col]["after_calibration"]['NMBE'] =\
StatisticsAnalyzer.calc_nmbe(meas=y_true,
sim=y_pred) * 100
res_dict[col]["after_calibration"]['CVRMSE'] =\
StatisticsAnalyzer.calc_cvrmse(meas=y_true,
sim=y_pred) * 100
res_dict[col]["before_calibration"]['NMBE'] =\
StatisticsAnalyzer.calc_nmbe(meas=y_true,
sim=y_pred_orig) * 100
res_dict[col]["before_calibration"]['CVRMSE'] =\
StatisticsAnalyzer.calc_cvrmse(meas=y_true,
sim=y_pred_orig) * 100
plt.figure(figsize=(16, 9))
plt.plot(ix, y_true, label='measured values')
plt.plot(ix, y_pred_orig, label='simulated values before calibration')
plt.plot(ix, y_pred, label='simulated values after calibration')
plt.ylabel(col)
plt.xlabel('Date at 01:00')
plt.xticks(_ix, _x_ticks, rotation=45)
plt.legend()
_save_name = col.replace(' ', '_').replace('/', '_').replace(':', '_').replace('[', '_').replace(']', '_').replace('(', '_').replace(')', '_')
save_name = self.energy_plus_api.working_directory / f'{_save_name}.png'
to_copy.append(f'{_save_name}.png')
plt.savefig(save_name, bbox_inches='tight')
plt.close()
with open(self.energy_plus_api.working_directory / 'calibration_kpis.json', 'w') as f:
json.dump(res_dict, f, indent=4)
to_copy.append('calibration_kpis.json')
infos_from_calibration_log(result_path=self.energy_plus_api.working_directory,
used_error_type=self.error_metric,
save_path=self.energy_plus_api.working_directory / 'convergence.png')
to_copy.append('convergence.png')
return to_copy
def convert_model(self) -> None:
"""
Convert calibrated model from JSON to IDF format.
Updates the EnergyPlus version identifier and converts the model
using the EnergyPlus converter.
"""
calibrated_model_path = self.energy_plus_api.working_directory / 'calibrated_model.json'
with open(calibrated_model_path, 'r') as f:
model = json.load(f)
model['Version']['Version 1']['version_identifier'] = self.energyplus_version
with open(calibrated_model_path, 'w') as f:
json.dump(model, f)
self.energy_plus_api.convert_json_to_idf(calibrated_model_path)
def create_final_files(self) -> None:
"""
Create and organize final calibration results.
Creates:
1. Calibrated parameter file
2. Calibrated model files (JSON and IDF)
3. Visualization plots
4. Copies all files to results directory
"""
with open(Path(self.energy_plus_api.working_directory) /\
'calibrated_parameters.json', 'r') as f:
parameters = json.load(f)
calibrated_model = self.energy_plus_api.ep_json.copy()
calibrated_model = self.energy_plus_api.change_model_dict_with_parameters(
model_dict=calibrated_model,
parameters=parameters,
schedule_vars=self.schedule_vars
)
parameters_use = parameters.copy()
for param in parameters:
if param in self.schedule_vars:
_, var = self.schedule_vars[param]
value = parameters[param]
if var == 'time' and not isinstance(value, str):
value = round(value * 2) / 2
hours = int(value)
minutes = int((value - hours) * 60)
# Format the time string with leading zeros
time_string = f"{hours:02d}:{minutes:02d}"
parameters[param] = time_string
with open(Path(self.energy_plus_api.working_directory) /\
'calibrated_parameters.json', 'w') as f:
json.dump(parameters, f)
calibrated_model_path = Path(
self.energy_plus_api.working_directory) / 'calibrated_model.json'
with open(calibrated_model_path, 'w') as f:
json.dump(calibrated_model, f)
self.convert_model()
now = datetime.now()
formatted_date = now.strftime("%Y-%m-%d_%H-%M-%S")
if self.res_name_add is None:
self.res_name_add = ''
folder_name = formatted_date + f"_{self.res_name_add}" if len(self.res_name_add) > 0 else formatted_date
results_folder = Path(
__file__).parents[1] / 'calibration_results' / folder_name
results_folder.mkdir(parents=True)
to_copy = ['calibrated_model.json',
'calibrated_model.idf',
'calibrated_parameters.json',
]
try:
to_copy2 = self.create_plots(parameters_opt=parameters_use)
except:
print('Error in creating plots. Do again manually')
to_copy2 = []
to_copy += to_copy2
source_path = Path(self.energy_plus_api.working_directory)
for file in to_copy:
_source_path = source_path / file
shutil.copy(_source_path, results_folder)
def calibrate(self,
framework: str,
method: str,
max_time: int = 120) -> None:
"""
Run the calibration process.
Args:
framework: Optimization framework to use
method: Optimization method within the framework
max_time: Maximum runtime in seconds
Note:
Saves calibration results and creates final files
"""
self.energy_plus_api.use_for_calibration = True
kwargs_optimization, kwargs_calibrator = self._get_kwargs_optimization(framework=framework,
max_time=max_time)
energyplus_calibrator = Calibrator(
working_directory=self.energy_plus_api.working_directory,
sim_api=self.energy_plus_api,
calibration_class=self.calibration_class,
**kwargs_calibrator)
with open(Path(self.energy_plus_api.working_directory) /
'kwargs_calibrator.json', 'w') as f:
json.dump(kwargs_calibrator, f, indent=4)
with open(Path(self.energy_plus_api.working_directory) /
'kwargs_optimization.json', 'w') as f:
json.dump(kwargs_optimization, f, indent=4)
with open(Path(self.energy_plus_api.working_directory) /
'simulation_setup.json', 'w') as f:
json.dump(self._simulation_setup_dict, f, indent=4)
result = energyplus_calibrator.calibrate(
framework=framework,
method=method,
**kwargs_optimization
)
with open(Path(self.energy_plus_api.working_directory) /
'calibrated_parameters.json', 'w') as f:
json.dump(result, f)
self.create_final_files()
def _get_kwargs_optimization(self,
framework: str,
max_time: int) -> Tuple[Dict[str, Any], Dict[str, Any]]:
"""
Get optimization configuration based on framework.
Args:
framework: Optimization framework to use
max_time: Maximum runtime in seconds
Returns:
Tuple of (optimization_kwargs, calibrator_kwargs)
Raises:
TypeError: If framework doesn't support multiprocessing when needed
"""
kwargs_calibrator = {"timedelta": 0,
"save_files": False, # save files not yet implemented
"verbose_logging": False,
"show_plot": False,
"create_tsd_plot": False,
"save_tsd_plot": False,
"show_plot_pause_time": 1e-3,
"plot_file_type": "png",
"fail_on_error": False,
"ret_val_on_error": 10_000,
# For this example, let's keep the runtime low
"max_itercount": 1_000_000,
"max_time": max_time
}
kwargs_scipy_dif_evo = {"maxiter": 1_000_000,
"popsize": 5,
"mutation": (0.5, 1),
"recombination": 0.7,
"seed": None,
"polish": True,
"init": 'latinhypercube',
"atol": 0}
kwargs_dlib_min = {"num_function_calls": int(1e9),
"solver_epsilon": 0}
kwargs_scipy_min = {"tol": None,
"options": {"maxfun": 1},
"constraints": None,
"jac": None,
"hess": None,
"hessp": None}
kwargs_pymoo = {"pop_size": 50,
"n_gen": 1_000_000,
"sampling": "real_random",
# Notice that changing Hyper-Parameters may change pop size.
"selection": "random",
"crossover": "real_sbx",
"mutation": "real_pm",
"eliminate_duplicates": True,
"n_offsprings": None}
kwargs_bayesian = {"random_state": 42,
"allow_dublicate_points": True,
"init_points": 5,
"n_iter": 1_000_000,
"kind_of_utility_function": "ei",
"xi": 0.4
}
# Merge the dictionaries into one.
# If you change the solver, also change the solver-kwargs-dict in the line below
if framework == "scipy_differential_evolution":
kwargs_optimization = kwargs_scipy_dif_evo
elif framework == "scipy_minimize":
kwargs_optimization = kwargs_scipy_min
elif framework == "dlib_minimize":
kwargs_optimization = kwargs_dlib_min
elif framework == "pymoo":
kwargs_optimization = kwargs_pymoo
elif framework == "bayesian_optimization":
kwargs_optimization = kwargs_bayesian
else:
kwargs_optimization = {}
# Check if pymoo is being used for Multiprocessing
if framework != "pymoo" and self.energy_plus_api.n_cpu > 1:
raise TypeError(f"Given framework {framework} does not support Multiprocessing."
f"Please use pymoo as your framework.")
return kwargs_optimization, kwargs_calibrator