diff --git a/src/psimpy/inference/active_learning.py b/src/psimpy/inference/active_learning.py index cdaa58e24075c6bd6366dd64d18bc922cbab4728..c178af67393fcae7a3ed8732784d704304b37e2b 100644 --- a/src/psimpy/inference/active_learning.py +++ b/src/psimpy/inference/active_learning.py @@ -1,15 +1,16 @@ import numpy as np import sys from scipy import optimize +from typing import Union +from beartype.typing import Callable +from beartype import beartype from psimpy.simulator.run_simulator import RunSimulator from psimpy.sampler.latin import LHS from psimpy.emulator.robustgasp import ScalarGaSP from psimpy.utility.util_funcs import check_bounds -from typing import Union -from beartype.typing import Callable -from beartype import beartype _min_float = 10**(sys.float_info.min_10_exp) +_max_exp = sys.float_info.max_exp class ActiveLearning: @@ -24,7 +25,8 @@ class ActiveLearning: likelihood: Callable, lhs_sampler: LHS, scalar_gasp: ScalarGaSP, - scalar_gasp_mean: str = 'constant', + scalar_gasp_trend: Union[str, + Callable[[np.ndarray], np.ndarray]] = 'constant', indicator: str = 'entropy', optimizer: Callable = optimize.brute, args_prior: Union[list, None] = None, @@ -46,31 +48,32 @@ class ActiveLearning: bounds[:, 0] - lower boundaries of each parameter. bounds[:, 1] - upper boundaries of each parameter. data : numpy array - Observed data for parameter calibration. 1d array of shape (k,) - where k is the number of observed data. + Observed data for parameter calibration. run_sim_obj : instance of class `RunSimulator` It has an attribute `simulator` and two methods to run `simulator`, - namely `serial_run` and `parrel_run`. - For each simulation, `simulator` must return outputs `y` as a 1d - numpy array of shape (k,), corresponding to `data`. + namely `serial_run` and `parrel_run`. For each simulation, + `simulator` must return outputs `y` as a numpy array. prior : Callable Prior probability density function. Call with `prior(x, *args_prior, **kwgs_prior)` and return the prior probability density value at x. likelihood : Callable - Likelihood function constructed based on `data` and corresponding - outputs `y` of `simulator` evaluated at `x`. + Likelihood function constructed based on `data` and `simulator` + outputs `y` evaluated at `x`. Call with `likelihood(y, data, *args_likelihood, *kwgs_likelihood)` and return the likelihood value at x. lhs_sampler : instance of class LHS Used to draw initial samples of `x`. scalar_gasp : instance of class ScalarGaSP Used to build emulators and make predictions. - scalar_gasp_mean : str, optional - Mean function of `scalar_gasp` emulator. - 'zero' - mean is set to zero - 'constant' - mean is set to a constant - 'linear' - mean is linear to training input + scalar_gasp_trend : str, optional + Mean function of `scalar_gasp` emulator to determine the trend at + given training / testing_input. + 'zero' - trend is set to zero + 'constant' - trend is set to a constant + 'linear' - trend is linear to training / testing_input + Callable - a function takes training / testing_input as parameter + and returns the trend. indicator : str, optional Indicator of uncertainty. 'entropy' or 'variance'. optimizer : Callable, optional @@ -107,8 +110,6 @@ class ActiveLearning: check_bounds(ndim, bounds) self.bounds = bounds - if data.ndim != 1: - raise ValueError("data must be a one dimensional numpy array") self.data = data self.run_sim_obj = run_sim_obj @@ -120,11 +121,13 @@ class ActiveLearning: self.scalar_gasp = scalar_gasp - if scalar_gasp_mean not in ["zero","constant","linear"]: - raise NotImplementedError( - f"unsupported scalar_gasp_mean {scalar_gasp_mean}" - f"please choose from 'zero', 'constant', and 'linear'") - self.scalar_gasp_mean = scalar_gasp_mean + if isinstance(scalar_gasp_trend, str): + if scalar_gasp_trend not in ["zero","constant","linear"]: + raise NotImplementedError( + f"unsupported scalar_gasp_trend {scalar_gasp_trend}" + f" please choose from 'zero', 'constant', and 'linear'" + f" or pass it as a function") + self.scalar_gasp_trend = scalar_gasp_trend if indicator not in ["entropy", "variance"]: raise NotImplementedError( @@ -191,8 +194,8 @@ class ActiveLearning: Variable input samples for `n0` initial simulations. Shape of (n0, ndim). init_sim_outputs : numpy array - Outputs of `n0` intial simulations, corrresponding to `data`. - Shape of (n0, len(data)). + Outputs of `n0` intial simulations. + init_sim_outputs.shape[0] is `n0`. """ init_var_samples = self.lhs_sampler.sample(n0) @@ -231,8 +234,8 @@ class ActiveLearning: Variable input samples for `ninit` simulations. Shape of (ninit, ndim). init_sim_outputs : numpy array - Outputs of `ninit` simulations, corrresponding to `data`. - Shape of (ninit, len(data)). + Outputs of `ninit` simulations. + init_sim_outputs.shape[0] is `ninit`. iter_prefixes : list of str, optional Consist of `niter` strings. Each of them is used to name corresponding iterative simulation output file. @@ -244,8 +247,8 @@ class ActiveLearning: Variable input samples of `ninit` simulations and `niter` iterative simulations. 2d array of shape (ninit+niter, ndim). sim_outputs : numpy array - Outputs of `ninit` and `niter` simulations, corresponding to `data`. - 2d array of shape (ninit+niter, len(data)). + Outputs of `ninit` and `niter` simulations. + sim_outputs.shape[0] is ninit+niter. ln_pxl_values : numpy array Natural logarithm values of the product of prior and likelihood at `ninit` and `niter` simulations. @@ -254,9 +257,9 @@ class ActiveLearning: if init_var_samples.shape != (ninit, self.ndim): raise ValueError("init_var_samples must be of shape (ninit, ndim)") - if init_sim_outputs.shape != (ninit, len(self.data)): + if init_sim_outputs.shape[0] != ninit: raise ValueError( - "init_sim_outputs must be of shape (ninit, len(data))") + "init_sim_outputs.shape[0] must equal to ninit") if iter_prefixes is None: iter_prefixes = [f'iter_sim{i}' for i in range(niter)] @@ -266,7 +269,7 @@ class ActiveLearning: raise ValueError("Each item of iter_prefixes must be unique") ln_pxl_values = [ - self._compute_ln_pl(init_var_samples[i,:], init_sim_outputs[i,:]) + self._compute_ln_pxl(init_var_samples[i,:], init_sim_outputs[i,:]) for i in range(ninit) ] var_samples = init_var_samples @@ -295,7 +298,7 @@ class ActiveLearning: self.run_sim_obj.serial_run(next_var_sample, [iter_prefixes[i]], append=False) next_sim_output = self.run_sim_obj.outputs[0] - sim_outputs = np.vstack((sim_outputs, next_sim_output)) + sim_outputs = np.vstack((sim_outputs, np.array([next_sim_output]))) next_ln_pxl_value = self._compute_ln_pxl( next_var_sample.reshape(-1), next_sim_output) @@ -336,8 +339,7 @@ class ActiveLearning: x : numpy array Variable input of `simulator`. 1d array of shape (ndim,). y : numpy array - Simulation outputs at `x` corresponding to `data`. 1d array of shape - (len(data),). + Simulation outputs at `x`. Returns ------- @@ -369,12 +371,15 @@ class ActiveLearning: """ n = len(var_samples) - if self.scalar_gasp_mean == "zero": - trend = np.zeros((n, 1)) - elif self.scalar_gasp_mean == "constant": - trend = np.ones((n, 1)) - elif self.scalar_gasp_mean == "linear": - trend = np.c_[np.ones(n), var_samples] + if isinstance(self.scalar_gasp_trend, str): + if self.scalar_gasp_trend == "zero": + trend = np.zeros((n, 1)) + elif self.scalar_gasp_trend == "constant": + trend = np.ones((n, 1)) + elif self.scalar_gasp_trend == "linear": + trend = np.c_[np.ones(n), var_samples] + else: + trend = self.scalar_gasp_trend(var_samples) self.scalar_gasp.train(var_samples, ln_pxl_values, trend) @@ -389,25 +394,28 @@ class ActiveLearning: Returns ------- - predictions : numpy array + predict : numpy array Emulator-prediction at `x`. Shape (1, 4). - predictions[0, 0] - mean - predictions[0, 1] - low95 - predictions[0, 2] - upper95 - predictions[0, 3] - sd + predict[0, 0] - mean + predict[0, 1] - low95 + predict[0, 2] - upper95 + predict[0, 3] - sd """ x = x.reshape((1, self.ndim)) - if self.scalar_gasp_mean == "zero": - trend = np.zeros((1, 1)) - elif self.scalar_gasp_mean == "constant": - trend = np.ones((1, 1)) - elif self.scalar_gasp_mean == "linear": - trend = np.c_[np.ones(1), x] + if isinstance(self.scalar_gasp_trend, str): + if self.scalar_gasp_trend == "zero": + trend = np.zeros((1, 1)) + elif self.scalar_gasp_trend == "constant": + trend = np.ones((1, 1)) + elif self.scalar_gasp_trend == "linear": + trend = np.c_[np.ones(1), x] + else: + trend = self.scalar_gasp_trend(x) - predictions = self.scalar_gasp.predict(x, trend) + predict = self.scalar_gasp.predict(x, trend) - return predictions + return predict def _uncertainty_indicator(self, x: np.ndarray) -> float: """ @@ -431,8 +439,12 @@ class ActiveLearning: if self.indicator == "entropy": neg_val = -(mean + 0.5*np.log(2*np.pi*np.e*std**2)) elif self.indicator == "variance": + if std**2 < _max_exp: + exp_std2 = np.exp(std**2) + else: + exp_std2 = np.exp(_max_exp) neg_val = -(2*mean + std**2) - np.log( - np.maximum(np.exp(std**2)-1, _min_float) + np.maximum(exp_std2-1, _min_float) ) return float(neg_val) \ No newline at end of file