Skip to content
Snippets Groups Projects
Commit b01f7006 authored by Hu Zhao's avatar Hu Zhao
Browse files

feat: allow callable scalar_gasp_trend, loose restriction on data and simulation outputs

parent 0635d822
No related branches found
No related tags found
No related merge requests found
import numpy as np import numpy as np
import sys import sys
from scipy import optimize from scipy import optimize
from typing import Union
from beartype.typing import Callable
from beartype import beartype
from psimpy.simulator.run_simulator import RunSimulator from psimpy.simulator.run_simulator import RunSimulator
from psimpy.sampler.latin import LHS from psimpy.sampler.latin import LHS
from psimpy.emulator.robustgasp import ScalarGaSP from psimpy.emulator.robustgasp import ScalarGaSP
from psimpy.utility.util_funcs import check_bounds from psimpy.utility.util_funcs import check_bounds
from typing import Union
from beartype.typing import Callable
from beartype import beartype
_min_float = 10**(sys.float_info.min_10_exp) _min_float = 10**(sys.float_info.min_10_exp)
_max_exp = sys.float_info.max_exp
class ActiveLearning: class ActiveLearning:
...@@ -24,7 +25,8 @@ class ActiveLearning: ...@@ -24,7 +25,8 @@ class ActiveLearning:
likelihood: Callable, likelihood: Callable,
lhs_sampler: LHS, lhs_sampler: LHS,
scalar_gasp: ScalarGaSP, scalar_gasp: ScalarGaSP,
scalar_gasp_mean: str = 'constant', scalar_gasp_trend: Union[str,
Callable[[np.ndarray], np.ndarray]] = 'constant',
indicator: str = 'entropy', indicator: str = 'entropy',
optimizer: Callable = optimize.brute, optimizer: Callable = optimize.brute,
args_prior: Union[list, None] = None, args_prior: Union[list, None] = None,
...@@ -46,31 +48,32 @@ class ActiveLearning: ...@@ -46,31 +48,32 @@ class ActiveLearning:
bounds[:, 0] - lower boundaries of each parameter. bounds[:, 0] - lower boundaries of each parameter.
bounds[:, 1] - upper boundaries of each parameter. bounds[:, 1] - upper boundaries of each parameter.
data : numpy array data : numpy array
Observed data for parameter calibration. 1d array of shape (k,) Observed data for parameter calibration.
where k is the number of observed data.
run_sim_obj : instance of class `RunSimulator` run_sim_obj : instance of class `RunSimulator`
It has an attribute `simulator` and two methods to run `simulator`, It has an attribute `simulator` and two methods to run `simulator`,
namely `serial_run` and `parrel_run`. namely `serial_run` and `parrel_run`. For each simulation,
For each simulation, `simulator` must return outputs `y` as a 1d `simulator` must return outputs `y` as a numpy array.
numpy array of shape (k,), corresponding to `data`.
prior : Callable prior : Callable
Prior probability density function. Prior probability density function.
Call with `prior(x, *args_prior, **kwgs_prior)` and return the prior Call with `prior(x, *args_prior, **kwgs_prior)` and return the prior
probability density value at x. probability density value at x.
likelihood : Callable likelihood : Callable
Likelihood function constructed based on `data` and corresponding Likelihood function constructed based on `data` and `simulator`
outputs `y` of `simulator` evaluated at `x`. outputs `y` evaluated at `x`.
Call with `likelihood(y, data, *args_likelihood, *kwgs_likelihood)` Call with `likelihood(y, data, *args_likelihood, *kwgs_likelihood)`
and return the likelihood value at x. and return the likelihood value at x.
lhs_sampler : instance of class LHS lhs_sampler : instance of class LHS
Used to draw initial samples of `x`. Used to draw initial samples of `x`.
scalar_gasp : instance of class ScalarGaSP scalar_gasp : instance of class ScalarGaSP
Used to build emulators and make predictions. Used to build emulators and make predictions.
scalar_gasp_mean : str, optional scalar_gasp_trend : str, optional
Mean function of `scalar_gasp` emulator. Mean function of `scalar_gasp` emulator to determine the trend at
'zero' - mean is set to zero given training / testing_input.
'constant' - mean is set to a constant 'zero' - trend is set to zero
'linear' - mean is linear to training input 'constant' - trend is set to a constant
'linear' - trend is linear to training / testing_input
Callable - a function takes training / testing_input as parameter
and returns the trend.
indicator : str, optional indicator : str, optional
Indicator of uncertainty. 'entropy' or 'variance'. Indicator of uncertainty. 'entropy' or 'variance'.
optimizer : Callable, optional optimizer : Callable, optional
...@@ -107,8 +110,6 @@ class ActiveLearning: ...@@ -107,8 +110,6 @@ class ActiveLearning:
check_bounds(ndim, bounds) check_bounds(ndim, bounds)
self.bounds = bounds self.bounds = bounds
if data.ndim != 1:
raise ValueError("data must be a one dimensional numpy array")
self.data = data self.data = data
self.run_sim_obj = run_sim_obj self.run_sim_obj = run_sim_obj
...@@ -120,11 +121,13 @@ class ActiveLearning: ...@@ -120,11 +121,13 @@ class ActiveLearning:
self.scalar_gasp = scalar_gasp self.scalar_gasp = scalar_gasp
if scalar_gasp_mean not in ["zero","constant","linear"]: if isinstance(scalar_gasp_trend, str):
if scalar_gasp_trend not in ["zero","constant","linear"]:
raise NotImplementedError( raise NotImplementedError(
f"unsupported scalar_gasp_mean {scalar_gasp_mean}" f"unsupported scalar_gasp_trend {scalar_gasp_trend}"
f"please choose from 'zero', 'constant', and 'linear'") f" please choose from 'zero', 'constant', and 'linear'"
self.scalar_gasp_mean = scalar_gasp_mean f" or pass it as a function")
self.scalar_gasp_trend = scalar_gasp_trend
if indicator not in ["entropy", "variance"]: if indicator not in ["entropy", "variance"]:
raise NotImplementedError( raise NotImplementedError(
...@@ -191,8 +194,8 @@ class ActiveLearning: ...@@ -191,8 +194,8 @@ class ActiveLearning:
Variable input samples for `n0` initial simulations. Variable input samples for `n0` initial simulations.
Shape of (n0, ndim). Shape of (n0, ndim).
init_sim_outputs : numpy array init_sim_outputs : numpy array
Outputs of `n0` intial simulations, corrresponding to `data`. Outputs of `n0` intial simulations.
Shape of (n0, len(data)). init_sim_outputs.shape[0] is `n0`.
""" """
init_var_samples = self.lhs_sampler.sample(n0) init_var_samples = self.lhs_sampler.sample(n0)
...@@ -231,8 +234,8 @@ class ActiveLearning: ...@@ -231,8 +234,8 @@ class ActiveLearning:
Variable input samples for `ninit` simulations. Variable input samples for `ninit` simulations.
Shape of (ninit, ndim). Shape of (ninit, ndim).
init_sim_outputs : numpy array init_sim_outputs : numpy array
Outputs of `ninit` simulations, corrresponding to `data`. Outputs of `ninit` simulations.
Shape of (ninit, len(data)). init_sim_outputs.shape[0] is `ninit`.
iter_prefixes : list of str, optional iter_prefixes : list of str, optional
Consist of `niter` strings. Each of them is used to name Consist of `niter` strings. Each of them is used to name
corresponding iterative simulation output file. corresponding iterative simulation output file.
...@@ -244,8 +247,8 @@ class ActiveLearning: ...@@ -244,8 +247,8 @@ class ActiveLearning:
Variable input samples of `ninit` simulations and `niter` Variable input samples of `ninit` simulations and `niter`
iterative simulations. 2d array of shape (ninit+niter, ndim). iterative simulations. 2d array of shape (ninit+niter, ndim).
sim_outputs : numpy array sim_outputs : numpy array
Outputs of `ninit` and `niter` simulations, corresponding to `data`. Outputs of `ninit` and `niter` simulations.
2d array of shape (ninit+niter, len(data)). sim_outputs.shape[0] is ninit+niter.
ln_pxl_values : numpy array ln_pxl_values : numpy array
Natural logarithm values of the product of prior and likelihood Natural logarithm values of the product of prior and likelihood
at `ninit` and `niter` simulations. at `ninit` and `niter` simulations.
...@@ -254,9 +257,9 @@ class ActiveLearning: ...@@ -254,9 +257,9 @@ class ActiveLearning:
if init_var_samples.shape != (ninit, self.ndim): if init_var_samples.shape != (ninit, self.ndim):
raise ValueError("init_var_samples must be of shape (ninit, ndim)") raise ValueError("init_var_samples must be of shape (ninit, ndim)")
if init_sim_outputs.shape != (ninit, len(self.data)): if init_sim_outputs.shape[0] != ninit:
raise ValueError( raise ValueError(
"init_sim_outputs must be of shape (ninit, len(data))") "init_sim_outputs.shape[0] must equal to ninit")
if iter_prefixes is None: if iter_prefixes is None:
iter_prefixes = [f'iter_sim{i}' for i in range(niter)] iter_prefixes = [f'iter_sim{i}' for i in range(niter)]
...@@ -266,7 +269,7 @@ class ActiveLearning: ...@@ -266,7 +269,7 @@ class ActiveLearning:
raise ValueError("Each item of iter_prefixes must be unique") raise ValueError("Each item of iter_prefixes must be unique")
ln_pxl_values = [ ln_pxl_values = [
self._compute_ln_pl(init_var_samples[i,:], init_sim_outputs[i,:]) self._compute_ln_pxl(init_var_samples[i,:], init_sim_outputs[i,:])
for i in range(ninit) for i in range(ninit)
] ]
var_samples = init_var_samples var_samples = init_var_samples
...@@ -295,7 +298,7 @@ class ActiveLearning: ...@@ -295,7 +298,7 @@ class ActiveLearning:
self.run_sim_obj.serial_run(next_var_sample, [iter_prefixes[i]], self.run_sim_obj.serial_run(next_var_sample, [iter_prefixes[i]],
append=False) append=False)
next_sim_output = self.run_sim_obj.outputs[0] next_sim_output = self.run_sim_obj.outputs[0]
sim_outputs = np.vstack((sim_outputs, next_sim_output)) sim_outputs = np.vstack((sim_outputs, np.array([next_sim_output])))
next_ln_pxl_value = self._compute_ln_pxl( next_ln_pxl_value = self._compute_ln_pxl(
next_var_sample.reshape(-1), next_sim_output) next_var_sample.reshape(-1), next_sim_output)
...@@ -336,8 +339,7 @@ class ActiveLearning: ...@@ -336,8 +339,7 @@ class ActiveLearning:
x : numpy array x : numpy array
Variable input of `simulator`. 1d array of shape (ndim,). Variable input of `simulator`. 1d array of shape (ndim,).
y : numpy array y : numpy array
Simulation outputs at `x` corresponding to `data`. 1d array of shape Simulation outputs at `x`.
(len(data),).
Returns Returns
------- -------
...@@ -369,12 +371,15 @@ class ActiveLearning: ...@@ -369,12 +371,15 @@ class ActiveLearning:
""" """
n = len(var_samples) n = len(var_samples)
if self.scalar_gasp_mean == "zero": if isinstance(self.scalar_gasp_trend, str):
if self.scalar_gasp_trend == "zero":
trend = np.zeros((n, 1)) trend = np.zeros((n, 1))
elif self.scalar_gasp_mean == "constant": elif self.scalar_gasp_trend == "constant":
trend = np.ones((n, 1)) trend = np.ones((n, 1))
elif self.scalar_gasp_mean == "linear": elif self.scalar_gasp_trend == "linear":
trend = np.c_[np.ones(n), var_samples] trend = np.c_[np.ones(n), var_samples]
else:
trend = self.scalar_gasp_trend(var_samples)
self.scalar_gasp.train(var_samples, ln_pxl_values, trend) self.scalar_gasp.train(var_samples, ln_pxl_values, trend)
...@@ -389,25 +394,28 @@ class ActiveLearning: ...@@ -389,25 +394,28 @@ class ActiveLearning:
Returns Returns
------- -------
predictions : numpy array predict : numpy array
Emulator-prediction at `x`. Shape (1, 4). Emulator-prediction at `x`. Shape (1, 4).
predictions[0, 0] - mean predict[0, 0] - mean
predictions[0, 1] - low95 predict[0, 1] - low95
predictions[0, 2] - upper95 predict[0, 2] - upper95
predictions[0, 3] - sd predict[0, 3] - sd
""" """
x = x.reshape((1, self.ndim)) x = x.reshape((1, self.ndim))
if self.scalar_gasp_mean == "zero": if isinstance(self.scalar_gasp_trend, str):
if self.scalar_gasp_trend == "zero":
trend = np.zeros((1, 1)) trend = np.zeros((1, 1))
elif self.scalar_gasp_mean == "constant": elif self.scalar_gasp_trend == "constant":
trend = np.ones((1, 1)) trend = np.ones((1, 1))
elif self.scalar_gasp_mean == "linear": elif self.scalar_gasp_trend == "linear":
trend = np.c_[np.ones(1), x] trend = np.c_[np.ones(1), x]
else:
trend = self.scalar_gasp_trend(x)
predictions = self.scalar_gasp.predict(x, trend) predict = self.scalar_gasp.predict(x, trend)
return predictions return predict
def _uncertainty_indicator(self, x: np.ndarray) -> float: def _uncertainty_indicator(self, x: np.ndarray) -> float:
""" """
...@@ -431,8 +439,12 @@ class ActiveLearning: ...@@ -431,8 +439,12 @@ class ActiveLearning:
if self.indicator == "entropy": if self.indicator == "entropy":
neg_val = -(mean + 0.5*np.log(2*np.pi*np.e*std**2)) neg_val = -(mean + 0.5*np.log(2*np.pi*np.e*std**2))
elif self.indicator == "variance": elif self.indicator == "variance":
if std**2 < _max_exp:
exp_std2 = np.exp(std**2)
else:
exp_std2 = np.exp(_max_exp)
neg_val = -(2*mean + std**2) - np.log( neg_val = -(2*mean + std**2) - np.log(
np.maximum(np.exp(std**2)-1, _min_float) np.maximum(exp_std2-1, _min_float)
) )
return float(neg_val) return float(neg_val)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment