feat: allow callable scalar_gasp_trend, loose restriction on data and simulation outputs

b01f7006 · Hu Zhao · 0635d822 · b01f7006
Commit b01f7006 authored 2 years ago by Hu Zhao
--- a/src/psimpy/inference/active_learning.py
+++ b/src/psimpy/inference/active_learning.py
 import numpy as np
 import sys
 from scipy import optimize
+from typing import Union
+from beartype.typing import Callable
+from beartype import beartype
 from psimpy.simulator.run_simulator import RunSimulator
 from psimpy.sampler.latin import LHS
 from psimpy.emulator.robustgasp import ScalarGaSP
 from psimpy.utility.util_funcs import check_bounds
-from typing import Union
-from beartype.typing import Callable
-from beartype import beartype
 _min_float = 10**(sys.float_info.min_10_exp)
+_max_exp = sys.float_info.max_exp
 class ActiveLearning:
@@ -24,7 +25,8 @@ class ActiveLearning:
        likelihood: Callable, 
        lhs_sampler: LHS,
        scalar_gasp: ScalarGaSP,
-        scalar_gasp_mean: str = 'constant',
+        scalar_gasp_trend: Union[str,
+            Callable[[np.ndarray], np.ndarray]] = 'constant',
        indicator: str = 'entropy',
        optimizer: Callable = optimize.brute,
        args_prior: Union[list, None] = None,
@@ -46,31 +48,32 @@ class ActiveLearning:
            bounds[:, 0] - lower boundaries of each parameter.
            bounds[:, 1] - upper boundaries of each parameter. 
        data : numpy array
-            Observed data for parameter calibration. 1d array of shape (k,)
+            Observed data for parameter calibration.
-            where k is the number of observed data.
        run_sim_obj : instance of class `RunSimulator`
            It has an attribute `simulator` and two methods to run `simulator`,
-            namely `serial_run` and `parrel_run`.
+            namely `serial_run` and `parrel_run`. For each simulation,
-            For each simulation, `simulator` must return outputs `y` as a 1d
+            `simulator` must return outputs `y` as a numpy array.
-            numpy array of shape (k,), corresponding to `data`.
        prior : Callable
            Prior probability density function.
            Call with `prior(x, *args_prior, **kwgs_prior)` and return the prior
            probability density value at x.
        likelihood : Callable
-            Likelihood function constructed based on `data` and corresponding
+            Likelihood function constructed based on `data` and `simulator`
-            outputs `y` of `simulator` evaluated at `x`.
+            outputs `y` evaluated at `x`.
            Call with `likelihood(y, data, *args_likelihood, *kwgs_likelihood)`
            and return the likelihood value at x.
        lhs_sampler : instance of class LHS
            Used to draw initial samples of `x`.
        scalar_gasp : instance of class ScalarGaSP
            Used to build emulators and make predictions.  
-        scalar_gasp_mean : str, optional
+        scalar_gasp_trend : str, optional
-            Mean function of `scalar_gasp` emulator.
+            Mean function of `scalar_gasp` emulator to determine the trend at
-            'zero' - mean is set to zero
+            given training / testing_input.
-            'constant' - mean is set to a constant
+            'zero' - trend is set to zero
-            'linear' - mean is linear to training input
+            'constant' - trend is set to a constant
+            'linear' - trend is linear to training / testing_input
+            Callable - a function takes training / testing_input as parameter
+                and returns the trend.
        indicator : str, optional
            Indicator of uncertainty. 'entropy' or 'variance'.
        optimizer : Callable, optional
@@ -107,8 +110,6 @@ class ActiveLearning:
        check_bounds(ndim, bounds)
        self.bounds = bounds           
-        if data.ndim != 1:
-            raise ValueError("data must be a one dimensional numpy array")
        self.data = data
        self.run_sim_obj = run_sim_obj
@@ -120,11 +121,13 @@ class ActiveLearning:
        self.scalar_gasp = scalar_gasp
-        if scalar_gasp_mean not in ["zero","constant","linear"]:
+        if isinstance(scalar_gasp_trend, str):
+            if scalar_gasp_trend not in ["zero","constant","linear"]:
                raise NotImplementedError(
-                f"unsupported scalar_gasp_mean {scalar_gasp_mean}"
+                    f"unsupported scalar_gasp_trend {scalar_gasp_trend}"
-                f"please choose from 'zero', 'constant', and 'linear'")
+                    f" please choose from 'zero', 'constant', and 'linear'"
-        self.scalar_gasp_mean = scalar_gasp_mean
+                    f" or pass it as a function")            
+        self.scalar_gasp_trend = scalar_gasp_trend
        if indicator not in ["entropy", "variance"]:
            raise NotImplementedError(
@@ -191,8 +194,8 @@ class ActiveLearning:
            Variable input samples for `n0` initial simulations.
            Shape of (n0, ndim).
        init_sim_outputs : numpy array
-            Outputs of `n0` intial simulations, corrresponding to `data`.
+            Outputs of `n0` intial simulations.
-            Shape of (n0, len(data)).
+            init_sim_outputs.shape[0] is `n0`.
        """
        init_var_samples = self.lhs_sampler.sample(n0)
@@ -231,8 +234,8 @@ class ActiveLearning:
            Variable input samples for `ninit` simulations.
            Shape of (ninit, ndim).
        init_sim_outputs : numpy array
-            Outputs of `ninit` simulations, corrresponding to `data`.
+            Outputs of `ninit` simulations.
-            Shape of (ninit, len(data)).
+            init_sim_outputs.shape[0] is `ninit`. 
        iter_prefixes : list of str, optional
            Consist of `niter` strings. Each of them is used to name
            corresponding iterative simulation output file.
@@ -244,8 +247,8 @@ class ActiveLearning:
            Variable input samples of `ninit` simulations and `niter`
            iterative simulations. 2d array of shape (ninit+niter, ndim).
        sim_outputs : numpy array
-            Outputs of `ninit` and `niter` simulations, corresponding to `data`.
+            Outputs of `ninit` and `niter` simulations. 
-            2d array of shape (ninit+niter, len(data)).
+            sim_outputs.shape[0] is ninit+niter.
        ln_pxl_values : numpy array
            Natural logarithm values of the product of prior and likelihood 
            at `ninit` and `niter` simulations. 
@@ -254,9 +257,9 @@ class ActiveLearning:
        if init_var_samples.shape != (ninit, self.ndim):
            raise ValueError("init_var_samples must be of shape (ninit, ndim)")
-        if init_sim_outputs.shape != (ninit, len(self.data)):
+        if init_sim_outputs.shape[0] != ninit:
            raise ValueError(
-                "init_sim_outputs must be of shape (ninit, len(data))")
+                "init_sim_outputs.shape[0] must equal to ninit")
        if iter_prefixes is None:
            iter_prefixes = [f'iter_sim{i}' for i in range(niter)]
@@ -266,7 +269,7 @@ class ActiveLearning:
            raise ValueError("Each item of iter_prefixes must be unique")
        ln_pxl_values = [
-            self._compute_ln_pl(init_var_samples[i,:], init_sim_outputs[i,:])
+            self._compute_ln_pxl(init_var_samples[i,:], init_sim_outputs[i,:])
            for i in range(ninit)
            ]
        var_samples = init_var_samples
@@ -295,7 +298,7 @@ class ActiveLearning:
            self.run_sim_obj.serial_run(next_var_sample, [iter_prefixes[i]],
                append=False)
            next_sim_output = self.run_sim_obj.outputs[0]
-            sim_outputs = np.vstack((sim_outputs, next_sim_output))
+            sim_outputs = np.vstack((sim_outputs, np.array([next_sim_output])))
            next_ln_pxl_value = self._compute_ln_pxl(
                next_var_sample.reshape(-1), next_sim_output)
@@ -336,8 +339,7 @@ class ActiveLearning:
        x : numpy array
            Variable input of `simulator`. 1d array of shape (ndim,).
        y : numpy array
-            Simulation outputs at `x` corresponding to `data`. 1d array of shape
+            Simulation outputs at `x`.
-            (len(data),).
        Returns
        -------
@@ -369,12 +371,15 @@ class ActiveLearning:
        """
        n = len(var_samples) 
-        if self.scalar_gasp_mean == "zero":
+        if isinstance(self.scalar_gasp_trend, str):
+            if self.scalar_gasp_trend == "zero":
                trend = np.zeros((n, 1))
-        elif self.scalar_gasp_mean == "constant":
+            elif self.scalar_gasp_trend == "constant":
                trend = np.ones((n, 1))
-        elif self.scalar_gasp_mean == "linear":
+            elif self.scalar_gasp_trend == "linear":
                trend = np.c_[np.ones(n), var_samples]
+        else:
+            trend = self.scalar_gasp_trend(var_samples)
        self.scalar_gasp.train(var_samples, ln_pxl_values, trend)
@@ -389,25 +394,28 @@ class ActiveLearning:
        Returns
        -------
-        predictions : numpy array
+        predict : numpy array
            Emulator-prediction at `x`. Shape (1, 4).
-            predictions[0, 0] - mean
+            predict[0, 0] - mean
-            predictions[0, 1] - low95
+            predict[0, 1] - low95
-            predictions[0, 2] - upper95
+            predict[0, 2] - upper95
-            predictions[0, 3] - sd
+            predict[0, 3] - sd
        """
        x = x.reshape((1, self.ndim))
-        if self.scalar_gasp_mean == "zero":
+        if isinstance(self.scalar_gasp_trend, str): 
+            if self.scalar_gasp_trend == "zero":
                trend = np.zeros((1, 1))
-        elif self.scalar_gasp_mean == "constant":
+            elif self.scalar_gasp_trend == "constant":
                trend = np.ones((1, 1))
-        elif self.scalar_gasp_mean == "linear":
+            elif self.scalar_gasp_trend == "linear":
                trend = np.c_[np.ones(1), x]
+        else:
+            trend = self.scalar_gasp_trend(x)
-        predictions = self.scalar_gasp.predict(x, trend)
+        predict = self.scalar_gasp.predict(x, trend)
-        return predictions
+        return predict
    def _uncertainty_indicator(self, x: np.ndarray) -> float:
        """
@@ -431,8 +439,12 @@ class ActiveLearning:
        if self.indicator == "entropy":
            neg_val = -(mean + 0.5*np.log(2*np.pi*np.e*std**2))
        elif self.indicator == "variance":
+            if std**2 < _max_exp:
+                exp_std2 = np.exp(std**2)
+            else:
+                exp_std2 = np.exp(_max_exp)
            neg_val = -(2*mean + std**2) - np.log(
-                np.maximum(np.exp(std**2)-1, _min_float)
+                np.maximum(exp_std2-1, _min_float)
                )
        return float(neg_val)
\ No newline at end of file