Source code for geomstats.learning.wrapped_gaussian_process

r"""Wrapped Gaussian Process.

Lead author: Arthur Pignet

Extension of Gaussian Processes to Riemannian Manifolds,
introduced in [Mallasto]_.

References
----------
.. [Mallasto] Mallasto, A. and Feragen, A.
    “Wrapped gaussian process regression on riemannian manifolds.”
    IEEE/CVF Conference on Computer Vision and Pattern Recognition
    (2018)

"""

from sklearn.base import BaseEstimator, MultiOutputMixin
from sklearn.gaussian_process import GaussianProcessRegressor

import geomstats.backend as gs
from geomstats.learning._sklearn import RegressorMixin



[docs]
class WrappedGaussianProcess(MultiOutputMixin, RegressorMixin, BaseEstimator):
    r"""Wrapped Gaussian Process.

    The implementation is based on the algorithm 4 of [1]_.

    Parameters
    ----------
    space : Manifold
        Equipped manifold.
    prior : callable
        Associate to each input a manifold valued point.

    References
    ----------
    .. [1] Mallasto, A. and Feragen, A. Wrapped gaussian process
        regression on riemannian manifolds. In 2018 IEEE/CVF
        Conference on Computer Vision and Pattern Recognition
    """

    def __init__(self, space, prior):
        self.space = space
        self.prior = prior

        self.euclidean_gpr = GaussianProcessRegressor(
            kernel=None,
            alpha=1e-10,
            optimizer="fmin_l_bfgs_b",
            n_restarts_optimizer=0,
            normalize_y=False,
            copy_X_train=True,
            random_state=None,
        )

        self.tangent_y_train_ = None


[docs]
    def set(self, **kwargs):
        """Set euclidean_gpr parameters.

        Especially useful for one line instantiations.
        """
        for param_name, value in kwargs.items():
            if not hasattr(self.euclidean_gpr, param_name):
                raise ValueError(f"Unknown parameter {param_name}.")

            setattr(self.euclidean_gpr, param_name, value)
        return self


    def _get_tangent_targets(self, X, y):
        """Compute the tangent targets, using the provided prior.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features) or list of object
            Feature vectors or other representations of training data.
        y : array-like of shape (n_samples,) or (n_samples, n_targets)
        or (n_samples, n1_targets, n2_targets) for
        matrix-valued targets.
            Target values. The target must belongs to the manifold space

        Returns
        -------
        tangent_y : array-like of shape (n_samples,) or (n_samples, n_targets)
        or (n_samples, n1_targets, n2_targets)
                Target projected on the associated (by the prior) tangent space.
        """
        base_points = self.prior(X)
        return self.space.metric.log(y, base_point=base_points)


[docs]
    def fit(self, X, y):
        """Fit Wrapped Gaussian process regression model.

        The Wrapped Gaussian process is fit through the following steps:

        - Compute the tangent dataset using the prior
        - Fit a Gaussian process regression on the tangent dataset
        - Store the resulting euclidean Gaussian process

        Parameters
        ----------
        X : array-like, shape=[n_samples,]
            Training input samples.
        y : array-like, shape[n_samples, {dim, [n,n]}]
            Training target values.

        Returns
        -------
        self : object
            Returns self.
        """
        self.tangent_y_train_ = tangent_y = self._get_tangent_targets(X, y)
        tangent_y = gs.reshape(tangent_y, (y.shape[0], -1))

        self.euclidean_gpr.fit(X, tangent_y)

        return self



[docs]
    def predict(self, X, return_tangent_std=False, return_tangent_cov=False):
        """Predict using the Gaussian process regression model.

        A fitted Wrapped Gaussian process can be use to predict values
        through the following steps:

        - Use the stored Gaussian process regression on the dataset to
          return tangent predictions
        - Compute the base-points using the prior
        - Map the tangent predictions on the manifold via the metric's exp
          with the base-points yielded by the prior

        We can also predict based on an unfitted model by using the GP prior.
        In addition to the mean of the predictive distribution, optionally also
        returns its standard deviation (`return_std=True`) or covariance
        (`return_cov=True`). Note that at most one of the two can be requested.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features) or list of object
            Query points where the GP is evaluated.
        return_tangent_std : bool, default=False
            If True, the standard-deviation of the predictive distribution on at
            the query points in the tangent space is returned along with the mean.
        return_tangent_cov : bool, default=False
            If True, the covariance of the joint predictive distribution at
            the query points in the tangent space is returned along with the mean.

        Returns
        -------
        y_mean : ndarray of shape (n_samples,) or (n_samples, n_targets)
            Mean of predictive distribution a query points.
        y_std : ndarray of shape (n_samples,) or (n_samples, n_targets), optional
            Standard deviation of predictive distribution at query points in
            the tangent space.
            Only returned when `return_std` is True.
        y_cov : ndarray of shape (n_samples, n_samples) or \
                (n_samples, n_samples, n_targets), optional
            Covariance of joint predictive distribution a query points
            in the tangent space.
            Only returned when `return_cov` is True.
            In the case where the target is matrix valued,
            return the covariance of the vectorized prediction.
        """
        euc_result = self.euclidean_gpr.predict(
            X, return_cov=return_tangent_cov, return_std=return_tangent_std
        )

        return_multiple = return_tangent_std or return_tangent_cov
        tangent_means = gs.from_numpy(euc_result[0] if return_multiple else euc_result)

        base_points = self.prior(X)
        tangent_means = gs.reshape(
            tangent_means,
            (X.shape[0], *self.space.shape),
        )
        y_mean = self.space.metric.exp(tangent_means, base_point=base_points)

        if return_multiple:
            tangent_std_cov = gs.from_numpy(euc_result[1])
            return (y_mean, tangent_std_cov)

        return y_mean



[docs]
    def sample_y(self, X, n_samples=1, random_state=0):
        """Draw samples from Wrapped Gaussian process and evaluate at X.

        A fitted Wrapped Gaussian process can be use to sample
        values through the following steps:

        - Use the stored Gaussian process regression on the dataset
          to sample tangent values
        - Compute the base-points using the prior
        - Flatten (and repeat if needed) both the base-points and the
          tangent samples to benefit from vectorized computation.
        - Map the tangent samples on the manifold via the metric's exp with the
          flattened and repeated base-points yielded by the prior

        Parameters
        ----------
        X : array-like of shape (n_samples_X, n_features) or list of object
            Query points where the WGP is evaluated.
        n_samples : int, default=1
            Number of samples drawn from the Wrapped Gaussian process per query
            point.
        random_state : int, RandomState instance or None, default=0
            Determines random number generation to randomly draw samples.
            Pass an int for reproducible results across multiple function
            calls.

        Returns
        -------
        y_samples : ndarray of shape (n_samples_X, n_samples), or \
            (n_samples_X, *target_shape, n_samples)
            Values of n_samples samples drawn from wrapped Gaussian process and
            evaluated at query points.
        """
        tangent_samples = gs.from_numpy(
            self.euclidean_gpr.sample_y(X, n_samples, random_state)
        )

        if gs.ndim(tangent_samples) > 2:
            tangent_samples = gs.moveaxis(tangent_samples, -2, -1)

        flat_tangent_samples = gs.reshape(tangent_samples, (-1, *self.space.shape))

        base_points = gs.repeat(self.prior(X), n_samples, axis=0)

        flat_y_samples = self.space.metric.exp(
            flat_tangent_samples, base_point=base_points
        )

        y_samples = gs.reshape(
            flat_y_samples, (X.shape[0], n_samples, *self.space.shape)
        )

        if gs.ndim(tangent_samples) > 2:
            y_samples = gs.moveaxis(y_samples, 1, -1)

        return y_samples