Source code for geomstats.learning.euclidean

"""Linear regression estimators for structured Euclidean data.

This module defines estimators for regression between Euclidean spaces. Inputs
and outputs may be vectors or higher-order tensors; structured data are
flattened before fitting and reshaped back according to their spaces.

It also provides a :class:`LinearRegression` factory that selects the
appropriate estimator from the domain and image spaces.

The wrapped scikit-learn estimator attributes are preserved as returned by
scikit-learn. Shape-aware versions of selected fitted attributes are stored
separately, for example as ``coef_reshaped_``.
"""

import geomstats.backend as gs
from geomstats._sklearngs.linear_model._base import (
    LinearRegression as _LinearRegression,
)

from ._sklearn import EuclideanInputMixin, EuclideanInputOutputMixin



[docs]
class VectorValuedLinearRegression(EuclideanInputMixin, _LinearRegression):
    """Linear regression with structured Euclidean inputs and vector-valued targets.

    This estimator extends sklearn's linear regression to inputs represented as
    points in a Euclidean space with nontrivial shape. Inputs are flattened before
    fitting and prediction, while fitted coefficients are reshaped back to the
    shape of the input space.

    The target values follow sklearn's usual convention: scalar targets have
    shape ``(n_samples,)`` and vector-valued or multi-output targets have shape
    ``(n_samples, n_outputs)``.

    Parameters
    ----------
    space : Euclidean
        Euclidean input space. Its ``shape`` attribute determines the structured
        shape of each input point.
    fit_intercept : bool, default=True
        Whether to calculate the intercept for this model.
    copy_X : bool, default=True
        Whether to copy the input array before fitting.
    tol : float, default=1e-6
        Precision of the solution.
    n_jobs : int or None, default=None
        Number of jobs to use for the computation.
    positive : bool, default=False
        Whether to force the coefficients to be positive.

    Attributes
    ----------
    coef_ : array-like, shape=(n_features,) or (n_targets, n_features)
        Estimated linear coefficients in sklearn's flattened feature
        representation.
    coef_reshaped_ : array-like, shape=space.shape or (n_targets, *space.shape)
        Estimated linear coefficients reshaped to the structured input space. For
        scalar-valued targets, ``coef_reshaped_`` has shape ``space.shape``. For
        vector-valued targets, its first axis indexes the target component and
        the remaining axes match ``space.shape``.
    intercept_ : float or array-like, shape=(n_targets,)
        Independent term in the linear model. It is a scalar for scalar-valued
        targets and a vector for vector-valued targets.
    """

    def __init__(
        self,
        space=None,
        *,
        fit_intercept=True,
        copy_X=True,
        tol=1e-6,
        n_jobs=None,
        positive=False,
    ):
        self.space = space
        super().__init__(
            fit_intercept=fit_intercept,
            copy_X=copy_X,
            tol=tol,
            n_jobs=n_jobs,
            positive=positive,
        )

    def _reshape_fitted_attrs(self):
        coef = self.coef_
        if (input_shape := self._input_shape()) is not None:
            coef = gs.reshape(
                coef,
                (*coef.shape[:-1], *input_shape),
            )
        self._set_reshaped_attr("coef_", coef)




[docs]
class TensorValuedLinearRegression(EuclideanInputOutputMixin, _LinearRegression):
    """Linear regression with structured Euclidean inputs and tensor-valued outputs.

    This estimator extends sklearn's linear regression to inputs and outputs
    represented as points in Euclidean spaces with nontrivial shapes. Inputs and
    outputs are flattened before fitting.

    Parameters
    ----------
    space : Euclidean
        Euclidean input space. Its ``shape`` attribute determines the structured
        shape of each input point.
    image_space : Euclidean
        Euclidean output space. Its ``shape`` attribute determines the structured
        shape of each output point.
    fit_intercept : bool, default=True
        Whether to calculate the intercept for this model.
    copy_X : bool, default=True
        Whether to copy the input array before fitting.
    tol : float, default=1e-6
        Precision of the solution.
    n_jobs : int or None, default=None
        Number of jobs to use for the computation.
    positive : bool, default=False
        Whether to force the coefficients to be positive.

    Attributes
    ----------
    coef_ : array-like, shape=(prod(image_space.shape), prod(space.shape))
        Estimated linear coefficients in sklearn's flattened representation.
        The first axis indexes flattened output coordinates and the second axis
        indexes flattened input coordinates.
    coef_reshaped_ : array-like, shape=(*image_space.shape, *space.shape)
        Estimated linear coefficients reshaped as a linear map from structured
        inputs in ``space`` to structured outputs in ``image_space``. The leading
        axes match ``image_space.shape`` and the trailing axes match
        ``space.shape``.
    intercept_ : array-like, shape=(prod(image_space.shape),)
        Independent term in sklearn's flattened output representation.
    intercept_reshaped_ : array-like, shape=image_space.shape
        Independent term reshaped as a point in the output space.
    """

    def __init__(
        self,
        space=None,
        image_space=None,
        *,
        fit_intercept=True,
        copy_X=True,
        tol=1e-6,
        n_jobs=None,
        positive=False,
    ):
        self.space = space

        if image_space is None:
            raise ValueError("Need to define ``image_space``.")
        self.image_space = image_space

        super().__init__(
            fit_intercept=fit_intercept,
            copy_X=copy_X,
            tol=tol,
            n_jobs=n_jobs,
            positive=positive,
        )

    def _reshape_fitted_attrs(self):
        input_shape = self._input_shape() or (-1,)

        coef = gs.reshape(
            self.coef_,
            (*self.image_space.shape, *input_shape),
        )
        self._set_reshaped_attr("coef_", coef)

        if self.fit_intercept:
            intercept = gs.reshape(self.intercept_, self.image_space.shape)
            self._set_reshaped_attr("intercept_", intercept)


[docs]
    def score(self, X, y, sample_weight=None):
        """Return the coefficient of determination R^2 of the prediction.

        Matrix-valued outputs are flattened per sample before calling sklearn's
        ``r2_score``.
        """
        from sklearn.metrics import r2_score

        y_pred = self.predict(X)

        y = gs.reshape(y, (len(y), -1))
        y_pred = gs.reshape(y_pred, (len(y_pred), -1))

        return r2_score(y, y_pred, sample_weight=sample_weight)





[docs]
def LinearRegression(space=None, image_space=None, **kwargs):
    """Create a linear regression estimator for structured Euclidean data.

    This factory returns a linear regression estimator adapted to the geometry of
    the input and output spaces.

    If ``image_space`` is ``None``, the returned estimator accepts structured
    Euclidean inputs and scalar- or vector-valued targets following sklearn's
    standard target conventions.

    If ``image_space`` is provided, the returned estimator accepts structured
    Euclidean inputs and predicts structured outputs in ``image_space``.

    Parameters
    ----------
    space : Euclidean
        Euclidean input space.
    image_space : Euclidean or None, default=None
        Euclidean output space. If ``None``, a
        ``VectorValuedLinearRegression`` is returned. Otherwise, a
        ``MatrixValuedLinearRegression`` is returned.
    **kwargs : dict
        Additional keyword arguments passed to the selected estimator.

    Returns
    -------
    estimator : VectorValuedLinearRegression or MatrixValuedLinearRegression
        Linear regression estimator adapted to the provided spaces.
    """
    if image_space is None or len(image_space.shape) < 2:
        return VectorValuedLinearRegression(space, **kwargs)

    return TensorValuedLinearRegression(space, image_space, **kwargs)