"""Linear regression estimators for structured Euclidean data.
This module defines estimators for regression between Euclidean spaces. Inputs
and outputs may be vectors or higher-order tensors; structured data are
flattened before fitting and reshaped back according to their spaces.
It also provides a :class:`LinearRegression` factory that selects the
appropriate estimator from the domain and image spaces.
The wrapped scikit-learn estimator attributes are preserved as returned by
scikit-learn. Shape-aware versions of selected fitted attributes are stored
separately, for example as ``coef_reshaped_``.
"""
import geomstats.backend as gs
from geomstats._sklearngs.linear_model._base import (
LinearRegression as _LinearRegression,
)
from ._sklearn import EuclideanInputMixin, EuclideanInputOutputMixin
[docs]
class VectorValuedLinearRegression(EuclideanInputMixin, _LinearRegression):
"""Linear regression with structured Euclidean inputs and vector-valued targets.
This estimator extends sklearn's linear regression to inputs represented as
points in a Euclidean space with nontrivial shape. Inputs are flattened before
fitting and prediction, while fitted coefficients are reshaped back to the
shape of the input space.
The target values follow sklearn's usual convention: scalar targets have
shape ``(n_samples,)`` and vector-valued or multi-output targets have shape
``(n_samples, n_outputs)``.
Parameters
----------
space : Euclidean
Euclidean input space. Its ``shape`` attribute determines the structured
shape of each input point.
fit_intercept : bool, default=True
Whether to calculate the intercept for this model.
copy_X : bool, default=True
Whether to copy the input array before fitting.
tol : float, default=1e-6
Precision of the solution.
n_jobs : int or None, default=None
Number of jobs to use for the computation.
positive : bool, default=False
Whether to force the coefficients to be positive.
Attributes
----------
coef_ : array-like, shape=(n_features,) or (n_targets, n_features)
Estimated linear coefficients in sklearn's flattened feature
representation.
coef_reshaped_ : array-like, shape=space.shape or (n_targets, *space.shape)
Estimated linear coefficients reshaped to the structured input space. For
scalar-valued targets, ``coef_reshaped_`` has shape ``space.shape``. For
vector-valued targets, its first axis indexes the target component and
the remaining axes match ``space.shape``.
intercept_ : float or array-like, shape=(n_targets,)
Independent term in the linear model. It is a scalar for scalar-valued
targets and a vector for vector-valued targets.
"""
def __init__(
self,
space=None,
*,
fit_intercept=True,
copy_X=True,
tol=1e-6,
n_jobs=None,
positive=False,
):
self.space = space
super().__init__(
fit_intercept=fit_intercept,
copy_X=copy_X,
tol=tol,
n_jobs=n_jobs,
positive=positive,
)
def _reshape_fitted_attrs(self):
coef = self.coef_
if (input_shape := self._input_shape()) is not None:
coef = gs.reshape(
coef,
(*coef.shape[:-1], *input_shape),
)
self._set_reshaped_attr("coef_", coef)
[docs]
class TensorValuedLinearRegression(EuclideanInputOutputMixin, _LinearRegression):
"""Linear regression with structured Euclidean inputs and tensor-valued outputs.
This estimator extends sklearn's linear regression to inputs and outputs
represented as points in Euclidean spaces with nontrivial shapes. Inputs and
outputs are flattened before fitting.
Parameters
----------
space : Euclidean
Euclidean input space. Its ``shape`` attribute determines the structured
shape of each input point.
image_space : Euclidean
Euclidean output space. Its ``shape`` attribute determines the structured
shape of each output point.
fit_intercept : bool, default=True
Whether to calculate the intercept for this model.
copy_X : bool, default=True
Whether to copy the input array before fitting.
tol : float, default=1e-6
Precision of the solution.
n_jobs : int or None, default=None
Number of jobs to use for the computation.
positive : bool, default=False
Whether to force the coefficients to be positive.
Attributes
----------
coef_ : array-like, shape=(prod(image_space.shape), prod(space.shape))
Estimated linear coefficients in sklearn's flattened representation.
The first axis indexes flattened output coordinates and the second axis
indexes flattened input coordinates.
coef_reshaped_ : array-like, shape=(*image_space.shape, *space.shape)
Estimated linear coefficients reshaped as a linear map from structured
inputs in ``space`` to structured outputs in ``image_space``. The leading
axes match ``image_space.shape`` and the trailing axes match
``space.shape``.
intercept_ : array-like, shape=(prod(image_space.shape),)
Independent term in sklearn's flattened output representation.
intercept_reshaped_ : array-like, shape=image_space.shape
Independent term reshaped as a point in the output space.
"""
def __init__(
self,
space=None,
image_space=None,
*,
fit_intercept=True,
copy_X=True,
tol=1e-6,
n_jobs=None,
positive=False,
):
self.space = space
if image_space is None:
raise ValueError("Need to define ``image_space``.")
self.image_space = image_space
super().__init__(
fit_intercept=fit_intercept,
copy_X=copy_X,
tol=tol,
n_jobs=n_jobs,
positive=positive,
)
def _reshape_fitted_attrs(self):
input_shape = self._input_shape() or (-1,)
coef = gs.reshape(
self.coef_,
(*self.image_space.shape, *input_shape),
)
self._set_reshaped_attr("coef_", coef)
if self.fit_intercept:
intercept = gs.reshape(self.intercept_, self.image_space.shape)
self._set_reshaped_attr("intercept_", intercept)
[docs]
def score(self, X, y, sample_weight=None):
"""Return the coefficient of determination R^2 of the prediction.
Matrix-valued outputs are flattened per sample before calling sklearn's
``r2_score``.
"""
from sklearn.metrics import r2_score
y_pred = self.predict(X)
y = gs.reshape(y, (len(y), -1))
y_pred = gs.reshape(y_pred, (len(y_pred), -1))
return r2_score(y, y_pred, sample_weight=sample_weight)
[docs]
def LinearRegression(space=None, image_space=None, **kwargs):
"""Create a linear regression estimator for structured Euclidean data.
This factory returns a linear regression estimator adapted to the geometry of
the input and output spaces.
If ``image_space`` is ``None``, the returned estimator accepts structured
Euclidean inputs and scalar- or vector-valued targets following sklearn's
standard target conventions.
If ``image_space`` is provided, the returned estimator accepts structured
Euclidean inputs and predicts structured outputs in ``image_space``.
Parameters
----------
space : Euclidean
Euclidean input space.
image_space : Euclidean or None, default=None
Euclidean output space. If ``None``, a
``VectorValuedLinearRegression`` is returned. Otherwise, a
``MatrixValuedLinearRegression`` is returned.
**kwargs : dict
Additional keyword arguments passed to the selected estimator.
Returns
-------
estimator : VectorValuedLinearRegression or MatrixValuedLinearRegression
Linear regression estimator adapted to the provided spaces.
"""
if image_space is None or len(image_space.shape) < 2:
return VectorValuedLinearRegression(space, **kwargs)
return TensorValuedLinearRegression(space, image_space, **kwargs)