Source code for geomstats.information_geometry.geometric

"""Statistical Manifold of geometric distributions with the Fisher metric.

Lead author: Tra My Nguyen.
"""

from scipy.stats import geom

import geomstats.backend as gs
from geomstats.geometry.base import VectorSpaceOpenSet
from geomstats.geometry.euclidean import Euclidean
from geomstats.geometry.riemannian_metric import RiemannianMetric
from geomstats.information_geometry.base import (
    InformationManifoldMixin,
    ScipyUnivariateRandomVariable,
)


[docs] class GeometricDistributions(InformationManifoldMixin, VectorSpaceOpenSet): """Class for the manifold of geometric distributions. This is the parameter space of geometric distributions i.e. [0,1] segment. """ def __init__(self, equip=True): super().__init__( dim=1, embedding_space=Euclidean(dim=1), support_shape=(), equip=equip, ) self._scp_rv = GeometricDistributionsRandomVariable(self)
[docs] @staticmethod def default_metric(): """Metric to equip the space with if equip is True.""" return GeometricMetric
[docs] def belongs(self, point, atol=gs.atol): """Evaluate if a point belongs to the manifold of geometric distributions. Parameters ---------- point : array-like, shape=[..., 1] Point to be checked. atol : float Tolerance to evaluate positivity. Optional, default: gs.atol Returns ------- belongs : array-like, shape=[...,] Boolean indicating whether point represents an geometric distribution. """ belongs_shape = self.shape == point.shape[-self.point_ndim :] if not belongs_shape: shape = point.shape[: -self.point_ndim] return gs.zeros(shape, dtype=bool) return gs.squeeze(gs.logical_and(-atol <= point, point <= 1 + atol))
[docs] def random_point(self, n_samples=1): """Sample parameters of geometric distributions. The uniform distribution on (0, 1) is used. Parameters ---------- n_samples : int Number of samples. Optional, default: 1. bound : float Right-end ot the segment where geometric parameters are sampled. Optional, default: 1. Returns ------- samples : array-like, shape=[n_samples,] Sample of points representing geometric distributions. """ size = (n_samples, self.dim) if n_samples != 1 else (self.dim,) return gs.random.rand(*size)
[docs] def projection(self, point, atol=gs.atol): """Project a point in ambient space to the open set. Return a new point belonging to [0,1] segment within the given tolerance. Parameters ---------- point : array-like, shape=[..., 1] Point in ambient space. atol : float Tolerance to evaluate the position with respect to [0,1] interval. Returns ------- projected : array-like, shape=[..., 1] Projected point. """ return gs.where( gs.logical_or(point < atol, point > 1 - atol), (1 - atol) * gs.cast(point > 1 - atol, point.dtype) + atol * gs.cast(point < atol, point.dtype), point, )
[docs] def sample(self, point, n_samples=1): """Sample from the geometric distribution. Sample from the geometric distribution with parameter provided by point. Parameters ---------- point : array-like, shape=[..., 1] Point representing an geometric distribution. n_samples : int Number of points to sample with each parameter in point. Optional, default: 1. Returns ------- samples : array-like, shape=[..., n_samples] Sample from geometric distributions. """ return self._scp_rv.rvs(point, n_samples)
[docs] def point_to_pdf(self, point): """Compute pmf associated to point. Compute the probability mass function of the geometric distribution with parameters provided by point. Parameters ---------- point : array-like, shape=[..., 1] Point representing an geometric distribution (scale). Returns ------- pdf : function Probability mass function of the geometric distribution with scale parameter provided by point. """ def pmf(k): """Generate parameterized function for geometric pmf. Compute the probability mass function of the geometric distribution with parameters provided by point. Parameters ---------- k : array-like, shape=[n_samples,] Point in the sample space, i.e. an integer. Returns ------- pmf_at_k : array-like, shape=[..., n_samples] Probability mass function of the geometric distribution with parameters provided by point. """ k = gs.reshape(gs.array(k), (-1,)) return (1 - point) ** (k - 1) * point return pmf
[docs] class GeometricMetric(RiemannianMetric): """Class for the Fisher information metric on geometric distributions. References ---------- .. [AM1981] Atkinson, C., & Mitchell, A. F. (1981). Rao's distance measure. Sankhyā: The Indian Journal of Statistics, Series A, 345-365. """
[docs] def squared_dist(self, point_a, point_b): """Compute squared distance associated with the geometric metric. Parameters ---------- point_a : array-like, shape=[..., 1] Point representing an geometric distribution. point_b : array-like, shape=[..., 1] Point representing a geometric distribution. Returns ------- squared_dist : array-like, shape=[...,] Squared distance between points point_a and point_b. """ return gs.squeeze( 4 * (gs.arctanh(gs.sqrt(1 - point_a)) - gs.arctanh(gs.sqrt(1 - point_b))) ** 2 )
[docs] def metric_matrix(self, base_point): """Compute the metric matrix at base_point. Parameters ---------- base_point : array-like, shape=[..., 1] Point representing a geometric distribution. Returns ------- mat : array-like, shape=[..., 1, 1] Metric matrix. """ return gs.expand_dims(1 / (base_point**2 * (1 - base_point)), axis=-1)
@staticmethod def _geodesic_path(t, frequency, initial_phase): """Generate parameterized function for geodesic curve. Parameters ---------- t : array-like, shape=[n_times,] Times at which to compute points of the geodesics. Returns ------- geodesic : array-like, shape=[..., n_times, 1] Values of the geodesic at times t. """ return gs.expand_dims(1 - gs.tanh(frequency * t + initial_phase) ** 2, axis=-1) def _geodesic_ivp(self, initial_point, initial_tangent_vec): """Solve geodesic initial value problem. Compute the parameterized function for the geodesic starting at initial_point with initial velocity given by initial_tangent_vec. Parameters ---------- initial_point : array-like, shape=[..., 1] Initial point. initial_tangent_vec : array-like, shape=[..., 1] Tangent vector at initial point. Returns ------- path : function Parameterized function for the geodesic curve starting at initial_point with velocity initial_tangent_vec. """ initial_phase = gs.arctanh(gs.sqrt(1 - initial_point)) frequency = -initial_tangent_vec / ( 2 * initial_point * gs.sqrt(1 - initial_point) ) return lambda t: self._geodesic_path(t, frequency, initial_phase) def _geodesic_bvp(self, initial_point, end_point): """Solve geodesic boundary problem. Compute the parameterized function for the geodesic starting at initial_point and ending at end_point. Parameters ---------- initial_point : array-like, shape=[..., 1] Initial point. end_point : array-like, shape=[..., 1] End point. Returns ------- path : function Parameterized function for the geodesic curve starting at initial_point and ending at end_point. """ initial_phase = gs.arctanh(gs.sqrt(1 - initial_point)) frequency = gs.arctanh(gs.sqrt(1 - end_point)) - initial_phase return lambda t: self._geodesic_path(t, frequency, initial_phase)
[docs] def exp(self, tangent_vec, base_point): """Compute exp map of a base point in tangent vector direction. Parameters ---------- tangent_vec : array-like, shape=[..., 1] Tangent vector at base point. base_point : array-like, shape=[..., 1] Base point. Returns ------- exp : array-like, shape=[..., 1] End point of the geodesic starting at base_point with initial velocity tangent_vec. """ return ( 1 - gs.tanh( -tangent_vec / (2 * base_point * gs.sqrt(1 - base_point)) + gs.arctanh(gs.sqrt(1 - base_point)) ) ** 2 )
[docs] def log(self, point, base_point): """Compute log map using a base point and an end point. Parameters ---------- point : array-like, shape=[..., 1] End point. base_point : array-like, shape=[..., 1] Base point. Returns ------- tangent_vec : array-like, shape=[..., 1] Initial velocity of the geodesic starting at base_point and reaching point at time 1. """ return ( -2 * base_point * gs.sqrt(1 - base_point) * (gs.arctanh(gs.sqrt(1 - point)) - gs.arctanh(gs.sqrt(1 - base_point))) )
[docs] class GeometricDistributionsRandomVariable(ScipyUnivariateRandomVariable): """A geometric random variable.""" def __init__(self, space): super().__init__(space, geom.rvs) @staticmethod def _flatten_params(point, pre_flat_shape): flat_point = gs.reshape(gs.broadcast_to(point, pre_flat_shape), (-1,)) return {"p": flat_point}
[docs] def pdf(self, x, point): """Evaluate the probability density function at x. Parameters ---------- x : array-like, shape=[n_samples, *support_shape] Sample points at which to compute the probability density function. point : array-like, shape=[..., *space.shape] Point representing a distribution. Returns ------- pdf_at_x : array-like, shape=[..., n_samples, *support_shape] Values of pdf at x for each value of the parameters provided by point. """ return gs.from_numpy( geom.pmf(x, point), )