Source code for multinorm

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
multinorm - Multivariate Normal Distributions for Humans.

A Python class to work with model fit results
(parameters and the covariance matrix).

- Code: https://github.com/cdeil/multinorm
- Docs: https://multinorm.readthedocs.io
- License: BSD-3-Clause
"""
from pkg_resources import get_distribution, DistributionNotFound
import numpy as np
import scipy.stats
import scipy.linalg

__all__ = ["MultiNorm"]

try:
    __version__ = get_distribution(__name__).version
except DistributionNotFound:
    # package is not installed
    pass


[docs]class MultiNorm(object):
    """Multivariate normal distribution.

    Given ``n`` parameters, the ``mean`` and ``names``
    should be one-dimensional with size ``n``,
    and ``cov`` should be a two-dimensional matrix of shape ``(n, n)``.

    See the documentation.

    Parameters
    ----------
    mean : numpy.ndarray
        Mean vector
    cov : numpy.ndarray
        Covariance matrix
    names : list
        Python list of parameter names (str).
        Default: use "par_i" with ``i = 0 .. N - 1``.
    """

    def __init__(self, mean=None, cov=None, names=None):
        n = _get_n_from_inputs(mean, cov, names)

        if mean is None:
            mean = np.zeros(n, dtype=float)
        else:
            mean = np.asarray(mean, dtype=float)
            if mean.shape != (n,):
                raise ValueError(
                    "mean shape = {!r}, expected ({},)".format(mean.shape, n)
                )

        if cov is None:
            cov = np.eye(n, dtype=float)
        else:
            cov = np.asarray(cov, dtype=float)
            if cov.shape != (n, n):
                raise ValueError(
                    "cov shape = {!r}, expected ({}, {})".format(cov.shape, n, n)
                )

        if names is None:
            names = ["par_{}".format(idx) for idx in range(n)]
        else:
            if len(names) != n:
                raise ValueError("len(names) = {}, expected n={}".format(len(names), n))

        self._n = n
        self._mean = mean
        self._cov = cov
        self._names = names

    def __repr__(self):
        return "{}(n={})".format(self.__class__.__name__, self.n)

    def __str__(self):
        s = repr(self)

        s += "\nnames: "
        s += str(self.names)

        s += "\nmean: "
        s += str(self.mean)

        s += "\nerr: "
        s += str(self.err)

        s += "\ncov:\n"
        s += str(self.cov)

        return s

    # TODO: add `corr=None` option to optionally set correlations
[docs]    @classmethod
    def from_err(cls, mean=None, err=None, names=None):
        r"""Create `MultiNorm` from parameter errors.

        With errors :math:`\sigma_i` this will create a
        diagonal covariance matrix with

        .. math::
            \Sigma_{ii} = \sigma_i^2
        """
        err = np.asarray(err, dtype=float)
        cov = np.diag(np.power(err, 2))
        return cls(mean, cov, names)

[docs]    @classmethod
    def from_points(cls, points, names=None):
        """Create `MultiNorm` from parameter points.

        Usually the points are samples from some distribution
        and creating this `MultiNorm` distribution is an
        estimate / approximation of that distribution of interest.

        See: :ref:`create_from_points`.

        Parameters
        ----------
        points : numpy.ndarray
            Array of data points with shape ``(n, 2)``.
        """
        mean = np.mean(points, axis=None)
        cov = np.cov(points, rowvar=False)
        return cls(mean, cov, names)

[docs]    @classmethod
    def joint(cls, distributions):
        """Create joint `MultiNorm` distribution.

        See :ref:`theory_combine` .

        Parameters
        ----------
        distributions : list
            Python list of `MultiNorm` distributions.

        Returns
        -------
        MultiNorm
            Combined joint distribution
        """
        names = distributions[0].names

        precisions = [_.precision for _ in distributions]
        precision = np.sum(precisions, axis=0)
        cov = np.linalg.inv(precision)

        means_weighted = [_._mean_weighted for _ in distributions]
        means_weighted = np.sum(means_weighted, axis=0)
        mean = np.dot(cov, means_weighted)
        return cls(mean, cov, names)

[docs]    def to_scipy(self):
        """Convert to `scipy.stats.multivariate_normal`_ object.

        The returned object is a "frozen" distribution object,
        with ``mean`` and ``covar`` set. It offers methods
        for ``pdf``, ``logpdf`` to evaluate the probability density
        function at given points, and ``rvs`` to draw random variate
        samples, i.e. random points from the distribution.

        See examples in :ref:`analyse-scipy`.
        """
        return scipy.stats.multivariate_normal(self.mean, self.cov)

[docs]    def to_uncertainties(self):
        """Convert to `uncertainties`_ objects.

        A tuple of numbers with uncertainties
        (one for each parameter) is returned.

        The `uncertainties`_ package makes it easy to
        do error propagation on derived quantities.

        See examples in :ref:`analyse`.
        """
        from uncertainties import correlated_values

        return correlated_values(self.mean, self.cov)

[docs]    def to_mcerp(self):
        """Convert to `mcerp`_ objects.

        TODO: document
        """
        # TODO: implement

[docs]    def to_soerp(self):
        """Convert to `soerp`_ objects.

        TODO: document
        """
        # TODO: implement

[docs]    def to_matplotlib_ellipse(self, n_sigma=1, **kwargs):
        """Create `matplotlib.patches.Ellipse`_.

        See examples in :ref:`plot`.

        Parameters
        ----------
        n_sigma : int
            Number of standard deviations. See :ref:`theory_sigmas`.
        """
        if self.n != 2:
            raise ValueError(
                "Ellipse only available for n=2. "
                "To select parameters, call ``marginal`` or ``conditional`` first."
            )

        from matplotlib.patches import Ellipse

        # See https://stackoverflow.com/questions/12301071
        xy = self.mean

        vals, vecs = self._eigh
        width, height = 2 * n_sigma * np.sqrt(vals)
        angle = np.degrees(np.arctan2(*vecs[:, 0][::-1]))

        return Ellipse(xy=xy, width=width, height=height, angle=angle, **kwargs)

    @property
    def _eigh(self):
        return np.linalg.eigh(self.cov)

    @property
    def n(self):
        """Number of dimensions of the distribution (int).

        Given by the number of parameters.
        """
        return self._n

    @property
    def names(self):
        """Parameter names (`list` of `str`)."""
        return self._names

    @property
    def mean(self):
        """Mean vector (`numpy.ndarray`)."""
        return self._mean

    @property
    def _mean_weighted(self):
        return np.dot(self.precision, self.mean)

    @property
    def cov(self):
        """Covariance matrix (`numpy.ndarray`)."""
        return self._cov

    @property
    def err(self):
        r"""Error vector (`numpy.ndarray`).

        Defined as :math:`\sigma_i = \sqrt{\Sigma_{ii}}`.
        """
        return np.sqrt(np.diag(self.cov))

    @property
    def correlation(self):
        r"""Correlation matrix (`numpy.ndarray`).

        Correlation :math:`C` is related to covariance :math:`\Sigma` via:

        .. math::
            C_{ij} = \frac{ \Sigma_{ij} }{ \sqrt{\Sigma_{ii} \Sigma_{jj}} }
        """
        err = self.err
        return self.cov / np.outer(err, err)

    @property
    def precision(self):
        """Precision matrix (`numpy.ndarray`).

        The inverse of the covariance matrix.

        Sometimes called the "information matrix" or "Hesse matrix".
        """
        return scipy.linalg.pinvh(self.cov)

[docs]    def sigma_distance(self, point):
        """Number of standard deviations from the mean (float).

        Also called the Mahalanobis distance.
        See :ref:`theory_sigmas`.
        """
        point = np.asanyarray(point)
        d = self._mean - point
        sigma = np.dot(np.dot(d.T, self.precision), d)
        return np.sqrt(sigma)

[docs]    def conditional(self, pars):
        """Conditional `MultiNormal` distribution.

        See :ref:`theory_conditional`.

        TODO: document.
        """
        idx = self._pars_to_idx(pars)
        # TODO: implement

[docs]    def marginal(self, pars):
        """Marginal `MultiNormal` distribution.

        See :ref:`theory_marginal`.

        Parameters
        ----------
        pars : list
            List of parameters (integer indices)

        Returns
        -------
        MultiNorm
            Marginal distribution
        """
        idx = self._pars_to_idx(pars)
        mean = self.mean[idx]
        cov = self.cov[np.ix_(idx, idx)]
        names = [self.names[_] for _ in idx]
        return self.__class__(mean, cov, names)

    def _pars_to_idx(self, pars):
        """Create parameter index array.

        Supports scalar, list and array input pars
        Support parameter indices (int) and names (str)
        """
        # TODO: should we support scalar?
        if isinstance(pars, (int, str)):
            pars = [pars]

        idxs = []
        for par in pars:
            # TODO: improve type handling (make str work on Python 2)
            # and give good error messages
            # Probably move this to a separate method.
            if isinstance(par, int):
                idxs.append(par)
            elif isinstance(par, str):
                idx = self.names.index(par)
                idxs.append(idx)
            else:
                raise TypeError()

        return np.asarray(idxs, dtype=int)


def _get_n_from_inputs(mean, cov, names):
    if mean is not None:
        return len(mean)

    if names is not None:
        return len(names)

    if cov is not None:
        return len(cov)

    raise ValueError("Could not determine number of parameters n.")


def sigma_to_containment(sigma, n):
    pass