Source code for pycbc.distributions.gaussian

# Copyright (C) 2016  Christopher M. Biwer, Collin Capano
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
"""
This modules provides classes for evaluating Gaussian distributions.
"""

import numpy
from scipy.special import erf, erfinv
import scipy.stats
from pycbc.distributions import bounded

[docs]class Gaussian(bounded.BoundedDist): r"""A Gaussian distribution on the given parameters; the parameters are independent of each other. Bounds can be provided on each parameter, in which case the distribution will be a truncated Gaussian distribution. The PDF of a truncated Gaussian distribution is given by: .. math:: p(x|a, b, \mu,\sigma) = \frac{1}{\sqrt{2 \pi \sigma^2}}\frac{e^{- \frac{\left( x - \mu \right)^2}{2 \sigma^2}}}{\Phi(b|\mu, \sigma) - \Phi(a|\mu, \sigma)}, where :math:`\mu` is the mean, :math:`\sigma^2` is the variance, :math:`a,b` are the bounds, and :math:`\Phi` is the cumulative distribution of an unbounded normal distribution, given by: .. math:: \Phi(x|\mu, \sigma) = \frac{1}{2}\left[1 + \mathrm{erf}\left(\frac{x-\mu}{\sigma \sqrt{2}}\right)\right]. Note that if :math:`[a,b) = [-\infty, \infty)`, this reduces to a standard Gaussian distribution. Instances of this class can be called like a function. By default, logpdf will be called, but this can be changed by setting the class's __call__ method to its pdf method. Parameters ---------- \**params : The keyword arguments should provide the names of parameters and (optionally) some bounds, as either a tuple or a `boundaries.Bounds` instance. The mean and variance of each parameter can be provided by additional keyword arguments that have `_mean` and `_var` adding to the parameter name. For example, `foo=(-2,10), foo_mean=3, foo_var=2` would create a truncated Gaussian with mean 3 and variance 2, bounded between :math:`[-2, 10)`. If no mean or variance is provided, the distribution will have 0 mean and unit variance. If None is provided for the bounds, the distribution will be a normal, unbounded Gaussian (equivalent to setting the bounds to `[-inf, inf)`). Attributes ---------------- name : 'guassian' The name of this distribution. Examples -------- Create an unbounded Gaussian distribution with zero mean and unit variance: >>> dist = distributions.Gaussian(mass1=None) Create a bounded Gaussian distribution on :math:`[1,10)` with a mean of 3 and a variance of 2: >>> dist = distributions.Gaussian(mass1=(1,10), mass1_mean=3, mass1_var=2) Create a bounded Gaussian distribution with the same parameters, but with cyclic boundary conditions: >>> dist = distributions.Gaussian(mass1=Bounds(1,10, cyclic=True), mass1_mean=3, mass1_var=2) """ name = "gaussian" def __init__(self, **params): # save distribution parameters as dict # calculate the norm and exponential norm ahead of time # and save to self._norm, self._lognorm, and self._expnorm self._bounds = {} self._mean = {} self._var = {} self._norm = {} self._lognorm = {} self._expnorm = {} # pull out specified means, variance mean_args = [p for p in params if p.endswith('_mean')] var_args = [p for p in params if p.endswith('_var')] self._mean = dict([[p[:-5], params.pop(p)] for p in mean_args]) self._var = dict([[p[:-4], params.pop(p)] for p in var_args]) # initialize the bounds super(Gaussian, self).__init__(**params) # check that there are no params in mean/var that are not in params missing = set(self._mean.keys()) - set(params.keys()) if any(missing): raise ValueError("means provided for unknow params {}".format( ', '.join(missing))) missing = set(self._var.keys()) - set(params.keys()) if any(missing): raise ValueError("vars provided for unknow params {}".format( ', '.join(missing))) # set default mean/var for params not specified self._mean.update(dict([[p, 0.] for p in params if p not in self._mean])) self._var.update(dict([[p, 1.] for p in params if p not in self._var])) # compute norms for p,bnds in self._bounds.items(): sigmasq = self._var[p] mu = self._mean[p] a,b = bnds invnorm = scipy.stats.norm.cdf(b, loc=mu, scale=sigmasq**0.5) \ - scipy.stats.norm.cdf(a, loc=mu, scale=sigmasq**0.5) invnorm *= numpy.sqrt(2*numpy.pi*sigmasq) self._norm[p] = 1./invnorm self._lognorm[p] = numpy.log(self._norm[p]) self._expnorm[p] = -1./(2*sigmasq) @property def mean(self): return self._mean @property def var(self): return self._var def _normalcdf(self, param, value): """The CDF of the normal distribution, without bounds.""" mu = self._mean[param] var = self._var[param] return 0.5*(1. + erf((value - mu)/(2*var)**0.5))
[docs] def cdf(self, param, value): """Returns the CDF of the given parameter value.""" a, b = self._bounds[param] if a != -numpy.inf: phi_a = self._normalcdf(param, a) else: phi_a = 0. if b != numpy.inf: phi_b = self._normalcdf(param, b) else: phi_b = 1. phi_x = self._normalcdf(param, value) return (phi_x - phi_a)/(phi_b - phi_a)
def _normalcdfinv(self, param, p): """The inverse CDF of the normal distribution, without bounds.""" mu = self._mean[param] var = self._var[param] return mu + (2*var)**0.5 * erfinv(2*p - 1.) def _cdfinv_param(self, param, p): """Return inverse of the CDF. """ a, b = self._bounds[param] if a != -numpy.inf: phi_a = self._normalcdf(param, a) else: phi_a = 0. if b != numpy.inf: phi_b = self._normalcdf(param, b) else: phi_b = 1. adjusted_p = phi_a + p * (phi_b - phi_a) return self._normalcdfinv(param, adjusted_p) def _pdf(self, **kwargs): """Returns the pdf at the given values. The keyword arguments must contain all of parameters in self's params. Unrecognized arguments are ignored. """ return numpy.exp(self._logpdf(**kwargs)) def _logpdf(self, **kwargs): """Returns the log of the pdf at the given values. The keyword arguments must contain all of parameters in self's params. Unrecognized arguments are ignored. """ if kwargs in self: return sum([self._lognorm[p] + self._expnorm[p]*(kwargs[p]-self._mean[p])**2. for p in self._params]) else: return -numpy.inf
[docs] def rvs(self, size=1, param=None): """Gives a set of random values drawn from this distribution. Parameters ---------- size : {1, int} The number of values to generate; default is 1. param : {None, string} If provided, will just return values for the given parameter. Otherwise, returns random values for each parameter. Returns ------- structured array The random values in a numpy structured array. If a param was specified, the array will only have an element corresponding to the given parameter. Otherwise, the array will have an element for each parameter in self's params. """ if param is not None: dtype = [(param, float)] else: dtype = [(p, float) for p in self.params] arr = numpy.zeros(size, dtype=dtype) for (p,_) in dtype: sigma = numpy.sqrt(self._var[p]) mu = self._mean[p] a,b = self._bounds[p] arr[p][:] = scipy.stats.truncnorm.rvs((a-mu)/sigma, (b-mu)/sigma, loc=self._mean[p], scale=sigma, size=size) return arr
[docs] @classmethod def from_config(cls, cp, section, variable_args): """Returns a Gaussian distribution based on a configuration file. The parameters for the distribution are retrieved from the section titled "[`section`-`variable_args`]" in the config file. Boundary arguments should be provided in the same way as described in `get_param_bounds_from_config`. In addition, the mean and variance of each parameter can be specified by setting `{param}_mean` and `{param}_var`, respectively. For example, the following would create a truncated Gaussian distribution between 0 and 6.28 for a parameter called `phi` with mean 3.14 and variance 0.5 that is cyclic: .. code-block:: ini [{section}-{tag}] min-phi = 0 max-phi = 6.28 phi_mean = 3.14 phi_var = 0.5 cyclic = Parameters ---------- cp : pycbc.workflow.WorkflowConfigParser A parsed configuration file that contains the distribution options. section : str Name of the section in the configuration file. variable_args : str The names of the parameters for this distribution, separated by `prior.VARARGS_DELIM`. These must appear in the "tag" part of the section header. Returns ------- Gaussian A distribution instance from the pycbc.inference.prior module. """ return bounded.bounded_from_config(cls, cp, section, variable_args, bounds_required=False)
__all__ = ['Gaussian']