Source code for pycbc.inference.sampler.dynesty

# Copyright (C) 2019  Collin Capano, Sumit Kumar
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.


#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#
"""
This modules provides classes and functions for using the dynesty sampler
packages for parameter estimation.
"""


from __future__ import absolute_import

import logging
from pycbc.pool import choose_pool
import numpy
import dynesty
from dynesty.utils import resample_equal
from pycbc.inference.io import (DynestyFile, validate_checkpoint_files)
from pycbc.distributions import read_constraints_from_config
from .base import BaseSampler
from .base_mcmc import get_optional_arg_from_config
from .. import models


#
# =============================================================================
#
#                                   Samplers
#
# =============================================================================
#

[docs]class DynestySampler(BaseSampler): """This class is used to construct an Dynesty sampler from the dynesty package. Parameters ---------- model : model A model from ``pycbc.inference.models``. nlive : int Number of live points to use in sampler. dlogz: float Tolerance limit to the value of logz (also a convergence criteria) pool : function with map, Optional A provider of a map function that allows a function call to be run over multiple sets of arguments and possibly maps them to cores/nodes/etc. """ name = "dynesty" _io = DynestyFile def __init__(self, model, nlive, dlogz, nprocesses=1, loglikelihood_function=None, use_mpi=False, **kwargs): self.model = model # Set up the pool model_call = DynestyModel(model, loglikelihood_function) if nprocesses > 1: # these are used to help paralleize over multiple cores / MPI models._global_instance = model_call log_likelihood_call = _call_global_loglikelihood prior_call = _call_global_logprior else: prior_call = model_call.prior_transform log_likelihood_call = model_call.log_likelihood pool = choose_pool(mpi=use_mpi, processes=nprocesses) if pool is not None: pool.size = nprocesses self.nlive = nlive self.dlogz = dlogz self.names = model.sampling_params self.ndim = len(model.sampling_params) self.checkpoint_file = None self._sampler = dynesty.NestedSampler(log_likelihood_call, prior_call, self.ndim, nlive=self.nlive, dlogz=self.dlogz, pool=pool, **kwargs)
[docs] def run(self): self._sampler.run_nested()
@property def io(self): return self._io @property def niterations(self): return len(tuple(self.samples.values())[0])
[docs] @classmethod def from_config(cls, cp, model, nprocesses=1, loglikelihood_function=None, use_mpi=False): """ Loads the sampler from the given config file. """ section = "sampler" # check name assert cp.get(section, "name") == cls.name, ( "name in section [sampler] must match mine") # get the number of live points to use nlive = int(cp.get(section, "nlive")) dlogz = float(cp.get(section, "dlogz")) loglikelihood_function = \ get_optional_arg_from_config(cp, section, 'loglikelihood-function') obj = cls(model, nlive=nlive, dlogz=dlogz, nprocesses=nprocesses, loglikelihood_function=loglikelihood_function, use_mpi=use_mpi) return obj
[docs] def checkpoint(self): pass
[docs] def finalize(self): logz = self._sampler.results.logz[-1:][0] dlogz = self._sampler.results.logzerr[-1:][0] logging.info("log Z, dlog Z: {}, {}".format(logz, dlogz)) for fn in [self.checkpoint_file]: with self.io(fn, "a") as fp: fp.write_logevidence(logz, dlogz) logging.info("Writing samples to files") for fn in [self.checkpoint_file, self.backup_file]: self.write_results(fn) logging.info("Validating checkpoint and backup files") checkpoint_valid = validate_checkpoint_files( self.checkpoint_file, self.backup_file) if not checkpoint_valid: raise IOError("error writing to checkpoint file")
@property def model_stats(self): logl = self._sampler.results.logl return {'loglikelihood': logl} @property def samples(self): samples_dict = {p: self.posterior_samples[:, i] for p, i in zip(self.model.variable_params, range(self.ndim))} return samples_dict
[docs] def set_initial_conditions(self, initial_distribution=None, samples_file=None): """Sets up the starting point for the sampler. Should also set the sampler's random state. """ pass
[docs] def write_results(self, filename): """Writes samples, model stats, acceptance fraction, and random state to the given file. Parameters ----------- filename : str The file to write to. The file is opened using the ``io`` class in an an append state. """ with self.io(filename, 'a') as fp: # write samples fp.write_samples(self.samples, self.model.variable_params) # write stats fp.write_samples(self.model_stats) # write log evidence fp.write_logevidence(self._sampler.results.logz[-1:][0], self._sampler.results.logzerr[-1:][0])
@property def posterior_samples(self): """ Returns posterior samples from nested samples and weights given by dynsety sampler """ dynesty_samples = self._sampler.results['samples'] weights = numpy.exp(self._sampler.results['logwt'] - self._sampler.results['logz'][-1]) posterior_dynesty = resample_equal(dynesty_samples, weights) return posterior_dynesty @property def logz(self): """ return bayesian evidence estimated by dynesty sampler """ return self._sampler.results.logz[-1:][0] @property def logz_err(self): """ return error in bayesian evidence estimated by dynesty sampler """ return self._sampler.results.logzerr[-1:][0]
def _call_global_loglikelihood(cube): return models._global_instance.log_likelihood(cube) def _call_global_logprior(cube): return models._global_instance.prior_transform(cube)
[docs]class DynestyModel(object): """ Class for making PyCBC Inference 'model class' Parameters ---------- model : inference.BaseModel instance A model instance from pycbc. """ def __init__(self, model, loglikelihood_function=None): if model.sampling_transforms is not None: raise ValueError("Dinesty does not support sampling transforms") self.model = model if loglikelihood_function is None: loglikelihood_function = 'loglikelihood' self.loglikelihood_function = loglikelihood_function
[docs] def log_likelihood(self, cube): """ returns log likelihood function """ params = {p: v for p, v in zip(self.model.sampling_params, cube)} self.model.update(**params) if self.model.logprior == -numpy.inf: return -numpy.inf return getattr(self.model, self.loglikelihood_function)
[docs] def prior_transform(self, cube): """ prior transform function for dynesty sampler It takes unit cube as input parameter and apply prior transforms """ prior_dists = self.model.prior_distribution.distributions dist_dict = {} for dist in prior_dists: dist_dict.update({param: dist for param in dist.params}) for i, param in enumerate(self.model.variable_params): cube[i] = dist_dict[param].cdfinv(param, cube[i]) return cube