Source code for pycbc.events.stat

# Copyright (C) 2016 Alex Nitz
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#
"""
This module contains functions for calculating coincident ranking statistic
values.
"""
import logging
import numpy
from . import ranking
from . import coinc_rate


[docs]class Stat(object): """Base class which should be extended to provide a coincident statistic""" def __init__(self, files=None, ifos=None, **kwargs): """Create a statistic class instance Parameters ---------- files: list of strs A list containing the filenames of hdf format files used to help construct the coincident statistics. The files must have a 'stat' attribute which is used to associate them with the appropriate statistic class. ifos: list of detector names, optional """ import h5py self.files = {} files = files or [] for filename in files: f = h5py.File(filename, 'r') stat = (f.attrs['stat']).decode() if stat in self.files: raise RuntimeError("We already have one file with stat attr =" " %s. Can't provide more than one!" % stat) logging.info("Found file %s for stat %s", filename, stat) self.files[stat] = f # Provide the dtype of the single detector method's output # This is used by background estimation codes that need to maintain # a buffer of such values. self.single_dtype = numpy.float32 # True if a larger single detector statistic will produce a larger # coincident statistic self.single_increasing = True self.ifos = ifos or []
[docs]class NewSNRStatistic(Stat): """Calculate the NewSNR coincident detection statistic"""
[docs] def single(self, trigs): """Calculate the single detector statistic, here equal to newsnr Parameters ---------- trigs: dict of numpy.ndarrays, h5py group (or similar dict-like object) Dictionary-like object holding single detector trigger information. Returns ------- numpy.ndarray The array of single detector values """ return ranking.get_newsnr(trigs)
[docs] def coinc(self, s0, s1, slide, step): # pylint:disable=unused-argument """Calculate the coincident detection statistic. Parameters ---------- s0: numpy.ndarray Single detector ranking statistic for the first detector. s1: numpy.ndarray Single detector ranking statistic for the second detector. slide: (unused in this statistic) step: (unused in this statistic) Returns ------- numpy.ndarray Array of coincident ranking statistic values """ return (s0 ** 2. + s1 ** 2.) ** 0.5
[docs] def coinc_lim_for_thresh(self, s0, thresh): """Calculate the required single detector statistic to exceed the threshold for each of the input triggers. Parameters ---------- s0: numpy.ndarray Single detector ranking statistic for the first detector. thresh: float The threshold on the coincident statistic. Returns ------- numpy.ndarray Array of limits on the second detector single statistic to exceed thresh. """ s1 = thresh ** 2. - s0 ** 2. s1[s1 < 0] = 0 return s1 ** 0.5
[docs] def coinc_multiifo(self, s, slide, step, to_shift, **kwargs): # pylint:disable=unused-argument """Calculate the coincident detection statistic. Parameters ---------- s: list List of (ifo, single detector statistic) tuples slide: (unused in this statistic) step: (unused in this statistic) to_shift: list List of integers indicating what multiples of the time shift will be applied (unused in this statistic) Returns ------- numpy.ndarray Array of coincident ranking statistic values """ return sum(sngl[1] ** 2. for sngl in s) ** 0.5
[docs] def coinc_multiifo_lim_for_thresh(self, s, thresh, limifo, **kwargs): # pylint:disable=unused-argument """Calculate the required single detector statistic to exceed the threshold for each of the input triggers. Parameters ---------- s: list List of (ifo, single detector statistic) tuples for all detectors except limifo. thresh: float The threshold on the coincident statistic. limifo: string The ifo for which the limit is to be found. Returns ------- numpy.ndarray Array of limits on the limifo single statistic to exceed thresh. """ s0 = thresh ** 2. - sum(sngl[1] ** 2. for sngl in s) s0[s0 < 0] = 0 return s0 ** 0.5
[docs]class NewSNRSGStatistic(NewSNRStatistic): """Calculate the NewSNRSG coincident detection statistic"""
[docs] def single(self, trigs): """Calculate the single detector statistic, here equal to newsnr_sgveto Parameters ---------- trigs: dict of numpy.ndarrays, h5py group (or similar dict-like object) Dictionary-like object holding single detector trigger information. Returns ------- numpy.ndarray The array of single detector values """ return ranking.get_newsnr_sgveto(trigs)
[docs]class NewSNRSGPSDStatistic(NewSNRSGStatistic): """Calculate the NewSNRSGPSD coincident detection statistic"""
[docs] def single(self, trigs): """Calculate the single detector statistic, here equal to newsnr combined with sgveto and psdvar statistic Parameters ---------- trigs: dict of numpy.ndarrays Returns ------- numpy.ndarray The array of single detector values """ return ranking.get_newsnr_sgveto_psdvar(trigs)
[docs]class NewSNRSGPSDScaledStatistic(NewSNRSGStatistic): """Calculate the NewSNRSGPSD coincident detection statistic"""
[docs] def single(self, trigs): """Calculate the single detector statistic, here equal to newsnr combined with sgveto and psdvar statistic Parameters ---------- trigs: dict of numpy.ndarrays Returns ------- numpy.ndarray The array of single detector values """ return ranking.get_newsnr_sgveto_psdvar_scaled(trigs)
[docs]class NewSNRSGPSDScaledThresholdStatistic(NewSNRSGStatistic): """Calculate the NewSNRSGPSD coincident detection statistic"""
[docs] def single(self, trigs): """Calculate the single detector statistic, here equal to newsnr combined with sgveto and psdvar statistic Parameters ---------- trigs: dict of numpy.ndarrays Returns ------- numpy.ndarray The array of single detector values """ return ranking.get_newsnr_sgveto_psdvar_scaled_threshold(trigs)
[docs]class NetworkSNRStatistic(NewSNRStatistic): """Same as the NewSNR statistic, but just sum of squares of SNRs"""
[docs] def single(self, trigs): return trigs['snr']
[docs]class NewSNRCutStatistic(NewSNRStatistic): """Same as the NewSNR statistic, but demonstrates a cut of the triggers"""
[docs] def single(self, trigs): """Calculate the single detector statistic. Parameters ---------- trigs: dict of numpy.ndarrays, h5py group (or similar dict-like object) Dictionary-like object holding single detector trigger information. Returns ------- newsnr: numpy.ndarray Array of single detector values """ newsnr = ranking.get_newsnr(trigs) rchisq = trigs['chisq'][:] / (2. * trigs['chisq_dof'][:] - 2.) newsnr[numpy.logical_and(newsnr < 10, rchisq > 2)] = -1 return newsnr
[docs] def coinc(self, s0, s1, slide, step): # pylint:disable=unused-argument """Calculate the coincident detection statistic. Parameters ---------- s0: numpy.ndarray Single detector ranking statistic for the first detector. s1: numpy.ndarray Single detector ranking statistic for the second detector. slide: (unused in this statistic) step: (unused in this statistic) Returns ------- cstat: numpy.ndarray Array of coincident ranking statistic values """ cstat = (s0 ** 2. + s1 ** 2.) ** 0.5 cstat[s0 == -1] = 0 cstat[s1 == -1] = 0 return cstat
[docs] def coinc_lim_for_thresh(self, s0, thresh): """Calculate the required single detector statistic to exceed the threshold for each of the input triggers. Parameters ---------- s0: numpy.ndarray Single detector ranking statistic for the first detector. thresh: float The threshold on the coincident statistic. Returns ------- numpy.ndarray Array of limits on the second detector single statistic to exceed thresh. """ s1 = thresh ** 2. - s0 ** 2. s1[s0 == -1] = numpy.inf s1[s1 < 0] = 0 return s1 ** 0.5
[docs]class PhaseTDNewStatistic(NewSNRStatistic): """Statistic that re-weights combined newsnr using coinc parameters. The weighting is based on the PDF of time delays, phase differences and amplitude ratios between triggers in different ifos. """ def __init__(self, files=None, ifos=None, **kwargs): NewSNRStatistic.__init__(self, files=files, ifos=ifos, **kwargs) self.single_dtype = [('snglstat', numpy.float32), ('coa_phase', numpy.float32), ('end_time', numpy.float64), ('sigmasq', numpy.float32), ('snr', numpy.float32) ] # Assign attribute so that it can be replaced with other functions self.get_newsnr = ranking.get_newsnr self.has_hist = False self.hist_ifos = None self.ref_snr = 5.0 self.relsense = {} self.swidth = self.pwidth = self.twidth = None self.srbmin = self.srbmax = None self.max_penalty = None self.pdtype = [] self.weights = {} self.param_bin = {} self.two_det_flag = (len(ifos) == 2) self.two_det_weights = {}
[docs] def get_hist(self, ifos=None): """Read in a signal density file for the ifo combination""" ifos = ifos or self.ifos selected = None for name in self.files: # Pick out the statistic files that provide phase / time/ amp # relationships and match to the ifos in use if 'phasetd_newsnr' in name: ifokey = name.split('_')[2] num = len(ifokey) / 2 if num != len(ifos): continue match = [ifo in name for ifo in ifos] if False in match: continue else: selected = name break if selected is None: raise RuntimeError("Couldn't figure out which stat file to use") logging.info("Using signal histogram %s for ifos %s", selected, ifos) histfile = self.files[selected] self.hist_ifos = histfile.attrs['ifos'] n_ifos = len(self.hist_ifos) # Histogram bin attributes self.twidth = histfile.attrs['twidth'] self.pwidth = histfile.attrs['pwidth'] self.swidth = histfile.attrs['swidth'] self.srbmin = histfile.attrs['srbmin'] self.srbmax = histfile.attrs['srbmax'] bin_volume = (self.twidth * self.pwidth * self.swidth) ** (n_ifos - 1) self.hist_max = - 1. * numpy.inf # Read histogram for each ifo, to use if that ifo has smallest SNR in # the coinc for ifo in self.hist_ifos: weights = histfile[ifo]['weights'][:] # renormalise to PDF self.weights[ifo] = weights / (weights.sum() * bin_volume) param = histfile[ifo]['param_bin'][:] if param.dtype == numpy.int8: # Older style, incorrectly sorted histogram file ncol = param.shape[1] self.pdtype = [('c%s' % i, param.dtype) for i in range(ncol)] self.param_bin[ifo] = numpy.zeros(len(self.weights[ifo]), dtype=self.pdtype) for i in range(ncol): self.param_bin[ifo]['c%s' % i] = param[:, i] lsort = self.param_bin[ifo].argsort() self.param_bin[ifo] = self.param_bin[ifo][lsort] self.weights[ifo] = self.weights[ifo][lsort] else: # New style, efficient histogram file # param bin and weights have already been sorted self.param_bin[ifo] = param self.pdtype = self.param_bin[ifo].dtype # Max_penalty is a small number to assigned to any bins without # histogram entries. All histograms in a given file have the same # min entry by design, so use the min of the last one read in. self.max_penalty = self.weights[ifo].min() self.hist_max = max(self.hist_max, self.weights[ifo].max()) if self.two_det_flag: # The density of signals is computed as a function of 3 binned # parameters: time difference (t), phase difference (p) and # SNR ratio (s). These are computed for each combination of # detectors, so for detectors 6 differences are needed. However # many combinations of these parameters are highly unlikely and # no instances of these combinations occurred when generating # the statistic files. Rather than storing a bunch of 0s, these # values are just not stored at all. This reduces the size of # the statistic file, but means we have to identify the correct # value to read for every trigger. For 2 detectors we can # expand the weights lookup table here, basically adding in all # the "0" values. This makes looking up a value in the # "weights" table a O(N) rather than O(NlogN) operation. It # sacrifices RAM to do this, so is a good tradeoff for 2 # detectors, but not for 3! if not hasattr(self, 'c0_size'): self.c0_size = {} self.c1_size = {} self.c2_size = {} self.c0_size[ifo] = 2 * (abs(self.param_bin[ifo]['c0']).max() + 1) self.c1_size[ifo] = 2 * (abs(self.param_bin[ifo]['c1']).max() + 1) self.c2_size[ifo] = 2 * (abs(self.param_bin[ifo]['c2']).max() + 1) array_size = [self.c0_size[ifo], self.c1_size[ifo], self.c2_size[ifo]] dtypec = self.weights[ifo].dtype self.two_det_weights[ifo] = \ numpy.zeros(array_size, dtype=dtypec) + self.max_penalty id0 = self.param_bin[ifo]['c0'].astype(numpy.int32) + self.c0_size[ifo] // 2 id1 = self.param_bin[ifo]['c1'].astype(numpy.int32) + self.c1_size[ifo] // 2 id2 = self.param_bin[ifo]['c2'].astype(numpy.int32) + self.c2_size[ifo] // 2 self.two_det_weights[ifo][id0, id1, id2] = self.weights[ifo] relfac = histfile.attrs['sensitivity_ratios'] for ifo, sense in zip(self.hist_ifos, relfac): self.relsense[ifo] = sense self.has_hist = True
[docs] def single(self, trigs): """Calculate the single detector statistic & assemble other parameters Parameters ---------- trigs: dict of numpy.ndarrays, h5py group or similar dict-like object Object holding single detector trigger information. 'snr', 'chisq', 'chisq_dof', 'coa_phase', 'end_time', and 'sigmasq' are required keys. Returns ------- numpy.ndarray Array of single detector parameter values """ sngl_stat = self.get_newsnr(trigs) singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype) singles['snglstat'] = sngl_stat singles['coa_phase'] = trigs['coa_phase'][:] singles['end_time'] = trigs['end_time'][:] singles['sigmasq'] = trigs['sigmasq'][:] singles['snr'] = trigs['snr'][:] return numpy.array(singles, ndmin=1)
[docs] def logsignalrate(self, s0, s1, shift): to_shift = [-1, 0] stats = {self.ifos[0]: s0, self.ifos[1]: s1} return self.logsignalrate_multiifo(stats, shift, to_shift)
[docs] def logsignalrate_multiifo(self, stats, shift, to_shift): """Calculate the normalized log rate density of signals via lookup Parameters ---------- stats: list of dicts giving single-ifo quantities, ordered as self.ifos shift: numpy array of float, size of the time shift vector for each coinc to be ranked to_shift: list of int, multiple of the time shift to apply ordered as self.ifos Returns ------- value: log of coinc signal rate density for the given single-ifo triggers and time shifts """ # Convert time shift vector to dict, as hist ifos and self.ifos may # not be in same order to_shift = {ifo: s for ifo, s in zip(self.ifos, to_shift)} if not self.has_hist: self.get_hist() # Figure out which ifo of the contributing ifos has the smallest SNR, # to use as reference for choosing the signal histogram. snrs = numpy.array([numpy.array(stats[ifo]['snr'], ndmin=1) for ifo in self.ifos]) smin = numpy.argmin(snrs, axis=0) # Store a list of the triggers using each ifo as reference rtypes = {ifo: numpy.where(smin == j)[0] for j, ifo in enumerate(self.ifos)} # Get reference ifo information rate = numpy.zeros(len(shift), dtype=numpy.float32) for ref_ifo in self.ifos: rtype = rtypes[ref_ifo] ref = stats[ref_ifo] pref = numpy.array(ref['coa_phase'], ndmin=1)[rtype] tref = numpy.array(ref['end_time'], ndmin=1)[rtype] sref = numpy.array(ref['snr'], ndmin=1)[rtype] sigref = numpy.array(ref['sigmasq'], ndmin=1) ** 0.5 sigref = sigref[rtype] senseref = self.relsense[self.hist_ifos[0]] binned = [] other_ifos = [ifo for ifo in self.ifos if ifo != ref_ifo] for ifo in other_ifos: sc = stats[ifo] p = numpy.array(sc['coa_phase'], ndmin=1)[rtype] t = numpy.array(sc['end_time'], ndmin=1)[rtype] s = numpy.array(sc['snr'], ndmin=1)[rtype] sense = self.relsense[ifo] sig = numpy.array(sc['sigmasq'], ndmin=1) ** 0.5 sig = sig[rtype] # Calculate differences pdif = (pref - p) % (numpy.pi * 2.0) tdif = shift[rtype] * to_shift[ref_ifo] + \ tref - shift[rtype] * to_shift[ifo] - t sdif = s / sref * sense / senseref * sigref / sig # Put into bins tbin = (tdif / self.twidth).astype(numpy.int) pbin = (pdif / self.pwidth).astype(numpy.int) sbin = (sdif / self.swidth).astype(numpy.int) binned += [tbin, pbin, sbin] # Convert binned to same dtype as stored in hist nbinned = numpy.zeros(len(pbin), dtype=self.pdtype) for i, b in enumerate(binned): nbinned['c%s' % i] = b # Read signal weight from precalculated histogram if self.two_det_flag: # High-RAM, low-CPU option for two-det rate[rtype] = numpy.zeros(len(nbinned)) + self.max_penalty id0 = nbinned['c0'].astype(numpy.int32) + self.c0_size[ref_ifo] // 2 id1 = nbinned['c1'].astype(numpy.int32) + self.c1_size[ref_ifo] // 2 id2 = nbinned['c2'].astype(numpy.int32) + self.c2_size[ref_ifo] // 2 # look up keys which are within boundaries within = (id0 > 0) & (id0 < self.c0_size[ref_ifo]) within = within & (id1 > 0) & (id1 < self.c1_size[ref_ifo]) within = within & (id2 > 0) & (id2 < self.c2_size[ref_ifo]) within = numpy.where(within)[0] rate[rtype[within]] = self.two_det_weights[ref_ifo][id0[within], id1[within], id2[within]] else: # Low[er]-RAM, high[er]-CPU option for >two det loc = numpy.searchsorted(self.param_bin[ref_ifo], nbinned) loc[loc == len(self.weights[ref_ifo])] = 0 rate[rtype] = self.weights[ref_ifo][loc] # These weren't in our histogram so give them max penalty # instead of random value missed = numpy.where( self.param_bin[ref_ifo][loc] != nbinned )[0] rate[rtype[missed]] = self.max_penalty # Scale by signal population SNR rate[rtype] *= (sref / self.ref_snr) ** -4.0 return numpy.log(rate)
[docs]class PhaseTDStatistic(NewSNRStatistic): """Statistic that re-weights combined newsnr using coinc parameters. The weighting is based on the PDF of time delays, phase differences and amplitude ratios between triggers in different ifos. """ def __init__(self, files=None, ifos=None, **kwargs): NewSNRStatistic.__init__(self, files=files, ifos=ifos, **kwargs) self.single_dtype = [('snglstat', numpy.float32), ('coa_phase', numpy.float32), ('end_time', numpy.float64), ('sigmasq', numpy.float32), ('snr', numpy.float32) ] # Assign attribute so that it can be replaced with other functions self.get_newsnr = ranking.get_newsnr self.hist = None self.bins = {} self.hist_ifos = []
[docs] def get_hist(self, ifos=None, norm='max'): """Read in a signal density file for the ifo combination""" # default name for old 2-ifo workflow if 'phasetd_newsnr' in self.files: histfile = self.files['phasetd_newsnr'] else: ifos = ifos or self.ifos # if None, use the instance attribute if len(ifos) != 2: raise RuntimeError("Need exactly 2 ifos for the p/t/a " "statistic! Ifos given were " + ifos) matching = [k for k in self.files.keys() if \ 'phasetd' in k and (ifos[0] in k and ifos[1] in k)] if len(matching) == 1: histfile = self.files[matching[0]] else: raise RuntimeError( "%i statistic files had an attribute matching phasetd*%s%s !" "Should be exactly 1" % (len(matching), ifos[0], ifos[1])) logging.info("Using signal histogram %s for ifos %s", matching, ifos) self.hist = histfile['map'][:] self.hist_ifos = ifos if norm == 'max': # Normalize so that peak of hist is equal to unity self.hist = self.hist / float(self.hist.max()) self.hist = numpy.log(self.hist) else: raise NotImplementedError("Sorry, we have no other normalizations") # Bin boundaries are stored in the hdf file self.bins['dt'] = histfile['tbins'][:] self.bins['dphi'] = histfile['pbins'][:] self.bins['snr'] = histfile['sbins'][:] self.bins['sigma_ratio'] = histfile['rbins'][:] self.hist_max = self.hist.max()
[docs] def single(self, trigs): """Calculate the single detector statistic & assemble other parameters Parameters ---------- trigs: dict of numpy.ndarrays, h5py group or similar dict-like object Object holding single detector trigger information. 'snr', 'chisq', 'chisq_dof', 'coa_phase', 'end_time', and 'sigmasq' are required keys. Returns ------- numpy.ndarray Array of single detector parameter values """ sngl_stat = self.get_newsnr(trigs) singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype) singles['snglstat'] = sngl_stat singles['coa_phase'] = trigs['coa_phase'][:] singles['end_time'] = trigs['end_time'][:] singles['sigmasq'] = trigs['sigmasq'][:] singles['snr'] = trigs['snr'][:] return numpy.array(singles, ndmin=1)
[docs] def signal_hist(self, td, pd, sn0, sn1, rd): assert self.hist is not None # enforce that sigma ratio is < 1 by swapping values snr0 = sn0 * 1 snr1 = sn1 * 1 snr0[rd > 1] = sn1[rd > 1] snr1[rd > 1] = sn0[rd > 1] rd[rd > 1] = 1. / rd[rd > 1] # Find which bin each coinc falls into tv = numpy.searchsorted(self.bins['dt'], td) - 1 pv = numpy.searchsorted(self.bins['dphi'], pd) - 1 s0v = numpy.searchsorted(self.bins['snr'], snr0) - 1 s1v = numpy.searchsorted(self.bins['snr'], snr1) - 1 rv = numpy.searchsorted(self.bins['sigma_ratio'], rd) - 1 # Enforce that points fit into the bin boundaries: if a point lies # outside the boundaries it is pushed back to the nearest bin. for binnum, axis in zip([tv, pv, rv, s0v, s1v], ['dt', 'dphi', 'sigma_ratio', 'snr', 'snr']): binend = len(self.bins[axis]) binnum[binnum < 0] = 0 binnum[binnum >= binend - 1] = binend - 2 return self.hist[tv, pv, s0v, s1v, rv]
[docs] def slide_dt(self, singles, shift, slide_vec): # Apply time shifts in the multiples specified by slide_vec # and return resulting time difference assert len(singles) == 2 assert len(slide_vec) == 2 dt = singles[0]['end_time'] + shift * slide_vec[0] -\ (singles[1]['end_time'] + shift * slide_vec[1]) return dt
[docs] def logsignalrate(self, s0, s1, shift): """Calculate the normalized log rate density of signals via lookup""" # does not require ifos to be specified, only 1 p/t/a file if self.hist is None: self.get_hist() # for 2-ifo pipeline, add time shift to 2nd ifo ('s1') slidevec = [0, 1] td = numpy.array(self.slide_dt([s0, s1], shift, slidevec), ndmin=1) if numpy.any(td > 1.): raise RuntimeError( "Time difference bigger than 1 second after applying any time " "shifts! This should not happen") pd = numpy.array((s0['coa_phase'] - s1['coa_phase']) % \ (2. * numpy.pi), ndmin=1) sn0 = numpy.array(s0['snr'], ndmin=1) sn1 = numpy.array(s1['snr'], ndmin=1) rd = numpy.array((s0['sigmasq'] / s1['sigmasq']) ** 0.5, ndmin=1) return self.signal_hist(td, pd, sn0, sn1, rd)
[docs] def logsignalrate_multiifo(self, s, shift, to_shift): """ Parameters ---------- s: list, length 2 List of sets of single-ifo trigger parameter values shift: numpy.ndarray Array of floats giving the time shifts to be applied with multiples given by to_shift to_shift: list, length 2 List of time shift multiples """ assert len(s) == 2 assert len(to_shift) == 2 # At present for triples use the H/L signal histogram hist_ifos = self.ifos if len(self.ifos) == 2 else ['H1', 'L1'] if self.hist is None: self.get_hist(hist_ifos) else: assert self.hist_ifos == hist_ifos logging.info("Using pre-set signal histogram for %s", self.hist_ifos) td = self.slide_dt(s, shift, to_shift) if numpy.any(td > 1.): raise RuntimeError( "Time difference bigger than 1 second after applying any time " "shifts! This should not happen") pd = numpy.array((s[0]['coa_phase'] - s[1]['coa_phase']) % \ (2. * numpy.pi), ndmin=1) sn0 = numpy.array(s[0]['snr'], ndmin=1) sn1 = numpy.array(s[1]['snr'], ndmin=1) rd = numpy.array((s[0]['sigmasq'] / s[1]['sigmasq']) ** 0.5, ndmin=1) return self.signal_hist(td, pd, sn0, sn1, rd)
[docs] def coinc(self, s0, s1, slide, step): """Calculate the coincident detection statistic. Parameters ---------- s0: numpy.ndarray Single detector ranking statistic for the first detector. s1: numpy.ndarray Single detector ranking statistic for the second detector. slide: numpy.ndarray Array of ints. These represent the multiple of the timeslide interval to bring a pair of single detector triggers into coincidence. step: float The timeslide interval in seconds. Returns ------- coinc_stat: numpy.ndarray An array of the coincident ranking statistic values """ rstat = s0['snglstat'] ** 2. + s1['snglstat'] ** 2. cstat = rstat + 2. * self.logsignalrate(s0, s1, slide * step) cstat[cstat < 0] = 0 return cstat ** 0.5
[docs] def coinc_lim_for_thresh(self, s0, thresh): """Calculate the required single detector statistic to exceed the threshold for each of the input triggers. Parameters ---------- s0: numpy.ndarray Single detector ranking statistic for the first detector. thresh: float The threshold on the coincident statistic. Returns ------- numpy.ndarray Array of limits on the second detector single statistic to exceed thresh. """ if self.hist is None: self.get_hist() s1 = thresh ** 2. - s0['snglstat'] ** 2. # Assume best case scenario and use maximum signal rate s1 -= 2. * self.hist_max s1[s1 < 0] = 0 return s1 ** 0.5
[docs]class PhaseTDSGStatistic(PhaseTDStatistic): """PhaseTDStatistic but with sine-Gaussian veto added to the single-detector ranking """ def __init__(self, files=None, ifos=None, **kwargs): PhaseTDStatistic.__init__(self, files=files, ifos=ifos, **kwargs) self.get_newsnr = ranking.get_newsnr_sgveto
[docs]class ExpFitStatistic(NewSNRStatistic): """Detection statistic using an exponential falloff noise model. Statistic approximates the negative log noise coinc rate density per template over single-ifo newsnr values. """ def __init__(self, files=None, ifos=None, **kwargs): if not len(files): raise RuntimeError("Can't find any statistic files !") NewSNRStatistic.__init__(self, files=files, ifos=ifos, **kwargs) # the stat file attributes are hard-coded as '%{ifo}-fit_coeffs' parsed_attrs = [f.split('-') for f in self.files.keys()] self.bg_ifos = [at[0] for at in parsed_attrs if (len(at) == 2 and at[1] == 'fit_coeffs')] if not len(self.bg_ifos): raise RuntimeError("None of the statistic files has the required " "attribute called {ifo}-fit_coeffs !") self.fits_by_tid = {} self.alphamax = {} for i in self.bg_ifos: self.fits_by_tid[i] = self.assign_fits(i) self.get_ref_vals(i) self.get_newsnr = ranking.get_newsnr self.single_increasing = False
[docs] def assign_fits(self, ifo): coeff_file = self.files[ifo+'-fit_coeffs'] template_id = coeff_file['template_id'][:] alphas = coeff_file['fit_coeff'][:] rates = coeff_file['count_above_thresh'][:] # the template_ids and fit coeffs are stored in an arbitrary order # create new arrays in template_id order for easier recall tid_sort = numpy.argsort(template_id) return {'alpha': alphas[tid_sort], 'rate': rates[tid_sort], 'thresh': coeff_file.attrs['stat_threshold'] }
[docs] def get_ref_vals(self, ifo): self.alphamax[ifo] = self.fits_by_tid[ifo]['alpha'].max()
[docs] def find_fits(self, trigs): """Get fit coeffs for a specific ifo and template id(s)""" try: tnum = trigs.template_num # exists if accessed via coinc_findtrigs ifo = trigs.ifo except AttributeError: tnum = trigs['template_id'] # exists for SingleDetTriggers assert len(self.ifos) == 1 # Should be exactly one ifo provided ifo = self.ifos[0] # fits_by_tid is a dictionary of dictionaries of arrays # indexed by ifo / coefficient name / template_id alphai = self.fits_by_tid[ifo]['alpha'][tnum] ratei = self.fits_by_tid[ifo]['rate'][tnum] thresh = self.fits_by_tid[ifo]['thresh'] return alphai, ratei, thresh
[docs] def lognoiserate(self, trigs): """Calculate the log noise rate density over single-ifo newsnr Read in single trigger information, make the newsnr statistic and rescale by the fitted coefficients alpha and rate """ alphai, ratei, thresh = self.find_fits(trigs) newsnr = self.get_newsnr(trigs) # alphai is constant of proportionality between single-ifo newsnr and # negative log noise likelihood in given template # ratei is rate of trigs in given template compared to average # thresh is stat threshold used in given ifo lognoisel = - alphai * (newsnr - thresh) + numpy.log(alphai) + \ numpy.log(ratei) return numpy.array(lognoisel, ndmin=1, dtype=numpy.float32)
[docs] def single(self, trigs): """Single-detector statistic, here just equal to the log noise rate""" return self.lognoiserate(trigs)
[docs] def coinc(self, s0, s1, slide, step): # pylint:disable=unused-argument """Calculate the final coinc ranking statistic""" # Approximate log likelihood ratio by summing single-ifo negative # log noise likelihoods loglr = - s0 - s1 # add squares of threshold stat values via idealized Gaussian formula threshes = [self.fits_by_tid[i]['thresh'] for i in self.bg_ifos] loglr += sum([t**2. / 2. for t in threshes]) # convert back to a coinc-SNR-like statistic # via log likelihood ratio \propto rho_c^2 / 2 return (2. * loglr) ** 0.5
[docs] def coinc_lim_for_thresh(self, s0, thresh): """Calculate the required single detector statistic to exceed the threshold for each of the input triggers. Parameters ---------- s0: numpy.ndarray Single detector ranking statistic for the first detector. thresh: float The threshold on the coincident statistic. Returns ------- numpy.ndarray Array of limits on the second detector single statistic to exceed thresh. """ s1 = - (thresh ** 2.) / 2. - s0 threshes = [self.fits_by_tid[i]['thresh'] for i in self.bg_ifos] s1 += sum([t**2. / 2. for t in threshes]) return s1
[docs]class ExpFitCombinedSNR(ExpFitStatistic): """Reworking of ExpFitStatistic designed to resemble network SNR Use a monotonic function of the negative log noise rate density which approximates combined (new)snr for coincs with similar newsnr in each ifo """ def __init__(self, files=None, ifos=None, **kwargs): ExpFitStatistic.__init__(self, files=files, ifos=ifos, **kwargs) # for low-mass templates the exponential slope alpha \approx 6 self.alpharef = 6. self.single_increasing = True
[docs] def use_alphamax(self): # take reference slope as the harmonic mean of individual ifo slopes inv_alphas = [1. / self.alphamax[i] for i in self.bg_ifos] self.alpharef = 1. / (sum(inv_alphas) / len(inv_alphas))
[docs] def single(self, trigs): logr_n = self.lognoiserate(trigs) _, _, thresh = self.find_fits(trigs) # shift by log of reference slope alpha logr_n += -1. * numpy.log(self.alpharef) # add threshold and rescale by reference slope stat = thresh - (logr_n / self.alpharef) return numpy.array(stat, ndmin=1, dtype=numpy.float32)
[docs] def single_multiifo(self, s): if self.single_increasing: sngl_multiifo = s[1]['snglstat'] else: sngl_multiifo = -1.0 * s[1]['snglstat'] return sngl_multiifo
[docs] def coinc(self, s0, s1, slide, step): # pylint:disable=unused-argument # scale by 1/sqrt(2) to resemble network SNR return (s0 + s1) / 2.**0.5
[docs] def coinc_lim_for_thresh(self, s0, thresh): return thresh * (2. ** 0.5) - s0
[docs] def coinc_multiifo(self, s, slide, step, to_shift, **kwargs): # pylint:disable=unused-argument # scale by 1/sqrt(number of ifos) to resemble network SNR return sum(sngl[1] for sngl in s) / len(s)**0.5
[docs] def coinc_multiifo_lim_for_thresh(self, s, thresh, limifo, **kwargs): # pylint:disable=unused-argument return thresh * ((len(s) + 1) ** 0.5) - sum(sngl[1] for sngl in s)
[docs]class ExpFitSGCombinedSNR(ExpFitCombinedSNR): """ExpFitCombinedSNR but with sine-Gaussian veto added to the single detector ranking """ def __init__(self, files=None, ifos=None, **kwargs): ExpFitCombinedSNR.__init__(self, files=files, ifos=ifos, **kwargs) self.get_newsnr = ranking.get_newsnr_sgveto
[docs]class ExpFitSGPSDCombinedSNR(ExpFitCombinedSNR): """ExpFitCombinedSNR but with sine-Gaussian veto and PSD variation added to the single detector ranking """ def __init__(self, files=None, ifos=None, **kwargs): ExpFitCombinedSNR.__init__(self, files=files, ifos=ifos, **kwargs) self.get_newsnr = ranking.get_newsnr_sgveto_psdvar
[docs]class PhaseTDExpFitStatistic(PhaseTDStatistic, ExpFitCombinedSNR): """Statistic combining exponential noise model with signal histogram PDF""" # default is 2-ifo operation with exactly 1 'phasetd' file def __init__(self, files=None, ifos=None, **kwargs): # read in both foreground PDF and background fit info ExpFitCombinedSNR.__init__(self, files=files, ifos=ifos, **kwargs) # need the self.single_dtype value from PhaseTDStatistic PhaseTDStatistic.__init__(self, files=files, ifos=ifos, **kwargs)
[docs] def single(self, trigs): # same single-ifo stat as ExpFitCombinedSNR sngl_stat = ExpFitCombinedSNR.single(self, trigs) singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype) singles['snglstat'] = sngl_stat singles['coa_phase'] = trigs['coa_phase'][:] singles['end_time'] = trigs['end_time'][:] singles['sigmasq'] = trigs['sigmasq'][:] singles['snr'] = trigs['snr'][:] return numpy.array(singles, ndmin=1)
[docs] def coinc(self, s0, s1, slide, step): # logsignalrate function inherited from PhaseTDStatistic logr_s = self.logsignalrate(s0, s1, slide * step) # rescale by ExpFitCombinedSNR reference slope as for sngl stat cstat = s0['snglstat'] + s1['snglstat'] + logr_s / self.alpharef # cut off underflowing and very small values cstat[cstat < 8.] = 8. # scale to resemble network SNR return cstat / (2.**0.5)
[docs] def coinc_lim_for_thresh(self, s0, thresh): # if the threshold is below this value all triggers will # pass because of rounding in the coinc method if thresh <= (8. / (2.**0.5)): return -1. * numpy.ones(len(s0['snglstat'])) * numpy.inf if self.hist is None: self.get_hist() # Assume best case scenario and use maximum signal rate logr_s = self.hist_max s1 = (2. ** 0.5) * thresh - s0['snglstat'] - logr_s / self.alpharef return s1
[docs]class PhaseTDNewExpFitStatistic(PhaseTDNewStatistic, ExpFitCombinedSNR): """Statistic combining exponential noise model with signal histogram PDF""" # default is 2-ifo operation with exactly 1 'phasetd' file def __init__(self, files=None, ifos=None, **kwargs): # read in both foreground PDF and background fit info ExpFitCombinedSNR.__init__(self, files=files, ifos=ifos, **kwargs) # need the self.single_dtype value from PhaseTDStatistic PhaseTDNewStatistic.__init__(self, files=files, ifos=ifos, **kwargs)
[docs] def single(self, trigs): # same single-ifo stat as ExpFitCombinedSNR sngl_stat = ExpFitCombinedSNR.single(self, trigs) singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype) singles['snglstat'] = sngl_stat singles['coa_phase'] = trigs['coa_phase'][:] singles['end_time'] = trigs['end_time'][:] singles['sigmasq'] = trigs['sigmasq'][:] singles['snr'] = trigs['snr'][:] return numpy.array(singles, ndmin=1)
[docs] def coinc(self, s0, s1, slide, step): # logsignalrate function inherited from PhaseTDStatistic logr_s = self.logsignalrate(s0, s1, slide * step) # rescale by ExpFitCombinedSNR reference slope as for sngl stat cstat = s0['snglstat'] + s1['snglstat'] + logr_s / self.alpharef # cut off underflowing and very small values cstat[cstat < 8.] = 8. # scale to resemble network SNR return cstat / (2.**0.5)
[docs] def coinc_lim_for_thresh(self, s0, thresh): # if the threshold is below this value all triggers will # pass because of rounding in the coinc method if thresh <= (8. / (2.**0.5)): return -1. * numpy.ones(len(s0['snglstat'])) * numpy.inf if not self.has_hist: self.get_hist() # Assume best case scenario and use maximum signal rate logr_s = self.hist_max s1 = (2 ** 0.5) * thresh - s0['snglstat'] - logr_s / self.alpharef return s1
[docs]class PhaseTDExpFitSGStatistic(PhaseTDExpFitStatistic): """Statistic combining exponential noise model with signal histogram PDF adding the sine-Gaussian veto to the single detector ranking """ def __init__(self, files=None, ifos=None, **kwargs): PhaseTDExpFitStatistic.__init__(self, files=files, ifos=ifos, **kwargs) self.get_newsnr = ranking.get_newsnr_sgveto
[docs]class PhaseTDNewExpFitSGStatistic(PhaseTDNewExpFitStatistic): """Statistic combining exponential noise model with signal histogram PDF adding the sine-Gaussian veto to the single detector ranking """ def __init__(self, files=None, ifos=None, **kwargs): PhaseTDNewExpFitStatistic.__init__(self, files=files, ifos=ifos, **kwargs) self.get_newsnr = ranking.get_newsnr_sgveto
[docs]class PhaseTDExpFitSGPSDStatistic(PhaseTDExpFitSGStatistic): """Statistic combining exponential noise model with signal histogram PDF adding the sine-Gaussian veto and PSD variation statistic to the single detector ranking """ def __init__(self, files=None, ifos=None, **kwargs): PhaseTDExpFitSGStatistic.__init__(self, files=files, ifos=ifos, **kwargs) self.get_newsnr = ranking.get_newsnr_sgveto_psdvar
[docs]class PhaseTDExpFitSGPSDScaledStatistic(PhaseTDExpFitSGStatistic): """Statistic combining exponential noise model with signal histogram PDF adding the sine-Gaussian veto and PSD variation statistic to the single detector ranking """ def __init__(self, files=None, ifos=None, **kwargs): PhaseTDExpFitSGStatistic.__init__(self, files=files, ifos=ifos, **kwargs) self.get_newsnr = ranking.get_newsnr_sgveto_psdvar_scaled
[docs]class MaxContTradNewSNRStatistic(NewSNRStatistic): """Combination of NewSNR with the power chisq and auto chisq"""
[docs] def single(self, trigs): """Calculate the single detector statistic. Parameters ---------- trigs: dict of numpy.ndarrays, h5py group (or similar dict-like object) Dictionary-like object holding single detector trigger information. 'snr', 'cont_chisq', 'cont_chisq_dof', 'chisq_dof' and 'chisq' are required keys for this statistic. Returns ------- stat: numpy.ndarray The array of single detector values """ chisq_newsnr = ranking.get_newsnr(trigs) rautochisq = trigs['cont_chisq'][:] / trigs['cont_chisq_dof'][:] autochisq_newsnr = ranking.newsnr(trigs['snr'][:], rautochisq) return numpy.array(numpy.minimum(chisq_newsnr, autochisq_newsnr, dtype=numpy.float32), ndmin=1, copy=False)
[docs]class ExpFitSGBgRateStatistic(ExpFitStatistic): """Detection statistic using an exponential falloff noise model. Statistic calculates the log noise coinc rate for each template over single-ifo newsnr values. """ def __init__(self, files=None, ifos=None, benchmark_lograte=-14.6, **kwargs): # benchmark_lograte is log of a representative noise trigger rate # This comes from H1L1 (O2) and is 4.5e-7 Hz super(ExpFitSGBgRateStatistic, self).__init__(files=files, ifos=ifos, **kwargs) self.benchmark_lograte = benchmark_lograte self.get_newsnr = ranking.get_newsnr_sgveto # Reassign the rate to be number per time rather than an arbitrarily # normalised number for ifo in self.bg_ifos: self.reassign_rate(ifo)
[docs] def reassign_rate(self, ifo): coeff_file = self.files[ifo+'-fit_coeffs'] template_id = coeff_file['template_id'][:] # create arrays in template_id order for easier recall tid_sort = numpy.argsort(template_id) self.fits_by_tid[ifo]['rate'] = \ coeff_file['count_above_thresh'][:][tid_sort] / \ float(coeff_file.attrs['analysis_time'])
[docs] def coinc_multiifo(self, s, slide, step, to_shift, **kwargs): # pylint:disable=unused-argument # ranking statistic is -ln(expected rate density of noise triggers) # plus normalization constant sngl_dict = {sngl[0]: sngl[1] for sngl in s} ln_noise_rate = coinc_rate.combination_noise_lograte( sngl_dict, kwargs['time_addition']) loglr = - ln_noise_rate + self.benchmark_lograte return loglr
[docs] def coinc_multiifo_lim_for_thresh(self, s, thresh, limifo, **kwargs): sngl_dict = {sngl[0]: sngl[1] for sngl in s} sngl_dict[limifo] = numpy.zeros(len(s[0][1])) ln_noise_rate = coinc_rate.combination_noise_lograte( sngl_dict, kwargs['time_addition']) loglr = - thresh - ln_noise_rate + self.benchmark_lograte return loglr
[docs]class ExpFitSGFgBgRateStatistic(PhaseTDStatistic, ExpFitSGBgRateStatistic): def __init__(self, files=None, ifos=None, **kwargs): # read in background fit info and store it ExpFitSGBgRateStatistic.__init__(self, files=files, ifos=ifos, **kwargs) # if ifos not already set, determine via background fit info self.ifos = self.ifos or self.bg_ifos # PhaseTD statistic single_dtype plus network sensitivity benchmark PhaseTDStatistic.__init__(self, files=files, ifos=self.ifos, **kwargs) self.single_dtype.append(('benchmark_logvol', numpy.float32)) self.get_newsnr = ranking.get_newsnr_sgveto for ifo in self.bg_ifos: self.assign_median_sigma(ifo) # benchmark_logvol is a benchmark sensitivity array over template id hl_net_med_sigma = numpy.amin([self.fits_by_tid[ifo]['median_sigma'] for ifo in ['H1', 'L1']], axis=0) self.benchmark_logvol = 3.0 * numpy.log(hl_net_med_sigma) self.single_increasing = False
[docs] def assign_median_sigma(self, ifo): coeff_file = self.files[ifo + '-fit_coeffs'] template_id = coeff_file['template_id'][:] tid_sort = numpy.argsort(template_id) self.fits_by_tid[ifo]['median_sigma'] = \ coeff_file['median_sigma'][:][tid_sort]
[docs] def single(self, trigs): # single-ifo stat = log of noise rate sngl_stat = self.lognoiserate(trigs) # populate other fields to calculate phase/time/amp consistency # and sigma comparison singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype) singles['snglstat'] = sngl_stat singles['coa_phase'] = trigs['coa_phase'][:] singles['end_time'] = trigs['end_time'][:] singles['sigmasq'] = trigs['sigmasq'][:] singles['snr'] = trigs['snr'][:] try: tnum = trigs.template_num # exists if accessed via coinc_findtrigs except AttributeError: tnum = trigs['template_id'] # exists for SingleDetTriggers # Should only be one ifo fit file provided assert len(self.ifos) == 1 # store benchmark log volume as single-ifo information since the coinc # method does not have access to template id singles['benchmark_logvol'] = self.benchmark_logvol[tnum] return numpy.array(singles, ndmin=1)
[docs] def coinc_multiifo(self, s, slide, step, to_shift, **kwargs): # pylint:disable=unused-argument sngl_rates = {sngl[0]: sngl[1]['snglstat'] for sngl in s} ln_noise_rate = coinc_rate.combination_noise_lograte( sngl_rates, kwargs['time_addition']) ln_noise_rate -= self.benchmark_lograte # Network sensitivity for a given coinc type is approximately # determined by the least sensitive ifo network_sigmasq = numpy.amin([sngl[1]['sigmasq'] for sngl in s], axis=0) # Volume \propto sigma^3 or sigmasq^1.5 network_logvol = 1.5 * numpy.log(network_sigmasq) # Get benchmark log volume as single-ifo information # NB benchmark logvol for a given template is not ifo-dependent # - choose the first ifo for convenience benchmark_logvol = s[0][1]['benchmark_logvol'] network_logvol -= benchmark_logvol coincifos = [sngl[0] for sngl in s] # logsignalrate function from PhaseTDStatistic if ('H1' in coincifos and 'L1' in coincifos): # apply HL hist for HL & HLV coincs, keep only H/L info s_hl = [sngl[1] for sngl in s if sngl[0] in ['H1', 'L1']] shift_hl = [sh for sngl, sh in zip(s, to_shift) if \ sngl[0] in ['H1', 'L1']] logr_s = self.logsignalrate_multiifo(s_hl, slide * step, shift_hl) else: logr_s = self.logsignalrate_multiifo([sngl[1] for sngl in s], slide * step, to_shift) loglr = logr_s + network_logvol - ln_noise_rate # cut off underflowing and very small values loglr[loglr < -30.] = -30. return loglr
[docs] def coinc_multiifo_lim_for_thresh(self, s, thresh, limifo, **kwargs): # pylint:disable=unused-argument if self.hist is None: self.get_hist() # if the threshold is below this value all triggers will # pass because of rounding in the coinc method if thresh <= -30.: return numpy.ones(len(s[0][1]['snglstat'])) * numpy.inf sngl_rates = {sngl[0]: sngl[1]['snglstat'] for sngl in s} # Add limifo to singles dict so that overlap time is calculated correctly sngl_rates[limifo] = numpy.zeros(len(s[0][1])) ln_noise_rate = coinc_rate.combination_noise_lograte( sngl_rates, kwargs['time_addition']) ln_noise_rate -= self.benchmark_lograte # Assume best case and use the maximum sigma squared from all triggers network_sigmasq = numpy.ones(len(s[0][1])) * kwargs['max_sigmasq'] # Volume \propto sigma^3 or sigmasq^1.5 network_logvol = 1.5 * numpy.log(network_sigmasq) # Get benchmark log volume as single-ifo information # NB benchmark logvol for a given template is not ifo-dependent # - choose the first ifo for convenience benchmark_logvol = s[0][1]['benchmark_logvol'] network_logvol -= benchmark_logvol loglr = - thresh + self.hist_max + network_logvol - ln_noise_rate return loglr
[docs]class ExpFitSGFgBgNormNewStatistic(PhaseTDNewStatistic, ExpFitSGBgRateStatistic): def __init__(self, files=None, ifos=None, **kwargs): # read in background fit info and store it ExpFitSGBgRateStatistic.__init__(self, files=files, ifos=ifos, **kwargs) # if ifos not already set, determine via background fit info self.ifos = self.ifos or self.bg_ifos # PhaseTD statistic single_dtype plus network sensitivity benchmark PhaseTDNewStatistic.__init__(self, files=files, ifos=self.ifos, **kwargs) self.single_dtype.append(('benchmark_logvol', numpy.float32)) self.get_newsnr = ranking.get_newsnr_sgveto for ifo in self.bg_ifos: self.assign_median_sigma(ifo) # benchmark_logvol is a benchmark sensitivity array over template id hl_net_med_sigma = numpy.amin([self.fits_by_tid[ifo]['median_sigma'] for ifo in ['H1', 'L1']], axis=0) self.benchmark_logvol = 3.0 * numpy.log(hl_net_med_sigma) self.single_increasing = False
[docs] def assign_median_sigma(self, ifo): coeff_file = self.files[ifo + '-fit_coeffs'] template_id = coeff_file['template_id'][:] tid_sort = numpy.argsort(template_id) self.fits_by_tid[ifo]['median_sigma'] = \ coeff_file['median_sigma'][:][tid_sort]
[docs] def lognoiserate(self, trigs, alphabelow=6): """Calculate the log noise rate density over single-ifo newsnr Read in single trigger information, make the newsnr statistic and rescale by the fitted coefficients alpha and rate """ alphai, ratei, thresh = self.find_fits(trigs) newsnr = self.get_newsnr(trigs) # Above the threshold we use the usual fit coefficient (alpha) # below threshold use specified alphabelow bt = newsnr < thresh lognoisel = - alphai * (newsnr - thresh) + numpy.log(alphai) + \ numpy.log(ratei) lognoiselbt = - alphabelow * (newsnr - thresh) + \ numpy.log(alphabelow) + numpy.log(ratei) lognoisel[bt] = lognoiselbt[bt] return numpy.array(lognoisel, ndmin=1, dtype=numpy.float32)
[docs] def single(self, trigs): # single-ifo stat = log of noise rate sngl_stat = self.lognoiserate(trigs) # populate other fields to calculate phase/time/amp consistency # and sigma comparison singles = numpy.zeros(len(sngl_stat), dtype=self.single_dtype) singles['snglstat'] = sngl_stat singles['coa_phase'] = trigs['coa_phase'][:] singles['end_time'] = trigs['end_time'][:] singles['sigmasq'] = trigs['sigmasq'][:] singles['snr'] = trigs['snr'][:] try: tnum = trigs.template_num # exists if accessed via coinc_findtrigs except AttributeError: tnum = trigs['template_id'] # exists for SingleDetTriggers # Should only be one ifo fit file provided assert len(self.ifos) == 1 # Store benchmark log volume as single-ifo information since the coinc # method does not have access to template id singles['benchmark_logvol'] = self.benchmark_logvol[tnum] return numpy.array(singles, ndmin=1)
[docs] def single_multiifo(self, s): ln_noise_rate = s[1]['snglstat'] ln_noise_rate -= self.benchmark_lograte network_sigmasq = s[1]['sigmasq'] network_logvol = 1.5 * numpy.log(network_sigmasq) benchmark_logvol = s[1]['benchmark_logvol'] network_logvol -= benchmark_logvol ln_s = -4 * numpy.log(s[1]['snr'] / self.ref_snr) loglr = network_logvol - ln_noise_rate + ln_s # cut off underflowing and very small values loglr[loglr < -30.] = -30. return loglr
[docs] def coinc_multiifo(self, s, slide, step, to_shift, **kwargs): # pylint:disable=unused-argument sngl_rates = {sngl[0]: sngl[1]['snglstat'] for sngl in s} ln_noise_rate = coinc_rate.combination_noise_lograte( sngl_rates, kwargs['time_addition']) ln_noise_rate -= self.benchmark_lograte # Network sensitivity for a given coinc type is approximately # determined by the least sensitive ifo network_sigmasq = numpy.amin([sngl[1]['sigmasq'] for sngl in s], axis=0) # Volume \propto sigma^3 or sigmasq^1.5 network_logvol = 1.5 * numpy.log(network_sigmasq) # Get benchmark log volume as single-ifo information : # benchmark_logvol for a given template is not ifo-dependent, so # choose the first ifo for convenience benchmark_logvol = s[0][1]['benchmark_logvol'] network_logvol -= benchmark_logvol # Use prior histogram to get Bayes factor for signal vs noise # given the time, phase and SNR differences between IFOs # First get signal PDF logr_s stat = {ifo: st for ifo, st in s} logr_s = self.logsignalrate_multiifo(stat, slide * step, to_shift) # Find total volume of phase-time-amplitude space occupied by noise # coincs # Extent of time-difference space occupied noise_twindow = coinc_rate.multiifo_noise_coincident_area( self.hist_ifos, kwargs['time_addition']) # Volume is the allowed time difference window, multiplied by 2pi for # each phase difference dimension and by allowed range of SNR ratio # for each SNR ratio dimension : there are (n_ifos - 1) dimensions # for both phase and SNR n_ifos = len(self.hist_ifos) hist_vol = noise_twindow * \ (2 * numpy.pi * (self.srbmax - self.srbmin) * self.swidth) ** \ (n_ifos - 1) # Noise PDF is 1/volume, assuming a uniform distribution of noise # coincs logr_n = - numpy.log(hist_vol) # Combine to get final statistic: log of # ((rate of signals / rate of noise) * PTA Bayes factor) loglr = network_logvol - ln_noise_rate + logr_s - logr_n # cut off underflowing and very small values loglr[loglr < -30.] = -30. return loglr
[docs] def coinc_multiifo_lim_for_thresh(self, s, thresh, limifo, **kwargs): # pylint:disable=unused-argument if not self.has_hist: self.get_hist() # if the threshold is below this value all triggers will # pass because of rounding in the coinc method if thresh <= -30: return numpy.ones(len(s[0][1]['snglstat'])) * numpy.inf sngl_rates = {sngl[0]: sngl[1]['snglstat'] for sngl in s} # Add limifo to singles dict so that overlap time is calculated correctly sngl_rates[limifo] = numpy.zeros(len(s[0][1])) ln_noise_rate = coinc_rate.combination_noise_lograte( sngl_rates, kwargs['time_addition']) ln_noise_rate -= self.benchmark_lograte # Assume best case and use the maximum sigma squared from all triggers network_sigmasq = numpy.ones(len(s[0][1])) * kwargs['max_sigmasq'] # Volume \propto sigma^3 or sigmasq^1.5 network_logvol = 1.5 * numpy.log(network_sigmasq) # Get benchmark log volume as single-ifo information : # benchmark_logvol for a given template is not ifo-dependent, so # choose the first ifo for convenience benchmark_logvol = s[0][1]['benchmark_logvol'] network_logvol -= benchmark_logvol # Assume best case scenario and use maximum signal rate logr_s = numpy.log(self.hist_max * (kwargs['min_snr'] / self.ref_snr) ** -4.0) # Find total volume of phase-time-amplitude space occupied by noise # coincs # Extent of time-difference space occupied noise_twindow = coinc_rate.multiifo_noise_coincident_area( self.hist_ifos, kwargs['time_addition']) # Volume is the allowed time difference window, multiplied by 2pi for # each phase difference dimension and by allowed range of SNR ratio # for each SNR ratio dimension : there are (n_ifos - 1) dimensions # for both phase and SNR n_ifos = len(self.hist_ifos) hist_vol = noise_twindow * \ (2 * numpy.pi * (self.srbmax - self.srbmin) * self.swidth) ** \ (n_ifos - 1) # Noise PDF is 1/volume, assuming a uniform distribution of noise # coincs logr_n = - numpy.log(hist_vol) loglr = - thresh + network_logvol - ln_noise_rate + logr_s - logr_n return loglr
[docs]class ExpFitSGPSDFgBgNormStatistic(ExpFitSGFgBgNormNewStatistic): def __init__(self, files=None, ifos=None, **kwargs): ExpFitSGFgBgNormNewStatistic.__init__(self, files=files, ifos=ifos, **kwargs) self.get_newsnr = ranking.get_newsnr_sgveto_psdvar
[docs]class ExpFitSGPSDScaledFgBgNormStatistic(ExpFitSGFgBgNormNewStatistic): def __init__(self, files=None, ifos=None, **kwargs): ExpFitSGFgBgNormNewStatistic.__init__(self, files=files, ifos=ifos, **kwargs) self.get_newsnr = ranking.get_newsnr_sgveto_psdvar_scaled
[docs]class ExpFitSGPSDFgBgNormBBHStatistic(ExpFitSGFgBgNormNewStatistic): def __init__(self, files=None, ifos=None, max_chirp_mass=None, **kwargs): ExpFitSGFgBgNormNewStatistic.__init__(self, files=files, ifos=ifos, **kwargs) self.get_newsnr = ranking.get_newsnr_sgveto_psdvar self.mcm = max_chirp_mass self.curr_mchirp = None
[docs] def single(self, trigs): from pycbc.conversions import mchirp_from_mass1_mass2 self.curr_mchirp = mchirp_from_mass1_mass2(trigs.param['mass1'], trigs.param['mass2']) if self.mcm is not None: # Careful - input might be a str, so cast to float self.curr_mchirp = min(self.curr_mchirp, float(self.mcm)) return ExpFitSGFgBgNormNewStatistic.single(self, trigs)
[docs] def logsignalrate_multiifo(self, stats, shift, to_shift): # model signal rate as uniform over chirp mass, background rate is # proportional to mchirp^(-11/3) due to density of templates logr_s = ExpFitSGFgBgNormNewStatistic.logsignalrate_multiifo( self, stats, shift, to_shift) logr_s += numpy.log((self.curr_mchirp / 20.0) ** (11./3.0)) return logr_s
[docs]class ExpFitSGPSDSTFgBgNormBBHStatistic(ExpFitSGPSDFgBgNormBBHStatistic): def __init__(self, files=None, ifos=None, max_chirp_mass=None, **kwargs): ExpFitSGPSDFgBgNormBBHStatistic.__init__(self, files=files, ifos=ifos, max_chirp_mass=None, **kwargs) self.get_newsnr = ranking.get_newsnr_sgveto_psdvar_scaled_threshold
statistic_dict = { 'newsnr': NewSNRStatistic, 'network_snr': NetworkSNRStatistic, 'newsnr_cut': NewSNRCutStatistic, 'phasetd_newsnr': PhaseTDStatistic, 'phasetd_newsnr_sgveto': PhaseTDSGStatistic, 'exp_fit_stat': ExpFitStatistic, 'exp_fit_csnr': ExpFitCombinedSNR, 'exp_fit_sg_csnr': ExpFitSGCombinedSNR, 'exp_fit_sg_csnr_psdvar': ExpFitSGPSDCombinedSNR, 'phasetd_exp_fit_stat': PhaseTDExpFitStatistic, 'max_cont_trad_newsnr': MaxContTradNewSNRStatistic, 'phasetd_exp_fit_stat_sgveto': PhaseTDExpFitSGStatistic, 'phasetd_new_exp_fit_stat_sgveto': PhaseTDNewExpFitSGStatistic, 'newsnr_sgveto': NewSNRSGStatistic, 'newsnr_sgveto_psdvar': NewSNRSGPSDStatistic, 'phasetd_exp_fit_stat_sgveto_psdvar': PhaseTDExpFitSGPSDStatistic, 'phasetd_exp_fit_stat_sgveto_psdvar_scaled': PhaseTDExpFitSGPSDScaledStatistic, 'exp_fit_sg_bg_rate': ExpFitSGBgRateStatistic, 'exp_fit_sg_fgbg_rate': ExpFitSGFgBgRateStatistic, 'exp_fit_sg_fgbg_norm_new': ExpFitSGFgBgNormNewStatistic, '2ogc': ExpFitSGPSDScaledFgBgNormStatistic, # backwards compatible '2ogcbbh': ExpFitSGPSDSTFgBgNormBBHStatistic, # backwards compatible 'exp_fit_sg_fgbg_norm_psdvar': ExpFitSGPSDFgBgNormStatistic, 'exp_fit_sg_fgbg_norm_psdvar_bbh': ExpFitSGPSDFgBgNormBBHStatistic } sngl_statistic_dict = { 'newsnr': NewSNRStatistic, 'new_snr': NewSNRStatistic, # For backwards compatibility 'snr': NetworkSNRStatistic, 'newsnr_cut': NewSNRCutStatistic, 'exp_fit_csnr': ExpFitCombinedSNR, 'exp_fit_sg_csnr': ExpFitSGCombinedSNR, 'max_cont_trad_newsnr': MaxContTradNewSNRStatistic, 'newsnr_sgveto': NewSNRSGStatistic, 'newsnr_sgveto_psdvar': NewSNRSGPSDStatistic, 'newsnr_sgveto_psdvar_scaled': NewSNRSGPSDScaledStatistic, 'newsnr_sgveto_psdvar_scaled_threshold': NewSNRSGPSDScaledThresholdStatistic, 'exp_fit_sg_csnr_psdvar': ExpFitSGPSDCombinedSNR }
[docs]def get_statistic(stat): """ Error-handling sugar around dict lookup for coincident statistics Parameters ---------- stat : string Name of the coincident statistic Returns ------- class Subclass of Stat base class Raises ------ RuntimeError If the string is not recognized as corresponding to a Stat subclass """ try: return statistic_dict[stat] except KeyError: raise RuntimeError('%s is not an available detection statistic' % stat)
[docs]def get_sngl_statistic(stat): """ Error-handling sugar around dict lookup for single-detector statistics Parameters ---------- stat : string Name of the single-detector statistic Returns ------- class Subclass of Stat base class Raises ------ RuntimeError If the string is not recognized as corresponding to a Stat subclass """ try: return sngl_statistic_dict[stat] except KeyError: raise RuntimeError('%s is not an available detection statistic' % stat)