Source code for threeML.utils.statistics.likelihood_functions

from __future__ import division

from math import log, pi, sqrt

import numpy as np
from numba import njit
from past.utils import old_div

from threeML.utils.statistics.gammaln import logfactorial

_log_pi_2 = log(2 * pi)

[docs]def regularized_log(vector): """ A function which is log(vector) where vector > 0, and zero otherwise. :param vector: :return: """ return np.where(vector > 0, np.log(vector), 0)
[docs]@njit(fastmath=True, parallel=False) def xlogy(x, y): """ A function which is 0 if x is 0, and x * log(y) otherwise. This is to fix the fact that for a machine 0 * log(inf) is nan, instead of 0. :param x: :param y: :return: """ out = np.zeros_like(x) n = len(x) for i in range(n): if x[i] > 0: out[i] = x[i] * log(y[i]) return out
[docs]@njit(fastmath=True, parallel=False) def xlogy_one(x, y): """ A function which is 0 if x is 0, and x * log(y) otherwise. This is to fix the fact that for a machine 0 * log(inf) is nan, instead of 0. :param x: :param y: :return: """ if x > 0: return x * log(y) else: return 0.0
[docs]@njit(fastmath=True) def poisson_log_likelihood_ideal_bkg( observed_counts, expected_bkg_counts, expected_model_counts ): """ Poisson log-likelihood for the case where the background has no uncertainties: L = \sum_{i=0}^{N}~o_i~\log{(m_i + b_i)} - (m_i + b_i) - \log{o_i!} :param observed_counts: :param expected_bkg_counts: :param expected_model_counts: :return: (log_like vector, background vector) """ # Model predicted counts # In this likelihood the background becomes part of the model, which means that # the uncertainty in the background is completely neglected n = expected_model_counts.shape log_likes = np.empty(n, dtype=np.float64) predicted_counts = expected_bkg_counts + expected_model_counts for i in range(n): log_likes[i] = ( xlogy_one(observed_counts[i], predicted_counts[i]) - predicted_counts[i] - logfactorial(observed_counts[i]) ) return log_likes, expected_bkg_counts
[docs]def poisson_observed_poisson_background_xs( observed_counts, background_counts, exposure_ratio, expected_model_counts ): """ Profile log-likelihood for the case when the observed counts are Poisson distributed, and the background counts are Poisson distributed as well (typical for X-ray analysis with aperture photometry). This has been derived by Keith Arnaud (see the Xspec manual, Wstat statistic) """ # We follow Arnaud et al. (Xspec manual) in the computation, which means that at the end we need to multiply by # (-1) as he computes the -log(L), while we need log(L). Also, he multiplies -log(L) by 2 at the end to make it # converge to chisq^2. We don't do that to keep it a proper (profile) likelihood. # Compute the nuisance background parameter first_term = ( exposure_ratio * (observed_counts + background_counts) - (1 + exposure_ratio) * expected_model_counts ) second_term = np.sqrt( first_term ** 2 + 4 * exposure_ratio * (exposure_ratio + 1) * background_counts * expected_model_counts ) background_nuisance_parameter = (first_term + second_term) / ( 2 * exposure_ratio * (exposure_ratio + 1) ) first_term = ( expected_model_counts + (1 + exposure_ratio) * background_nuisance_parameter ) # we regularize the log so it will not give NaN if expected_model_counts and background_nuisance_parameter are both # zero. For any good model this should also mean observed_counts = 0, btw. second_term = -xlogy( observed_counts, expected_model_counts + exposure_ratio * background_nuisance_parameter, ) third_term = -xlogy(background_counts, background_nuisance_parameter) ppstat = 2 * (first_term + second_term + third_term) ppstat += 2 * ( -observed_counts + xlogy(observed_counts, observed_counts) - background_counts + xlogy(background_counts, background_counts) ) # assert np.isfinite(ppstat).all() return ppstat * (-1)
[docs]@njit(fastmath=True) def poisson_observed_poisson_background( observed_counts, background_counts, exposure_ratio, expected_model_counts ): # TODO: check this with simulations # Just a name change to make writing formulas a little easier alpha = exposure_ratio # b = background_counts # o = observed_counts # M = expected_model_counts n = len(expected_model_counts) loglike = np.empty(n, dtype=np.float64) B_mle = np.empty(n, dtype=np.float64) # Nuisance parameter for Poisson likelihood # NOTE: B_mle is zero when b is zero! for idx in range(n): o_plus_b = observed_counts[idx] + background_counts[idx] sqr = np.sqrt( 4 * (alpha + alpha ** 2) * background_counts[idx] * expected_model_counts[idx] + ((alpha + 1) * expected_model_counts[idx] - alpha * (o_plus_b)) ** 2 ) B_mle[idx] = ( 1 / (2.0 * alpha * (1 + alpha)) * (alpha * (o_plus_b) - (alpha + 1) * expected_model_counts[idx] + sqr) ) # Profile likelihood loglike[idx] = ( xlogy_one( observed_counts[idx], alpha * B_mle[idx] + expected_model_counts[idx] ) + xlogy_one(background_counts[idx], B_mle[idx]) - (alpha + 1) * B_mle[idx] - expected_model_counts[idx] - logfactorial(background_counts[idx]) - logfactorial(observed_counts[idx]) ) return loglike, B_mle * alpha
[docs]@njit(fastmath=True) def poisson_observed_gaussian_background( observed_counts, background_counts, background_error, expected_model_counts ): # This loglike assume Gaussian errors on the background and Poisson uncertainties on the # observed counts. It is a profile likelihood. n = background_counts.shape log_likes = np.empty(n, dtype=np.float64) b = np.empty(n, dtype=np.float64) for idx in range(n): MB = background_counts[idx] + expected_model_counts[idx] s2 = background_error[idx] * background_error[idx] # type: np.ndarray b[idx] = 0.5 * ( sqrt(MB * MB - 2 * s2 * (MB - 2 * observed_counts[idx]) + s2 * s2) + background_counts[idx] - expected_model_counts[idx] - s2 ) # type: np.ndarray # Now there are two branches: when the background is 0 we are in the normal situation of a pure # Poisson likelihood, while when the background is not zero we use the profile likelihood # NOTE: bkgErr can be 0 only when also bkgCounts = 0 # Also it is evident from the expression above that when bkgCounts = 0 and bkgErr=0 also b=0 # Let's do the branch with background > 0 first if background_counts[idx] > 0: log_likes[idx] = ( -((b[idx] - background_counts[idx]) ** 2) / (2 * s2) + observed_counts[idx] * log(b[idx] + expected_model_counts[idx]) - b[idx] - expected_model_counts[idx] - logfactorial(observed_counts[idx]) - 0.5 * _log_pi_2 - log(background_error[idx]) ) # Let's do the other branch else: # the 1e-100 in the log is to avoid zero divisions # This is the Poisson likelihood with no background log_likes[idx] = ( xlogy_one(observed_counts[idx], expected_model_counts[idx]) - expected_model_counts[idx] - logfactorial(observed_counts[idx]) ) return log_likes, b
[docs]@njit(fastmath=True) def half_chi2(y, yerr, expectation): # This is half of a chi2. The reason for the factor of two is that we need this to be the Gaussian likelihood, # so that the delta log-like for an error of say 1 sigma is 0.5 and not 1 like it would be for # the other likelihood functions. This way we can sum it with other likelihood functions. N = y.shape log_likes = np.empty(N, dtype=np.float64) for n in range(N): log_likes[n] = (y[n] - expectation[n])**2 / (yerr[n]**2) return 0.5 * log_likes