Source code for threeML.utils.statistics.likelihood_functions

from math import log, pi, sqrt

import numpy as np
from numba import njit

from threeML.utils.statistics.gammaln import logfactorial

_log_pi_2 = log(2 * pi)



[docs]
def regularized_log(vector):
    """A function which is log(vector) where vector > 0, and zero otherwise.

    :param vector:
    :return:
    """

    return np.where(vector > 0, np.log(vector), 0)




[docs]
@njit(fastmath=True, parallel=False)
def xlogy(x, y):  # pragma: no cover
    """A function which is 0 if x is 0, and x * log(y) otherwise. This is to
    fix the fact that for a machine 0 * log(inf) is nan, instead of 0.

    :param x:
    :param y:
    :return:
    """

    out = np.zeros_like(x)

    n = len(x)
    for i in range(n):
        if x[i] > 0:
            out[i] = x[i] * log(y[i])

    return out




[docs]
@njit(fastmath=True, parallel=False)
def xlogy_one(x, y):  # pragma: no cover
    """A function which is 0 if x is 0, and x * log(y) otherwise. This is to
    fix the fact that for a machine 0 * log(inf) is nan, instead of 0.

    :param x:
    :param y:
    :return:
    """
    if x > 0:
        return x * log(y)
    else:
        return 0.0




[docs]
@njit(fastmath=True)
def poisson_log_likelihood_ideal_bkg(
    observed_counts, expected_bkg_counts, expected_model_counts
):  # pragma: no cover
    r"""
    Poisson log-likelihood for the case where the background has no uncertainties:

    $L = \sum_{i=0}^{N}~o_i~\log{(m_i + b_i)} - (m_i + b_i) - \log{o_i!}$

    :param observed_counts:
    :param expected_bkg_counts:
    :param expected_model_counts:
    :return: (log_like vector, background vector)
    """

    # Model predicted counts
    # In this likelihood the background becomes part of the model, which means that
    # the uncertainty in the background is completely neglected

    n = expected_model_counts.shape[0]
    log_likes = np.empty(n, dtype=np.float64)
    predicted_counts = expected_bkg_counts + expected_model_counts

    for i in range(n):
        log_likes[i] = (
            xlogy_one(observed_counts[i], predicted_counts[i])
            - predicted_counts[i]
            - logfactorial(observed_counts[i])
        )

    return log_likes, expected_bkg_counts




[docs]
def poisson_observed_poisson_background_xs(
    observed_counts, background_counts, exposure_ratio, expected_model_counts
):
    """Profile log-likelihood for the case when the observed counts are Poisson
    distributed, and the background counts are Poisson distributed as well
    (typical for X-ray analysis with aperture photometry).

    This has been derived by Keith Arnaud (see the Xspec manual, Wstat
    statistic)
    """

    # We follow Arnaud et al. (Xspec manual) in the computation, which means that at the
    # end we need to multiply by (-1) as he computes the -log(L), while we need log(L).
    # Also, he multiplies -log(L) by 2 at the end to make it converge to chisq^2. We
    # don't do that to keep it a proper (profile) likelihood.

    # Compute the nuisance background parameter

    first_term = (
        exposure_ratio * (observed_counts + background_counts)
        - (1 + exposure_ratio) * expected_model_counts
    )
    second_term = np.sqrt(
        first_term**2
        + 4
        * exposure_ratio
        * (exposure_ratio + 1)
        * background_counts
        * expected_model_counts
    )

    background_nuisance_parameter = (first_term + second_term) / (
        2 * exposure_ratio * (exposure_ratio + 1)
    )

    first_term = (
        expected_model_counts + (1 + exposure_ratio) * background_nuisance_parameter
    )

    # we regularize the log so it will not give NaN if expected_model_counts and
    # background_nuisance_parameter are both zero. For any good model this should also
    # mean observed_counts = 0, btw.

    second_term = -xlogy(
        observed_counts,
        expected_model_counts + exposure_ratio * background_nuisance_parameter,
    )

    third_term = -xlogy(background_counts, background_nuisance_parameter)

    ppstat = 2 * (first_term + second_term + third_term)

    ppstat += 2 * (
        -observed_counts
        + xlogy(observed_counts, observed_counts)
        - background_counts
        + xlogy(background_counts, background_counts)
    )

    # assert np.isfinite(ppstat).all()

    return ppstat * (-1)




[docs]
@njit(fastmath=True)
def poisson_observed_poisson_background(
    observed_counts, background_counts, exposure_ratio, expected_model_counts
):  # pragma: no cover
    # TODO: check this with simulations

    # Just a name change to make writing formulas a little easier

    alpha = exposure_ratio
    # b = background_counts
    # o = observed_counts
    # M = expected_model_counts

    n = len(expected_model_counts)

    loglike = np.empty(n, dtype=np.float64)
    B_mle = np.empty(n, dtype=np.float64)

    # Nuisance parameter for Poisson likelihood
    # NOTE: B_mle is zero when b is zero!

    for idx in range(n):
        o_plus_b = observed_counts[idx] + background_counts[idx]

        sqr = np.sqrt(
            4 * (alpha + alpha**2) * background_counts[idx] * expected_model_counts[idx]
            + ((alpha + 1) * expected_model_counts[idx] - alpha * (o_plus_b)) ** 2
        )

        B_mle[idx] = (
            1
            / (2.0 * alpha * (1 + alpha))
            * (alpha * (o_plus_b) - (alpha + 1) * expected_model_counts[idx] + sqr)
        )

        # Profile likelihood

        loglike[idx] = (
            xlogy_one(
                observed_counts[idx], alpha * B_mle[idx] + expected_model_counts[idx]
            )
            + xlogy_one(background_counts[idx], B_mle[idx])
            - (alpha + 1) * B_mle[idx]
            - expected_model_counts[idx]
            - logfactorial(background_counts[idx])
            - logfactorial(observed_counts[idx])
        )

    return loglike, B_mle * alpha




[docs]
@njit(fastmath=True)
def poisson_observed_gaussian_background(
    observed_counts, background_counts, background_error, expected_model_counts
):  # pragma: no cover
    # This loglike assume Gaussian errors on the background and Poisson uncertainties on
    # the observed counts. It is a profile likelihood.
    n = background_counts.shape[0]

    log_likes = np.empty(n, dtype=np.float64)
    b = np.empty(n, dtype=np.float64)

    for idx in range(n):
        MB = background_counts[idx] + expected_model_counts[idx]
        s2 = background_error[idx] * background_error[idx]  # type: np.ndarray

        b[idx] = 0.5 * (
            sqrt(MB * MB - 2 * s2 * (MB - 2 * observed_counts[idx]) + s2 * s2)
            + background_counts[idx]
            - expected_model_counts[idx]
            - s2
        )  # type: np.ndarray

        # Now there are two branches: when the background is 0 we are in the normal
        # situation of a pure Poisson likelihood, while when the background is not zero
        # we use the profile likelihood

        # NOTE: bkgErr can be 0 only when also bkgCounts = 0
        # Also it is evident from the expression above that when bkgCounts = 0 and
        # bkgErr=0 also b=0

        # Let's do the branch with background > 0 first

        if background_counts[idx] > 0:
            if observed_counts[idx] > 0:
                log_likes[idx] = (
                    -((b[idx] - background_counts[idx]) ** 2) / (2 * s2)
                    + observed_counts[idx] * log(b[idx] + expected_model_counts[idx])
                    - b[idx]
                    - expected_model_counts[idx]
                    - logfactorial(observed_counts[idx])
                    - 0.5 * _log_pi_2
                    - log(background_error[idx])
                )
            else:
                log_likes[idx] = (
                    -((b[idx] - background_counts[idx]) ** 2) / (2 * s2)
                    - b[idx]
                    - expected_model_counts[idx]
                    - 0.5 * _log_pi_2
                    - log(background_error[idx])
                )

        # Let's do the other branch

        else:
            # the 1e-100 in the log is to avoid zero divisions
            # This is the Poisson likelihood with no background
            log_likes[idx] = (
                xlogy_one(observed_counts[idx], expected_model_counts[idx])
                - expected_model_counts[idx]
                - logfactorial(observed_counts[idx])
            )
        # print ('N=',observed_counts[idx],
        #        'M=',expected_model_counts[idx],
        #        'B=',background_counts[idx],
        #        'BE=', background_error[idx],
        #        'b=',b[idx],
        #        'b+M=', b[idx] + expected_model_counts[idx],
        #        'LL=',log_likes[idx])

    return log_likes, b




[docs]
@njit(fastmath=True)
def half_chi2(y, yerr, expectation):  # pragma: no cover
    # This is half of a chi2. The reason for the factor of two is that we need this to
    # be the Gaussian likelihood, so that the delta log-like for an error of say 1 sigma
    # is 0.5 and not 1 like it would be for the other likelihood functions. This way we
    # can sum it with other likelihood functions.

    N = y.shape[0]

    log_likes = np.empty(N, dtype=np.float64)

    # yerr[yerr<1]=np.sqrt(0.75)

    for n in range(N):
        log_likes[n] = 0
        if yerr[n] > 0:
            log_likes[n] = (y[n] - expectation[n]) ** 2 / (yerr[n] ** 2)

    return 0.5 * log_likes