Source code for threeML.classicMLE.goodness_of_fit

import collections
from builtins import object

import numpy as np
from astromodels import clone_model

from threeML.classicMLE.joint_likelihood_set import JointLikelihoodSet
from threeML.data_list import DataList



[docs]
class GoodnessOfFit(object):
    def __init__(self, joint_likelihood_instance, like_data_frame=None):
        self._jl_instance = joint_likelihood_instance

        # Make sure we have a fit
        assert (
            self._jl_instance.results is not None
        ), "You have to perform a fit before using GoodnessOfFit"

        if like_data_frame is None:
            like_data_frame = self._jl_instance.results.get_statistic_frame()

        # Restore best fit and store the reference value for the likelihood
        self._jl_instance.restore_best_fit()

        self._reference_like = like_data_frame["-log(likelihood)"]

        # Store best model
        self._best_fit_model = clone_model(self._jl_instance.likelihood_model)


[docs]
    def get_simulated_data(self, id):
        # Make sure we start from the best fit model
        self._jl_instance.restore_best_fit()

        # Generate a new data set for each plugin contained in the data list

        new_datas = []

        for dataset in list(self._jl_instance.data_list.values()):
            new_data = dataset.get_simulated_dataset("%s_sim" % dataset.name)

            new_datas.append(new_data)

        new_data_list = DataList(*new_datas)

        return new_data_list



[docs]
    def get_model(self, id):
        # Make a copy of the best fit model, so that we don't touch the original model
        # during the fit, and we also always restart from the best fit (instead of the
        # last iteration)

        new_model = clone_model(self._best_fit_model)

        return new_model



[docs]
    def by_mc(
        self,
        n_iterations=1000,
        continue_on_failure=False,
        preprocessor=None,
        postprocessor=None,
    ):
        """Compute goodness of fit by generating Monte Carlo datasets and
        fitting the current model on them. The fraction of synthetic datasets
        which have a value for the likelihood larger or equal to the observed
        one is a measure of the goodness of fit.

        :param n_iterations: number of MC iterations to perform
            (default: 1000)
        :param continue_of_failure: whether to continue in the case a
            fit fails (False by default)
        :return: tuple (goodness of fit, frame with all results, frame
            with all likelihood values)
        """

        # Create the joint likelihood set
        jl_set = JointLikelihoodSet(
            self.get_simulated_data,
            self.get_model,
            n_iterations,
            iteration_name="simulation",
            preprocessor=preprocessor,
            postprocessor=postprocessor,
        )

        # Use the same minimizer as in the joint likelihood object
        # NOTE: we use a clone so that the original best fit will not be touched

        jl_set.set_minimizer(self._jl_instance.minimizer_in_use)

        # Run the set
        data_frame, like_data_frame = jl_set.go(continue_on_failure=continue_on_failure)

        # Compute goodness of fit

        gof = collections.OrderedDict()

        # Total
        idx = (
            like_data_frame["-log(likelihood)"][:, "total"].values
            >= self._reference_like["total"]
        )  # type: np.ndarray

        gof["total"] = np.sum(idx) / float(n_iterations)

        for dataset in list(self._jl_instance.data_list.values()):
            sim_name = "%s_sim" % dataset.name

            idx = (
                like_data_frame["-log(likelihood)"][:, sim_name].values
                >= self._reference_like[dataset.name]
            )

            gof[dataset.name] = np.sum(idx) / float(n_iterations)

        return gof, data_frame, like_data_frame