Source code for threeML.bayesian.autoemcee_sampler

import logging

import time

import numpy as np
from astromodels import use_astromodels_memoization

from threeML.bayesian.sampler_base import UnitCubeSampler
from threeML.config.config import threeML_config


try:
    import autoemcee

except Exception:
    has_autoemcee = False

else:
    has_autoemcee = True


try:
    # see if we have mpi and/or are using parallel

    from mpi4py import MPI

    if MPI.COMM_WORLD.Get_size() > 1:  # need parallel capabilities
        using_mpi = True

        comm = MPI.COMM_WORLD
        rank = comm.Get_rank()

    else:
        using_mpi = False
except Exception:
    using_mpi = False


# un_logger = logging.getLogger("ultranest")
# un_logger.propagate = False

log = logging.getLogger(__name__)



[docs]
class AutoEmceeSampler(UnitCubeSampler):
    def __init__(self, likelihood_model=None, data_list=None, **kwargs):
        assert has_autoemcee, "You must install AutoEmcee to use this sampler"

        super(AutoEmceeSampler, self).__init__(likelihood_model, data_list, **kwargs)


[docs]
    def setup(
        self,
        num_global_samples=10000,
        num_chains=4,
        num_walkers=None,
        max_ncalls=1000000,
        max_improvement_loops=4,
        num_initial_steps=100,
        min_autocorr_times=0,
    ):
        r"""Sample until MCMC chains have converged.

        The steps are:

        1. Draw *num_global_samples* from prior. The highest *num_walkers* points are
            selected.
        2. Set *num_steps* to *num_initial_steps*
        3. Run *num_chains* MCMC ensembles for *num_steps* steps
        4. For each walker chain, compute auto-correlation length (Convergence requires
            *num_steps*/autocorrelation length > *min_autocorr_times*)
        5. For each parameter, compute geweke convergence diagnostic (Convergence
            requires \|z\| < 2)
        6. For each ensemble, compute gelman-rubin rank convergence diagnostic
            (Convergence requires rhat<1.2)
        7. If converged, stop and return results.
        8. Increase *num_steps* by 10, and repeat from (3) up to
            *max_improvement_loops* times.




        num_global_samples: int
            Number of samples to draw from the prior to
        num_chains: int
            Number of independent ensembles to run. If running with MPI,
            this is set to the number of MPI processes.
        num_walkers: int
            Ensemble size. If None, max(100, 4 * dim) is used
        max_ncalls: int
            Maximum number of likelihood function evaluations
        num_initial_steps: int
            Number of sampler steps to take in first iteration
        max_improvement_loops: int
            Number of times MCMC should be re-attempted (see above)
        min_autocorr_times: float
            if positive, additionally require for convergence that the
            number of samples is larger than the *min_autocorr_times*
            times the autocorrelation length.
        """

        self._num_global_samples = num_global_samples
        self._num_chains = num_chains
        self._num_walkers = num_walkers
        self._max_ncalls = max_ncalls
        self._max_improvement_loops = max_improvement_loops
        self._num_initial_steps = num_initial_steps
        self._min_autocorr_times = min_autocorr_times

        self._is_setup = True



[docs]
    def sample(self, quiet=False):
        """Sample using the UltraNest numerical integration method :rtype:

        :returns:
        """
        if not self._is_setup:
            log.error("You forgot to setup the sampler!")

            raise RuntimeError()

        loud = not quiet

        self._update_free_parameters()

        param_names = list(self._free_parameters.keys())

        loglike, autoemcee_prior = self._construct_unitcube_posterior(return_copy=True)

        # We need to check if the MCMC
        # chains will have a place on
        # the disk to write and if not,
        # create one

        if threeML_config["parallel"]["use_parallel"]:
            log.error(
                "If you want to run ultranest in parallell you need to use an ad-hoc "
                "method"
            )

            raise RuntimeError()

        else:
            sampler = autoemcee.ReactiveAffineInvariantSampler(
                param_names,
                loglike,
                transform=autoemcee_prior,
                vectorized=False,
                sampler="goodman-weare",
            )

            with use_astromodels_memoization(False):
                log.debug("Start autoemcee run")
                sampler.run(
                    self._num_global_samples,
                    self._num_chains,
                    self._num_walkers,
                    self._max_ncalls,
                    self._max_improvement_loops,
                    self._num_initial_steps,
                    self._min_autocorr_times,
                    progress=threeML_config.interface.progress_bars,
                )
                log.debug("autoemcee run done")

        process_fit = False

        if using_mpi:
            # if we are running in parallel and this is not the
            # first engine, then we want to wait and let everything finish

            if rank != 0:
                # let these guys take a break
                time.sleep(1)

                # these engines do not need to read
                process_fit = False

            else:
                # wait for a moment to allow it all to turn off
                time.sleep(1)

                process_fit = True

        else:
            process_fit = True

        if process_fit:

            self._sampler = sampler

            self._raw_samples = np.concatenate(
                [
                    sampler.transform(s.get_chain(flat=True))
                    for s in self._sampler.samplers
                ]
            )

            # First we need the prior
            log_prior = [self._log_prior(x) for x in self._raw_samples]

            self._log_probability_values = np.concatenate(
                [s.get_log_prob(flat=True) for s in self._sampler.samplers]
            )

            self._log_like_values = self._log_probability_values - log_prior

            self._marginal_likelihood = None

            self._build_samples_dictionary()

            self._build_results()

            # Display results
            if loud:
                self._results.display()

            # now get the marginal likelihood

            return self.samples