import logging
import os
import time
from pathlib import Path
import numpy as np
from astromodels import ModelAssertionViolation, use_astromodels_memoization
from threeML.bayesian.sampler_base import UnitCubeSampler
from threeML.config.config import threeML_config
from threeML.io.logging import setup_logger
try:
import autoemcee
except:
has_autoemcee = False
else:
has_autoemcee = True
try:
# see if we have mpi and/or are using parallel
from mpi4py import MPI
if MPI.COMM_WORLD.Get_size() > 1: # need parallel capabilities
using_mpi = True
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
else:
using_mpi = False
except:
using_mpi = False
# un_logger = logging.getLogger("ultranest")
# un_logger.propagate = False
log = setup_logger(__name__)
[docs]
class AutoEmceeSampler(UnitCubeSampler):
def __init__(self, likelihood_model=None, data_list=None, **kwargs):
assert has_autoemcee, "You must install AutoEmcee to use this sampler"
super(AutoEmceeSampler, self).__init__(
likelihood_model, data_list, **kwargs)
[docs]
def setup(
self,
num_global_samples=10000,
num_chains=4,
num_walkers=None,
max_ncalls=1000000,
max_improvement_loops=4,
num_initial_steps=100,
min_autocorr_times=0
):
"""
Sample until MCMC chains have converged.
The steps are:
1. Draw *num_global_samples* from prior. The highest *num_walkers* points are selected.
2. Set *num_steps* to *num_initial_steps*
3. Run *num_chains* MCMC ensembles for *num_steps* steps
4. For each walker chain, compute auto-correlation length (Convergence requires *num_steps*/autocorrelation length > *min_autocorr_times*)
5. For each parameter, compute geweke convergence diagnostic (Convergence requires \|z\| < 2)
6. For each ensemble, compute gelman-rubin rank convergence diagnostic (Convergence requires rhat<1.2)
7. If converged, stop and return results.
8. Increase *num_steps* by 10, and repeat from (3) up to *max_improvement_loops* times.
num_global_samples: int
Number of samples to draw from the prior to
num_chains: int
Number of independent ensembles to run. If running with MPI,
this is set to the number of MPI processes.
num_walkers: int
Ensemble size. If None, max(100, 4 * dim) is used
max_ncalls: int
Maximum number of likelihood function evaluations
num_initial_steps: int
Number of sampler steps to take in first iteration
max_improvement_loops: int
Number of times MCMC should be re-attempted (see above)
min_autocorr_times: float
if positive, additionally require for convergence that the
number of samples is larger than the *min_autocorr_times*
times the autocorrelation length.
"""
# log.debug(f"Setup for UltraNest sampler: min_num_live_points:{min_num_live_points}, "\
# f"chain_name:{chain_name}, dlogz: {dlogz}, wrapped_params: {wrapped_params}. "\
# f"Other input: {kwargs}")
self._num_global_samples = num_global_samples
self._num_chains = num_chains
self._num_walkers = num_walkers
self._max_ncalls = max_ncalls
self._max_improvement_loops = max_improvement_loops
self._num_initial_steps = num_initial_steps
self._min_autocorr_times = min_autocorr_times
self._is_setup = True
[docs]
def sample(self, quiet=False):
"""
sample using the UltraNest numerical integration method
:rtype:
:returns:
"""
if not self._is_setup:
log.error("You forgot to setup the sampler!")
raise RuntimeError()
loud = not quiet
self._update_free_parameters()
param_names = list(self._free_parameters.keys())
n_dim = len(param_names)
loglike, autoemcee_prior = self._construct_unitcube_posterior(
return_copy=True)
# We need to check if the MCMC
# chains will have a place on
# the disk to write and if not,
# create one
if threeML_config["parallel"]["use_parallel"]:
log.error(
"If you want to run ultranest in parallell you need to use an ad-hoc method")
raise RuntimeError()
else:
sampler = autoemcee.ReactiveAffineInvariantSampler(
param_names,
loglike,
transform=autoemcee_prior,
vectorized=False,
sampler="goodman-weare"
)
with use_astromodels_memoization(False):
log.debug("Start autoemcee run")
sampler.run(self._num_global_samples,
self._num_chains,
self._num_walkers,
self._max_ncalls,
self._max_improvement_loops,
self._num_initial_steps,
self._min_autocorr_times,
progress=threeML_config.interface.progress_bars
)
log.debug("autoemcee run done")
process_fit = False
if using_mpi:
# if we are running in parallel and this is not the
# first engine, then we want to wait and let everything finish
if rank != 0:
# let these guys take a break
time.sleep(1)
# these engines do not need to read
process_fit = False
else:
# wait for a moment to allow it all to turn off
time.sleep(1)
process_fit = True
else:
process_fit = True
if process_fit:
results = sampler.results
self._sampler = sampler
self._raw_samples = np.concatenate(
[sampler.transform(s.get_chain(flat=True)) for s in self._sampler.samplers])
# First we need the prior
log_prior = [self._log_prior(x) for x in self._raw_samples]
self._log_probability_values = np.concatenate(
[s.get_log_prob(flat=True) for s in self._sampler.samplers])
self._log_like_values = self._log_probability_values - log_prior
self._marginal_likelihood = None
self._build_samples_dictionary()
self._build_results()
# Display results
if loud:
self._results.display()
# now get the marginal likelihood
return self.samples