from __future__ import print_function
from future import standard_library
standard_library.install_aliases()
from builtins import map
import urllib.request, urllib.parse, urllib.error
import os
import astropy.time as astro_time
import datetime
import astropy.io.votable as votable
from threeML.io.file_utils import (
sanitize_filename,
if_directory_not_existing_then_make,
file_existing_and_readable,
)
import warnings
import yaml
import codecs
[docs]def get_heasarc_table_as_pandas(heasarc_table_name, update=False, cache_time_days=1):
"""
Obtain a a VO table from the HEASARC archives and return it as a pandas table indexed
by object/trigger names. The heasarc_table_name values are the ones referenced at:
https://heasarc.gsfc.nasa.gov/docs/archive/vo/
In order to speed up the processing of the tables, 3ML can cache the XML table in a cache
that is updated every cache_time_days. The cache can be forced to update, i.e, reload from
the web, by setting update to True.
:param heasarc_table_name: the name of a HEASARC browse table
:param update: force web read of the table and update cache
:param cache_time_days: number of days to hold the current cache
:return: pandas DataFrame with results and astropy table
"""
# make sure the table is a string
assert type(heasarc_table_name) is str
# point to the cache directory and create it if it is not existing
cache_directory = os.path.join(os.path.expanduser("~"), ".threeML", ".cache")
if_directory_not_existing_then_make(cache_directory)
cache_file = os.path.join(cache_directory, "%s_cache.yml" % heasarc_table_name)
cache_file_sanatized = sanitize_filename(cache_file)
# build and sanitize the votable XML file that will be saved
file_name = os.path.join(cache_directory, "%s_votable.xml" % heasarc_table_name)
file_name_sanatized = sanitize_filename(file_name)
if not file_existing_and_readable(cache_file_sanatized):
print(
"The cache for %s does not yet exist. We will try to build it\n"
% heasarc_table_name
)
write_cache = True
cache_exists = False
else:
with open(cache_file_sanatized) as cache:
# the cache file is two lines. The first is a datetime string that
# specifies the last time the XML file was obtained
yaml_cache = yaml.load(cache, Loader=yaml.SafeLoader)
cached_time = astro_time.Time(
datetime.datetime(*list(map(int, yaml_cache["last save"].split("-"))))
)
# the second line how many seconds to keep the file around
cache_valid_for = float(yaml_cache["cache time"])
# now we will compare it to the current time in UTC
current_time = astro_time.Time(datetime.datetime.utcnow(), scale="utc")
delta_time = current_time - cached_time
if delta_time.sec >= cache_valid_for:
# ok, this is an old file, we will update it
write_cache = True
cache_exists = True
else:
# we
write_cache = False
cache_exists = True
if write_cache or update:
print("Building cache for %s.\n" % heasarc_table_name)
# go to HEASARC and get the requested table
heasarc_url = (
"http://heasarc.gsfc.nasa.gov/cgi-bin/W3Browse/getvotable.pl?name=%s"
% heasarc_table_name
)
try:
urllib.request.urlretrieve(heasarc_url, filename=file_name_sanatized)
except (IOError):
warnings.warn(
"The cache is outdated but the internet cannot be reached. Please check your connection"
)
else:
# # Make sure the lines are interpreted as Unicode (otherwise some characters will fail)
with open(file_name_sanatized) as table_file:
# might have to add this in for back compt J MICHAEL
# new_lines = [x. for x in table_file.readlines()]
new_lines = table_file.readlines()
# now write the decoded lines back to the file
with codecs.open(file_name_sanatized, "w+", "utf-8") as table_file:
table_file.write("".join(new_lines))
# save the time that we go this table
with open(cache_file_sanatized, "w") as cache:
yaml_dict = {}
current_time = astro_time.Time(datetime.datetime.utcnow(), scale="utc")
yaml_dict["last save"] = current_time.datetime.strftime(
"%Y-%m-%d-%H-%M-%S"
)
seconds_in_day = 86400.0
yaml_dict["cache time"] = seconds_in_day * cache_time_days
yaml.dump(yaml_dict, stream=cache, default_flow_style=False)
# use astropy routines to read the votable
with warnings.catch_warnings():
warnings.simplefilter("ignore")
vo_table = votable.parse(file_name_sanatized)
table = vo_table.get_first_table().to_table(use_names_over_ids=True)
# make sure we do not use this as byte code
table.convert_bytestring_to_unicode()
# create a pandas table indexed by name
pandas_df = table.to_pandas().set_index("name")
del vo_table
return pandas_df