Source code for threeML.utils.data_download.Fermi_LAT.download_LAT_data

from __future__ import print_function
from future import standard_library

standard_library.install_aliases()
from builtins import str
import html.parser
import re
import socket
import time
import urllib.request, urllib.parse, urllib.error
import os
import glob

import astropy.io.fits as pyfits

from threeML.io.file_utils import sanitize_filename
from threeML.config.config import threeML_config
from threeML.utils.unique_deterministic_tag import get_unique_deterministic_tag
from threeML.io.download_from_http import ApacheDirectory

# Set default timeout for operations
socket.setdefaulttimeout(120)


[docs]class DivParser(html.parser.HTMLParser): """ Extract data from a <div></div> tag """ def __init__(self, desiredDivName): html.parser.HTMLParser.__init__(self) self.recording = 0 self.data = [] self.desiredDivName = desiredDivName
[docs] def handle_starttag(self, tag, attributes): if tag != "div": return if self.recording: self.recording += 1 return for name, value in attributes: if name == "id" and value == self.desiredDivName: break else: return self.recording = 1
[docs] def handle_endtag(self, tag): if tag == "div" and self.recording: self.recording -= 1
[docs] def handle_data(self, data): if self.recording: self.data.append(data)
# Keyword name to store the unique ID for the download _uid_fits_keyword = "QUERYUID"
[docs]def merge_LAT_data(ft1s, destination_directory=".", outfile='ft1_merged.fits'): outfile = os.path.join(destination_directory, outfile) if os.path.exists(outfile): print( "Existing merged event file %s correspond to the same selection. " "We assume you did not tamper with it, so we will return it instead of merging it again. " "If you want to redo the FT1 file again, remove it from the outdir" % (outfile) ) return outfile if len(ft1s) == 1: print('Only one FT1 file provided. Skipping the merge...') import shutil shutil.copyfile(ft1s[0],outfile) return outfile _filelist = "_filelist.txt" infile = os.path.join(destination_directory, _filelist) infile_list = open(infile,'w') for ft1 in ft1s: infile_list.write(ft1 + '\n' ) infile_list.close() from GtApp import GtApp gtselect = GtApp('gtselect') gtselect['infile'] = '@' + infile gtselect['outfile'] = outfile gtselect['ra'] = 'INDEF' gtselect['dec'] = 'INDEF' gtselect['rad'] = 'INDEF' gtselect['tmin'] = 'INDEF' gtselect['tmax'] = 'INDEF' gtselect['emin'] = '30' gtselect['emax'] ='1000000' gtselect['zmax'] = 180 gtselect.run() return outfile
[docs]def download_LAT_data( ra, dec, radius, tstart, tstop, time_type, data_type="Photon", destination_directory=".", ): """ Download data from the public LAT data server (of course you need a working internet connection). Data are selected in a circular Region of Interest (cone) centered on the provided coordinates. Example: ``` > download_LAT_data(195.6, -35.4, 12.0, '2008-09-16 01:00:23', '2008-09-18 01:00:23', time_type='Gregorian', destination_directory='my_new_data') ``` :param ra: R.A. (J2000) of the center of the ROI :param dec: Dec. (J2000) of the center of the ROI :param radius: radius (in degree) of the center of the ROI (use a larger radius than what you will need in the analysis) :param tstart: start time for the data :param tstop: stop time for the data :param time_type: type of the time input (one of MET, Gregorian or MJD) :param data_type: type of data to download. Use Photon if you use Source or cleaner classes, Extended otherwise. Default is Photon. :param destination_directory: directory where you want to save the data (default: current directory) :return: the path to the downloaded FT1 and FT2 file """ _known_time_types = ["MET", "Gregorian", "MJD"] assert time_type in _known_time_types, "Time type must be one of %s" % ",".join( _known_time_types ) valid_classes = ["Photon", "Extended"] assert data_type in valid_classes, "Data type must be one of %s" % ",".join( valid_classes ) assert radius > 0, "Radius of the Region of Interest must be > 0" assert 0 <= ra <= 360.0, "R.A. must be 0 <= ra <= 360" assert -90 <= dec <= 90, "Dec. must be -90 <= dec <= 90" # create output directory if it does not exists destination_directory = sanitize_filename(destination_directory, abspath=True) if not os.path.exists(destination_directory): os.makedirs(destination_directory) # This will complete automatically the form available at # http://fermi.gsfc.nasa.gov/cgi-bin/ssc/LAT/LATDataQuery.cgi # After submitting the form, an html page will inform about # the identifier assigned to the query and the time which will be # needed to process it. After retrieving the query number, # this function will wait for the files to be completed on the server, # then it will download them url = threeML_config["LAT"]["query form"] # Save parameters for the query in a dictionary query_parameters = {} query_parameters["coordfield"] = "%.4f,%.4f" % (ra, dec) query_parameters["coordsystem"] = "J2000" query_parameters["shapefield"] = "%s" % radius query_parameters["timefield"] = "%s,%s" % (tstart, tstop) query_parameters["timetype"] = "%s" % time_type query_parameters[ "energyfield" ] = "30,1000000" # Download everything, we will chose later query_parameters["photonOrExtendedOrNone"] = data_type query_parameters["destination"] = "query" query_parameters["spacecraft"] = "checked" # Compute a unique ID for this query query_unique_id = get_unique_deterministic_tag(str(query_parameters)) # Look if there are FT1 and FT2 files in the output directory matching this unique ID ft1s = glob.glob(os.path.join(destination_directory, "*PH??.fits")) ft2s = glob.glob(os.path.join(destination_directory, "*SC??.fits")) # Loop over all ft1s and see if there is any matching the uid prev_downloaded_ft1s = [] prev_downloaded_ft2 = None for ft1 in ft1s: with pyfits.open(ft1) as f: this_query_uid = f[0].header.get(_uid_fits_keyword) if this_query_uid == query_unique_id: # Found one! Append to the list as there might be others prev_downloaded_ft1s.append(ft1) #break pass if len(prev_downloaded_ft1s)>0: for ft2 in ft2s: with pyfits.open(ft2) as f: this_query_uid = f[0].header.get(_uid_fits_keyword) if this_query_uid == query_unique_id: # Found one! (FT2 is a single file) prev_downloaded_ft2 = ft2 break else: # No need to look any further, if there is no FT1 file there shouldn't be any FT2 file either pass # If we have both FT1 and FT2 matching the ID, we do not need to download anymore if len(prev_downloaded_ft1s)>0 and prev_downloaded_ft2 is not None: print( "Existing event file %s and Spacecraft file %s correspond to the same selection. " "We assume you did not tamper with them, so we will return those instead of downloading them again. " "If you want to download them again, remove them from the outdir" % (prev_downloaded_ft1s, prev_downloaded_ft2) ) return merge_LAT_data(prev_downloaded_ft1s, destination_directory, outfile='L%s_FT1.fits' % query_unique_id), prev_downloaded_ft2 # Print them out print("Query parameters:") for k, v in query_parameters.items(): print("%30s = %s" % (k, v)) # POST encoding postData = urllib.parse.urlencode(query_parameters).encode("utf-8") temporaryFileName = "__temp_query_result.html" # Remove temp file if present try: os.remove(temporaryFileName) except: pass # This is to avoid caching urllib.request.urlcleanup() # Get the form compiled try: urllib.request.urlretrieve(url, temporaryFileName, lambda x, y, z: 0, postData) except socket.timeout: raise RuntimeError( "Time out when connecting to the server. Check your internet connection, or that the " "form at %s is accessible, then retry" % url ) except Exception as e: print(e) raise RuntimeError( "Problems with the download. Check your internet connection, or that the " "form at %s is accessible, then retry" % url ) # Now open the file, parse it and get the query ID with open(temporaryFileName) as htmlFile: lines = [] for line in htmlFile: # lines.append(line.encode('utf-8')) lines.append(line) html = " ".join(lines).strip() os.remove(temporaryFileName) # Extract data from the response parser = DivParser("sec-wrapper") parser.feed(html) if parser.data == []: parser = DivParser("right-side") parser.feed(html) try: # Get line containing the time estimation estimatedTimeLine = [ x for x in parser.data if x.find("The estimated time for your query to complete is") == 0 ][0] # Get the time estimate estimatedTimeForTheQuery = re.findall( "The estimated time for your query to complete is ([0-9]+) seconds", estimatedTimeLine, )[0] except: raise RuntimeError( "Problems with the download. Empty or wrong answer from the LAT server. " "Please retry later." ) else: print( "\nEstimated complete time for your query: %s seconds" % estimatedTimeForTheQuery ) http_address = [ x for x in parser.data if x.find("https://fermi.gsfc.nasa.gov") >= 0 ][0] print( "\nIf this download fails, you can find your data at %s (when ready)\n" % http_address ) # Now periodically check if the query is complete startTime = time.time() timeout = max(1.5 * max(5.0, float(estimatedTimeForTheQuery)), 120) # Seconds refreshTime = min(float(estimatedTimeForTheQuery) / 2.0, 5.0) # Seconds # precompile Url regular expression regexpr = re.compile("wget (.*.fits)") # Now download every tot seconds the status of the query, until we get status=2 (success) links = None fakeName = "__temp__query__result.html" while time.time() <= startTime + timeout: # Try and fetch the html with the results try: _ = urllib.request.urlretrieve(http_address, fakeName,) except socket.timeout: urllib.request.urlcleanup() raise RuntimeError( "Time out when connecting to the server. Check your internet connection, or that " "you can access %s, then retry" % threeML_config["LAT"]["query form"] ) except Exception as e: print(e) urllib.request.urlcleanup() raise RuntimeError( "Problems with the download. Check your connection or that you can access " "%s, then retry." % threeML_config["LAT"]["query form"] ) with open(fakeName) as f: html = " ".join(f.readlines()) status = re.findall("The state of your query is ([0-9]+)", html)[0] if status == "2": # Success! Get the download link links = regexpr.findall(html) # Remove temp file os.remove(fakeName) # we're done break else: # Clean up and try again after a while os.remove(fakeName) urllib.request.urlcleanup() time.sleep(refreshTime) # Continue to next iteration remotePath = "%s/queries/" % threeML_config["LAT"]["public HTTP location"] if links != None: filenames = [x.split("/")[-1] for x in links] print("\nDownloading FT1 and FT2 files...") downloader = ApacheDirectory(remotePath) downloaded_files = [ downloader.download(filename, destination_directory) for filename in filenames ] else: raise RuntimeError("Could not download LAT Standard data") # Now we need to sort so that the FT1 is always first (they might be out of order) # Separate the FT1 and FT2 files: FT1 = [] FT2 = None for fits_file in downloaded_files: # Open the FITS file and write the unique key for this query, so that the download will not be # repeated if not necessary with pyfits.open(fits_file, mode="update") as f: f[0].header.set(_uid_fits_keyword, query_unique_id) if re.match(".+SC[0-9][0-9].fits", str(fits_file) ) is not None: FT2 = fits_file else: FT1.append(fits_file) # If FT2 is first, switch them, otherwise do nothing #if re.match(".+SC[0-9][0-9].fits", downloaded_files[0]) is not None: return merge_LAT_data(FT1, destination_directory, outfile='L%s_FT1.fits' % query_unique_id), FT2