#!/usr/bin/env python
#
# Copyright (C) 2010--2014  Kipp Cannon, Chad Hanna
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

## @file
# A program to compute the likelhood ratios of inspiral triggers
#
# ### Command line interface
#
#	+ `--input-cache` [filename]: Also process the files named in this LAL cache.  See lalapps_path2cache for information on how to produce a LAL cache file.
#	+ `--likelihood-url` [URL]: Set the name of the likelihood ratio data file to use.  Can be given more than once.  Filenames and URLs are accepted.
#	+ `--likelihood-cache` [filename]: Also load the likelihood ratio data files listsed in this LAL cache.  See lalapps_path2cache for information on how to produce a LAL cache file.
#	+ `--tmp-space` [path]: Path to a directory suitable for use as a work area while manipulating the database file.  The database file will be worked on in this directory, and then moved to the final location when complete.  This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on which the final output will reside.
#	+ `--vetoes-name` [name]: Set the name of the segment lists to use as vetoes (default = do not apply vetoes).
#	+ `--verbose`: Be verbose.
#	+ `--write-likelihood` [filename]: Write merged raw likelihood data to this file.
#	+ `--trim-database`: Delete events that are found to be below the pipeline's likelihood ratio threshold (default = do not delete them).  Deleting the events saves a significant amount of disk space but is inconvenient during pipeline development and tuning as it makes it impossible to rerank the events later with a different ranking statistic.

#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#


from optparse import OptionParser
import sys


from glue import iterutils
from glue.lal import CacheEntry
from glue.text_progress_bar import ProgressBar
from glue.ligolw import ligolw
from glue.ligolw import lsctables
from glue.ligolw import utils as ligolw_utils
from glue.ligolw.utils import process as ligolw_process
from glue.ligolw.utils import search_summary as ligolw_search_summary
from glue.ligolw.utils import segments as ligolw_segments
from glue import segments
from pylal import ligolw_burca2
from pylal import ligolw_thinca
from pylal import snglcoinc
from gstlal import far


__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
__version__ = "git id %s" % ""	# FIXME
__date__ = ""	# FIXME


#
# =============================================================================
#
#                                 Command Line
#
# =============================================================================
#


def parse_command_line():
	parser = OptionParser(
		version = "Name: %%prog\n%s" % "" # FIXME
	)
	parser.add_option("-c", "--input-cache", metavar = "filename", help = "Also process the files named in this LAL cache.  See lalapps_path2cache for information on how to produce a LAL cache file.")
	parser.add_option("-l", "--likelihood-url", metavar = "URL", action = "append", help = "Set the name of the likelihood ratio data file to use.  Can be given more than once.  Filenames and URLs are accepted.")
	parser.add_option("--likelihood-cache", metavar = "filename", help = "Also load the likelihood ratio data files listsed in this LAL cache.  See lalapps_path2cache for information on how to produce a LAL cache file.")
	parser.add_option("-t", "--tmp-space", metavar = "path", help = "Path to a directory suitable for use as a work area while manipulating the database file.  The database file will be worked on in this directory, and then moved to the final location when complete.  This option is intended to improve performance when running in a networked environment, where there might be a local disk with higher bandwidth than is available to the filesystem on which the final output will reside.")
	parser.add_option("--vetoes-name", metavar = "name", help = "Set the name of the segment lists to use as vetoes (default = do not apply vetoes).")
	parser.add_option("-f", "--force", action = "store_true", help = "Force recomputation of likelihood values.")
	parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
	parser.add_option("--trim-database", action = "store_true", help = "Delete events that are found to be below the pipeline's likelihood ratio threshold (default = do not delete them).  Deleting the events saves a significant amount of disk space but is inconvenient during pipeline development and tuning as it makes it impossible to rerank the events later with a different ranking statistic.")
	options, filenames = parser.parse_args()

	options.likelihood_urls = []
	if options.likelihood_urls is not None:
		options.likelihood_urls += options.likelihood_url
	if options.likelihood_cache is not None:
		options.likelihood_urls += [CacheEntry(line).url for line in open(options.likelihood_cache)]
	if not options.likelihood_urls:
		raise ValueError("no likelihood URLs specified")

	if options.input_cache:
		filenames += [CacheEntry(line).path for line in open(options.input_cache)]

	return options, filenames


#
# =============================================================================
#
#                   Support Funcs for Likelihood Ratio Code
#
# =============================================================================
#


def sngl_inspiral_veto_func(event, vetoseglists):
	# return True if event should be *retained*
	return event.ifo not in vetoseglists or event.get_end() not in vetoseglists[event.ifo]


#
# =============================================================================
#
#                                     Main
#
# =============================================================================
#


#
# command line
#


options, filenames = parse_command_line()


#
# load parameter distribution data
#


coincparamsdistributions = None
seglists = segments.segmentlistdict()
for n, likelihood_url in enumerate(options.likelihood_urls, start = 1):
	if options.verbose:
		print >>sys.stderr, "%d/%d:" % (n, len(options.likelihood_urls)),
	xmldoc = ligolw_utils.load_url(likelihood_url, contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler, verbose = options.verbose)
	this_coincparamsdistributions, ignored, this_seglists = far.parse_likelihood_control_doc(xmldoc)
	xmldoc.unlink()
	if this_coincparamsdistributions is None:
		raise ValueError("%s does not contain parameter distribution data" % likelihood_url)
	if coincparamsdistributions is None:
		coincparamsdistributions = this_coincparamsdistributions
	else:
		coincparamsdistributions += this_coincparamsdistributions
	seglists |= this_seglists
if options.verbose:
	print >>sys.stderr, "total livetime:\n\t%s" % ",\n\t".join("%s = %s s" % (instrument, str(abs(segs))) for instrument, segs in seglists.items())

# Compute the probability of instruments given signal
coincparamsdistributions.populate_prob_of_instruments_given_signal(segs = seglists, n = 1.0, verbose = options.verbose)

# compute the instrument combination counts
coincparamsdistributions.add_instrument_combination_counts(segs = seglists, verbose = options.verbose)

#
# rebuild event parameter PDFs (+= method has not constructed these
# correctly, and we might have added additional priors to the histograms),
# then initialize likeihood ratio evaluator
#


coincparamsdistributions.finish(verbose = options.verbose)
ln_likelihood_ratio_func = snglcoinc.LnLikelihoodRatio(coincparamsdistributions)


#
# iterate over files
#


failed = []
for n, filename in enumerate(filenames, 1):
	#
	# Open the file.  Be lazy and use the content handler for the
	# distribution data files because it's fine for this, too.  If a
	# file can't be loaded because of a filesystem failure or CRC
	# failure, or whatever, try to do the rest of the files before
	# exiting instead of crashing right away to reduce the time spent
	# in rescue dags.
	#

	if options.verbose:
		print >>sys.stderr, "%d/%d:" % (n, len(filenames)),
	try:
		xmldoc = ligolw_utils.load_filename(filename, contenthandler = far.ThincaCoincParamsDistributions.LIGOLWContentHandler, verbose = options.verbose)
	except Exception as e:
		if options.verbose:
			print >>sys.stderr, "failed to load '%s': %s.  trying to continue with remaining files" % (filename, str(e))
		failed.append(filename)
		continue

	if not options.force and ligolw_process.doc_includes_process(xmldoc, u"gstlal_inspiral_calc_likelihood"):
		if options.verbose:
			print >>sys.stderr, "already processed, skipping"
		xmldoc.unlink()
		continue

	#
	# Summarize the database, and record our passage.
	#

	try:
		coinc_def_id = lsctables.CoincDefTable.get_table(xmldoc).get_coinc_def_id(ligolw_thinca.InspiralCoincDef.search, ligolw_thinca.InspiralCoincDef.search_coinc_type, create_new = False)
	except KeyError:
		if options.verbose:
			print >>sys.stderr, "document does not contain inspiral coincidences.  skipping."
		xmldoc.unlink()
		continue

	process = ligolw_process.register_to_xmldoc(xmldoc, u"gstlal_inspiral_calc_likelihood", {})
	search_summary = ligolw_search_summary.append_search_summary(xmldoc, process, ifos = seglists.keys(), inseg = seglists.extent_all(), outseg = seglists.extent_all())

	if options.verbose:
		print >>sys.stderr, "indexing document ...",
	sngl_inspiral_table_index = dict((row.event_id, row) for row in lsctables.SnglInspiralTable.get_table(xmldoc))
	coinc_event_map_index = dict((row.coinc_event_id, []) for row in lsctables.CoincTable.get_table(xmldoc) if row.coinc_def_id == coinc_def_id)
	for row in lsctables.CoincMapTable.get_table(xmldoc):
		if row.coinc_event_id not in coinc_event_map_index:
			continue
		coinc_event_map_index[row.coinc_event_id].append(sngl_inspiral_table_index[row.event_id])
	del sngl_inspiral_table_index

	offset_vectors = lsctables.TimeSlideTable.get_table(xmldoc).as_dict()

	if options.vetoes_name is not None:
		vetoseglists = ligolw_segments.segmenttable_get_by_name(xmldoc, options.vetoes_name).coalesce()
	else:
		vetoseglists = segments.segmentlistdict()
	if options.verbose:
		print >>sys.stderr, "done"

	#
	# Run likelihood ratio calculation.
	#

	ligolw_burca2.assign_likelihood_ratios_xml(
		xmldoc = xmldoc,
		coinc_def_id = coinc_def_id,
		offset_vectors = offset_vectors,
		vetoseglists = vetoseglists,
		events_func = lambda _, coinc_event_id: coinc_event_map_index[coinc_event_id],
		veto_func = sngl_inspiral_veto_func,
		ln_likelihood_ratio_func = ln_likelihood_ratio_func,
		likelihood_params_func = coincparamsdistributions.coinc_params,
		verbose = options.verbose
	)

	#
	# Delete low significance events to reduce database size
	#

	if options.trim_database:
		coinc_ids_to_delete = set(row.coinc_event_id for row in lsctables.CoincTable.get_table(xmldoc) if row.coinc_def_id == coinc_def_id and row.likelihood < far.RankingData.ln_likelihood_ratio_threshold)
		# FIXME:  don't hard-code program and parameter names
		sngls_snr_threshold, = ligolw_process.get_process_params(xmldoc, u"gstlal_inspiral", u"--singles-threshold")
		sngl_ids_to_delete = set(sngl.event_id for coinc_event_id in coinc_ids_to_delete for sngl in coinc_event_map_index[coinc_event_id] if sngl.snr < sngls_snr_threshold)

		iterutils.inplace_filter((lambda row: row.coinc_event_id not in coinc_ids_to_delete), lsctables.CoincTable.get_table(xmldoc))
		iterutils.inplace_filter((lambda row: row.coinc_event_id not in coinc_ids_to_delete), lsctables.CoincInspiralTable.get_table(xmldoc))
		iterutils.inplace_filter((lambda row: row.coinc_event_id not in coinc_ids_to_delete), lsctables.CoincMapTable.get_table(xmldoc))
		iterutils.inplace_filter((lambda row: row.event_id not in sngl_ids_to_delete), lsctables.SnglInspiralTable.get_table(xmldoc))

	#
	# Close out process metadata.
	#

	ligolw_process.set_process_end_time(process)

	#
	# Clean up.
	#

	ligolw_utils.write_filename(xmldoc, filename, gz = (filename or "stdout").endswith(".gz"), verbose = options.verbose)
	xmldoc.unlink()


#
# Crash if any input files were broken
#


if failed:
	raise ValueError("%s could not be processed" % ", ".join("'%s'" % filename for filename in failed))
