#!/usr/bin/env python
#
# Copyright (C) 2011  Chad Hanna
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

"""
This program makes a dag for a gstlal inspiral low latency pipeline
"""

__author__ = 'Chad Hanna <channa@caltech.edu>'

##############################################################################
# import standard modules and append the lalapps prefix to the python path
import sys, os, copy, stat
import shutil

##############################################################################
# import the modules we need to build the pipeline
from glue import pipeline
from glue import lal
import glue.ligolw.utils as utils
from optparse import OptionParser
from gstlal import inspiral
from gstlal import inspiral_pipe
from gstlal import dagparts
from gstlal import datasource

## @file gstlal_ll_trigger_pipe
# This program will make create a HTCondor DAG to automate the running of low-latency, online gstlal_inspiral jobs; see gstlal_ll_trigger_pipe

## @package gstlal_ll_trigger_pipe
#
# ### Graph of the HTCondor DAG
#
# - gray boxes are optional and depend on the command line given
#
# @dot
# digraph G {
#       // graph properties
#
#       rankdir=LR;
#       compound=true;
#       node [shape=record fontsize=10 fontname="Verdana"];     
#       edge [fontsize=8 fontname="Verdana"];
#	gstlal_inspiral [URL="\ref gstlal_inspiral"];
#	gstlal_llcbcsummary [URL="\ref gstlal_llcbcsummary"];
#	gstlal_llcbcnode [URL="\ref gstlal_llcbcnode"];
#	gstlal_inspiral_marginalize_likelihoods_online [URL="\ref gstlal_inspiral_marginalize_likelihoods_online"];
#	lvalert_listen [style=filled, color=lightgrey, URL="https://www.lsc-group.phys.uwm.edu/daswg/docs/howto/lvalert-howto.html"];
# }
# @enddot
#
# ### Usage cases
#
# - Typical usage case
#
# ### Command line options
#
#	+ `--psd-fft-length` [int] (s): FFT length, default 16s
#	+ `--reference-psd` [filename]: Set the reference psd file
#	+ `--bank-cache` [filenames]: Set the bank cache files in format H1=H1.cache,H2=H2.cache, etc
#	+ `--channel` [IFO=CHANNEL-NAME]: Set the name of the channel to process (optional).  The default is "LSC-STRAIN" for all detectors. Override with IFO=CHANNEL-NAME can be given multiple times
#	+ `--ht-gate-threshold` [float]: Set the h(t) gate threshold to reject glitches
#	+ `--do-iir-pipeline`: Run the iir pipeline instead of lloid")
#	+ `--num-banks` [str]: The number of banks per job. can be given as a list like 1,2,3,4 then it will split up the bank cache into N groups with M banks each
#	+ `--max-jobs` [num]: Stop parsing the cache after reaching a certain number of jobs to limit what is submitted to the HTCondor pool
#	+ `--likelihood-file` [filename]: Set the likelihood file, required if --copy-likelihoods is used
#	+ `--marginalized-likelihood-file` [filename]: Set the marginalized likelihood file, required
#	+ `--control-peak-time` [int] (s): Set the control peak time, default 4
#	+ `--fir-stride` [int] (s): Set the fir bank stride, default 4
#	+ `--thinca-interval` [int] (s): Set the thinca interval, default 10
#	+ `--gracedb-far-threshold` [float] (Hz): False alarm rate threshold for gracedb (Hz). Set to -1 to disable uploads
#	+ `--gracedb-type` [str]: Set the gracedb type, default LowMass
#	+ `--gracedb-group` [str]: Set the gracedb group, default Test
#	+ `--data-source` [lvshm|framexmit]: Where to get the data from. Default lvshm
#	+ `--copy-likelihoods`: Copy the likelihood files from a seed, must give --likelihood-file: WARNING overwrites existing files
#	+ `--injections` [filename]: Set the name of the LIGO light-weight XML file from which to load injections (optional)
#	+ `--veto-segments-file` [filename]: Set the name of the LIGO light-weight XML file from which to load vetoes (optional)
#	+ `--veto-segments-name` [name]: Set the name of the segments to extract from the segment tables and use as the veto list. Default is "vetoes"
#	+ `--state-vector-on-bits` [IFO=bits]: Set the state vector on bits to process (optional).  The default is 0x7 for all detectors. Override with IFO=bits can be given multiple times
#	+ `--state-vector-off-bits` [IFO=bits]: Set the state vector off bits to process (optional).  The default is 0x160 for all detectors. Override with IFO=bits can be given multiple times
#	+ `--lvalert-listener-program` [program]: Set the programs to respond to lvalerts from this analysis, can be given multiple times
#

#
# gstlal_inspiral
#


class gstlal_inspiral_job(pipeline.CondorDAGJob):
	"""
	A gstlal_inspiral job
	"""
	def __init__(self, executable=dagparts.which('gstlal_inspiral'), tag_base='gstlal_inspiral'):
		"""
		"""
		self.__prog__ = 'gstlal_inspiral'
		self.__executable = executable
		self.__universe = 'vanilla'
		pipeline.CondorDAGJob.__init__(self,self.__universe,self.__executable)
		self.add_condor_cmd('getenv','True')
		# these jobs gracefully shutdown with SIGINT
		self.add_condor_cmd("want_graceful_removal", "True")
		self.add_condor_cmd("kill_sig", "15")
		self.add_condor_cmd('+Online_CBC_SVD', 'True')
		self.add_condor_cmd('Requirements', '(TARGET.Online_CBC_SVD =?= True)')
		self.tag_base = tag_base
		self.set_sub_file(tag_base+'.sub')
		self.set_stdout_file('logs/'+tag_base+'-$(macrojobtag)-$(macronodename)-$(cluster)-$(process).out')
		self.set_stderr_file('logs/'+tag_base+'-$(macrojobtag)-$(macronodename)-$(cluster)-$(process).err')
		self.number = 1

#
# gstlal_iir_inspiral
#


class gstlal_iir_inspiral_job(pipeline.CondorDAGJob):
	"""
	A gstlal_iir_inspiral job
	"""
	def __init__(self, executable=dagparts.which('gstlal_iir_inspiral'), tag_base='gstlal_iir_inspiral'):
		"""
		"""
		self.__prog__ = 'gstlal_iir_inspiral'
		self.__executable = executable
		self.__universe = 'vanilla'
		pipeline.CondorDAGJob.__init__(self,self.__universe,self.__executable)
		self.add_condor_cmd('getenv','True')
		# these jobs gracefully shutdown with SIGINT
		self.add_condor_cmd("want_graceful_removal", "True")
		self.add_condor_cmd("kill_sig", "15")
		self.add_condor_cmd('+Online_CBC_IIR', 'True')
		self.tag_base = tag_base
		self.set_sub_file(tag_base+'.sub')
		self.set_stdout_file('logs/'+tag_base+'-$(macrojobtag)-$(macronodename)-$(cluster)-$(process).out')
		self.set_stderr_file('logs/'+tag_base+'-$(macrojobtag)-$(macronodename)-$(cluster)-$(process).err')
		self.number = 1

class gstlal_inspiral_marginalize_likelihoods_online_job(pipeline.CondorDAGJob):
	"""
	A gstlal_inspiral_marginalize_likelihoods_online job
	"""
	def __init__(self, executable=dagparts.which('gstlal_inspiral_marginalize_likelihoods_online'), tag_base='gstlal_inspiral_marginalize_likelihoods_online'):
		"""
		"""
		self.__prog__ = 'gstlal_inspiral_marginalize_likelihoods_online'
		self.__executable = executable
		self.__universe = 'local'
		pipeline.CondorDAGJob.__init__(self,self.__universe,self.__executable)
		self.add_condor_cmd('getenv','True')
		# these jobs gracefully shutdown with SIGINT
		self.add_condor_cmd("want_graceful_removal", "True")
		self.add_condor_cmd("kill_sig", "15")
		self.tag_base = tag_base
		self.set_sub_file(tag_base+'.sub')
		self.set_stdout_file('logs/'+tag_base+'-$(macroid)-$(macronodename)-$(cluster)-$(process).out')
		self.set_stderr_file('logs/'+tag_base+'-$(macroid)-$(macronodename)-$(cluster)-$(process).err')

class gstlal_inspiral_marginalize_likelihoods_online_node(pipeline.CondorDAGNode):
	"""
	gstlal_inspiral_marginalize_likelihoods_onlinenode
	"""
	def __init__(self, job, dag, output):
		pipeline.CondorDAGNode.__init__(self,job)
		self.add_var_arg("%s %s" % (os.getcwd(), output))
		dag.add_node(self)

class lvalert_listen_job(pipeline.CondorDAGJob):
	"""
	A lvalert_listen_job
	"""
	def __init__(self, executable=dagparts.which('lvalert_listen'), tag_base='lvalert_listen', gracedb_group = "CBC", gracedb_type = "LowMass", progs = ("gstlal_inspiral_lvalert_psd_plotter", "gstlal_inspiral_followups_from_gracedb")):
		"""
		"""
		self.__prog__ = 'lvalert_listen'
		self.__executable = executable
		self.__universe = 'local'
		pipeline.CondorDAGJob.__init__(self,self.__universe,self.__executable)
		self.add_condor_cmd('getenv','True')
		# these jobs gracefully shutdown with SIGINT
		self.add_condor_cmd("want_graceful_removal", "True")
		self.add_condor_cmd("kill_sig", "15")
		self.tag_base = tag_base
		self.set_sub_file(tag_base+'.sub')
		self.set_stdout_file('logs/'+tag_base+'-$(macroid)-$(macronodename)-$(cluster)-$(process).out')
		self.set_stderr_file('logs/'+tag_base+'-$(macroid)-$(macronodename)-$(cluster)-$(process).err')

		# produce the lvalert processor

		f = open("lvalert.sh", "w")
		f.write("#!/bin/bash \n")
		f.write('cat <&0 | tee ')
		for prog in progs:
			f.write(">(%s) " % dagparts.which(prog))
		f.close()
		os.chmod('lvalert.sh', os.stat('lvalert.sh').st_mode | stat.S_IEXEC)

		f = open("lvalert.ini", "w")
		#FIXME gracedb server code sets up nodes based on this convention
		f.write("[%s_%s]\n" % (gracedb_group.lower(), gracedb_type.lower()))
		f.write("executable=./lvalert.sh")
		f.close()


class lvalert_listen_node(pipeline.CondorDAGNode):
	"""
	lvalert_listen node
	"""
	def __init__(self, job, dag):
		pipeline.CondorDAGNode.__init__(self,job)
		self.add_var_opt("username", raw_input("lvalert username: "))
		self.add_var_opt("password", raw_input("lvalert password: "))
		self.add_var_opt("config-file", "lvalert.ini")
		dag.add_node(self)


class gstlal_ll_inspiral_get_urls_job(pipeline.CondorDAGJob):
	"""
	A gstlal_ll_inspiral_get_urls job
	"""
	def __init__(self, executable=dagparts.which('gstlal_ll_inspiral_get_urls'), tag_base='gstlal_ll_inspiral_get_urls'):
		"""
		"""
		self.__prog__ = 'gstlal_ll_inspiral_get_urls'
		self.__executable = executable
		self.__universe = 'local'
		pipeline.CondorDAGJob.__init__(self,self.__universe,self.__executable)
		self.add_condor_cmd('getenv','True')
		# these jobs gracefully shutdown with SIGINT
		self.add_condor_cmd("want_graceful_removal", "True")
		self.add_condor_cmd("kill_sig", "15")
		self.tag_base = tag_base
		self.set_sub_file(tag_base+'.sub')
		self.set_stdout_file('logs/'+tag_base+'-$(macroid)-$(macronodename)-$(cluster)-$(process).out')
		self.set_stderr_file('logs/'+tag_base+'-$(macroid)-$(macronodename)-$(cluster)-$(process).err')


class gstlal_ll_inspiral_get_urls_node(pipeline.CondorDAGNode):
	def __init__(self, job, dag, numjobs):
		pipeline.CondorDAGNode.__init__(self,job)
		self.add_var_arg(". 0001,%04d 25" % numjobs)
		dag.add_node(self)

class gstlal_inspiral_node(pipeline.CondorDAGNode):
	"""
	A gstlal_inspiral node
	"""
	#FIXME add frame segments, name and veto segments name
	def __init__(self, job, dag, channel_dict, reference_psd, svd_bank, tmp_space=dagparts.log_path(), ht_gate_thresh=10.0, control_peak_time = 5, fir_stride = 5, thinca_interval = 10, likelihood_file = None, marginalized_likelihood_file = None, gracedb_far_threshold = None, gracedb_group = None, gracedb_type = None, injections = None, data_source = "lvshm", copy_likelihoods = False, veto_segments_file = None, veto_segments_name = None, state_vector_on_off_dict = None, psd_fft_length = 16, p_node=[]):

		pipeline.CondorDAGNode.__init__(self,job)
		self.add_var_opt("psd-fft-length", psd_fft_length)
		# FIXME add as an argument rather than hard code
		self.add_var_opt("likelihood-snapshot-interval", 14400)
		self.add_var_opt("channel-name", datasource.pipeline_channel_list_from_channel_dict(channel_dict))
		if reference_psd is not None:
			self.add_var_opt("reference-psd", reference_psd)
		self.add_var_opt("svd-bank", svd_bank)
		self.add_var_opt("tmp-space", tmp_space)
		self.add_var_opt("track-psd", "")
		self.add_var_opt("data-source", data_source)
		self.add_var_opt("control-peak-time", control_peak_time)
		self.add_var_opt("fir-stride", fir_stride)
		self.add_var_opt("thinca-interval", thinca_interval)
		self.add_var_opt("job-tag", "%04d" % (job.number,))
		self.add_macro("macrojobtag", "%04d" % (job.number,))
		if ht_gate_thresh is not None:
			self.add_var_opt("ht-gate-threshold", ht_gate_thresh)
		if gracedb_far_threshold is not None:
			self.add_var_opt("gracedb-far-threshold", gracedb_far_threshold)
		if gracedb_group is not None:
			self.add_var_opt("gracedb-group", gracedb_group)
		if gracedb_type is not None:
			self.add_var_opt("gracedb-type", gracedb_type)
		if injections is not None:
			self.add_var_opt("injections", injections)
		if veto_segments_file is not None:
			self.add_var_opt("veto-segments-file", veto_segments_file)
		if veto_segments_name is not None:
			self.add_var_opt("veto-segments-name", veto_segments_name)
		# self.add_var_opt("verbose", "") #Put this in for debugging
		# FIXME, the way the likelihood file is handled here is a mess.
		likefile = os.path.split(likelihood_file)[1]
		path = os.getcwd()
		# make a new likelihood file
		likefile = "%s/%04d_%s" % (path, job.number, likefile)
		if copy_likelihoods:
			shutil.copyfile(likelihood_file, likefile)
		self.add_var_opt("likelihood-file", likefile)
		self.add_var_opt("marginalized-likelihood-file", marginalized_likelihood_file)
		if state_vector_on_off_dict is not None:
			on, off = inspiral.state_vector_on_off_list_from_bits_dict(state_vector_on_off_dict)
			self.add_var_opt("state-vector-on-bits", on)
			self.add_var_opt("state-vector-off-bits", off)
		svd_bank = os.path.split(svd_bank)[1].replace('.xml','')
		self.set_name("gstlal_inspiral_%04d" % job.number)
		job.number += 1
		for p in p_node:
			self.add_parent(p)
		dag.add_node(self)


#
# Parse the command line
#


def parse_command_line():
	parser = OptionParser(description = __doc__)
	parser.add_option("--psd-fft-length", metavar = "s", default = 16, type = "int", help = "FFT length, default 16s")
	parser.add_option("--reference-psd", metavar = "filename", help = "Set the reference psd file.")
	parser.add_option("--bank-cache", metavar = "filenames", help = "Set the bank cache files in format H1=H1.cache,H2=H2.cache, etc..")
	parser.add_option("--channel", metavar = "name", default=[], action = "append", help = "Set the name of the channel to process (optional).  The default is \"LSC-STRAIN\" for all detectors. Override with IFO=CHANNEL-NAME can be given multiple times")
	parser.add_option("--ht-gate-threshold", metavar = "float", help = "Set the h(t) gate threshold to reject glitches", type="float")
	parser.add_option("--do-iir-pipeline", action = "store_true", help = "run the iir pipeline instead of lloid")
	parser.add_option("--num-banks", metavar = "str", help = "the number of banks per job. can be given as a list like 1,2,3,4 then it will split up the bank cache into N groups with M banks each.")
	parser.add_option("--max-jobs", metavar = "num", type = "int", help = "stop parsing the cache after reaching a certain number of jobs to limit what is submitted to the HTCondor pool")
	parser.add_option("--likelihood-file", help = "set the likelihood file, required if --copy-likelihoods is used")	
	parser.add_option("--marginalized-likelihood-file", help = "set the marginalized likelihood file, required")	
	parser.add_option("--control-peak-time", default = 4, metavar = "secs", help = "set the control peak time, default 4")
	parser.add_option("--fir-stride", default = 4, metavar = "secs", help = "set the fir bank stride, default 4")
	parser.add_option("--thinca-interval", default = 10, metavar = "secs", help = "set the thinca interval, default 10")
	parser.add_option("--gracedb-far-threshold", type = "float", help = "false alarm rate threshold for gracedb (Hz), if not given gracedb events are not sent")
	parser.add_option("--gracedb-type", default = "LowMass", help = "gracedb type, default LowMass")
	parser.add_option("--gracedb-group", default = "Test", help = "gracedb group, default Test")
	parser.add_option("--data-source", metavar = "[lvshm|]", default = "lvshm", help = "Where to get the data from. Default lvshm")
	parser.add_option("--copy-likelihoods", action = "store_true", help = "Copy the likelihood files from a seed, must give --likelihood-file: : WARNING overwrites existing files")
	parser.add_option("--injections", metavar = "filename", help = "Set the name of the LIGO light-weight XML file from which to load injections (optional).")
	parser.add_option("--veto-segments-file", metavar = "filename", help = "Set the name of the LIGO light-weight XML file from which to load vetoes (optional).")
	parser.add_option("--veto-segments-name", metavar = "name", help = "Set the name of the segments to extract from the segment tables and use as the veto list.", default = "vetoes")
	parser.add_option("--state-vector-on-bits", metavar = "name", default = [], action = "append", help = "Set the state vector on bits to process (optional).  The default is 0x7 for all detectors. Override with IFO=bits can be given multiple times")
	parser.add_option("--state-vector-off-bits", metavar = "name", default = [], action = "append", help = "Set the state vector off bits to process (optional).  The default is 0x160 for all detectors. Override with IFO=bits can be given multiple times")
	parser.add_option("--lvalert-listener-program", action = "append", default = [], metavar = "program", help = "set the programs to respond to lvalerts from this analysis, can be given multiple times")
	
	options, filenames = parser.parse_args()
	options.num_banks = [int(v) for v in options.num_banks.split(",")]

	fail = ""
	for option in ("bank_cache", "gracedb_far_threshold"):
		if getattr(options, option) is None:
			fail += "must provide option %s\n" % (option)
	if fail: raise ValueError, fail

	if options.copy_likelihoods and options.likelihood_file is None:
		raise ValueError("Must include --likelihood-file when giving --copy-likelihoods")

	#FIXME add consistency check?
	bankcache = inspiral_pipe.parse_cache_str(options.bank_cache)
	channel_dict = datasource.channel_dict_from_channel_list(options.channel)

	options.state_vector_on_off_dict = inspiral.state_vector_on_off_dict_from_bit_lists(options.state_vector_on_bits, options.state_vector_off_bits)
	
	return options, filenames, bankcache, channel_dict


###############################################################################
# MAIN
###############################################################################

options, filenames, bank_cache, channel_dict = parse_command_line()

try: os.mkdir("logs")
except: pass
dag = dagparts.CondorDAG("trigger_pipe")

#
# setup the job classes
#

if options.do_iir_pipeline is not None:
	gstlalInspiralJob = gstlal_iir_inspiral_job()
else:
	gstlalInspiralJob = gstlal_inspiral_job()

# A local universe job that will run in a loop marginalizing all of the likelihoods
margJob = gstlal_inspiral_marginalize_likelihoods_online_job()
margNode = gstlal_inspiral_marginalize_likelihoods_online_node(margJob, dag, options.marginalized_likelihood_file) 

# an lvalert_listen job
listenJob = lvalert_listen_job(gracedb_group = options.gracedb_group, gracedb_type = options.gracedb_type, progs = options.lvalert_listener_program)
listenNode = lvalert_listen_node(listenJob, dag)

# 
urlsJob = gstlal_ll_inspiral_get_urls_job()

###############################################################################
# loop over banks to run gstlal inspiral pre clustering and far computation
###############################################################################

for num_insp_nodes, (s, trials_factor) in enumerate(inspiral_pipe.build_bank_string(bank_cache, options.num_banks, options.max_jobs)):
	gstlal_inspiral_node(gstlalInspiralJob, dag, channel_dict, reference_psd=options.reference_psd, svd_bank=s, ht_gate_thresh = options.ht_gate_threshold, fir_stride = options.fir_stride, thinca_interval = options.thinca_interval, likelihood_file = options.likelihood_file, marginalized_likelihood_file = options.marginalized_likelihood_file, control_peak_time = options.control_peak_time, gracedb_far_threshold = options.gracedb_far_threshold, gracedb_group = options.gracedb_group, gracedb_type = options.gracedb_type, injections = options.injections, data_source = options.data_source, copy_likelihoods = options.copy_likelihoods, veto_segments_file = options.veto_segments_file, veto_segments_name = options.veto_segments_name, state_vector_on_off_dict = options.state_vector_on_off_dict, psd_fft_length = options.psd_fft_length)

gstlal_ll_inspiral_get_urls_node(urlsJob, dag, num_insp_nodes+1)

dag.write_sub_files()
# we probably want these jobs to retry indefinitely on dedicated nodes. A user
# can intervene and fix a problem without having to bring the dag down and up.
# There are few enough total jobs that this really shouldn't bog down the
# scheduler. For now 1000 will be considered indefinite
[node.set_retry(1000) for node in dag.get_nodes()]
dag.write_dag()
dag.write_script()
dag.write_cache()

# set up the webpage
# FIXME don't hardcode this stuff
shutil.copy2(dagparts.which('gstlal_llcbcsummary'), os.path.expanduser("~/public_html/cgi-bin"))
shutil.copy2(dagparts.which('gstlal_llcbcnode'), os.path.expanduser("~/public_html/cgi-bin"))

print >>sys.stderr, "\n\n NOTE! You can monitor the analysis at this url: https://ldas-jobs.ligo.caltech.edu/~%s/cgi-bin/gstlal_llcbcsummary?id=0001,%04d&dir=%s \n\n" % (os.environ['USER'], num_insp_nodes+1, os.getcwd())
