#!/usr/bin/env python

#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#

from optparse import OptionParser
try:
    import sqlite3
except ImportError:
    # pre 2.5.x
    from pysqlite2 import dbapi2 as sqlite3
import sys
import os

from pylal import ligolw_sqlutils as sqlutils

from glue import git_version
from glue.ligolw import ligolw
from glue.ligolw import dbtables
from glue.ligolw import lsctables
from glue.ligolw.utils import process
from glue.ligolw.utils import ligolw_sqlite

__prog__ = "ligolw_cbc_cluster_coincs"
__author__ = "Collin Capano <cdcapano@physics.syr.edu>"

description = \
"Clusters coincident trigges in a database and writes result out " + \
"to a new database."


# =============================================================================
#
#                                   Set Options
#
# =============================================================================


def parse_command_line():
    """
    Parse the command line, return options and check for consistency among the
    options.
    """
    parser = OptionParser(
        version = git_version.verbose_msg,
        usage   = "%prog [options]",
        description = description
        )

    # following are related to file input and output naming
    parser.add_option( "-i", "--input", action = "store", type = "string", default = None,
        help = 
            "Input database to read. Can only input one at a time."
            )
    parser.add_option( "-o", "--output", action = "store", type = "string", default = None,
        help = 
            "Name of output database to save to."
            )
    parser.add_option( "-t", "--tmp-space", action = "store", type = "string", default = None,
        metavar = "PATH",
        help = 
            "Requried. Location of local disk on which to do work. " +
            "This is used to enhance performance in a networked " +
            "environment, and to protect against accidently " +      
            "overwriting the input database."
            )
    parser.add_option( "-v", "--verbose", action = "store_true", default = False,
        help = 
            "Print progress information"
           )
    parser.add_option( "-D", "--debug", action = "store_true", default = False,
        help =
            "Print SQLite queries used and the approximate time taken to run each one." )
    # following are generic inspiral_sql options
    parser.add_option( "", "--param-name", action = "store", default = None,
        metavar = "PARAMETER", 
        help = 
            "Can be any parameter in the param table (see --param-table). " + 
            "Specifying this and param-ranges will only cluster " +
            "triggers that fall  within the parameter range. Not " + 
            "specifying will cause all triggers in the database to " +
            "be clustered together."
             )
    parser.add_option( "", "--param-ranges", action = "store", default = None,
        metavar = " [ LOW1, HIGH1 ); ( LOW2, HIGH2]; etc.",
        help = 
            "Requires --param-name. Specify the parameter ranges " +
            "to cluster triggers in. A '(' or ')' implies an open " +
            "boundary, a '[' or ']' a closed boundary. To specify " +
            "multiple ranges, separate each range by a ';'. Any " +
            "coincidences that fall outside of the union of all " +
            "the specified ranges will be deleted." 
            )
    parser.add_option( "", "--exclude-coincs", action = "store", type = "string", default = None,
        metavar = " [COINC_INSTRUMENTS1 + COINC_INSTRUMENTS2 in INSTRUMENTS_ON1];" +
            "[ALL in INSTRUMENTS_ON2]; etc.",
        help = 
            "Exclude coincident types in specified detector times, " + 
            "e.g., '[H2,L1 in H1,H2,L1]'. Some rules: " +             
                "* Coinc-types and detector time must be separated by " + 
                "an ' in '. When specifying a coinc_type or detector " +
                "time, detectors and/or ifos must be separated by " +
                "commas, e.g. 'H1,L1' not 'H1L1'. " + 
                "* To specify multiple coinc-types in one type of time, " +
                "separate each coinc-type by a '+', e.g., " +
                "'[H1,H2 + H2,L1 in H1,H2,L1]'. " +
                "* To exclude all coincs in a specified detector time " +
                "or specific coinc-type in all times, use 'ALL'. E.g., " +
                "to exclude all H1,H2 triggers, use '[H1,H2 in ALL]' " +
                "or to exclude all H2,L1 time use '[ALL in H2,L1]'. " +
                "* To specify multiple exclusions, separate each " +
                "bracket by a ';'. " +
                "* Order of the instruments nor case of the letters " +
                "matter. So if your pinky is broken and you're " +
                "dyslexic you can type '[h2,h1 in all]' without a " +
                "problem. " 
            )
    parser.add_option( "", "--include-only-coincs", action = "store", type = "string", default = None,
        metavar = " [COINC_INSTRUMENTS1 + COINC_INSTRUMENTS2 in INSTRUMENTS_ON1];" +
            "[ALL in INSTRUMENTS_ON2]; etc.",
        help =
            "Opposite of --exclude-coincs: only the specified coincs " +
            "will be clusterd (all other coincs will be deleted " +
            "from the output database). To avoid confusing overlaps, " +
            "cannot specify both --exclude-coincs and --include-only-coincs." 
            )
    parser.add_option( "", "--vacuum", action = "store_true", default = False,
        help = 
            "If turned on, will vacuum the database before saving. " +
            "This cleans any fragmentation and removes empty space " +
            "left behind by all the DELETEs, making the output " +
            "database smaller and more efficient. " +
            "WARNING: Since this requires rebuilding the entire " +
            "database, this can take awhile for larger files." 
            )
    parser.add_option( "", "--time-column", action = "store", type = "string", default = None,
        help =
            "Required. What column to use for the gps-time of triggers. Should be set to end_time for inspiral searches " +
            "or start_time for ringdown searches. It is assumed the the column specified has a corresponding _ns column."
            )
    parser.add_option("", "--ranking-table", action="store", type="string",
        default=None,
        help =
          "Required. What table to get the ranking-statsitic from. Can be any table with a coinc_event_id. Ex. coinc_inspiral."
        )
    parser.add_option("", "--ifos-table", action="store", type="string",
        default=None,
        help =
          "What table to look in for the coincident ifos. Can be any table with a coinc_event_id and an ifos column. " +
          "Default is to use whatever ranking-table is set to. If, however, the specified ranking-table does not have an " +
          "ifos column (e.g., coinc_event), then this must be specified."
        )
    parser.add_option("", "--param-table", action="store", type="string",
        default=None,
        help =
          "If specifying a param-name and ranges, what table to look in for the param values. Can be any table with a coinc_event_id. " +
          "Default is to use whatever ranking-table is set to."
        )
    parser.add_option("", "--time-table", action="store", type="string",
        default=None,
        help =
          "What table to look in for the time column (see --time-column). Can be any table with a coinc_event_id and the specified time column. " +
          "Default is to use whatever ranking-table is set to. If, however, the specified ranking-table does not have the " +
          "time column (e.g., the coinc_event table), then this must be specified."
        )
    # following are options specific to this program
    parser.add_option( "", "--cluster-window", action = "store", type = "float", default = 0,
        help =
            "Required. Time window, in ms, to cluster triggers in."
            )
    parser.add_option( "", "--ranking-stat", action = "store", type = "string", default = None,
        help = 
            "Requried. The statistic to cluster on. Can be any " +
            "statistic in the coinc_inspiral table."
            )
    parser.add_option( "", "--rank-by", action = "store", type = "string", default = None, 
        metavar = "MAX or MIN",
        help = 
            "Requried. Options are MAX or MIN. " + 
            "This specifies whether to cluster triggers by maximum (MAX) or " +
            "minimum (MIN) stat value. Use MIN if clustering on stats in " +
            "which smaller is better (e.g., chisq, far). Otherwise, use MAX. "
            )
    parser.add_option( "-G", "--group-by-ifos", action = "store_true", default = False, 
        help = 
            "Turning on will cause triggers to be grouped by coincident ifos when " +
            "clustering."
            )
    parser.add_option( "-M", "--group-by-multiplicity", action = "store_true", default = False, 
        help = 
            "Turning on will cause triggers to be grouped by the number of coincident ifos when " +
            "clustering. For example, doubles will be grouped with doubles, triples with triples, etc. " +
            "Note: if both --group-by-ifos and --group-by-multiplicity on, group-by-ifos takes precedence."
            )

    (options, args) = parser.parse_args()

    # check for required options and for self-consistency
    if not options.input:
        raise ValueError, "No input specified."
    if not options.output:
        raise ValueError, "No output specified."
    if not options.tmp_space:
        raise ValueError, "--tmp-space is a required argument."
    if not options.cluster_window:
        raise ValueError, "No cluster window specified."
    if not options.time_column:
        raise ValueError, "No time-column specified."
    if not options.ranking_table:
        raise ValueError, "No ranking-table specified."
    if not options.ranking_stat:
        raise ValueError, "No ranking stat specified."
    if not (options.rank_by.strip().upper() == 'MAX' or options.rank_by.strip().upper() == 'MIN'):
        raise ValueError, "--rank-by must be specified and set to either MAX or MIN."
    if options.param_name and not options.param_ranges:
        raise ValueError, "param-name requires param-ranges"
    if options.param_ranges and not options.param_name:
        raise ValueError, "param-ranges requires param-name"
    if options.exclude_coincs and options.include_only_coincs:
        raise ValueError, "Cannot specify both --exclude-coincs and --include-only-coincs."

    return options, sys.argv[1:]


# =============================================================================
#
#                       Function Definitions
#
# =============================================================================
    
def gps_time_in_ns( gps_time, gps_time_ns ):
    return gps_time*1e9 + gps_time_ns

def get_ifo_multiplicity( ifos ):
    """
    Returns the number of coincident ifos in an 'ifos' string.

    @ifos: a string of comma-separated ifos
    """
    return len( lsctables.instrument_set_from_ifos( ifos ) )


#
# DB content handler
#


class ContentHandler(ligolw.LIGOLWContentHandler):
    pass

dbtables.use_in(ContentHandler)


# =============================================================================
#
#                                     Main
#
# =============================================================================

#
#       Generic Initilization
#

opts, args = parse_command_line()

# get input database filename
filename = opts.input
if not os.path.isfile( filename ):
    raise ValueError, "The input file, %s, cannot be found." % filename

sqlite3.enable_callback_tracebacks(opts.debug)

# Setup working databases and connections
if opts.verbose: 
    print >> sys.stderr, "Setting up temp. database..."
if filename.endswith('.sql') or filename.endswith('.sqlite'):
    working_filename = dbtables.get_connection_filename( 
        filename, tmp_path = opts.tmp_space, verbose = opts.verbose )
    ContentHandler.connection = sqlite3.connect( working_filename )
elif filename.endswith('.xml') or filename.endswith('.xml.gz'):
    working_filename = dbtables.get_connection_filename(opts.output, tmp_path = opts.tmp_space, verbose = opts.verbose)
    ContentHandler.connection = sqlite3.connect( working_filename )
    ligolw_sqlite.insert_from_url(filename, contenthandler = ContentHandler, verbose=opts.verbose)
connection = ContentHandler.connection

# get needed table names
ranking_table = sqlutils.validate_option( opts.ranking_table )
if opts.ifos_table is None:
    ifos_table = ranking_table
else:
    ifos_table = sqlutils.validate_option( opts.ifos_table )
if opts.param_table is None:
    param_table = ranking_table
else:
    param_table = sqlutils.validate_option( opts.param_table )
if opts.time_table is None:
    time_table = ranking_table
else:
    time_table = sqlutils.validate_option( opts.time_table )
time_column = sqlutils.validate_option( opts.time_column )

# Add program to process and process params table

# FIXME: remove the following two lines once boolean type
# has been properly handled
from glue.ligolw import types as ligolwtypes
ligolwtypes.FromPyType[type(True)] = ligolwtypes.FromPyType[type(8)]

# create an xmldoc representation of the database for writing the
# process and process-params
xmldoc = dbtables.get_xml(connection)
# Add entries to process and process_params tables for this program
proc_id = process.register_to_xmldoc(xmldoc, __prog__, opts.__dict__, version = git_version.id)

# Get param and param-ranges if specified
if opts.param_name:
    param_parser = sqlutils.parse_param_ranges( param_table, opts.param_name, 
      opts.param_ranges, verbose = opts.verbose )
    param_name = param_parser.get_param_name()
    param_filters = param_parser.get_param_filters()
else:
    param_filters = None

# Get exclude_coincs list if specified
if opts.exclude_coincs:
    exclude_coinc_filters = sqlutils.parse_coinc_options( opts.exclude_coincs, 
        verbose = opts.verbose ).get_coinc_filters( ifos_table )
else:
    exclude_coinc_filters = None

# Get include_coincs list if specified
if opts.include_only_coincs:
    include_coinc_filters = sqlutils.parse_coinc_options( opts.include_only_coincs, 
        verbose = opts.verbose ).get_coinc_filters( ifos_table )
else:
    include_coinc_filters = None


# Clear coinc table of triggers outside of interested ranges
if param_filters or exclude_coinc_filters or include_coinc_filters:
    sqlutils.apply_inclusion_rules_to_coinc_table( connection, ranking_table,
        exclude_coincs = exclude_coinc_filters,
        include_coincs = include_coinc_filters, 
        param_filters = param_filters, verbose = opts.verbose )

#
#         Program-specific Initialization
# 

# Get cluster stat
ranking_stat = '.'.join([ ranking_table, sqlutils.validate_option(opts.ranking_stat) ])

# determine how to rank
if opts.rank_by.strip().upper() == "MAX":
    rank_by = ">"
elif opts.rank_by.strip().upper() == "MIN":
    rank_by = "<"

# Get cluster window; convert from ms to ns
window = opts.cluster_window * 1e6

# sqlitize param_parser.group_by_param_range()
if opts.param_name:
    connection.create_function("group_by_param", 1, param_parser.group_by_param_range)
    param_grouping = ''.join([ 'group_by_param(', param_name, ')' ])
else:
    param_grouping = '0'

# get desired ifo grouping
if opts.group_by_ifos:
    ifo_grouping = '.'.join([ ifos_table, 'ifos' ])
elif opts.group_by_multiplicity:
    ifo_grouping = ''.join(['get_ifo_multiplicity(', ifos_table, '.ifos)'])
    connection.create_function('get_ifo_multiplicity', 1, get_ifo_multiplicity)
else:
    ifo_grouping = '"ALL_IFOS"'

# add any extra tables to the join statement
add_join = ''
add_tables = set([table_name for table_name in [ifos_table, param_table, time_table] if table_name != ranking_table])
for table_name in add_tables:
    add_join = ''.join([ add_join, '\n', """
    JOIN
        """, table_name, """ ON
        experiment_map.coinc_event_id == """, table_name, '.coinc_event_id' ])

# sqlitize gps_time calc
connection.create_function("gps_time_in_ns", 2, gps_time_in_ns)

#
#         Cluster
# 

if opts.verbose:
    print >> sys.stderr, "Clustering triggers..."

# Create a temporary scratch table that only includes the info we need;
# we'll do all the clustering in this table, then delete everything 
# in the experiment_map table who's esids qualify.
# Next, we'll delete triggers in the coinc table who no longer
# have coinc_event_ids in the experiment_map table. We do it this way because
# zero-lag coincs will be mapped to both all_data and either exclude_play or playground
# esids (i.e. one coinc event will be mapped to two different esids in two different
# rows in the experiment_map table); however, it may happen that while a coinc would
# be deleted in all_data, it would not be deleted in exclude_play or playground.

# Also, a note about the WHERE clause below:
# You would expect to be able to do:
# WHERE coinc_event_id, experiment_summ_id IN (
#   SELECT ceid, esid
#   FROM clustered AS deltrigs .... )
# and you could, if this was db2. Unfortunately, SQLite doesn't appear to
# let you compare multiple values  using the IN statement, so we hack around this by
# constructing a string from the two values and comparing the strings.
sqlscript = ''.join(["""
    CREATE TEMP TABLE clustered AS
        SELECT 
            experiment_map.experiment_summ_id AS esid,
            experiment_map.coinc_event_id AS ceid,
            """, ifo_grouping, """ AS ifos,
            gps_time_in_ns( """, time_table, """.""",time_column, """, """, time_table, """.""", time_column, """_ns ) AS gps_time,
            """,
            param_grouping, """ AS param_grouping, 
            """,
            ranking_stat, """ AS ranking_stat
        FROM
            """, ranking_table, """
        JOIN 
            experiment_map 
        ON
            experiment_map.coinc_event_id == """, ranking_table, """.coinc_event_id""", add_join, """;
    CREATE INDEX cl_sipt_index ON clustered (esid, ifos, param_grouping, gps_time);
    DELETE 
    FROM 
        experiment_map
    WHERE (cast(coinc_event_id AS char) || "," || cast(experiment_summ_id AS char)) IN (
        SELECT 
            (cast(ceid AS char) || "," || cast(esid AS char)) 
        FROM clustered AS deltrigs
        WHERE EXISTS (
            SELECT *
            FROM
                clustered AS reftrigs
            WHERE
                reftrigs.esid == deltrigs.esid
                AND reftrigs.ifos == deltrigs.ifos
                AND reftrigs.param_grouping == deltrigs.param_grouping
                AND reftrigs.ranking_stat """, rank_by, """ deltrigs.ranking_stat
                AND reftrigs.gps_time > ( deltrigs.gps_time - """, `window`,""" )
                AND reftrigs.gps_time <= ( deltrigs.gps_time + """, `window`,""" )
            LIMIT 1)
        );"""])

if opts.debug:
    import time
    print >> sys.stderr, sqlscript
    print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]

connection.cursor().executescript( sqlscript )

if opts.debug:
    print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]

sqlscript = ''.join(["""
    -- delete triggers from the coinc table
    DELETE
    FROM
        """, ranking_table, """
    WHERE 
        coinc_event_id NOT IN (
            SELECT
                coinc_event_id
            FROM
                experiment_map
        );"""])

if opts.debug:
    print >> sys.stderr, sqlscript
    print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]

connection.cursor().executescript( sqlscript )

if opts.debug:
    print >> sys.stderr, time.localtime()[3], time.localtime()[4], time.localtime()[5]


# Remove triggers from other tables that no longer have coincidences in the
# coinc table
sqlutils.clean_using_coinc_table( connection, ranking_table, verbose = opts.verbose,
    clean_experiment_map = False, clean_coinc_event_table = True, clean_coinc_definer = True,
    clean_coinc_event_map = True, clean_mapped_tables = True, selected_tables = [])

# Update number of events
sqlutils.update_experiment_summ_nevents( connection, verbose = opts.verbose )

# Vacuum database if desired
if opts.vacuum:
    if opts.verbose:
        print >> sys.stderr, "Vacuuming database..."
    connection.cursor().execute( 'VACUUM' )
    if opts.verbose:
        print >> sys.stderr, "done."

#
#       Save and Exit
#

connection.commit()
connection.close()

# write output database
dbtables.put_connection_filename(opts.output, working_filename, verbose = opts.verbose)

if opts.verbose: 
    print >> sys.stdout, "Finished!"

# set process end time
process.set_process_end_time(proc_id)
sys.exit(0)
