#! /usr/bin/env python

import os, re, sys
import argparse

parser = argparse.ArgumentParser(
    description='''Compare benches generated by 'vcsn score'.''',
    epilog='''Install the `colorama` Python module to get colored output.''')
parser.add_argument('file', nargs='+',
                    type=str, default=None,
                    help='Bench file (from vcsn score) to compare')
parser.add_argument('--all', action='store_true',
                    help='Report also benches with no differences')
parser.add_argument('--no-color', dest='color', action='store_false',
                    default=True,
                    help='Do not use colors in the output')
parser.add_argument('--only', metavar='RE',
                    type=re.compile, default='.*',
                    help='Report only benches whose title is matched by RE')
parser.add_argument('--threshold', metavar='FLOAT',
                    type=float, default=0.1,
                    help='''Show good and bad scores with associated threshold.
                    Defaults to 0.1.''')
parser.add_argument('--csv', type=str,
                    help='Create a csv file')
args = parser.parse_args()

# Colors support.
green = ''
red   = ''
std   = ''
if args.color:
    try:
        from colorama import Fore, Style
        green = Fore.GREEN + Style.BRIGHT
        red   = Fore.RED   + Style.BRIGHT
        std   = Style.RESET_ALL
    except:
        pass

bench = dict()
benc_csv = dict()

def normalize(k):
    '''Fix a bench fix, i.e., fix errors, update APIs etc.
    '''
    # Separate with " # " only.
    k = " # ".join(map(str.strip, k.split('#', 2)))

    # The right symbol for repeated &.
    k = re.sub(r'a\*\*(\d+) ', r'a & \1', k)
    # Fix: spello.
    k = re.sub(r'de_buijn', 'de_bruijn', k)
    # Fix: extraneous paren.
    k = re.sub(r'ladybird\(21\)\)', 'ladybird(21)', k)
    # Fix: Incorrect use of .format.
    k = re.sub(r'(a.(?:product|shuffle)\(a\) # a = std\(\{\}\).format\(r\))',
               lambda m: m.group(1).replace('{}', '[a-e]?{50})'),
               k)
    # Fix: now use 's' to denote a string, instead of 'a'.
    k = re.sub(r'read\(a\) # a =',
               r'read(s) # s =', k)
    # Now we display the number of repetitions.
    k = re.sub(r'(# a = de_bruijn\(150\))$', r'\1, 1000x', k)
    k = re.sub(r'(# e = "\(\\e\+a\)" \* 500)$', r'\1, 100x', k)
    k = re.sub(r'(# r = b\.expression\("\(\\e\+a\)" \* 500\))$', r'\1, 1000x', k)
    # Now, instead of "   on [a-z]  -> Z", ", c = [a-z] -> Z".
    k = re.sub(r' +on (\[.*?\][?*]?) *-> *([BQZ])',
               r', c = \1 -> \2', k)
    k = re.sub(r'a = lal\(a-zA-Z0-9\).ladybird\(18\)',
               r'a = ladybird(18), c = [a-zA-Z0-9] -> B', k)
    # We never worked on Q in score, it was a typo.  And working with
    # B is good enough anyway and more relevant.
    k = re.sub(r'(determinize.*de_bruijn\(\d+\)), c = \[abc\] -> [BQ]',
               r'\1', k)

    # derived_term.
    k = k.replace("derived_term()", 'derived_term("derivation")')
    k = k.replace("linear()", 'derived_term("expansion")')

    # For a while we displayed "a.sort() # a = std([a-e]?{600})" but
    # were actually running "a.shortest(5)".
    k = re.sub(r'a.sort\(\) (# a = std\(\[a-e\]\?\{600\}\))',
               r'a.shortest(5) \1', k)
    # and we were not reporting the context, although it's not B.
    k = re.sub(r'(a\.shortest\(5\) # a = std\(\[a-e\]\?\{600\}\))$',
               r'\1, c = [a-e] -> Z',
               k)

    # The syntax of contexts has changed.
    k = re.sub(r'lal_char\(abc\)(_|, )b', '[abc] -> B', k)

    k = k.replace('ratexp', 'expression')

    k = k.replace('a.num_sccs', 'a.scc')

    k = k.replace('a.accessible ', 'a.accessible() ')

    # Now we display the context.
    k = re.sub(r'(a.minimize\("(moore|signature)"\) # a = std\(.*?\))$',
               r'\1, c = [a-k] -> B',
               k)

    k = k.replace('product', 'conjunction')

    k = k.replace('a.expression()', 'a.expression("associative")')
    k = re.sub(r'(a\.expression\("\w+")\)',
               r'\1, "naive")',
               k)

    # has_twins_property is benched on an expression using the
    # associative identities.
    k = re.sub(r'(a.has_twins_property.* # a = std\([^,]*?)\)',
               r'\1, "associative")', k)

    k = "{:20s} # {}".format(*map(str.strip, k.split('#', 2)))
    return k

def read(fn):
    '''Read a `vcsn score` generated file named `fn`.  Store in `bench`.

    Each line looks like:

        0.12s: a.is_proper()        # a = "", 200000x

    So split in `v` (0.12s) and `k` for the rest, normalized.
    '''
    with open(fn) as f:
        for line in f:
            v, k = map(str.strip, line.split(':', 1))
            # Get rid of "s", we know the unit.
            if v[-1] == 's':
                v = v[:-1]
            # Fix errors in algo descriptions.
            k = normalize(k)
            if k not in bench:
                bench[k] = dict()
            bench[k][fn] = v

def csv(keys, fn):
    '''Save the benches about `keys` in CSV format in file named `fn`.'''
    fl = sys.stdout if args.csv in ['', '-'] else open(args.csv, 'wa')
    fl.write(' ,')
    for k in keys:
        str = '\"' + k.replace('\"', '\"\"') + '\"'
        fl.write(str)
        if k != keys[-1]:
            fl.write(',')
    fl.write('\n')
    for f in args.file:
        fl.write(os.path.basename(f))
        fl.write(',')
        for k in keys:
            v = bench[k][f] if k in bench and f in bench[k] else ''
            fl.write(v)
            if k != keys[-1]:
                fl.write(',')
        fl.write('\n')

def text(keys):
    for k in keys:
        # All the benches as strings.
        bs = [bench[k][f] if f in bench[k] else "N/A" for f in args.file]
        # If all the same, there is nothing to say.
        if not args.all and len(set(bs)) == 1:
            continue
        # All the benches as floats.
        bfs = [float(b) for b in bs if b not in ["N/A", "FAIL"]]
        # Compute average of benchs.
        avg = sum(bfs) / len(bfs)
        for b in bs:
            if b in ["FAIL", "N/A"]:
                print("{:>5}".format(b), end = ' ')
            else:
                color = ''
                score = float(b)
                if score <= avg - args.threshold * avg:
                    color = green
                elif args.threshold * avg + avg <= score:
                    color = red
                print(color + "{:5.2f}".format(score) + std, end = ' ')
        print(k)

# Read the score files.
for fn in args.file:
    read(fn)

# The keys we are interested in.
keys = [k for k in sorted(bench.keys()) if args.only.search(k)]

# Print the score table.
if args.csv is not None:
    csv(keys, args.csv)
else:
    text(keys)
