#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdarg.h>

#include <time.h>

#include "config.h"



#undef DEBUG
#define BUFSIZE 255

class hit
{
public:
	hit (unsigned long n_timestamp, char * n_url, 
			hit * n_previous = NULL, hit * n_next = NULL)
	{
		timestamp = n_timestamp;
		url = n_url;
		previous = n_previous;
		next = n_next;
	}
	unsigned long timestamp;
	char * url;

	/* note that these mean *chronologically* next and previous */
	hit * previous;
	hit * next;
};

class session
{
public:
	session (char * n_session, hit * n_hits, 
			session * n_left = NULL, session * n_right = NULL)
	{
		name = n_session;
		hits = n_hits;
		left = n_left;
		right = n_right;
	}
	char * name;
	hit * hits;
	session * left;
	session * right;
};

/* 
 * the 'hits' are a linked list of nodes, with the
 * newest node on top (since we expect newer nodes.
 * before calling this function, we have verified that
 * 'hitnode->timestamp' is higher than 'hitnode->newhit'.
 */
void add_hit(hit * hitnode, hit * newhit)
{
	if (hitnode->previous == NULL)
	{
		newhit->next = NULL;
		hitnode->previous = newhit;
	}
	else if (hitnode->previous->timestamp <= newhit->timestamp)
	{
		newhit->previous = hitnode->previous;
		hitnode->previous->next = newhit;
		newhit->next = hitnode;
		hitnode->previous = newhit;
	}
	else
	{
		// TODO check
		add_hit(hitnode->previous, newhit);
	}
}

void add_hit(session * treenode, char * name, unsigned long timestamp, char * url) 
{
	int diff = strcmp(name, treenode->name);
	if (diff == 0)
	{
		/* add a new hit to the hitlist for this session */
		hit * new_hit = new hit (timestamp, url);
		if (treenode->hits == NULL)
			treenode->hits = new_hit;
		else
		{
			if (treenode->hits->timestamp <= timestamp)
			{
				/* the common case, we are recording a newer hit */
				treenode->hits->next = new_hit;
				new_hit->previous = treenode->hits;
				treenode->hits = new_hit;
			}
			else
			{
				add_hit (treenode->hits, new_hit);
			}
		}
	} 
	else if (diff < 0)
	{
		if (treenode->left == NULL)
		{
			hit * new_hit = new hit (timestamp, url);
			treenode->left = new session (name, new_hit, NULL, NULL);
		}
		else
		{
			add_hit(treenode->left, name, timestamp, url);
		}
	}
	else
	{
		if (treenode->right == NULL)
		{
			hit * new_hit = new hit (timestamp, url);
			treenode->right = new session (name, new_hit, NULL, NULL);
		}
		else
		{
			add_hit(treenode->right, name, timestamp, url);
		}
	}
}

/* converts month name into month number 
 * (1-12, 0 is error) */
int getMonth (char month [4]) 
{
	switch (month[0])
	{
		case 'J': // Jan / Jun / Jul 
			if (month[1] == 'a')
				return 1;
			else if (month[2] == 'n')
				return 6;
			else if (month[2] == 'l')
				return 7;
		case 'F': // Feb
			return 2;
		case 'M': // Mar / May
			if (month[2] == 'r')
				return 3;
			else if (month[2] == 'y')
				return 5;
		case 'A': // Apr / Aug
			if (month[1] == 'p')
				return 4;
			else if (month[1] == 'u')
				return 8;
		case 'S': // Sep
			return 9;
		case 'O': // Oct
			return 10;
		case 'N': // Nov
			return 11;
		case 'D': // Dec
			return 12;
	}
	fprintf(stderr, "Unrecognised month: %s\n", month);
	return 0;
}

/* converts time into the number of seconds since EPOCH */
int convertTime(char * s)
{
	struct tm tm;

	if (s == NULL)
	{
		fprintf (stderr, "Warning! - Error parsing logline (no time found).\n");
		return 0;
	}
	char month[4];
	int results = sscanf(s, "[%d/%3c/%d:%d:%d:%d", &(tm.tm_mday), &month, &(tm.tm_year), 
			&(tm.tm_hour), &(tm.tm_min), &(tm.tm_sec));
	tm.tm_year -= 1900;
	if (results != 6)
	{
		printf("Couldn't parse timestamp %s (got %d tokens).\n", s, results);
		printf("Read string %s\n", month);
		return 0;
	}
	tm.tm_mon = getMonth(month) - 1;

	return mktime(&tm);
}

void printHits (hit * top, char * session)
{
	hit * current = top;
	if (current == NULL)
		return;

	/* we first walk chronologically to the back */
	while (current->previous != NULL)
		current=current->previous;

	/* and then, printing, back to the front */
	while (current != NULL)
	{
		printf("%s\t%lu\t%s\n", session, current->timestamp, current->url);
		current = current->next;
	}
}

void printSessions (session * tree)
{
	if (tree == NULL)
		return;
	printSessions(tree->left);
	printHits(tree->hits, tree->name);
	printSessions(tree->right);
}

#if USE_IGNORE
int ignore (char * source, Config * config)
{
	ignorer * current = config->ignores;
	size_t nmatch = 0;
	regmatch_t * pmatch=NULL; //(regmatch_t*) malloc (sizeof(regmatch_t));
	int eflags = 0;

	while (current != NULL)
	{
		int error_value;
		if (!(error_value = regexec(current->regexp, source, nmatch, pmatch, eflags)))
		{
			//fprintf(stderr, ".");
			// match!
			return true;
		}
		char buffer[BUFSIZE];
		regerror(error_value, current->regexp, buffer, BUFSIZE);
		//fprintf(stderr, "Error: %s\n", buffer);

		current = current->next;
	}	
	return false;
}
#endif

#if USE_UNIFY
char * unify (char * source, Config * config)
{
	unifier * current = config->unifiers;
	size_t nmatch=0;
	regmatch_t * pmatch=NULL;
	int eflags = 0;

	while (current != NULL)
	{
		int error_value;
		if (!(error_value = regexec(current->from, source, nmatch, pmatch, eflags)))
		{
			// match!
			return current->to;
		}
		char buffer[BUFSIZE];
		regerror(error_value, current->from, buffer, BUFSIZE);
		//fprintf(stderr, "Error: %s\n", buffer);

		current = current->next;
	}
	/* fprintf(stderr,  "Nothing matched, returning %s\n", source); */
	
	return source;
}
#endif

void discardUntilNewline(FILE * f)
{
	unsigned char match = '\n';
	int last = 42;

	while ((last != EOF) && (((unsigned char)last) != match))
	{
		last = fgetc(f);
	}
}

void check_notnull (void * memory)
{
	if (memory == NULL)
	{
		printf("Out of memory.\n");
		exit(1);
	}
}

int main (int argc, char ** argv)
{
	Config * config; 
	if (argc > 1)
	  config = ReadConfig (argv[1]);
	else
	  config = ReadConfig ("pathalizer.conf");

	session * tree = new session("top", NULL);
	char buffer [BUFSIZE];
	long lineno = 0;
	while (fgets(buffer, BUFSIZE, stdin))
	{
		lineno++;
		/* handle loglines larger than BUFSIZE */
		/* if buffer[BUFSIZE-2] == '\n', we've exactly filled the buffer*/
		if ((strlen(buffer) == BUFSIZE-1) && (buffer[BUFSIZE-2] != '\n'))
		{
			discardUntilNewline(stdin);
		}

#ifdef DEBUG
		fprintf(stderr, "line: %s\n", buffer);
#endif
		char * name, * url;
		char * tmp;

		unsigned long timestamp;

		/* get the URL string */
		strtok(buffer, "\"");
		url = strtok(NULL, "\"");
		/* strip GET/POST/whatever */
		if (strtok(url, " "))
		{
			if (tmp = strtok(NULL, " "))
				url = strdup(tmp);
			else
			{
				url = (char *) calloc (1, sizeof(char *));
				check_notnull(url);
			}
		}
		else
		{
			url = (char *) calloc (1, sizeof(char *));
			check_notnull(url);
		}
#ifdef DEBUG
		fprintf(stderr, "url: %s\n", url);
#endif

		if (tmp = strtok(buffer, " "))
			name = strdup(tmp);
		else
		{
			name = (char *) calloc (1, sizeof(char *));
			check_notnull(url);
		}

		//fprintf (stderr, "buffer: %s\n", buffer);
	
		//name = strdup(strtok(buffer, " "));
		strtok(NULL, " ");
		strtok(NULL, " ");
		timestamp = convertTime(strtok(NULL, " "));
		if (timestamp == 0)
		{
			fprintf(stderr, "Error on line %d for session %s.\n", lineno, name);
		}
#if USE_IGNORE
		if (ignore(url, config))
			continue;
#endif /* USE_IGNORE */
#if USE_UNIFY
		url = unify(url, config);
#endif /* USE_UNIFY */
		add_hit(tree, name, timestamp, url);
	}
	printSessions(tree->left);
	printSessions(tree->right);

	return 0;
}
