/* SHSQLJOIN.C 
 * Copyright 1998-2002 Stephen C. Grubb  
 * This code is covered under the GNU General Public License (GPL);
 * see the file ./Copyright for details. */

/* invoked by shsql select command - not generally invoked directly */

/* for debugging, set "dbdebugmode: 1" in config file */
/* for additional debug output uncomment print statements labelled as DEBUG herein */

#include <ctype.h>
#include  "tdhkit.h"
#include "shsql.h"

#define WHEREMAX 3000	/* buffer size increased, scg 9/9/03 */
#define KEYMAX 10

extern int getpid(), unlink();
extern int TDH_dequote();

char **rows1, **rows2;
struct selectparms sp1, sp2;
char *fields1[ MAXITEMS ], *fields2[ MAXITEMS ];
int reading1, reading2;
int nf1, nf2;
int maxrows = 5000;

static int gln1(), gln2(), parse_comfile(), buildsel();

int
main( argc, argv )
int argc;
char *argv[];
{
int i, stat, ix, diff, null1, null2;
char tbl1[MAXPATH], rightside[512]; 			/* left side (always a table), and right 
							   side (either a table or another join command) */
char kf1[255], kf2[255];
int nk1, nk2;
char jointype[20], alias1[MAXPATH], alias2[MAXPATH];
int dups1, dups2, dleft, dright;
char whereclause[WHEREMAX];
char selcom[WHEREMAX];
char tok[256];
int k1[KEYMAX], k2[KEYMAX];
char comfile[ MAXPATH ];
char *keyprinted[ KEYMAX ];
int subjoin;


TDH_errprog( "shsql_join" );

/* set defaults */
dups1 = dups2 = 0;
dleft = dright = 0;
strcpy( tbl1, "" ); strcpy( rightside, "" );
strcpy( kf1, "" ); strcpy( kf2, "" );
strcpy( comfile, "" );
for( i = 0; i < KEYMAX; i++ ) keyprinted[i] = "";
subjoin = 0;


/* get command line parameters.. */
if( argc < 3 ) { err( 381, "shsql_join usage error", "" ); exit(1); }
for( i = 1; i < argc; i++ ) {
	if( strcmp( argv[i], "-comfile" )==0 ) {   /* -comfile is always required */
		if( i < argc-1 ) { i++; strcpy( comfile, argv[i] ); }
		}
	else if( strcmp( argv[i], "-config" )==0 ) {    /* -config is always required */
		if( i < argc-1 ) { 
			i++; 
			sprintf( tok, "file=%s", argv[i] );
			/* read config file */
			stat = TDH_readconfig( tok );
			stat += SHSQL_readconfig();
			if( stat != 0 ) { err( 382, "Cannot load config file", "" ); exit( 20 ); }
			}
		}

	else break;
	}

/* parse the 'from' clause and 'where' clause passed from qlsel.. */
/* if rightside is a subjoin, the command will be returned in 'rightside', and subjoin set to 1.. */
parse_comfile( comfile, jointype, tbl1, alias1, kf1, rightside, alias2, kf2, &subjoin, whereclause );

if( SHSQL_debug ) {
	fprintf( stderr, "\njoin %d: starting; left side is table %s (alias %s) (keyfields %s)\n", getpid(), tbl1, alias1, kf1 );
	if( subjoin ) fprintf( stderr, "...right side is a subjoin: %s ...(alias %s) (keyfields %s)\n", rightside, alias2, kf2 );
	else fprintf( stderr, "...right side is table %s (alias %s) (keyfields %s)\n", rightside, alias2, kf2 );
	}



dups2 = 1;
/* default is inner */
if( strnicmp( jointype, "left", 4 )==0 ) dleft = 1; 
else if( strnicmp( jointype, "right", 5 )==0 ) dright = 1;
else if( strnicmp( jointype, "outer", 5 )==0 ) { dleft = 1; dright = 1; }

if( stricmp( &jointype[ strlen( jointype ) -2 ], "dl" )==0 ) { dups1 = 1; dups2 = 0; }

/* check args.. */
if( tbl1[0] == '\0' ) { err( 383, "expecting leftside table", "" ); exit( 1 ); }
if( rightside[0] == '\0' ) { err( 384, "expecting rightside table or 2nd join", "" ); exit( 1 ); }

if( kf1[0] == '\0' ) { err( 385, "expecting keyfields1", "" ); exit( 1 ); }
if( kf2[0] == '\0' ) { err( 386, "expecting keyfields2", "" ); exit( 1 ); }


/* build and execute select command for left side.. */
buildsel( selcom, tbl1, alias1, kf1, whereclause );
if( SHSQL_debug ) fprintf( stderr, "\njoin %d: submitting command 1: %s\n", getpid(), selcom ); 
SHSQL_query_already_dequoted( 1 ); /* inform query processor that quoted strings have already been converted.. */
SHSQL_command( selcom );
SHSQL_fetchrows( &rows1, maxrows, &sp1 );
nf1 = sp1.nitems;
if( SHSQL_debug ) fprintf( stderr, "join %d: command 1 retrieved %d rows\n", getpid(), sp1.nrows ); 

/* build and execute select command for right side.. */
buildsel( selcom, rightside, alias2, kf2, whereclause );
if( SHSQL_debug ) fprintf( stderr, "\njoin %d: submitting command 2: %s\n", getpid(), selcom );
SHSQL_command( selcom ); 
SHSQL_query_already_dequoted( 0 ); /* restore */
SHSQL_fetchrows( &rows2, maxrows, &sp2 ); 
nf2 = sp2.nitems;
if( SHSQL_debug ) fprintf( stderr, "join %d: command 2 retrieved %d rows\n", getpid(), sp2.nrows );

/* get list of field positions for kf1.. */
for( nk1 = 0, ix = 0; ; ) {
	GL_getseg( tok, kf1, &ix, "," );
	if( tok[0] == '\0' ) break;
	for( i = 0; i < sp1.nitems; i++ ) {
		if( strcmp( tok, sp1.itemlist[i] )==0 ) {
			k1[ nk1++ ] = sp1.fldpos[i];
			break;
			}
		}
	if( i >= sp1.nitems ) { err( 387, "unrecognized field name in join clause", tok ); exit( 1 ); }
	}

/* get list of field positions for kf2.. */
for( nk2 = 0, ix = 0; ; ) {
	GL_getseg( tok, kf2, &ix, "," );
	if( tok[0] == '\0' ) break;
	for( i = 0; i < sp2.nitems; i++ ) {
		if( strcmp( tok, sp2.itemlist[i] )==0 ) {
			k2[ nk2++ ] = sp2.fldpos[i];
			break;
			}
		}
	if( i >= sp2.nitems ) { err( 388, "unrecognized field name in join clause", tok ); exit( 1 ); }
	}

if( nk1 < 1 || nk2 < 1 ) { err( 389, "error in join fields", "" ); exit( 1 ); }
if( nk1 != nk2 ) { err( 390, "mismatched number of join fields", "" ); exit( 1 ); }


/* print field name header.. */
for( i = 0; i < sp1.nitems; i++ ) printf( "%s.%s%c", alias1, sp1.itemlist[i], SHSQL_delim ); 

if( subjoin ) for( i = 0; i < sp2.nitems; i++ ) printf( "%s%c", sp2.itemlist[i], SHSQL_delim ); 
else for( i = 0; i < sp2.nitems; i++ ) printf( "%s.%s%c", alias2, sp2.itemlist[i], SHSQL_delim ); 

printf( "\n" );


/* indicate that the join result is intermediate.. suppress conversions on result fields. */
/* note- this must be done after submitting the command (above) but before getting any rows */
sp1.intermed = 1;
sp2.intermed = 1;

/* processing loop */
reading1 = reading2 = 1;
gln1( ); gln2( );

while( reading1 || reading2 ) {

	for( i = 0; i < nk1; i++ ) {
		/* changed to accomodate the fact that sqlsel puts nulls at top of sorted result 5/3/02 */
		null1 = strcmp( fields1[ k1[ i ]], TDH_dbnull );
		null2 = strcmp( fields2[ k2[ i ]], TDH_dbnull );
		if( null1 + null2 == 0 ) diff = 0;
		else if( null1 == 0 ) diff = -1;
		else if( null2 == 0 ) diff = 1;
		else

		   diff = stricmp( fields1[ k1[ i ]], fields2[ k2[ i ]] );  /* changed to case insensitive 6/29/01*/
		
		if( diff != 0 ) break;
		}

	/* DEBUG: uncomment the following (lots of output.. best for small test data files).. */ 
	/* fprintf( stderr, "join %d: %s vs. %s diff=%d\n", getpid(), fields1[ k1[0]], fields2[ k2[0]], diff );  */

	if( diff == 0 ) {
		for( i = 0; i < nf1; i++ ) printf( "%s%c", fields1[i], SHSQL_delim );		/* datadelim */
		for( i = 0; i < nf2; i++ ) printf( "%s%c", fields2[i], SHSQL_delim );		/* datadelim */
		printf( "\n" );

		/* Save this key so we know we printed a rec for it.. */
		for( i = 0; i < nk1; i++ ) keyprinted[i] = fields1[ k1[ i ]];

		/* If one file does not have dups, get a record from the other which may have dups */
		if( !dups2 ) gln1();
		if( !dups1 ) gln2();
		}

	else if( diff < 0 ) { 
		if( dleft ) {
			/* Only print if not yet printed (based on key fields).. */
			for(i = 0; i < nk1; i++) {
				if( strcmp( fields1[ k1[ i ]], keyprinted[i] ) != 0 ) break;
			 	}
			if( i < nk1 ) {
				for( i = 0; i < nf1; i++ ) printf( "%s%c", fields1[i], SHSQL_delim );		/* datadelim */
				for( i = 0; i < nf2; i++ ) printf( "%s%c", TDH_dbnull, SHSQL_delim );		/* datadelim */
				printf( "\n" );
				}
			}
		gln1(); 
		}

	else if( diff > 0 ) { 
		if( dright ) {
			/* Only print if not yet printed (based on key fields).. */
			for(i = 0; i < nk2; i++) {
				if( strcmp( fields2[ k2[ i ]], keyprinted[i] ) != 0 ) break;
			 	}
			if( i < nk2 ) {
				for( i = 0; i < nf1; i++ ) printf( "%s%c", TDH_dbnull, SHSQL_delim );		/* datadelim */
				for( i = 0; i < nf2; i++ ) printf( "%s%c", fields2[i], SHSQL_delim );		/* datadelim */
				printf( "\n" );
				}
			}
		gln2(); 
		}
	}

/* DEBUG - in debug mode the comfile will be retained in the tmp dir.
	If you then run: shsql_join -comfile <commandfilename> -config configfile
	(use full paths), you will see result of join.
 */
if( !SHSQL_debug ) unlink( comfile );

exit( 0 );
}

/* ========================== */
/* GLN1 - get a line from file 1 */

static int
gln1()
{ 
int stat, i;
/* fprintf( stderr, "[g1 %d]", reading1 ); */
if( reading1 ) {
	stat = SHSQL_row( fields1, rows1, &sp1 );
	if( stat ) reading1 = 0;
        /* DEBUG - it might be useful to uncomment the following */
	/*  else	{
	 *	fprintf( stderr, "[LEFT]" );
	 *	for( i = 0; i < nf1; i++ ) fprintf( stderr, "[%s]", fields1[i] );
	 *	fprintf( stderr, "\n" );
	 *	}
 	 */
	}
if( !reading1 ) for( i = 0; i < nf1; i++ ) fields1[i] = "~~~~~~~~~~~~~~~~~";
return( 0 );
}

/* ========================= */
/* GLN2 - get a line from file 2 */

static int
gln2()
{ 
int stat, i;
/* fprintf( stderr, "[g2 %d]", reading2 ); */
if( reading2 ) {
	stat = SHSQL_row( fields2, rows2, &sp2 );
	if( stat ) reading2 = 0;
        /* DEBUG - it might be useful to uncomment the following */
	/* else	{
	 *	fprintf( stderr, "[RIGHT]" );
	 *	for( i = 0; i < nf2; i++ ) fprintf( stderr, "[%s]", fields2[i] );
	 *	fprintf( stderr, "\n" );
	 *	}
	 */
	}
if( !reading2 ) for( i = 0; i < nf2; i++ ) fields2[i] = "~~~~~~~~~~~~~~~~~";
return( 0 );
}


/* ============================================ */
/* PARSE_COMFILE - parse a FROM line where join(s) are specified */

static int
parse_comfile( commandfile, jointype, tbl1, alias1, kf1, rightside, alias2, kf2, subjoin, whereclause )
char *commandfile;		/* holds incoming sql syntax */
char *jointype;  		/* inner, left, right, outer */
char *tbl1, *rightside;		/* left side (always a table) and right side (either a table or 2nd join) */
char *kf1, *kf2;		/* keyfields list for table 1 and 2 */
char *alias1, *alias2; 		/* table aliases, if any, for table 1 and 2 */
int *subjoin;			/* returned as 1 if rightside contains a 2nd join */
char *whereclause;		/* where clause or "none" if none */
{
int i, ix, ixhold;
char buf[512], buf2[WHEREMAX];
char tok[256];
FILE *fpcom;
char s1[MAXPATH], s2[50]; 
int stat, debug;
int wordcount, andbreak, n_rs_chunks, rslen, n_chunks;
char jointype2[20], tbl3[MAXPATH], alias3[MAXPATH];
char lhs[MAXPATH], rhs[MAXPATH];
int lhstab, rhstab;
char errmsgmode[20];


strcpy( jointype, "" );
strcpy( alias1, "" ); strcpy( alias2, "" );
strcpy( kf1, "" ); strcpy( kf2, "" );

strcpy( jointype2, "" ); strcpy( alias3, "" ); strcpy( tbl3, "" );

fpcom = fopen( commandfile, "r" );
if( fpcom == NULL ) return( err( 391, "cannot open join command file", commandfile ));

strcpy( buf, "" );
fgets( buf, 511, fpcom );

buf[ strlen( buf ) - 1 ] = '\0';

fgets( buf2, WHEREMAX-1, fpcom );
buf2[ strlen( buf2 ) - 1 ] = '\0';

/* convert quoted strings now in where clause.. shsql engine will be informed that this has been done.. */
TDH_valuesubst_settings( "sqlmode", 1 );
stat = TDH_dequote( whereclause, buf2, "QS" );
TDH_valuesubst_settings( "sqlmode", 0 );
if( stat != 0 ) return( err( 392, "where clause quote error", buf ));


fgets( tok, 255, fpcom );
sscanf( tok, "%*s %s %*s %d %*s %d %*s %s", SHSQL_tmptblpfx, &maxrows, &debug, errmsgmode );
if( SHSQL_debug == 0 && debug ) SHSQL_debug = 1;
TDH_errmode( errmsgmode );
fclose( fpcom );


ix = 0;

/* table 1.. */
strcpy( tbl1, GL_getok( buf, &ix ) );
strcpy( alias1, tbl1 );
strcpy( tok, GL_getok( buf, &ix ) );
if( stricmp( tok, "as" )==0 || strcmp( tok, "=" )==0 ) {
	strcpy( alias1, GL_getok( buf, &ix ) );
	strcpy( tok, GL_getok( buf, &ix ) );
	}

/* INNER.. etc */
if( GL_slmember( tok, "inner* left* right* outer*" )) {
	strcpy( jointype, tok );
	strcpy( tok, GL_getok( buf, &ix ) );
	if( stricmp( tok, "join" )!= 0 ) return( err( 392, "sql syntax error: expecting 'join'", "" ) );
	}
else if( stricmp( tok, "join" )==0 ) strcpy( jointype, "inner" );
else return( err( 393, "sql syntax error: expecting inner, left, right, or outer join", tok ) );


/* table 2.. */
ixhold = ix;
strcpy( rightside, GL_getok( buf, &ix ) );  /* note, rightside will be obliterated below.. */
strcpy( alias2, rightside );  
strcpy( tok, GL_getok( buf, &ix ) );
if( stricmp( tok, "as" )==0 || strcmp( tok, "=" ) ==0 ) {
	strcpy( alias2, GL_getok( buf, &ix ) );
	strcpy( tok, GL_getok( buf, &ix ) );
	}


/* optional 2nd join.. */
/* INNER.. etc */
if( GL_slmember( tok, "inner* left* right* outer*" )) {
	strcpy( jointype2, tok );
	strcpy( tok, GL_getok( buf, &ix ) );
	if( stricmp( tok, "join" )!= 0 ) return( err( 394, "sql syntax error: expecting 2nd 'join'", "" ) );
	}
else if( stricmp( tok, "join" )==0 ) strcpy( jointype2, "inner" );
else if( stricmp( tok, "on" )==0 ) goto ONCLAUSE;
else return( err( 395, "sql syntax error: expecting 2nd inner, left, right, or outer join", tok ) );


/* table 3.. */
strcpy( tbl3, GL_getok( buf, &ix ) );
strcpy( alias3, tbl3 );
strcpy( tok, GL_getok( buf, &ix ) );
if( stricmp( tok, "as" )==0 || strcmp( tok, "=" ) ==0 ) {
	strcpy( alias3, GL_getok( buf, &ix ) );
	strcpy( tok, GL_getok( buf, &ix ) );
	}

/* start building rightside join command.. */
*subjoin = 1;
rslen = ix - ixhold;
strncpy( rightside, &buf[ixhold], rslen );
strcpy( &rightside[rslen], " on " );


/* ON */
ONCLAUSE:
if( GL_slmember( tok, "inner* left* right* outer* join" )) return( err( 396, "attempt to join more than 3 tables", "" ));
if( stricmp( tok, "on" )!=0 ) return( err( 396, "'on' expected", "" ));

wordcount = 0;
andbreak = ix;
n_chunks = 0;
n_rs_chunks = 0;

/* parse out the ON clause.. */
while( 1 ) {
	
	strcpy( tok, GL_getok( buf, &ix ));
	wordcount++;

	if( tok[0] == '\0' ) {
		if( wordcount != 4 ) return( err( 397, "sql join: incomplete 'on' clause", "" ));
		else break;
		}

	if( strcmp( tok, "=" )==0 ) {
		if( wordcount != 2 ) return( err( 378, "sql join: syntax error in the 'on' clause", "" ));
		else continue;
		}
	else if( wordcount == 2 ) return( err( 398, "sql join: 'on' clause bindings must use '='", "" ));
	else if( GL_smemberi( tok, "and &&" )) {
		if( wordcount != 4 )  return( err( 377, "sql join: syntax error in the 'on' clause", "" ));
		else	{
			andbreak = ix;
			wordcount = 0;
			continue;
			}
		}

	/* tok is a fieldname, either lhs or rhs.. */
	if( wordcount == 1 ) {
		for( i = 0; tok[i] != '\0'; i++ ) { if( tok[i] == '.' ) lhs[i] = BLANK; else lhs[i] = tok[i]; }
		lhs[i] = '\0';
		sscanf( lhs, "%s %s", s1, s2 );
		lhstab = 0;
		if( strcmp( s1, alias1 )==0 ) lhstab = 1;
		else if( strcmp( s1, alias2 )==0 ) lhstab = 2;
		else if( strcmp( s1, alias3 )==0 ) lhstab = 3;
		}
	else if( wordcount == 3 ) {
		for( i = 0; tok[i] != '\0'; i++ ) { if( tok[i] == '.' ) rhs[i] = BLANK; else rhs[i] = tok[i]; }
		rhs[i] = '\0';
		sscanf( rhs, "%s %s", s1, s2 );
		rhstab = 0;
		if( strcmp( s1, alias1 )==0 ) rhstab = 1;
		else if( strcmp( s1, alias2 )==0 ) rhstab = 2;
		else if( strcmp( s1, alias3 )==0 ) rhstab = 3;
		}

	/* we now have a pair.. */
	if( wordcount == 3 ) { 
		if( lhstab == 0 || rhstab == 0 ) 
			return( err( 375, "sql join: 'on' binding uses unrecognized table", "" ));
		if( ( lhstab == 1 && rhstab == 3 ) || ( lhstab == 3 && rhstab == 1 ) ) 
			return( err( 376, "sql join: 'on' binding references tables that aren't being joined", "" ));

		/* if this pair references table3, add this chunk to the ON clause that we're building.. */
		if( lhstab == 3 || rhstab == 3 ) {
			if( n_rs_chunks > 0 ) {
				strcpy( &rightside[rslen], " and " );
				rslen += 5;
				}
			strncpy( &rightside[rslen], &buf[andbreak], ix-andbreak );
			rslen += (ix-andbreak);
			rightside[rslen] = '\0';
			n_rs_chunks++;
			continue;
			}

		/* otherwise, it is associated with the current join.. parse out the names of key fields.. */
		n_chunks++;
		sscanf( lhs, "%s %s", s1, s2 );
		if( lhstab == 1 ) {
			if( strlen( kf1 ) > 1 ) strcat( kf1, "," );
			strcat( kf1, s2 );
			}
		else if( lhstab == 2 ) {
			if( strlen( kf2 ) > 1 ) strcat( kf2, "," );
			if( *subjoin ) { strcat( kf2, s1 ); strcat( kf2, "." ); strcat( kf2, s2 ); }
			else strcat( kf2, s2 );
			}
		
		sscanf( rhs, "%s %s", s1, s2 );
		if( rhstab == 1 ) {
			if( strlen( kf1 ) > 1 ) strcat( kf1, "," );
			strcat( kf1, s2 );
			}
		else if( rhstab == 2 ) {
			if( strlen( kf2 ) > 1 ) strcat( kf2, "," );
			if( *subjoin ) { strcat( kf2, s1 ); strcat( kf2, "." ); strcat( kf2, s2 ); }
			else strcat( kf2, s2 );
			}
		}
	}


if( n_chunks < 1 ) return( err( 379, "sql join: invalid 'on' clause", "" ));


return( 0 );
}

/* ==================================== */
/* BUILDSEL - build a primary select command */
static int
buildsel( selcom, tbl, alias, kflist, whereclause )
char *selcom;
char *tbl;
char *alias;
char *kflist;
char *whereclause;
{
int ix;
char buf[512], tok[256];
int tlen, alen;
char tblper[MAXPATH], alper[MAXPATH];
int nclauses;
int othref, tref;
int dotpos, swc;
int first;
int tconstruct;

sprintf( tblper, "%s.", tbl );
tlen = strlen( tblper );
sprintf( alper, "%s.", alias );
alen = strlen( alper );

strcpy( selcom, "select * from " );
strcat( selcom, tbl );
swc = strlen( selcom );
strcat( selcom, " where " );


nclauses = 0;
ix = 0;


/* where */
strcpy( tok, GL_getok( whereclause, &ix ) ); /* 'where' */
if( strcmp( tok, "none" )==0 ) {
	selcom[ swc ] = '\0'; /* truncate */
	goto ORDERBY;
	}

while( 1 ) {
	strcpy( buf, "" );
	othref = 0;
	tref = 0;
	/* get chunk (delimited by 'and') */
	while( 1 ) {
		strcpy( tok, GL_getok( whereclause, &ix ) );
		if( tok[0] == '\0' ) break;
		if( GL_smember( tok, "and &&" )) break;

		/* if token is a tbl.fieldname or alias.fieldname construct, see if it references tbl or not.. */
		/* must be sure that it is not a decimal number, including negative numbers! */
		dotpos = GL_member( '.', &tok[1] );
		if( dotpos > 0 && !isdigit( (int) tok[0] ) && tok[0] != '-' ) tconstruct = 1;
		else tconstruct = 0;

		if( tconstruct ) {
			if( strncmp( tok, tblper, tlen ) ==0 || strncmp( tok, alper, alen ) ==0 ) tref = 1;
			else othref = 1;
			}

		strcat( tok, " " );

		if( tconstruct ) strcat( buf, &tok[ dotpos+1 ] );
		else strcat( buf, tok );
		}
	
	/* buf contains a chunk */
	/* fprintf( stderr, "[tref=%d othref=%d]", tref, othref ); */
	if( tref && !othref ) {
		if( nclauses > 0 ) strcat( selcom, " and " );
		strcat( selcom, buf );
		nclauses++;
		}
	if( tok[0] == '\0' ) break;
	}
if( nclauses == 0 ) selcom[ swc ] = '\0'; /* truncate */

/* order by */
ORDERBY:
strcat( selcom, " order by " );
ix = 0;
first = 1;
while( 1 ) {
	GL_getchunk( tok, kflist, &ix, "," );
	if( tok[0] == '\0' ) break;
	if( !first ) strcat( selcom, ", " );
	strcat( selcom, tok );
	first = 0;
	}
	
return( 0 );
}

/* ====================== */
int
custom_function()
{ return( 0 ); }

