/*
Program:   nettee.c
Version:   0.1.6
Date:      11-MAY-2005
Author:    David Mathog, Biology Division, Caltech
email:     mathog@caltech.edu
Copyright: 2005 David Mathog and California Institute of Technology (Caltech)
License:   GNU General Public License 2.
Description:

    A network capable "tee" program.

    Compiles cleanly with:

     % gcc -Wall -pedantic -std=c99 -DNOUSLEEP -D_LARGEFILE64_SOURCE -o nettee nettee.c
     
    except for a warning about gethostname (which can usually be ignored).
    However that uses select() for subsecond waits and that can be
    a bit of a CPU hog.  Alternatively allow usleep and compile like:
    
     % gcc -Wall -D_LARGEFILE64_SOURCE -o nettee nettee.c
     
     The LARGEFILE64_SOURCE part is for support for files >2GB, if your
     OS supports it.  This is probably different for other compilers.
   
Parentage:  nettee is derived from Felix Rauch-Valenti's dolly [0.58C].
            Copyright: Felix Rauch <rauch@inf.ethz.ch> and ETH Zurich.
            nettee differs from dolly in that it is a "lighter".  nettee
            passes configuration information only from the command line,
            whereas dolly sends this info down from the server node to
            each node.

Changes: 
   V 0.1.6
         Changed handling of bad byte count to be more in line with conwf,
         colwf.  Added flags.  Changed MSK to ERR_.  Made a distinction
         between errors somewhere in chain (which propagates) and error
         in or just below node, which triggers a name emission on the error
         message but does not propagate further.  This is to allow the
         development of better postmortem tools to figure out which 
         nodes, if any, did something wrong in a chain.
         
         Tru Huynh reported that strlen() used as an argument in
         sprintf causes problems on 64bit systems.  Apparently size_t
         there is int8.  Used an intermediate unsigned int variable to work
         around this.
   V 0.1.5
         Added -conwf and -colwf (Continue on Net/Local write failure). 
         The idea being that if a node fails
         partially it won't kill the whole chain.  Without these a write
         error to either the network or a disk causes the entire chain
         to fold up immediately. With -conwf the node stops writing
         to the failed network(s), effectively converting to _EOC_
         so it and the ones above it can complete the transfer.  With -colwf
         the affected node converts to a relay, not attempting any future
         disk writes.  A node can suffer BOTH failures at which point
         it reads from the upstream node but does nothing further with
         the data.  The node originating the data cannot have either -conwf
         or -colwf specified.
         
         Status handling has been slightly improved.  The package
         which calls nettee should write a status value to a
         permanent file so that it can be read later by some other means
         if a postmortem is desired.  The node sending data can only
         determine if a bytecount failure has occurred and/or if 
         any node in the chain triggered a colwf or conwf condition.
         Ie, from the top one can learn whether or not it is necessary
         to scan the nodes to figure out what an error was.  Status
         returned is now in this format:
         
         XXXXXXXXXXXXXXXX DONE     mask         # done @ bytes read
         
         where the mask indicates any nodes which suffered a (1) conwf or (2)
         colwf, both (3), or neither 0.
         
   V 0.1.4 Does not exist, that of the package changed only documentation
         and scripts.
   V 0.1.3
         Added -stm EOS.  Causes nettee to stream from the input
         source until it encounters the terminator STRING.  The purpose
         of this is to allow nettee to stream commands down a chain with
         little delay.  Without the read section would wait for a full buffer
         or a socket shutdown.  See -hexamples for more info.
   V 0.1.2
         Added -in socket, -out socket and -cmd 'COMMAND'.  The
         first two are mutually exclusive and COMMAND is what feeds
         into, reads from, the socket.  Since pipe input is
         typically constricted to a few thousand bytes using sockets
         to move the data should typically be faster than a pipe
         when, for instance, using tar at both ends.  Your mileage
         may vary. This code follows an example posted by Grant Taylor
         on Jan 12 2005 in comp.os.linux.development.system with subject:
         "Re:  pipe buffer sizes".
   V 0.1.1 
         Fixed a bug in the open for -out (forgot O_CREAT).
         Added "-w".  This allows the upstream node to wait for
         the next to boot or be attached to the network.  If not
         specified and the next doesn't have a working net the
         data or control connection will fail.
   V 0.1 24-MAR-2005 David Mathog <mathog@caltech.edu>
         The main idea is to strip off the whistles and bells and
         reduce dolly to a simple command line program.
         
         Messages supported for backflow (to ctrlin) are:
         XXXXXXXXXXXXXXXX READY <nodename>  # the number of ready nodes
         XXXXXXXXXXXXXXXX DONE              # done @ bytes read
         Where XX...X is 16X hex format long long.
         
         Messages supported for downflow (to ctrlout) are NONE.
         In Dolly there was a size value sent down.  In this version
         the server keeps track of size and the children just compare the
         DONE value with what they have.  If these differ then the child
         throws an error and passes the bad value up the line.  The children
         see that the data is done when the node above does a shutdown()
         on the socket, which results in a read of 0.
                                              
         NOTE:  The message passing mechanism isn't very robust.  Two
         fast messages might be appended to each other if the reader
         hasn't had a chance to read the buffer before the second one arrives.
         For this early version there is no passing of more complicated
         messages up/down the control stream. 

           
	  
   If you change the history, then please also change the version_string
   right below!  */

static const char version_string[] = "0.1.6, 12-MAY-2005";
static const char c_r_1[] =  "Copyright: 2005 David Mathog and California Institute of Technology";
static const char c_r_2[] =  "Copyright: Felix Rauch <rauch@inf.ethz.ch> and ETH Zurich";

#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/poll.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <sys/time.h>
#include <sys/mman.h>
#include <netinet/tcp.h>
#include <assert.h>
#include <ctype.h>
#include <signal.h>


/* Definitions */
#define MAXFANOUT 8

#ifdef  _LARGEFILE64_SOURCE
#define FLSUPPORT "Yes"
#define FLOPT O_LARGEFILE
#else
#define FLSUPPORT "No"
#define FLOPT 0
#endif

/* have mercy on anybody porting this to a nonUnixy OS by placing these
here rather than in the code */

#define DEVSTDIN  "/dev/stdin"
#define DEVSTDOUT "/dev/stdout"
#define DEVSTDERR "/dev/stderr"

/* Size of blocks transf. to/from net/disk and one less than that
On linux it 2.6.8 it speeds up until 4096 and stays the same above
that.
 */
#define T_B_SIZE   4096
#define T_B_SIZEM1 (T_B_SIZE - 1)

#define NETTEE_NONBLOCK 1                 /* Use nonblocking network sockets */

/* verbose levels */
#define VERB_NONE     0
#define VERB_ERROR    1
#define VERB_PARAM    2
#define VERB_MESG     4
#define VERB_STATUS   8
#define VERB_SAYNAME 16

/* special processing for handle_messages */
#define M_EOL    1   /* terminate message with an EOL */
#define M_FFLUSH 2   /* flush stream */

/* log destinations */
#define LOG_FILE   0 /* destination for messages (warnings and error) */
#define LOG_SERVER 1 /* RESERVED FOR FUTURE EXPANSION, logging of messages back up chain*/

/* some status bits.  Those that must be reported to the next node
up the chain are marked with a Y, others with an N.  COLWF must be logged
upwards because the odds are nothing permanent can be written to the
local device to retain a record of this error. */

#define ERR_CHNWF 0x0001 /* Y there were CONWF errors in the chain */
#define ERR_CHLWF 0x0002 /* Y there were COLWF errors in the chain */
#define ERR_CHBBC 0x0004 /* Y there were BB    errors in the chain */
#define ERR_CHBST 0x0008 /* Y there were BSTAT errors in the chain */
#define ERR_CONWF 0x1000 /* N node suffered a network write failure (on the data stream) */
#define ERR_COLWF 0x2000 /* Y node suffered a local   write failure (on the data stream) */
#define ERR_BBC   0x4000 /* N node detected a bad byte count in a child */
#define ERR_BSTAT 0x8000 /* N node received a bad status message from the child */
#define ERR_LMASK 0xF000 /* all local bitf*/
#define ERR_CMASK 0x0FFF /* bit mask which ANDs to remove local error codes */
#define ERR_UMASK 0x2FFF /* all bits that need to be sent upward */

#define SCKBUFSIZE 98304  /* TCP socket buffer sizes (approx. 100KB, multiple of 4 KB) */
#define MYMAXFILENAME 256 /* maximum file name from command line*/
#define MYMAXMESSAGE  256 /* longest control message (including null) */
#define SCANTIME        1 /* wait time while select() for next, 0 seconds */
#define SCANUTIME       0 /* wait time while select() for next, 100000 useconds */
#define NAMESIZE      128

static int  meserver;   /* If true, this machine sends the data. */
static int  melast;     /* If true this machine doesn't have children to send data */
static int  usesocket;  /* 1, read socket, 2 write socket, 0 no sockets */
static int  colwf=0;    /* If true, continue relaying data after a local write failure */
static int  conwf=0;    /* If true, continue storing data locally a network write failure */
static char bmyhostname[NAMESIZE] = "";
static char *myhostname=&bmyhostname[0];
static char msg[MYMAXMESSAGE] = "";
static char msgbuf[MYMAXMESSAGE] = "";
static char *cmdstring=NULL;
static char *shutdown_stream=NULL;

/* Clients need the ports before they can listen, so we use defaults. */
static unsigned int dataport = 9998;
static unsigned int ctrlport = 9997;

/* File descriptors for file I/O.  Meanings:
  for input:
    EOF  nettee
    valid file descriptor, stdin or a file
  for output
    EOF  none
    valid file descriptor, stdout or a file
*/
static int input  = -1;    /* data stream from stdin             */
static int output = -1;    /* local output data stream to stdout */


/* TCP Segment Size (useful for benchmarking only) */
static int segsize = 0;


/* Normal sockets for data transfer */
static int datain[MAXFANOUT], dataout[MAXFANOUT];
static int datasock = -1;

/* A place to keep track of socket status and names of next nodes */
static int socketok[MAXFANOUT];
static char *socketname[MAXFANOUT];

/* Special sockets for control information */
static int ctrlin = -1, ctrlout[MAXFANOUT];
static int ctrlsock = -1;

static unsigned long long maxbytes = 0; /* max bytes to transfer */
static int verbose           = 0;       /* verbose */
static int timeout           = 0;       /* Timeout for startup */
static int waitfornext       = 0;       /* wait for next to boot or attach to net */
static int verbignoresignals = 1;       /* warn on ignore signal errors */

static char tinfile[MYMAXFILENAME];
static char toutfile[MYMAXFILENAME];
static char tlogfile[MYMAXFILENAME];
static char tnextnode[MYMAXFILENAME];
static char *infile   = &tinfile[0];
static char *outfile  = &toutfile[0];
static char *logfile  = &tlogfile[0];
static char *nextnode = &tnextnode[0];
static int nr_children = 0;      /* nodes that receive data directly */
static int descendents = 0;      /* nodes that receive data direct + indirect */
static int ready_mach  = 0;      /* Number of ready machines current */
static FILE *flog;
static int logdest = LOG_FILE; /* default, log messages to stderr */

/* Prototypes */
void handle_message(char *string, const int verb_level, const int process);
void hexamples(void);
void insane(char *string);
void insane_perror(char *string);
int  lcl_strcasecmp(char *s1, char *s2);
/*
static int movebytes(int fd, int dir, char *addr, unsigned int n);
*/
static int readnbytes(int fd, char *addr, unsigned int n);
static int writenbytes(int fd, char *addr, unsigned int n);
void ostrcpy(char *dst, char *src);
void process_command_line_args(int argc, char **argv);
void setinonnegnumeric(int *val,int *numarg,int argc,char **argv,char * label);
void setstring(char **val,int *numarg,int argc,char **argv,char * label);

/* Handles timeouts by terminating the program. */
static void alarm_handler(int arg)
{
  if(verbose & VERB_SAYNAME){
    (void) fprintf(flog,"%s: ",myhostname);
  }
  fprintf(flog, "Timeout reached (was set to %d seconds).\nTerminating.\n",
	  timeout);
  exit(EXIT_FAILURE);
}

int  lcl_strcasecmp(char *s1, char *s2){
int c1;
int c2;
  for(; ;s1++,s2++){
    c1=toupper(*s1);
    c2=toupper(*s2);
    if(c1 < c2)return -1;
    if(c1 > c2)return  1;
    if(c1 == 0)return  0;  /*c2 also is 0 in this case */
  }
}

/* like strcpy but doesn't copy the string terminator, so it
overwrites part of a string.  If src string is longer than dst string
copies as much as it can */

void ostrcpy(char *dst, char *src){
  while(*dst !='\0' && *src!='\0'){
    *dst = *src;
    dst++;
    src++;
  }
}

void setinonnegnumeric(int *val,int *numarg,int argc,char **argv,char * label){
      (*numarg)++;
      if( ( *numarg >= argc ) || (argv[*numarg] == NULL)){
        (void) fprintf( stderr, "nettee: fatal error: %s: missing argument\n",label);
        exit(EXIT_FAILURE);
      }
      if(sscanf(argv[*numarg],"%d",val) != 1){
        (void) fprintf(stderr,"nettee: fatal error: Bad integer argument/parameter [%s %s] \n",label,argv[*numarg]);
        exit(EXIT_FAILURE);
      }
      if(*val < 0){
        (void) fprintf(stderr,"nettee: fatal error: Illegal nonnegative integer argument/parameter [%s %s] \n",label,argv[*numarg]);
        exit(EXIT_FAILURE);
      }
}

void setstring(char **val,int *numarg,int argc,char **argv,char * label){
      (*numarg)++;
      if( ( *numarg >= argc ) || (argv[*numarg] == NULL)){
        (void) fprintf( stderr, "nettee: fatal error: %s: missing argument\n",label);
        exit(EXIT_FAILURE);
      }
      *val = argv[*numarg];
}


/* This functions prints all the parameters before starting.
   It's mostly used for debugging. */
static void print_params(void)
{
  (void) fprintf(stderr, "Parameters: \n");
  (void) fprintf(stderr, "infile    %s\n", infile);
  (void) fprintf(stderr, "outfile   %s\n", outfile);
  (void) fprintf(stderr, "logfile   %s\n", logfile);
  if(cmdstring != NULL){
    (void) fprintf(stderr, "sock. cmd %s\n", cmdstring);
  }
  if(shutdown_stream != NULL){
    (void) fprintf(stderr, "stm EOS   %s\n", shutdown_stream);
  }
  (void) fprintf(stderr, "myname    %s\n", myhostname);
  (void) fprintf(stderr, "nextnode  %s\n", nextnode);
  (void) fprintf(stderr, "data port %u\n", dataport);
  (void) fprintf(stderr, "ctrl port %u\n", ctrlport);
  (void) fprintf(stderr, "timeout   %d\n", timeout);
  (void) fprintf(stderr, "wait      %d\n", waitfornext);
  if(segsize > 0) {
    (void) fprintf(stderr, "TCP segment size = %d\n", segsize);
  }
  if(meserver){
    (void) fprintf(stderr, "I'm the server = first link in all chains.\n");
  }
  else {
    if(melast){
      (void) fprintf(stderr, "I'm at the end of a chain.\n");
    }
    else {
      (void) fprintf(stderr, "I'm an internal link in a chain.\n");
    }
  }
  (void) fprintf(stderr, "Using transfer size %d bytes.\n", T_B_SIZE);
  (void) fprintf(stderr, "end of Parameters list\n");
}

static void open_insocks(void)
{
  struct sockaddr_in addr;
  int optval;
  char *drcvbuf = NULL;
  int recv_size, sizeofint = sizeof(int);
  
  /* All machines have an incoming data link */
  datasock = socket(PF_INET, SOCK_STREAM, 0);
  if(datasock == -1) {
    insane_perror("Opening input data socket");
  }
  optval = 1;
  if(setsockopt(datasock, SOL_SOCKET, SO_REUSEADDR,
		&optval, sizeof(int)) == -1) {
    insane_perror("setsockopt on datasock");
  }
  
  /* Attempt to set TCP_NODELAY */
  optval = 1;
  if(setsockopt(datasock, IPPROTO_TCP, TCP_NODELAY,
        	&optval, sizeof(int)) < 0) {
    sprintf(msgbuf, "setsockopt: TCP_NODELAY failed! errno = %d", errno);
    handle_message(msgbuf,VERB_ERROR,M_EOL); /*an error, but not a fatal one */
  }

  if(segsize > 0) {
    /* Attempt to set TCP_MAXSEG */
    sprintf(msgbuf, "Set TCP_MAXSEG to %d bytes", segsize);
    handle_message(msgbuf,VERB_MESG,M_EOL); 
    if(setsockopt(datasock, IPPROTO_TCP, TCP_MAXSEG,
		  &segsize, sizeof(int)) < 0) {
      sprintf(msgbuf, "setsockopt: TCP_MAXSEG failed! errno=%d", errno);
      handle_message(msgbuf,VERB_ERROR,M_EOL); /*an error, but not a fatal one */
    }
  }
  
  /* MATHOG, set a large buffer for the data socket, this section is
     taken from NETPIPE. */
  /* Attempt to set input BUFFER sizes */
   drcvbuf = malloc(SCKBUFSIZE);
   if(drcvbuf == NULL) {
     insane_perror("Error creating buffer for input data socket");
   }
   if(setsockopt(datasock, SOL_SOCKET, SO_RCVBUF, &drcvbuf, SCKBUFSIZE) < 0) {
  	(void) fprintf(stderr, "setsockopt: SO_RCVBUF failed! errno = %d\n",
		       errno);
  	exit(556);
   }
   getsockopt(datasock, SOL_SOCKET, SO_RCVBUF,
  		   (char *) &recv_size, (void *) &sizeofint);
   sprintf(msgbuf, "Receive buffer is %d bytes", recv_size);
   handle_message(msgbuf,VERB_MESG,M_EOL); 
  
  addr.sin_family = AF_INET;
  addr.sin_port = htons(dataport);
  addr.sin_addr.s_addr = htonl(INADDR_ANY);
  if (bind(datasock, (struct sockaddr *) &addr, sizeof(addr)) == -1) {
    insane_perror("binding input data socket");
  }
  if(listen(datasock, 1) == -1) {
    insane_perror("listen input data socket");
  }
  
  /* All machines have an incoming control link */
  ctrlsock = socket(PF_INET, SOCK_STREAM, 0);
  if(ctrlsock == -1) {
    insane_perror("Opening input control socket");
  }
  optval = 1;
  if(setsockopt(ctrlsock, SOL_SOCKET, SO_REUSEADDR,
		&optval, sizeof(int)) == -1) {
    insane_perror("setsockopt on ctrlsock");
  }
  addr.sin_family = AF_INET;
  addr.sin_port = htons(ctrlport);
  addr.sin_addr.s_addr = htonl(INADDR_ANY);
  if (bind(ctrlsock, (struct sockaddr *) &addr, sizeof(addr)) == -1) {
    insane_perror("binding input control socket");
  }
  if(listen(ctrlsock, 1) == -1) {
    insane_perror("listen input control socket");
  }
}

static void open_outsocks(void)
{
  struct hostent *hent;
  struct sockaddr_in addrdata, addrctrl;
  int ret;
  int dataok = 0, ctrlok = 0;
  int i;
  int optval;
  char hn[256+32];
  char *dsndbuf = NULL;
  int send_size, sizeofint = sizeof(int);
  char *onenextnode;
  char *lnextnode;
  struct timeval ssleep;

  ssleep.tv_sec = SCANTIME;
  ssleep.tv_usec= SCANUTIME;
  nr_children=0;
  lnextnode=nextnode;
  for(i = 0; i < MAXFANOUT; i++) {  /* For all children we have */
    onenextnode=strtok(lnextnode," ,\t:");
    lnextnode=NULL;
    if(onenextnode==NULL)break;
    
    strcpy(hn, onenextnode);
    nr_children++;
    
    hent = gethostbyname(hn);
    if(hent == NULL) {
      char str[256];
      sprintf(str, "gethostbyname for host '%s' error %d",
	      hn, h_errno);
      herror(str);
      exit(EXIT_FAILURE);
    }
    if(hent->h_addrtype != AF_INET) {
      fprintf(stderr, "Expected h_addrtype of AF_INET, got %d\n",
	      hent->h_addrtype);
    }
    
    sprintf(msgbuf, "Connecting to host %s... ",hn);
    handle_message(msgbuf, VERB_MESG, M_EOL);
    
    /* The creation of the sockets must be before the connect loop! */
    dataout[i] = socket(PF_INET, SOCK_STREAM, 0);
    if(dataout[i] == -1) {
      handle_message("Opening output data socket", VERB_MESG,M_EOL);
      exit(EXIT_FAILURE);
    }
    socketok[i]=1;
    socketname[i]=onenextnode;

    if((nr_children > 1) || (i == 0)) {
      ctrlout[i] = socket(PF_INET, SOCK_STREAM, 0);
      if(ctrlout[i] == -1) {
        handle_message("Opening output control socket", VERB_MESG,M_EOL);
        exit(EXIT_FAILURE);
      }
    }

    /* Attempt to set TCP_NODELAY */
    optval = 1;
    if(setsockopt(dataout[i], IPPROTO_TCP, TCP_NODELAY,
                  &optval, sizeof(int)) < 0) {
      sprintf(msgbuf,"setsockopt: TCP_NODELAY failed! errno = %d\n",errno);
      handle_message(msgbuf, VERB_ERROR, M_EOL); /* Nonfatal error */
    }

    if(segsize > 0) {
      /* Attempt to set TCP_MAXSEG */
      sprintf(msgbuf,"Set TCP_MAXSEG to %d bytes\n", segsize);
      handle_message(msgbuf, VERB_MESG, M_EOL); /* Nonfatal error */
      if(setsockopt(dataout[i], IPPROTO_TCP, TCP_MAXSEG,
                    &segsize, sizeof(int)) < 0) {
        sprintf(msgbuf,"setsockopt: TCP_MAXSEG failed! errno = %d\n",errno);
        handle_message(msgbuf, VERB_ERROR, M_EOL); /* Nonfatal error */
      }
    }
    
    /* MATHOG, set a large buffer for the data socket, this section is
       taken from NETPIPE */
       
    /* Attempt to set output BUFFER sizes */
    if(dsndbuf == NULL){
       dsndbuf = malloc(SCKBUFSIZE);/* Note it may reallocate, which is ok */
       if(dsndbuf == NULL){
         insane_perror("Error creating buffer for input data socket");
       }
       if(setsockopt(dataout[i], SOL_SOCKET, SO_SNDBUF, &dsndbuf,
                     SCKBUFSIZE) < 0)
       {
           (void) fprintf(stderr,
                          "setsockopt: SO_SNDBUF failed! errno = %d\n",
                          errno);
            exit(556);
       }
       getsockopt(dataout[i], SOL_SOCKET, SO_RCVBUF,
                       (char *) &send_size, (void *) &sizeofint);
       sprintf(msgbuf, "Send buffer %d is %d bytes", i, send_size);
       handle_message(msgbuf,VERB_MESG,M_EOL);
    }
    
    /* Setup data port */
    addrdata.sin_family = hent->h_addrtype;
    addrdata.sin_port = htons(dataport);
    memcpy(&addrdata.sin_addr, hent->h_addr, hent->h_length);

    /* Setup control port */
    addrctrl.sin_family = hent->h_addrtype;
    addrctrl.sin_port = htons(ctrlport);
    memcpy(&addrctrl.sin_addr, hent->h_addr, hent->h_length);

    /* Wait until we connected to everything... */
    dataok  = ctrlok = 0;
    dsndbuf = NULL;
    while(1) {
      if(!dataok) {
	ret = connect(dataout[i],
		      (struct sockaddr *)&addrdata, sizeof(addrdata));
	if(ret != -1){
	  dataok = 1;
#ifdef NETTEE_NONBLOCK
	  ret = fcntl(dataout[i], F_SETFL, O_NONBLOCK);
	  if(ret == -1) {
	    perror("fcntl");
	  }
#endif /* NETTEE_NONBLOCK */
          handle_message("Data connected",VERB_MESG,M_EOL | M_FFLUSH);
	}
        else {
          /* For ECONNREFUSED keep polling until it connects or the timeout expires */
	  if(errno != ECONNREFUSED){
            /* If waitfornext is set then we also ignore
               No route to host and/or host is down errors */
            if(!waitfornext || (errno != EHOSTUNREACH && errno != EHOSTDOWN)){
	      (void) fprintf(stderr,"data connection failed\n");
	      exit(EXIT_FAILURE);
            }
	  }
        }
      }
      if(!ctrlok) {
	ret = connect(ctrlout[i],
	              (struct sockaddr *)&addrctrl, sizeof(addrctrl));
        if(ret != -1){
	  ctrlok = 1;
	  if(verbose & VERB_MESG) {
          handle_message("Control connected",VERB_MESG,M_EOL | M_FFLUSH);
	  }
	}
        else {
          /* For ECONNREFUSED keep polling until it connects or the timeout expires */
	  if(errno != ECONNREFUSED) {
            /* If waitfornext is set then we also ignore
               No route to host and/or host is down errors */
            if(!waitfornext || (errno != EHOSTUNREACH && errno != EHOSTDOWN)){
	      (void) fprintf(stderr,"control connection failed\n");
	      exit(EXIT_FAILURE);
            }
	  }
        }
      }
      if(dataok + ctrlok == 2)break;
#ifdef NOUSLEEP
        (void) select(1,NULL,NULL,NULL,&ssleep);
#else
        usleep(100000);
#endif
    } /* End of while waiting for connections */
  }
}

/*
 * If "try_hard" is 1, call must be succesful.
 * If try_hard is 1 and an input file can't be opened, the program terminates.
 * If try_hard is not 1 and an input file can't be opened, -1 is returend.
 *
 * Note that "socket" opens stdin.  stdin is ignored for the child process
 * which sends its output to stdout which is hooked to stdin of the parent.
 * So the parent proceeds as if "-in -" instead of "-in socket".
 */
static int open_infile(int try_hard)
{
  char name[256+16];

  strcpy(name, infile);
  
  /* Input is from file or stdin */
  if(lcl_strcasecmp(name,"-")==0   ||
     lcl_strcasecmp(name,"socket")==0
     ){
    input = open(DEVSTDIN, O_RDONLY);
  }
  else {
    input = open(name, O_RDONLY | FLOPT );
  }
  if(input == -1) {
    if(try_hard == 1) {
      char str[256];
      sprintf(str, "nettee: fatal error: could not open input file '%s'", name);
      insane(str);
    } else {
      return -1;
    }
  }
  return 0;
}

/*
  Note that "socket" opens stdout.  stdout is ignored for the child process
  which reads from stdin (hooked to the parent's stdout) and then writes directly
  to an output file or files.
  So the parent proceeds as if "-out -" instead of "-out socket".
*/
static int open_outfile(int try_hard)
{
  char name[256+16];

  strcpy(name, outfile);
  /* Setup the output files. */
  /* Output is to stdout, nowhere, or a local file */
  if(lcl_strcasecmp(name,"-")==0   ||
     lcl_strcasecmp(name,"socket")==0
    ){
    output = open(DEVSTDOUT, O_WRONLY);
  }
  else if(lcl_strcasecmp(outfile,"none")==0){
    output = -1;
    return 0;  /* there may be no local output, subsequent commands must handle this*/
  }
  else{
    output = open(name, O_CREAT | O_WRONLY | O_TRUNC | FLOPT);
  }
  if(output == -1) {
    if(try_hard == 1) {
      char str[256];
      sprintf(str, "nettee: fatal error: could not open output file '%s'", name);
      insane(str);
      exit(EXIT_FAILURE);
    } else {
      return -1;
    }
  }
  return 0;
}

#define WRITE 1
#define READ 2

/*
static int movebytes(int fd, int dir, char *addr, unsigned int n)
{
  int ret, bytes;
  
  bytes = 0;
  
  while(0 != n) {
    if(dir == WRITE) {
      ret = write(fd, addr, n);
    } else if(dir == READ) {
      ret = read(fd, addr, n);
    } else {
      fprintf(stderr, "Bad direction in movebytes!\n");
      ret = 0;
    }
    if(ret == -1) {
#ifdef NETTEE_NONBLOCK
      if(errno == EAGAIN) {
	continue;
      }
#endif 
      insane_perror("movebytes read/write");
    } else if(ret == 0){
      break;
    } else {
      addr += ret;
      n -= ret;
      bytes += ret;
      if(dir == READ && shutdown_stream != NULL){
         addr[bytes]='\0';
         break;
      }
    }
  }
  return bytes;
}
*/

/* MATHOG, tested the return values for a chain and a roughly
500 Mb file and found:
                    -1  0  >0
   head node: 48964074  2  425837
   int. node:  2575630  3  624735
   tail node:        0  2  508213
   
   gprof showed;
   
   head node spent 98% in movebytes
   int. node spent some time 51% in transmit, 49% in movebytes
   tail node spent some time 52% in transmit, 48% in movebytes
*/

static int writenbytes(int fd, char *addr, unsigned int n)
{
  int ret, bytes;
  
  bytes = 0;
  
  while(0 != n) {
    ret = write(fd, addr, n);
    if(ret == -1) {
#ifdef NETTEE_NONBLOCK
      if(errno == EAGAIN) {
	continue;
      }
#endif /* NETTEE_NONBLOCK */
      return -1;
    } else if(ret == 0){
      break;
    } else {
      addr += ret;
      n -= ret;
      bytes += ret;
    }
  }
  return bytes;
}

static int readnbytes(int fd, char *addr, unsigned int n)
{
  int ret, bytes;
  
  bytes = 0;
  
  while(0 != n) {
    ret = read(fd, addr, n);
    if(ret == -1) {
#ifdef NETTEE_NONBLOCK
      if(errno == EAGAIN) {
	continue;
      }
#endif /* NETTEE_NONBLOCK */
      return -1;
    } else if(ret == 0){
      break;
    } else {
      addr += ret;
      n -= ret;
      bytes += ret;
      if(shutdown_stream != NULL){
         addr[bytes]='\0';
         break;
      }
    }
  }
  return bytes;
}

static void buildchains(void)
{
  socklen_t size;
  int ret, i;
  int answered=0;
  char info_buf[1024];
  unsigned long long templong;  /* Number of nodes sent up from below which are said to be ready */
  char *p;
  unsigned int mlen;
  int maxsetnr = -1;
  fd_set real_set, cur_set;

  datain[0]=0;
  if(!meserver) {
    /* Open the input sockets and wait for connections... */
    open_insocks();
    if(verbose & VERB_MESG) {
      fprintf(stderr, "Accepting...\n");
      fflush(stderr);
    }
    
    /* All except the first accept a connection now */
    ctrlin = accept(ctrlsock, NULL, &size);
    if(ctrlin == -1) {
      insane_perror("accept input control socket");
    }
    /*After this handle_message could return a message to sender */
    handle_message("ControlIn ok", VERB_MESG, M_FFLUSH | M_EOL);
    
    datain[0] = accept(datasock, NULL, &size);
    if(datain[0] == -1) {
      insane_perror("accept input data socket");
    }
    handle_message("DataIn ok", VERB_MESG, M_FFLUSH | M_EOL);
    /* The input sockets are now connected. */

  }

  if(meserver) {
    open_infile(1);
  } else {
    open_outfile(1);
  }

  /* Finally, all but the leaf nodes accept a backflow connection
     and open output sockets.  The leaf nodes skip this and just send
     an I'm ready to the nodes upstream from them.  That signal is
     "00...01 READY <nodename>" (as text).  So the initialization goes
     something like this:
     
     node0:waiting <-> node1:waiting <-> node2:waiting <-> node3(terminal):ready
     node0:waiting <-> node1:waiting <-> node2:ready   <-> node3(terminal):ready
     node0:waiting <-> node1:ready   <-> node2:ready   <-> node3(terminal):ready
     node0:ready   <-> node1:ready   <-> node2:ready   <-> node3(terminal):ready
     
     As each node picks up a ready from the nodes underneath it and progresses
     to the ready state it adds one to the sum it received from below.  This way
     the top node can learn how many nodes are in the complete distribution chain.
     Ie, the top chain will receive something like: 0...0123 READY and will emit,
     if verbose is set that way "123 nodes in distribution chain below server.
     

  */
  if(!melast) {
    open_outsocks();


    /* Wait for backflow-information or the data socket connection */
    handle_message("Waiting for node chain(s) to build...", VERB_MESG, M_EOL);
    FD_ZERO(&real_set);
    maxsetnr = -1;
    for(i = 0; i < nr_children; i++) {
      FD_SET(ctrlout[i], &real_set);
      if(ctrlout[i] > maxsetnr) {
	maxsetnr = ctrlout[i];
      }
    }
    maxsetnr++;
    while(ready_mach < nr_children){
      cur_set = real_set;
      ret = select(maxsetnr, &cur_set, NULL, NULL, NULL);
      if(ret == -1) {
	handle_message("nettee: fatal error: select in buildchains()",VERB_ERROR,M_EOL);
	exit(EXIT_FAILURE);
      }
      for(i = 0; i < nr_children; i++) {
	if(FD_ISSET(ctrlout[i], &cur_set)) {
	  ret = read(ctrlout[i], info_buf, 1024);
	  if(ret == -1) {
	    handle_message("nettee: fatal error: read status -1 in backflow in buildchains",VERB_ERROR,M_EOL);
	    exit(EXIT_FAILURE);
	  }
	  if(ret == 0) {
	    handle_message("nettee: fatal error: read status 0 in backflow in buildchains",VERB_ERROR,M_EOL);
	    exit(EXIT_FAILURE);
	  } else {
          
	    
	    p = info_buf;
	    info_buf[ret] = 0;
            if(sscanf(p,"%llx %s",&templong,msg)!=2){
	      sprintf(msgbuf, "nettee: fatal error: downstream node sent bad READY >%s<",p);
	      handle_message(msgbuf,VERB_ERROR, M_EOL);
              exit(EXIT_FAILURE);
            }
            /* This section should only see READY, MESG or ERROR messages */
            if(strcmp(msg,"READY")==0){
              if(templong==0){
	        sprintf(msgbuf, "nettee: fatal error: downstream node sent bad READY >%s<",p);
	        handle_message(msgbuf,VERB_ERROR, M_EOL);
                exit(EXIT_FAILURE);
              }
              ready_mach += templong;
              answered++;
	      if(verbose & VERB_MESG) {
                sprintf(msgbuf, "Children waiting:  %d",nr_children - answered);
                handle_message(msgbuf, VERB_MESG,M_EOL);
              }
            }
            else if (strcmp(msg,"MESG")==0){
              if(meserver){ ostrcpy(p,"CHAIN:"); }
              handle_message(p, VERB_MESG, M_EOL);
            }
            else if (strcmp(msg,"ERROR")==0){
              handle_message(p, VERB_ERROR, M_EOL);
              exit(EXIT_FAILURE);
            }
            else { /* Done or some whacky message, neither should happen */
              handle_message("bad return message in setup section", VERB_ERROR, M_EOL);
              handle_message(p, VERB_ERROR, M_EOL);
              exit(EXIT_FAILURE);
            }
	  }
	}
      } /* For all children */
      sprintf(msgbuf, "Children ready:    %d", answered);
      handle_message(msgbuf, VERB_MESG, M_EOL);
    }
  }

  handle_message("Connections Accepted", VERB_MESG, M_EOL);
  
  descendents=ready_mach;
  
  if(!meserver) {
     ready_mach++;    /* This node is now also ready */
    
    /* Give information back to server */

      
    templong=ready_mach;
    mlen=sprintf(msg, "%llx READY %s\n",templong,myhostname);
    ret = writenbytes(ctrlin, msg, mlen);
    if(ret != mlen) {
      sprintf(msgbuf, "Couldn't write ready-message back to server "
	      "(sent %d instead of %u bytes)", ret, mlen);
      handle_message(msgbuf, VERB_ERROR, M_EOL);
      exit(EXIT_FAILURE);
    }
  }
}

/* The main transmitting function */
static void transmit(void)
{
  char *buf_addr, *buf;
  unsigned long long  transbytes = 0, lastout = 0;
  unsigned long long cmaxbytes = 0; /* max reported from a child */
  unsigned int bytes = T_B_SIZE;
  int ret = 1, maxsetnr = 0;
  unsigned long td = 0, tdlast = 0;
  int i,cycle;
  int foundfd;
  struct timeval tv1, tv2, tv3;
  int setmaxbytes=0;  /* a signal for when the end of file has been read */
  fd_set real_set, cur_set;
  int sv[2],child;              /*these are used for the socket, maybe*/
  int status_mask=0;
  int child_smask;
  int oret;
  unsigned int mlen;

  if(usesocket){

    /* Linux-specific anonymous Unix domain socket */
    if (!socketpair(AF_UNIX, SOCK_STREAM, 0, sv)) {

/* For now this code uses the default socket buffer sizes since
   default = max on linux (currently).  Ot increase buffer size up
   to system max utilize the following code:
   
      int sov=2048*1024;
      setsockopt(sv[0], SOL_SOCKET, SO_SNDBUF, &sov, sizeof(int));
      setsockopt(sv[1], SOL_SOCKET, SO_RCVBUF, &sov, sizeof(int));
*/
      child=fork();
      if (child > 0) {
        /* parent  && usesocket = 1:  read from socket
           parent  && usesocket = 2;  write to  socket
        */
        if(usesocket==1){
          /* read from socket */
          close(sv[1]);    /* close unused side of the socket */
          input=sv[0];     /* input will be from socket */
        }
        else if(usesocket==2){
          /* write to socket */
          close(sv[0]);     /* close unused side of the socket */
          output=sv[1];     /* output will be to socket */
        }
      } else if (child == 0) {
        /* child   && usesocket = 1:  read from disk write to stdout
           child   && usesocket = 2;  read from stdin and write to disk
        */
        if(usesocket==1){
          /* child runs tar or something and sends output to stdout*/
          close(sv[0]);     /* close stdin*/
          dup2(sv[1], 1);   /* duplicate stdout*/
        }
        else if(usesocket==2){
          /* child runs a program that reads from stdin and sends output to disk*/
          close(sv[1]);     /* close stdout*/
          dup2(sv[0], 0);   /* duplicate stdin*/
        }
        execlp("sh", "sh", "-c", cmdstring,NULL);
        insane("nettee: fatal error: exec for -cmd failed");
      } else {
        insane("nettee: fatal error: fork for -cmd failed");
      }
    } else {
      insane("nettee: fatal error: could not create a socket for -cmd");
    }
  }

  buf_addr = (char *)malloc(2 * T_B_SIZEM1);
  buf = (char *)((unsigned long)(buf_addr + T_B_SIZEM1) & (~T_B_SIZEM1));

  maxbytes = 0;
#define ADJ_MAXSET(a) if((a)>maxsetnr){maxsetnr=(a);}
  if(!meserver) {
    FD_ZERO(&real_set);
    FD_SET(datain[0], &real_set);
    ADJ_MAXSET(datain[0]);
    maxsetnr++;
  }
#undef ADJ_MAXSET
  
  gettimeofday(&tv1, NULL);
  tv2 = tv1;
  
  cycle=1;
  while(cycle) {
    /* The server writes data as long has it can to the data stream.
       When there's nothing left it shuts down the dataout socket.
       The clients read that shutdown as the end of data.
     */
    if(meserver) {
      /*
       * Server part
       */
      ret = readnbytes(input, buf, bytes);
      maxbytes += ret;
      if(ret > 0) {
        if( (shutdown_stream != NULL) &&
            (strncmp(buf,shutdown_stream,ret-1)==0)  ){
          /* Do NOT write the terminator string to the data stream
             Force an exit from the transmit loop
             Fix up maxbytes, it is ret too high */
          maxbytes -= ret;
          cycle=0; 
          handle_message("Encountered the End Of Stream string",VERB_MESG, M_EOL);
        }
        else {
          for(i = 0; i < nr_children; i++) {
	    oret=writenbytes(dataout[i], buf, ret);
            if(oret<0){
              insane_perror("nettee: fatal error writing to child");
            }
	  }
        }
      } 
      else if(ret==0) {
        /* If shutdown_stream is defined then we expect a lot of
           ret=0 returns while it waits for input.  If not defined
           then force an exit. */ 
        if(shutdown_stream == NULL){ cycle=0; }
      }
      else {
        insane_perror("nettee: fatal read on input");
      }
      if(cycle==0){
        /* Flush any remaining output data in dataout, whatever the cause of the exit*/
	for(i = 0; i < nr_children; i++) {
          if(socketok[i]){
            if(0!=shutdown(dataout[i], SHUT_RDWR)){
	      sprintf(msgbuf,"Error (1) on shutdown of dataout[%d] to %s",
                   i,socketname[i]);
              handle_message(msgbuf,VERB_ERROR, M_EOL); 
            }
          }
	}
        setmaxbytes=1;
      }
      if((verbose & VERB_STATUS) && (maxbytes - lastout >= 10000000)) {
	tv3=tv2;
	gettimeofday(&tv2, NULL);
	td = (tv2.tv_sec*1000000 + tv2.tv_usec)
	  - (tv1.tv_sec*1000000 + tv1.tv_usec);
	tdlast = (tv2.tv_sec*1000000 + tv2.tv_usec)
	  - (tv3.tv_sec*1000000 + tv3.tv_usec);
	sprintf(msgbuf,
		"\rSent MB: %.0f, MB/s: %.3f, Current MB/s: %.3f      ",
		(float)maxbytes/1000000,
		(float)maxbytes/td,(float)(maxbytes - lastout)/tdlast);
        handle_message(msgbuf, VERB_STATUS, M_FFLUSH);
	lastout = maxbytes;
      }
      if(setmaxbytes){
	sprintf(msgbuf, "\nRead %llu bytes from file(s).", maxbytes);
        handle_message(msgbuf, VERB_MESG, M_EOL);
      }
    } else {
      /*
       * Client part
       */
      unsigned int i;
      cur_set = real_set;  /* No need to use FD_CLR since cur_set is copied each time*/
      ret = select(maxsetnr, &cur_set, NULL, NULL, NULL);
      if(ret == -1) {
        if(errno != EINTR) {
	  /* MATHOG: (on above "if" statement)
	   * Some signal was received, don't have a handler, ignore it.
	   */
	  insane_perror("select");
	}
	if(verbignoresignals) {
	  /* fr: Shouldn't that be a bit further up? */
	  sprintf(msgbuf,
			"\nIgnoring unhandled signal (select() returned %d.",
			ret);
          handle_message(msgbuf, VERB_MESG, M_EOL);
	}
	continue;
      }

      foundfd  = 0;

      /* Unlike Dolly nettee has neither downflow nor backflow messages
         during the transmit phase. */

            
      if(FD_ISSET(datain[0], &cur_set)) {
        ret = readnbytes(datain[0], buf, T_B_SIZE);
        if(ret > 0) {
            for(i = 0; i < nr_children; i++) {
             if(socketok[i]){
                oret=writenbytes(dataout[i], buf, ret);
                if(oret<0){
                  if(conwf){
                    socketok[i]=0; 
                    status_mask |= ERR_CONWF ; /* block out all further network writes */
                    status_mask |= ERR_CHNWF ; /* this bit will propagate up the chain */
                  }
                  else {
                    insane_perror("nettee: fatal error writing to child");
                  }
                }
              }
            }
          if(output != -1){
            if((status_mask & ERR_COLWF) == 0){
              oret=writenbytes(output, buf, ret);
              if(oret<0){
                if(colwf){
                  status_mask |= ERR_COLWF; /* block out all further local writes */
                  status_mask |= ERR_CHLWF; /* this bit will propagate up the chain */
                  handle_message("Severe error: continuing after local write failure",VERB_ERROR, M_EOL); 
                }
                else {
                  insane_perror("nettee: fatal error writing to output");
                }
              }
            }
          }
          transbytes   += ret;
        }
        else {
          cycle=0;
          maxbytes=transbytes;
          /* datain socket closed, flush our dataout*/
          for(i = 0; i < nr_children; i++) {
            if(socketok[i]){
              if(0!=shutdown(dataout[i], SHUT_RDWR)){
	        sprintf(msgbuf,"Error (2) on shutdown of dataout[%d] to %s",
                     i,socketname[i]);
                handle_message(msgbuf,VERB_ERROR, M_EOL); 
              }
            }
          }
        }
        foundfd++;
      } 
      
      
      /* if nothing found in ret,  Ugh, select problem.  Direct dump
         to flog directly but not through handle_message as at this
         point it is not safe to log through the sockets.  */

      if(foundfd == 0) {
        handle_message("Fatal select() error",VERB_MESG, M_EOL);
        exit(EXIT_FAILURE);
      }

      if((verbose & VERB_STATUS)  && (transbytes - lastout >= 10000000)) {
	tv3=tv2;
	gettimeofday(&tv2, NULL);
	td = (tv2.tv_sec*1000000 + tv2.tv_usec)
	   - (tv1.tv_sec*1000000 + tv1.tv_usec);
	tdlast = (tv2.tv_sec*1000000 + tv2.tv_usec)
	   - (tv3.tv_sec*1000000 + tv3.tv_usec);
	sprintf(msgbuf,"\rTransfered MB: %.0f, MB/s: %.3f, Current MB/s: %.3f      ", (float)transbytes/1000000, (float)transbytes/td,(float)(transbytes - lastout)/tdlast);
        handle_message(msgbuf,VERB_STATUS, M_FFLUSH);
	lastout = transbytes;
      }
    }
    alarm(0);  /* We did something, so turn off the timeout-alarm */
  } /* end while */

  gettimeofday(&tv2, NULL);
  td = (tv2.tv_sec*1000000 + tv2.tv_usec) - (tv1.tv_sec*1000000 + tv1.tv_usec);
  
  if(meserver) {
      sprintf(msgbuf,"Server Sent MB: %.0f.", (float)maxbytes/1000000);
  }
  else {
      sprintf(msgbuf,"Transfered MB: %.0f, MB/s: %.3f",
            (float)transbytes/1000000, (float)transbytes/td);
  }
  handle_message(msgbuf,VERB_MESG, M_EOL);
  sprintf(msgbuf,"Children    = %8d", nr_children);
  handle_message(msgbuf,VERB_MESG, M_EOL);
  sprintf(msgbuf,"Descendents = %8d", descendents);
  handle_message(msgbuf,VERB_MESG, M_EOL);
  
  if(output != -1){ close(output); }
  
  sprintf(msgbuf,"Bytes:    %lld", maxbytes);
  handle_message(msgbuf,VERB_MESG, M_EOL);
  sprintf(msgbuf,"Time: %lu.%03lu", td / 1000000, td % 1000000);
  handle_message(msgbuf,VERB_MESG, M_EOL);
  sprintf(msgbuf,"MBytes/s: %0.3f", (double)maxbytes / td);
  handle_message(msgbuf,VERB_MESG, M_EOL);

  if(!melast) {
    /*
      Inner and top nodes wait for the DONE message from their children
      and compare the value with what they have internally. 
    */
    for(i = 0; i < nr_children; i++) {
      child_smask=0;
      if(! socketok[i]){
        /* Impossible or unlikely that we can read the status from the
           next node because a previous network write failure indicates
           that that node has problems.  So don't even try.  PRETEND
           that we read the values and set a few values.
        */
        child_smask |= ERR_CONWF;   /* Bad network write TO child */
      }
      else {
        sprintf(msgbuf,"Waiting for exit of child %d",i);
        handle_message(msgbuf,VERB_MESG, M_EOL);
        /* the next line will lock until the data is read or the socket dies */
        ret = readnbytes(ctrlout[i], buf, MYMAXMESSAGE);
        if(ret <= 0 ){
          sprintf(msgbuf,"nettee: fatal error: child %d exited abnormally\n",i);
          handle_message(msgbuf,VERB_ERROR, M_EOL);
          exit(EXIT_FAILURE);
        }
        buf[ret]='\0'; 

        if(0!=shutdown(ctrlout[i], SHUT_RDWR)){
          handle_message("Error on shutdown of ctrlout[i] 2",VERB_ERROR, M_EOL); 
        }
      
        if(sscanf(buf,"%llx %s %d",&cmaxbytes,msg,&child_smask)!=3){
          child_smask |= ERR_BSTAT;  /* Bad status returned by child */
          status_mask |= ERR_CHBST;  /* this bit will propagate up the chain */
        }
        else {
          if(cmaxbytes != maxbytes){
            child_smask |= ERR_BBC;   /* Bad Byte count in a child */
            status_mask |= ERR_CHBBC; /* this bit will propagate up the chain */
          }
      
          if(0 != strcmp(msg,"DONE") ){
            child_smask |= ERR_BSTAT;  /* Bad status returned by child */
            status_mask |= ERR_CHBST;  /* this bit will propagate up the chain */
          }
        }
      }
      
      /* Announce all errors detected on the child */
      
      sprintf(msgbuf,"Failures detected in child %d [%s]:",i,socketname[i]);
      if((child_smask & ERR_LMASK)==0){ strcat(msgbuf," NONE"); }
      if(child_smask & ERR_CONWF){      strcat(msgbuf," NWF");  }
      if(child_smask & ERR_COLWF){      strcat(msgbuf," LWF");  }
      if(child_smask & ERR_BBC){        strcat(msgbuf," BBC");   }
      if(child_smask & ERR_BSTAT){      strcat(msgbuf," BSTAT");} 
      if(child_smask){
        handle_message(msgbuf,VERB_ERROR, M_EOL);
      }
      else {
        handle_message(msgbuf,VERB_MESG, M_EOL);
      }
      child_smask &= ERR_CMASK; /* clear all local bits, retain chain bits */
      status_mask |= child_smask;
      status_mask &= ERR_UMASK; /*send only bits that should prop. upwards */
      continue;
    }
  }

  /*
    internal nodes echo the "DONE" up the chain only after
    all children are done.   End nodes use this code too, only
    the server doesn't send a count upwards.  All nodes also check
    the status bits and announce errors if any are found - helpful
    for a postmortem if logged somewhere.
  */
  
  strcpy(msgbuf,"Failures detected in chain:");
  if(status_mask==0){          strcat(msgbuf," NONE");}
  if(status_mask & ERR_CHNWF){ strcat(msgbuf," NWF");}
  if(status_mask & ERR_CHLWF){ strcat(msgbuf," LWF");}
  if(status_mask & ERR_CHBBC){ strcat(msgbuf," BBC");}
  if(status_mask & ERR_CHBST){ strcat(msgbuf," BSTAT");} 
  if(status_mask){
    handle_message(msgbuf,VERB_ERROR, M_EOL);
  }
  else {
    handle_message(msgbuf,VERB_MESG, M_EOL);
  }

  if(!meserver){
    mlen=sprintf(msg,"%16llx DONE %d",maxbytes,status_mask);
    oret=writenbytes(ctrlin, msg, mlen);
    if(oret<0){ insane_perror("nettee: fatal error writing status to previous node");}
    
    if(0!=shutdown(ctrlin, SHUT_RDWR)){
      handle_message("Error on shutdown of ctrlin",VERB_ERROR, M_EOL); 
    }
    if(0!=shutdown(datain[0], SHUT_RDWR)){
      handle_message("Error on shutdown of datain",VERB_ERROR, M_EOL);
    }
  }

  handle_message("Data Transmission Completed.",VERB_MESG, M_EOL);
  free(buf_addr);
  
}

void insane(char *string){
 (void) fprintf(stderr,"%s\n",string);
 exit(EXIT_FAILURE);
}

void insane_perror(char *string){
 perror(string);
 exit(EXIT_FAILURE);
}

static void usage(void)
{
  fprintf(stderr, "\n");
  fprintf(stderr, "nettee passes a data stream to one or more child nodes\n");
  fprintf(stderr, "   using a daisychain method that typically runs at full network speed.\n");
  fprintf(stderr, "   On each node it optionally directs the stream to a file or pipe\n\n");
  fprintf(stderr, "Usage: nettee [command line options]\n");
  fprintf(stderr, "   -in nettee    read data from an upstream nettee feed [DEFAULT]\n");
  fprintf(stderr, "       -         read data stream from stdin\n");
  fprintf(stderr, "       socket    read data generated by cmd from a socket\n");
  fprintf(stderr, "       filename  read data from file (cannot be \"nettee\" or \"-\")\n");
  fprintf(stderr, "   -out none     write nowhere locally [DEFAULT on the source node]\n");
  fprintf(stderr, "       -         write data to stdout [DEFAULT on destination nodes]\n");
  fprintf(stderr, "       socket    pass data through a socket to cmd\n");
  fprintf(stderr, "       filename  write data to a file (cannot be \"none\" or \"-\")\n");
  fprintf(stderr, "   -next host[,host2[,host3[...]]]\n");
  fprintf(stderr, "                 1-%d remote data stream destination[s].\n",MAXFANOUT);
  fprintf(stderr, "                 End of Chain is indicated by a host value of\n");
  fprintf(stderr, "                   <empty string>, ., or _EOC_, or by omitting -next.\n");
  fprintf(stderr, "                 [REQUIRED except for the last node in a chain.]\n");
  fprintf(stderr, "   -cmd COMMAND  specifies the command executed for a socket.\n");
  fprintf(stderr, "                 Input  COMMAND reads from disk  and writes to stdout.\n");
  fprintf(stderr, "                 Output COMMAND reads from stdin and writes to disk.\n");
  fprintf(stderr, "                 (-in socket and -out socket are mutually exclusive.)\n");
  fprintf(stderr, "   -stm EOS      stream text through the nettee chain until an EOS\n");
  fprintf(stderr, "                 string is encountered, then exit.  This allows short text\n");
  fprintf(stderr, "                 messages to traverse the chain without waiting for a full buffer.\n");
  fprintf(stderr, "   -name string  node name used in messages (<=%d characters)\n",NAMESIZE-1);
  fprintf(stderr, "   -log -        write messages to stderr [DEFAULT]\n");
  fprintf(stderr, "        filename write messages to a file\n");
  fprintf(stderr, "   -p,-port N    first of two consecutive ports to use [DEFAULT is %d]\n",ctrlport);
  fprintf(stderr, "   -v N          level of verbosity (bit mask) [use 0 for silent]\n");
  fprintf(stderr, "                  1  show error messages [DEFAULT]\n");
  fprintf(stderr, "                  2  show command line settings\n");
  fprintf(stderr, "                  4  show messages\n");
  fprintf(stderr, "                  8  show periodic status messages during transfer\n");
  fprintf(stderr, "                 16  prepend nodename to all messages\n");
  fprintf(stderr, "   -q            Suppresss \"ignored signal\" messages\n");
  fprintf(stderr, "   -t s          Wait up to s seconds for input data before exiting\n");
  fprintf(stderr, "                 with a failure status.  [DEFAULT = 0 = wait forever]\n");
  fprintf(stderr, "   -w            wait for next node to boot or attach to the network.\n");
  fprintf(stderr, "   -colwf        Continue On Local Write Failure.  Allows a chain to\n");
  fprintf(stderr, "                 to keep processing past a node with a failed or full disk.\n");
  fprintf(stderr, "                 [DEFAULT: entire chain exits on a local write failure.]\n");
  fprintf(stderr, "   -conwf        Continue On Network Write Failure.  Allows a chain to\n");
  fprintf(stderr, "                 to keep processing down to the node above the one that failed.\n");
  fprintf(stderr, "                 [DEFAULT: entire chain exits on a network write failure.]\n");
  fprintf(stderr, "   -help, -h     print the online help\n");
  fprintf(stderr, "   -version, -i  print the version information\n");
  fprintf(stderr, "   -hexamples    print examples\n");
}

void hexamples(void){
  fprintf(stderr, "Use rsh, ssh, etc. to start the remote nettee commands.\n\n");
  fprintf(stderr, "Copy a file FOO from node A to nodes B, C, D in one chain\n");
  fprintf(stderr, "   A:   nettee -next B <FOO\n");
  fprintf(stderr, "   B:   nettee -next C >FOO\n");
  fprintf(stderr, "   C:   nettee -next D >FOO\n");
  fprintf(stderr, "   D:   nettee         >FOO\n");
  fprintf(stderr, "\nCopy a file FOO from node A to nodes B, C, D directly.\n");
  fprintf(stderr, "If the copy is bandwidth limited this will take three\n");
  fprintf(stderr, "times longer than the preceding example.\n");
  fprintf(stderr, "   A:   nettee -next B,C,D <FOO\n");
  fprintf(stderr, "   B:   nettee -next .     >FOO\n");
  fprintf(stderr, "   C:   nettee -next \".\"   >FOO\n");
  fprintf(stderr, "   D:   nettee -next \"\"    >FOO\n");
  fprintf(stderr, "\nCopy a file FOO from node A to nodes C, D on one subnet, nodes F,G\n");
  fprintf(stderr, "on another, through nodes B and E, which do not make a local copy of FOO.\n");
  fprintf(stderr, "   A:   nettee -next B,E   -in  FOO\n");
  fprintf(stderr, "   B:   nettee -next C     -out none\n");
  fprintf(stderr, "   C:   nettee -next D     -out FOO\n");
  fprintf(stderr, "   D:   nettee             -out FOO\n");
  fprintf(stderr, "   E:   nettee -next F     -out none\n");
  fprintf(stderr, "   F:   nettee -next G     -out FOO\n");
  fprintf(stderr, "   G:   nettee -next _EOC_ -out FOO\n");
  fprintf(stderr, "\nCopy a directory structure BAR from node A:/FOO to /tmp/BAR\n");
  fprintf(stderr, "on nodes B, C, D in one chain\n");
  fprintf(stderr, "   A:   cd /FOO; tar -cf - BAR   | nettee -next B\n");
  fprintf(stderr, "   B:   cd /tmp; nettee -next C  | tar xpf -\n");
  fprintf(stderr, "   C:   cd /tmp; nettee -next D  | tar xpf -\n");
  fprintf(stderr, "   D:   cd /tmp; nettee -next .  | tar xpf -\n");
  fprintf(stderr, "\nCopy a directory structure BAR from node A:/FOO to B:/GOO\n");
  fprintf(stderr, "on nodes B, C using a socket.  This may be faster than\n");
  fprintf(stderr, "using a pipe (as in the previous example).\n");
  fprintf(stderr, "   A:   nettee -in  socket -next B -cmd 'cd /FOO; tar -cf  - BAR'\n");
  fprintf(stderr, "   A:   nettee -out socket -next C -cmd 'cd /GOO; tar -xpf -'\n");
  fprintf(stderr, "   A:   nettee -out socket         -cmd 'cd /GOO; tar -Xpf -'\n");
  fprintf(stderr, "\nCreate a command chain A,B,C,...,Z on a different port:\n");
  fprintf(stderr, "   A:   mkfifo cmdfifo    ; nettee -p 11000 -next B -in cmdfifo -stm EOS\n");
  fprintf(stderr, "   B:   export NEXTNODE=C ; nettee -p 11000 -next $NEXTNODE -stm EOS | execinput \n");
  fprintf(stderr, "   C-Y: ...\n");
  fprintf(stderr, "   Z:   export NEXTNODE=. ; nettee -p 11000 -next $NEXTNODE -stm EOS | execinput \n");
  fprintf(stderr, "Use this command chain to quickly set up nettee on each node and\n");
  fprintf(stderr, "then use the second nettee to transfer 3 different files to B...Z.\n");
  fprintf(stderr, "Lastly force the command chain to close by sending the EOS string.\n");
  fprintf(stderr, "   A:   echo 'nettee -next $NEXTNODE -out foobar.txt' >>cmdfifo\n");
  fprintf(stderr, "   A:   nettee -next A -in foobar.txt\n");
  fprintf(stderr, "   A:   echo 'nettee -next $NEXTNODE -out goobar.txt' >>cmdfifo\n");
  fprintf(stderr, "   A:   nettee -next A -in goobar.txt\n");
  fprintf(stderr, "   A:   echo 'nettee -next $NEXTNODE -out woobar.txt' >>cmdfifo\n");
  fprintf(stderr, "   A:   nettee -next A -in woobar.txt\n");
  fprintf(stderr, "   A:   echo 'nettee EOS' >>cmdfifo; rm -f cmdfifo\n");
}

void  process_command_line_args(int argc, char **argv){
int  numarg=0;
/* Set the default parameters */

  *infile ='\0';                /* no infile set yet */
  *outfile='\0';                /* no outfile set yet */
  *logfile='\0';                /* no logfile set yet */
  nextnode[0]='\0';             /* no next node */
  verbose   = VERB_ERROR;
  timeout   = 0;
  flog      = stderr;
  logdest   = LOG_FILE;
  bmyhostname[0]='\0';
  conwf     = 0;
  colwf     = 0;

  while( ++numarg < argc){
    if( (lcl_strcasecmp(argv[numarg], "-h")==0)     ||
        (lcl_strcasecmp(argv[numarg], "-?")==0)     ||
        (lcl_strcasecmp(argv[numarg], "?")==0)    ||
        (lcl_strcasecmp(argv[numarg], "-help")==0)  ||
        (lcl_strcasecmp(argv[numarg], "--help")==0) ){
      usage();
      exit(EXIT_SUCCESS);
    }
    else if(lcl_strcasecmp(argv[numarg], "-hexamples")==0){
      hexamples();
      exit(EXIT_SUCCESS);
    }
    else if(lcl_strcasecmp(argv[numarg], "-name")==0){
      setstring(&myhostname,&numarg,argc,argv,"-name");
    }
    else if(lcl_strcasecmp(argv[numarg], "-in")==0){
      if(*infile != '\0'){
         insane("nettee: fatal error: multiple -in parameters\n");
      }
      setstring(&infile,&numarg,argc,argv,"-in");
      if(lcl_strcasecmp(infile, "nettee")==0){meserver=1;}
      if(lcl_strcasecmp(infile, "socket")==0){
        if(usesocket){
          insane("nettee: fatal error: multiple uses of socket option");
        }
        usesocket=1;
        meserver=1;
      }
    }
    else if(lcl_strcasecmp(argv[numarg], "-out")==0){
      if(*outfile != '\0'){
         insane("nettee: fatal error: multiple -out parameters\n");
      }
      setstring(&outfile,&numarg,argc,argv,"-out");
      if(lcl_strcasecmp(outfile, "socket")==0){
        if(usesocket){
          insane("nettee: fatal error: multiple uses of socket option");
        }
        usesocket=2;
      }
    }
    else if(lcl_strcasecmp(argv[numarg], "-cmd")==0){
      if(cmdstring != NULL){
         insane("nettee: fatal error: multiple -cmd parameters\n");
      }
      setstring(&cmdstring,&numarg,argc,argv,"-cmd");
    }
    else if(lcl_strcasecmp(argv[numarg], "-stm")==0){
      if(shutdown_stream != NULL){
         insane("nettee: fatal error: multiple -stm parameters\n");
      }
      setstring(&shutdown_stream,&numarg,argc,argv,"-cmd");
    }
    else if(lcl_strcasecmp(argv[numarg], "-next")==0){
      if(*nextnode != '\0'){
         insane("nettee: fatal error: multiple -next parameters\n");
      }
      setstring(&nextnode,&numarg,argc,argv,"-next");
      /* end of chain can be "", "." or "_EOC_"
         For now "-" is not defined. In some
         future version it will indicate some sort of default
         "next" node. */
      if(strcmp(nextnode, "_EOC_")==0){ *nextnode='\0'; }
      else if(strcmp(nextnode,".")==0){ *nextnode='\0'; }
    }
    else if(lcl_strcasecmp(argv[numarg], "-log")==0){
      if(*logfile != '\0'){
        insane("nettee: fatal error: multiple -log parameters\n");
      }
      setstring(&logfile,&numarg,argc,argv,"-log");
    }
    else if(lcl_strcasecmp(argv[numarg], "-t")==0){
      setinonnegnumeric(&timeout,&numarg,argc,argv,"-t");
      signal(SIGALRM, alarm_handler);
    }
    else if(lcl_strcasecmp(argv[numarg], "-w")==0){
      waitfornext=1;
    }
    else if(lcl_strcasecmp(argv[numarg], "-colwf")==0){
      colwf=1;
    }
    else if(lcl_strcasecmp(argv[numarg], "-conwf")==0){
      conwf=1;
    }
    else if( (lcl_strcasecmp(argv[numarg], "-port")==0) ||
             (lcl_strcasecmp(argv[numarg], "-p")==0)  ){
      setinonnegnumeric((int *) &ctrlport,&numarg,argc,argv,"-port");
      dataport=ctrlport+1;
    }
    else if((lcl_strcasecmp(argv[numarg], "-v")==0)  ){
      setinonnegnumeric(&verbose,&numarg,argc,argv,"-v");
    }
    else if( (lcl_strcasecmp(argv[numarg], "-version")==0) ||
             (lcl_strcasecmp(argv[numarg], "-i")==0)  ){
      (void)fprintf(stderr,"Version:     %s\n",version_string);
      (void)fprintf(stderr,"bugs to:     mathog@caltech.edu\n");
      (void)fprintf(stderr,"Copyright:   %s\n",c_r_1);
      (void)fprintf(stderr,"Copyright:   %s\n",c_r_2);
      (void)fprintf(stderr,"License:     GPL 2\n");
      (void)fprintf(stderr,"Large Files: %s\n",FLSUPPORT);
      exit(EXIT_SUCCESS);
    }
    else if(lcl_strcasecmp(argv[numarg], "-q")==0){
      verbignoresignals = 0;
    }
    else {
      (void) fprintf(stderr,"nettee: fatal error: Unrecognized command line option: %s\n",argv[numarg]);
      exit(EXIT_FAILURE);
    }
  }
  /* sanity checking */
  if(lcl_strcasecmp(infile,"nettee")==0){  meserver=0; }
  else if(*infile=='\0'){                  meserver=0; }
  else {                                   meserver=1; }
  if(nextnode[0]){                         melast=0;   }
  else {                                   melast=1;   }
  if(meserver==1){
    if(melast==1){
      insane("nettee: fatal error: another node must be specified as input or nextnode");
    }
    if(lcl_strcasecmp(logfile,"nettee")==0){
      insane("nettee: fatal error: cannot log to nettee from the server node");
    }
    if( (conwf | colwf) ==1){
      insane("nettee: fatal error: server node cannot use -colwf or -conwf");
    }
  }
  if((cmdstring == NULL)  && usesocket){
    insane("nettee: fatal error: socket needs -cmd");
  }
  if((cmdstring != NULL)  && usesocket==0){
    insane("nettee: fatal error: -cmd needs socket");
  }
  if(*infile =='\0'){ (void) strcpy(infile, "-"); }  /* read from stdin */
  if(*outfile=='\0'){ (void) strcpy(outfile,"-"); }  /* write to stdout */
  if(*logfile == '\0' || (lcl_strcasecmp(logfile,"-")==0) ){
    /* log to stderr, flog defaulted to that */
    logdest=LOG_FILE;
  }
  else {                        /* send it to a log file */
    logdest=LOG_FILE;
    flog=fopen(logfile,"w");
    if(flog==NULL){  /* obviously can't log this error to the log file... */
      if( verbose >= VERB_ERROR) {
        (void) fprintf(stderr,"nettee: fatal error: could not open log file %s\n",logfile);
      }
      exit(EXIT_FAILURE);
    }
  }
  if(*myhostname == '\0'){ /* no hostname was assigned, try MYNODENAME */
    myhostname = getenv("MYNODENAME");
  }
  if(myhostname == NULL){ /* still no hostname was assigned, try HOSTNAME */
    myhostname = getenv("HOSTNAME");
  }
  if(myhostname == NULL){ /* still no hostname was assigned, try gethostname */
    myhostname=&bmyhostname[0];
    (void) gethostname(myhostname, NAMESIZE-1);
  }
  if(*myhostname == '\0'){ /* give up, use "some_node" */
    strcpy(myhostname,"some_node");
  }

}

/* route ALL messages through here
This sends messages to the destination specified by the
-log parameter at the verbose level specified by verb_level
 */
void handle_message(char *string, const int verb_level, const int options){
  if(verbose & verb_level){
    switch (logdest){
      case LOG_FILE:
        if(verbose & VERB_SAYNAME){
          (void) fprintf(flog,"%s: ",myhostname);
        }
        if(options & M_EOL){
          (void) fprintf(flog,"%s\n",string);
        }
        else {
          (void) fprintf(flog,"%s",string);
        }
        if(options & M_FFLUSH){  fflush(flog);  }
        break;
      case LOG_SERVER:
        (void) fprintf(stderr,"FATAL ERROR, unimplemented upstream logging function\n");
        exit(EXIT_FAILURE);
        break;
    }
  }
}


int main(int argc, char **argv){
/* set all default values */

  int i;

  /* Parse command line arguments */
  process_command_line_args(argc,argv);

  /* Did we get the parameters we need? */

  if( verbose & VERB_PARAM) {
    print_params();
  }


  handle_message("Trying to build node chain(s) ...",VERB_MESG, M_EOL);
  
  alarm(timeout);

  buildchains();
  
  if(meserver) { handle_message("Sending...",  VERB_MESG, M_EOL);    }
  else {         handle_message("Receiving...",VERB_MESG, M_EOL);    }
 

  transmit();
  
  if(datain[0]!=0){
    if(0!=close(datain[0])){ handle_message("Error on close of datain",VERB_ERROR, M_EOL); }
    if(0!=close(ctrlin)){    handle_message("Error on close of ctrlin",VERB_ERROR, M_EOL); }
  }
  
  for(i = 0; i < nr_children; i++) {
    if(0!=close(ctrlout[i])){ handle_message("Error on close of ctrlout[i]",VERB_ERROR, M_EOL);   }
    if(0!=close(dataout[i])){ handle_message("Error (3) on close of dataout[i]",VERB_ERROR, M_EOL); }
  }
 
  if(logdest == LOG_FILE){
    fclose(flog);
  }
 
  exit(EXIT_SUCCESS);
}
