/*
  Copyright(C) 2007-2012 National Institute of Information and Communications Technology
*/

/*
  svmtools
  *.fv to *.ft converter
*/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "exception.h"
#include "hashtable.h"
#include "split.h"


#define BUF_SIZE (32 * 1024)


int main(int argc, char **argv) {
  int i;
  int buf_size;
  char *buf;
  int nelem;
  char **elem;
  HT *ht;
  int line;
  int len;

  if (argc != 1) {
    fprintf(stderr, "usage: %s < <*.fv> > <*.ft>\n", argv[0]);
    return 1;
  }

  /* $B=i4|2=(B */
  buf_size = BUF_SIZE;
  buf = smalloc(sizeof(char) * buf_size);
  elem = smalloc(sizeof(char *) * (buf_size / 2));
  ht = ht_new((int (*)(const void *, const void *))strcmp, (size_t (*)(const void *))strhash);
  exception(ht == NULL, "ht_new() failed");

  /* $B0l9T$:$DFI$s$GJQ49(B */
  for (line = 1; ; line++) {
    /* $B%Y%/%H%k$NFI$_9~$_(B($B%a%b%j$NF0E*3NJ](B) */
    if (fgets(buf, buf_size, stdin) == NULL) break;
    while (buf[(len = strlen(buf)) - 1] != '\n') {
      buf_size = 2 * buf_size + 1;
      buf = srealloc(buf, sizeof(char) * buf_size);
      elem = srealloc(elem, sizeof(char *) * (buf_size / 2));
      exception(fgets(buf + len, buf_size - len, stdin) == NULL, "unexpedted EOF (line: %d)", line);
    }
    buf[--len] = '\0';
    if (len > 0 && buf[len - 1] == '\r') buf[--len] = '\0';
    if (len > 0 && buf[len - 1] == ' ') buf[--len] = '\0';

    /* $B%3%a%s%H(B */
    if (buf[0] == '#') continue;

    /* $BJ,3d(B */
    nelem = split(buf, ' ', elem, buf_size / 2);

    /* $B%Y%/%H%k(B */
    for (i = 1; i < nelem; i++) {
      char *s;

      if (elem[i][0] == '\0') continue;	/* 2$B$D0J>e%9%Z!<%9$,$"$k>l9g(B */
      s = rindex(elem[i], ':');
      exception(s == NULL, "invalid data (no colon) (line: %d)", line);

      *(s++) = '\0';
      if (ht_get(ht, elem[i]) == NULL) {
	char *str;

	printf("%s\n", elem[i]);
	str = sstrdup(elem[i]);
	exception(ht_put(ht, str, str), "ht_put() failed");
      }
    }
  }

  return 0;
}
