#!/usr/bin/gawk -f

#
# Copyright(C) 2007-2012 National Institute of Information and Communications Technology
#

# ʸ(EUC-JP)Ȳ

BEGIN {
  FS = "\t";

  if (ARGC > 1) {
    printf "usage: %s [<*.tsv>]\n", SCRIPT > "/dev/stderr";
    EXIT = 1;
    exit EXIT;
  }
}

{
#Կ򥫥
  nlines++;

  cls = $1;
  xpr = $2;
  sen = $3;
  mrp = $4;

  # ǲ
  n = ma(mrp, surf, base, cpos, fpos);

  # ɾɽνиս
  p = position(sen, n, surf, xpr, 0);
  if (p == -1) {
    showError("xpr !in sen");
    printf " = Line:%d\n", nlines > "/dev/stderr";
  }

  # 
  printf "%d", cls;

  # unigram
  for (i = 1; i <= n; i++) {
    printf " s=%s:1 b=%s:1 c=%s:1 f=%s:1", surf[i], base[i], cpos[i], fpos[i];
  }
  # bigram
  for (i = 1; i <= n - 1; i++) {
    printf " ss=%s_%s:1 bb=%s_%s:1 cc=%s_%s:1 ff=%s_%s:1", surf[i], surf[i + 1], base[i], base[i + 1], cpos[i], cpos[i + 1], fpos[i], fpos[i + 1];
  }

  # unigram
  for (i = PSTART; i <= PEND; i++) {
    printf " S=%s:1 B=%s:1 C=%s:1 F=%s:1", surf[i], base[i], cpos[i], fpos[i];
  }
  # bigram
  for (i = PSTART; i <= PEND - 1; i++) {
    printf " SS=%s_%s:1 BB=%s_%s:1 CC=%s_%s:1 FF=%s_%s:1", surf[i], surf[i + 1], base[i], base[i + 1], cpos[i], cpos[i + 1], fpos[i], fpos[i + 1];
  }

  # unigram
  for (i = 1; i <= PSTART - 1; i++) {
    printf " Sp=%s:1 Bp=%s:1 Cp=%s:1 Fp=%s:1", surf[i], base[i], cpos[i], fpos[i];
  }
  # bigram
  for (i = 1; i <= PSTART - 1 - 1; i++) {
    printf " SSp=%s_%s:1 BBp=%s_%s:1 CCp=%s_%s:1 FFp=%s_%s:1", surf[i], surf[i + 1], base[i], base[i + 1], cpos[i], cpos[i + 1], fpos[i], fpos[i + 1];
  }

  # unigram
  for (i = PEND + 1; i <= n; i++) {
    printf " Ss=%s:1 Bs=%s:1 Cs=%s:1 Fs=%s:1", surf[i], base[i], cpos[i], fpos[i];
  }
  # bigram
  for (i = PEND + 1; i <= n - 1; i++) {
    printf " SSs=%s_%s:1 BBs=%s_%s:1 CCs=%s_%s:1 FFs=%s_%s:1", surf[i], surf[i + 1], base[i], base[i + 1], cpos[i], cpos[i + 1], fpos[i], fpos[i + 1];
  }

  printf "\n";
}
