#!/usr/bin/gawk -f

#
# Copyright(C) 2007-2012 National Institute of Information and Communications Technology
#

# *.tsvΥǡCRF++ϥեޥåȤѴ
# ǲϤζڤ꤬פʤϹˤȤ

BEGIN {
  FS = "\t";
}

{
#Կ򥫥
  nlines++;
  if ($8 == "") next;

  sen = $5;
  srcall = $7;
  xprall = $8;
  mrp = $10;

  nsrc = split(srcall, srcelem, /\\n/);
  nxpr = split(xprall, xprelem, /\\n/);
  if (nsrc != nxpr) error("nsrc != nxpr");

  # ǲ
  n = ma(mrp, surf, base, cpos, fpos);

  for (z = 1; z <= nxpr; z++) {
    src = srcelem[z];
    xpr = xprelem[z];
    if (src == "[]") continue;
    gsub(/\[.*\]/, "", src);

    # ɾɽΥդ
    for (i = 1; i <= n; i++) expr[i] = "0";
    p = position(sen, n, surf, xpr, 0);
    if (p == -1) {
        showError("xpr !in sen");
        printf " = Line:%d\n", nlines > "/dev/stderr";
        continue;
     }
    for (i = 1; i < PSTART; i++) expr[i] = "p";
    for (i = PSTART; i <= PEND; i++) expr[i] = "x";
    for (i = PEND + 1; i <= n; i++) expr[i] = "s";

    # BIOդ
    for (i = 1; i <= n; i++) tag[i] = "O";
    if (src != "") {
      p = position(sen, n, surf, src, 0);
      if (p == -1) {
          showError("src !in sen");
          printf " = Line:%d\n", nlines > "/dev/stderr";
          continue;
       }
      tag[PSTART] = "B";
      for (i = PSTART + 1; i <= PEND; i++) tag[i] = "I";
    }

    # ǡ
    for (i = 1; i <= n; i++) {
      printf "%s\t%s\t%s\t%s\t%s\t%s\n", surf[i], base[i], cpos[i], fpos[i], expr[i], tag[i];
    }
    printf "\n";
  }
}
