#!/bin/env perl

#
# Copyright(C) 2007-2012 National Institute of Information and Communications Technology
#

use strict;
use warnings;

use tsv;
use utf8;
use Encode;
binmode STDIN,  ":encoding(euc-jp)";
binmode STDOUT, ":encoding(euc-jp)";
binmode STDERR, ":encoding(utf8)";

# 引数のチェック
if ( $#ARGV < 0 || $#ARGV >= 2 ) {
    printf STDERR "Usage: $0 filename [topic]\n";
    exit -1;
}

# トピックを設定
my $topic = "";
if ( defined( $ARGV[1] ) ) {
    $topic = $ARGV[1];
    utf8::decode($topic);
}

# 入力ファイルにEUC-JPで変換済みのものを適用する
open my $in, "<:encoding(euc-jp)", $ARGV[0] or die;
my $id        = 0;
my $lineCount = 0;

printf STDERR "変換処理を開始します...\n";

while ( my $s = <$in> ) {

    # 処理行数をインクリメント
    $lineCount++;

    chomp $s;

    #$s = encode( 'euc-jp', $s, sub { sprintf "", $_[0] } );
    #$s = decode( 'euc-jp', $s );

    # 入力が半角英数字、記号だった場合に全角に変換
    $s = &tsv::valueChangeZenkaku($s);

    # 空白文字削除
    $s =~ s/[　 ]//g;

    my $tsv = tsv->new(
        {
            topic      => $topic,
            documentID => $ARGV[0],
            sentence   => $s
        }
    );

    $id++;
    $tsv->setVal( { sampleID   => $id } );
    $tsv->setVal( { sentenceID => $id } );

    # 空白行スキップ
    if ( $s eq "\n" || $s eq "\\n" ) {
        $tsv->printOUTBlank();
        next;
    }

    my $execsts = $tsv->setknpresult($s);
    if ( $execsts == 1 ) {
        if ( $tsv->checkTSV() == -1 ) {

            # エラー行数表示
            printf STDERR " = Line:%d\n", $id;
        }
        $tsv->printTSV();

        # 解析結果でエラーがあった場合、行数を表示する
    }
    elsif ( $execsts == -2 || $execsts == 0 ) {
        printf STDERR " skip: KNP Error = Line:%d\n", $lineCount;
        $tsv->printOUTBlank();
    }
    elsif ( $execsts == -3 ) {
        printf STDERR " Stop at line:%d", $lineCount;
    }
    else {

        # エラー行数表示
        printf STDERR " = Line:%d\n", $lineCount;
        $tsv->printOUTBlank();
    }

}

printf STDERR "変換処理が終了しました...\n";
