#!/bin/env perl

#
# Copyright(C) 2007-2012 National Institute of Information and Communications Technology
#

use strict;
use warnings;

use tsv;
use Text::CSV_XS;
use IO::Handle;
use Encode;
use utf8;
binmode STDIN,  ":utf8";
binmode STDOUT, ":encoding(euc-jp)";
binmode STDERR, ":utf8";

sub csv2str($) {
    my ($s) = @_;
    if ( !defined($s) || $s eq "" ) {
        return "";
    }
    $s = Encode::decode( "sjis", $s );

    return $s;
}

# 引数のチェック
if ( $#ARGV < 0 || $#ARGV >= 2 ) {
    printf STDERR "Usage: $0 filename [topic]\n";
    exit -1;
}

my $tc = Text::CSV_XS->new( { binary => 1 } ) or die;
open( my $fh, "<", $ARGV[0] ) or die "Error: cannot open $ARGV[0]\n";
printf STDERR "%s\n", $ARGV[0];

my $topic = "";
if ( defined( $ARGV[1] ) ) {
    $topic = $ARGV[1];
    utf8::decode($topic);
}

#my $sampleID = 1;
my $lineCount = 0;
while ( my $fields = $tc->getline($fh) ) {
    last unless @$fields;

    # 処理行数をインクリメント
    $lineCount++;

    # ヘッダー処理

    if ( $lineCount == 1 ) {
        printf STDERR " skip: Header = Line:%d\n", $lineCount;
        next;
    }

    # ロード処理。CSVの項目に合わせて全角にする
    # 評価保持者のみ[]を使用する
    my $sentenceID = csv2str( $fields->[0] );    # 文ID
    my $sentence   = csv2str( $fields->[1] );    # 文
                                                 # 文の#NAME?チェック
    if ( $sentence eq "#NAME?" ) {
        printf STDERR " skip: Sentence [%s] = Line:%d\n", $sentence, $lineCount;
        next;
    }
    $sentence = &tsv::valueChangeZenkaku($sentence);
    my $holder = csv2str( $fields->[2] );        # 評価保持者

    # 本文中に保持者の情報があるかを確認する
    my @holderName = split( /\n|\\n|￥ｎ/, $holder, -1 );
    my $newHolder  = "";
    my $holderErr  = 0;

    # 評価保持者の形式が[x],y[x]の場合、[]は半角にする
    my $checkResult = 0;
    for ( my $i = 0 ; $i <= $#holderName ; $i++ ) {
        my $holderIndex = index( $holderName[$i], "]" ) + 1;

        # [xxx]の形式の場合はスキップ
        if ( $holderIndex == length( $holderName[$i] ) ) {
            next;
        }
        elsif ( $holderIndex == 0 ) {

# []に被われていない評価保持者の場合、本文中に存在するかチェックする処理
            my $sentenceIndex = index( $sentence, $holderName[$i] );
            if ( $sentenceIndex < 0 ) {
                printf STDERR " skip: Holder is Not Found = Line:%d\n",
                  $lineCount;
                $holderErr = 1;
                last;
            }

            # 1文中に[]が複数あるかを検索する
        }
        else {

# 複数あった場合は\nで改行する（check処理中では評価保持者の改行が複数あるためNumber mismatchで弾かれる）
            $checkResult = 1;
            my $loop          = 1;
            my $loopCount     = 0;
            my $newHolderName = "";
            my @newHolderNameStr;
            my $holderNameSub = $holderName[$i];

            while ($loop) {
                my $holderIndex2 = index( $holderNameSub, "]" ) + 1;

                if ( length($holderNameSub) == 0 ) {
                    $loop = 0;
                }
                elsif ( $holderIndex2 == 0 ) {
                    $loop = 0;
                    $newHolderNameStr[$loopCount] =
                      &tsv::valueChangeZenkaku($holderNameSub);
                }
                else {
                    $newHolderNameStr[$loopCount] = &tsv::valueChangeZenkaku(
                        substr( $holderNameSub, 0, $holderIndex2 ) );
                    $holderNameSub = substr( $holderNameSub, $holderIndex2 );
                }
                $holderIndex2 = index( $holderNameSub, "]" ) + 1;
                $loopCount++;
            }

            for ( my $j = 0 ; $j <= $#newHolderNameStr ; $j++ ) {
                $newHolderName .= $newHolderNameStr[$j] . "\\n";
            }

            $newHolderName =
              substr( $newHolderName, 0, length($newHolderName) - 2 );
            $holderName[$i] = $newHolderName;
            $holderName[$i] =~ tr{［］}{[]};
        }
    }

    if ($checkResult) {
        my $newHolder = "";
        for ( my $j = 0 ; $j <= $#holderName ; $j++ ) {
            $newHolder .= $holderName[$j] . "\\n";
        }
        $holder = substr( $newHolder, 0, length($newHolder) - 2 );
    }
    elsif ($holderErr) {
        next;
    }

    my $expression = csv2str( $fields->[3] );    # 評価表現
    $expression = &tsv::valueChangeZenkaku($expression);
    my $type = csv2str( $fields->[4] );   # 評価タイプ
                                          # 評価タイプの#NAME?チェック
    if ( $type eq "#NAME?" ) {
        printf STDERR " skip: Type [%s] = Line:%d\n", $type, $lineCount;
        next;
    }

    # 評価タイプが要望、当為の場合、極性を出力しない
    my @typeList = split( /\n|\\n|￥ｎ/, $type );
    my $typeStr = "";
    for ( my $typeSize = 0 ; $typeSize <= $#typeList ; $typeSize++ ) {
        my $index = index( $typeList[$typeSize], "要望" );
        if ( $index == 0 ) {
            $typeList[$typeSize] = "要望";
        }
        $index = index( $typeList[$typeSize], "当為" );
        if ( $index == 0 ) {
            $typeList[$typeSize] = "当為";
        }
        $typeStr = $typeStr . $typeList[$typeSize];
        if ( $typeSize < $#typeList ) {
            $typeStr = $typeStr . "\n";
        }
    }
    $type = $typeStr;
    my $target = csv2str( $fields->[5] );    # 評価対象

    my $tsv = tsv->new(
        {
            topic      => $topic,
            sentenceID => $sentenceID,
            sentence   => $sentence
        }
    );
    my $execsts = $tsv->setknpresult2($sentence);
    if ( $execsts == 1 ) {
        $tsv->setVal( { holder => $holder } );

        # 評価表現の範囲の補正(文節に合わせる)
        if ( $expression ne "" ) {
            &setExpressionBunid( $tsv, $expression );
        }
        else {
            $tsv->setVal( { expression => $expression } );
        }
        $tsv->setVal( { type   => $type } );
        $tsv->setVal( { target => $target } );
        if ( $tsv->checkTSV() == -1 ) {

            # エラー行数表示
            printf STDERR " = Line:%d\n", $lineCount;
        }
        else {

         # CSVの読み込み行を設定する。ヘッダー分は無視する
            $tsv->setVal( { sampleID => $lineCount - 1 } );
            $tsv->printTSV();
        }

        # 解析結果でエラーがあった場合、行数を表示する
    }
    elsif ( $execsts == -2 || $execsts == 0 ) {
        printf STDERR " skip: KNP Error = Line:%d\n", $lineCount;
    }
    else {

        # エラー行数表示
        printf STDERR " = Line:%d\n", $lineCount;
    }

}

# 評価表現の形態素を文節ID似合わせて出力する
sub setExpressionBunid($) {
    my ( $pkg, $expression ) = @_;
    my @bunid  = @{ $pkg->{knp}->{bunid} };
    my @string = @{ $pkg->{knp}->{string} };
    my @xprelm = split( /\n|\\n|￥ｎ/, $expression );
    my $xprnum = $#xprelm;
    if ( $xprnum < 0 ) {
        $xprelm[0] = $expression;
    }
    my $newxpr = "";
    for ( my $z = 0 ; $z <= $xprnum ; $z++ ) {
        my $index = index( $pkg->{sentence}, $xprelm[$z] );
        my $b     = $bunid[$index];
        my $e     = $bunid[ $index + length( $xprelm[$z] ) - 1 ];
        $newxpr .= "\\n";
        for ( my $i = $b ; $i <= $e ; $i++ ) {
            $newxpr .= $string[$i];
        }
    }
    $newxpr = substr( $newxpr, 2 );
    $pkg->setVal( { expression => $newxpr } );
}

