/*====================================================================

		ǲɤ߹ߡʸؤΤޤȤ

                                               S.Kurohashi 91. 6.25
                                               S.Kurohashi 93. 5.31

    $Id: read_data.c,v 1.148.2.1 2009/10/19 23:34:31 kawahara Exp $

====================================================================*/
#include "knp.h"

int Bnst_start[MRPH_MAX];
int Tag_start[MRPH_MAX];
int Tag_dpnd[TAG_MAX];
int Tag_type[TAG_MAX];
FEATURE *Input_bnst_feature[BNST_MAX];
FEATURE *Input_tag_feature[TAG_MAX];

int ArticleID = 0;
int preArticleID = 0;

extern char CorpusComment[BNST_MAX][DATA_LEN];

/*==================================================================*/
	void selected_imi2feature(char *str, MRPH_DATA *m_ptr)
/*==================================================================*/
{
    char *buf, *token, *cp, *imip;

    if (!strcmp(str, "NIL")) {
	return;
    }

    buf = strdup(str);

    /* ̾ "" ǳƤ */
    if (buf[0] == '\"') {
	imip = &buf[1];
	if (cp = strchr(imip, '\"')) {
	    *cp = '\0';
	}
    }
    else {
	imip = buf;
    }

    token = strtok(imip, " ");
    while (token) {
	/* ʲΤΰʳͿ */
	if (strncmp(token, "ɽɽ", strlen("ɽɽ")) && 
	    strncmp(token, "ǽư", strlen("ǽư")) && 
	    strncmp(token, "ɤ", strlen("ɤ")) &&
	    strncmp(token, "ƥ", strlen("ƥ")) &&
	    strncmp(token, "ɥᥤ", strlen("ɥᥤ"))) {
	    assign_cfeature(&(m_ptr->f), token, FALSE);
	}
	token = strtok(NULL, " ");
    }

    free(buf);
}

/*==================================================================*/
    void assign_feature_alt_mrph(FEATURE **fpp, MRPH_DATA *m_ptr)
/*==================================================================*/
{
    char *buf;

    buf = malloc_data(strlen(m_ptr->Goi2) + 
		      strlen(m_ptr->Yomi) + 
		      strlen(m_ptr->Goi) + 
		      strlen(m_ptr->Imi) + 20, "assign_feature_alt_mrph");
    sprintf(buf, "ALT-%s-%s-%s-%d-%d-%d-%d-%s", 
	    m_ptr->Goi2, m_ptr->Yomi, m_ptr->Goi, 
	    m_ptr->Hinshi, m_ptr->Bunrui, 
	    m_ptr->Katuyou_Kata, m_ptr->Katuyou_Kei, 
	    m_ptr->Imi);
    assign_cfeature(fpp, buf, FALSE);
    free(buf);
}

/*==================================================================*/
		 char *get_mrph_rep(MRPH_DATA *m_ptr)
/*==================================================================*/
{
    char *cp;

    if ((cp = strstr(m_ptr->Imi, "ɽɽ:"))) {
	return cp + strlen("ɽɽ:");
    }
    return NULL;
}

/*==================================================================*/
	char *get_mrph_rep_from_f(MRPH_DATA *m_ptr, int flag)
/*==================================================================*/
{
    char *cp;

    /* flagΩäƤƤġɽɽѹƤѹɽɽ֤ */
    if (flag && (cp = check_feature(m_ptr->f, "ɽɽѹ"))) {
	return cp + strlen("ɽɽѹ:");
    }
    
    if ((cp = check_feature(m_ptr->f, "ɽɽ"))) {
	return cp + strlen("ɽɽ:");
    }
    return NULL;
}

/*==================================================================*/
	       int get_mrph_rep_length(char *rep_strt)
/*==================================================================*/
{
    char *rep_end;

    if (rep_strt == NULL) {
	return 0;
    }

    if ((rep_end = strchr(rep_strt, ' ')) == NULL) {
	rep_end = strchr(rep_strt, '\"');
    }

    return rep_end - rep_strt;
}

/*==================================================================*/
 char *get_bnst_head_canonical_rep(BNST_DATA *ptr, int compound_flag)
/*==================================================================*/
{
    char *cp;

    if (compound_flag) { /* 缭+ */
	if ((cp = check_feature(ptr->f, "缭ɽɽ"))) {
	    return cp + strlen("缭ɽɽ:");
	}
    }

    if ((cp = check_feature(ptr->f, "缭ɽɽ"))) {
	return cp + strlen("缭ɽɽ:");
    }
    else {
	return NULL;
    }
}

/*==================================================================*/
	     int assign_rep_f_from_imi(MRPH_DATA *m_ptr)
/*==================================================================*/
{
    char *cp, buf[IMI_MAX];
    int length;

    if ((cp = strstr(m_ptr->Imi, "ɽɽ:"))) {
	length = get_mrph_rep_length(cp);
	strncpy(buf, cp, length);
	buf[length] = '\0';
	assign_cfeature(&(m_ptr->f), buf, FALSE);
	return TRUE;
    }

    return FALSE;
}

/*==================================================================*/
		 char *make_mrph_rn(MRPH_DATA *m_ptr)
/*==================================================================*/
{
    char *buf;

    /* (ɽɽʤȤ)ɽɽ */

    /* ܷĹ⤷ʤΤ;ʬ˳ */
    buf = (char *)malloc_data(strlen(m_ptr->Goi) + strlen(m_ptr->Yomi) + SMALL_DATA_LEN, "make_mrph_rn");
    sprintf(buf, "%s/%s", m_ptr->Goi, m_ptr->Yomi);

    if (m_ptr->Katuyou_Kata > 0 && m_ptr->Katuyou_Kei > 0) { /* Ѹ */
	buf[strlen(buf) - strlen(Form[m_ptr->Katuyou_Kata][m_ptr->Katuyou_Kei].gobi)] = '\0'; /* 촴ˤ */
	strcat(buf, Form[m_ptr->Katuyou_Kata][get_form_id(BASIC_FORM, m_ptr->Katuyou_Kata)].gobi); /* ܷĤ */
    }
    return buf;
}

/*==================================================================*/
		void rn2canonical_rn(MRPH_DATA *m_ptr)
/*==================================================================*/
{
    char *rn, *buf;

    /* ɽɽ򤽤Τޤɽɽ */

    if (rn = get_mrph_rep_from_f(m_ptr, FALSE)) {
	buf = (char *)malloc_data(strlen("ɽɽ:") + strlen(rn) + 1, "rn2canonical_rn");
	strcpy(buf, "ɽɽ:");
	strcat(buf, rn);
	assign_cfeature(&(m_ptr->f), buf, FALSE);
	free(buf);
    }
}

/*==================================================================*/
	   void assign_cc_feature_to_bp(SENTENCE_DATA *sp)
/*==================================================================*/
{
    int i, j, merged_rep_size = DATA_LEN;
    char *cp, *merged_rep;

    /* <Ƹ>Ǥɽɽ顢ܶɽɽ */

    merged_rep = (char *)malloc_data(merged_rep_size, "assign_cc_feature_to_bp");

    for (i = 0; i < sp->Tag_num; i++) { /* ٤ƤδܶͿ */
	*merged_rep = '\0';
	for (j = 0; j < (sp->tag_data + i)->mrph_num; j++) {
	    if ((check_feature(((sp->tag_data + i)->mrph_ptr + j)->f, "Ƹ") || 
		 check_feature(((sp->tag_data + i)->mrph_ptr + j)->f, "Ƹ")) && /* <Ƹ>Ǥоݤ */
		!check_feature(((sp->tag_data + i)->mrph_ptr + j)->f, "ü󸫽и") && /* ֡Ρװʳ */
		(cp = check_feature(((sp->tag_data + i)->mrph_ptr + j)->f, "ɽɽ"))) {
		if (*merged_rep) {
		    if (strlen(merged_rep) + strlen(cp + strlen("ɽɽ:")) + 2 > merged_rep_size) {
			merged_rep = (char *)realloc_data(merged_rep, merged_rep_size *= 2, "assign_cc_feature_to_bp");
		    }
		    strcat(merged_rep, "+");
		    strcat(merged_rep, cp + strlen("ɽɽ:"));
		}
		else {
		    strcpy(merged_rep, cp);
		}
	    }
	}

	if (*merged_rep) {
	    assign_cfeature(&((sp->tag_data + i)->f), merged_rep, FALSE); /* Ϣɽɽ */
	}
    }

    free(merged_rep);
}

/*==================================================================*/
	  void assign_cc_feature_to_bnst(SENTENCE_DATA *sp)
/*==================================================================*/
{
    int i, j, merged_rep_size = DATA_LEN, error_flag;
    char *cp, *merged_rep, *last_rep;

    /* ܶɽɽʸɽɽ */

    merged_rep = (char *)malloc_data(merged_rep_size, "assign_cc_feature_to_bnst");

    for (i = 0; i < sp->Bnst_num; i++) { /* ٤ƤʸͿ */
	*merged_rep = '\0';
	last_rep = NULL;
	for (j = 0; j < (sp->bnst_data + i)->tag_num; j++) {
	    if ((cp = check_feature(((sp->bnst_data + i)->tag_ptr + j)->f, "ɽɽ"))) {
		if (*merged_rep) {
		    if (strlen(merged_rep) + strlen(cp + strlen("ɽɽ:")) + 2 > merged_rep_size) {
			merged_rep = (char *)realloc_data(merged_rep, merged_rep_size *= 2, "assign_cc_feature_to_bnst");
		    }
		    strcat(merged_rep, "+");
		    strcat(merged_rep, cp + strlen("ɽɽ:"));
		}
		else {
		    strcpy(merged_rep, cp);
		}
		last_rep = cp;
	    }
	}

	if (*merged_rep) {
	    assign_cfeature(&((sp->bnst_data + i)->f), merged_rep, FALSE); /* Ϣ뤷ɽɽ */
	}

	if (last_rep) {
	    strncpy(last_rep + strlen(""), "缭", strlen("缭"));
	    assign_cfeature(&((sp->bnst_data + i)->f), last_rep + strlen(""), FALSE); /* 缭ɽɽ */
	    strncpy(last_rep + strlen(""), "", strlen(""));
	}

	/* ʸΤȤϡ缭ɽɽ */
	if ((sp->bnst_data + i)->tag_num > 1 && 
	    check_feature(((sp->bnst_data + i)->tag_ptr + (sp->bnst_data + i)->tag_num - 1)->f, "ʸ")) {
	    *merged_rep = '\0';
	    error_flag = 0;
	    for (j = (sp->bnst_data + i)->tag_num - 2; j < (sp->bnst_data + i)->tag_num; j++) {
		if ((cp = check_feature(((sp->bnst_data + i)->tag_ptr + j)->f, "ɽɽ"))) {
		    if (*merged_rep) {
			strcat(merged_rep, "+");
			strcat(merged_rep, cp + strlen("ɽɽ:"));
		    }
		    else {
			strcpy(merged_rep, cp);
		    }
		}
		else {
		    error_flag = 1;
		    break;
		}
	    }

	    if (!error_flag) {
		strncpy(merged_rep, "缭", strlen("缭"));
		assign_cfeature(&((sp->bnst_data + i)->f), merged_rep, FALSE); /* 缭ɽɽ */
		strncpy(merged_rep, "", strlen(""));
	    }
	}
    }

    free(merged_rep);
}

/*==================================================================*/
	 void assign_canonical_rep_to_mrph(SENTENCE_DATA *sp)
/*==================================================================*/
{
    FEATURE *fp;
    MRPH_DATA m, *m_ptr = sp->mrph_data;
    char *rep_strt, *rep_strt2, *merged_rep;
    int i, rep_length, rep_length2, merged_rep_size = DATA_LEN;

    merged_rep = (char *)malloc_data(merged_rep_size, "assign_canonical_rep_to_mrph");

    for (i = 0; i < sp->Mrph_num; i++, m_ptr++) {

	/* ѤƤǤɽɽ */
	rep_strt = get_mrph_rep(m_ptr);
	rep_length = get_mrph_rep_length(rep_strt);
	if (rep_length < 1) {
	    continue;
	}

	strcpy(merged_rep, "ɽɽ:");
	strncat(merged_rep, rep_strt, rep_length);

	fp = m_ptr->f;
	while (fp) {
	    if (!strncmp(fp->cp, "ALT-", 4)) {
		sscanf(fp->cp + 4, "%[^-]-%[^-]-%[^-]-%d-%d-%d-%d-%[^\n]", 
		       m.Goi2, m.Yomi, m.Goi, 
		       &m.Hinshi, &m.Bunrui, 
		       &m.Katuyou_Kata, &m.Katuyou_Kei, m.Imi);
		rep_strt2 = get_mrph_rep(&m);
		rep_length2 = get_mrph_rep_length(rep_strt2);
		if (rep_length2 > 0 && 
		    (rep_length != rep_length2 || strncmp(rep_strt, rep_strt2, rep_length))) {
		    /* ɽɽ"?"Ϣ */
		    if (strlen(merged_rep) + rep_length2 + 2 > merged_rep_size) {
			merged_rep = (char *)realloc_data(merged_rep, merged_rep_size *= 2, "assign_canonical_rep_to_mrph");
		    }
		    strcat(merged_rep, "?");
		    strncat(merged_rep, rep_strt2, rep_length2);
		}
	    }
	    fp = fp->next;
	}

	/* ɽɽͿ */
	assign_cfeature(&(m_ptr->f), merged_rep, FALSE);
    }

    free(merged_rep);
}

/*==================================================================*/
void lexical_disambiguation(SENTENCE_DATA *sp, MRPH_DATA *m_ptr, int homo_num)
/*==================================================================*/
{
    int i, j, k, flag, orig_amb_flag, pref_mrph, pref_rule;
    int bw_length;
    int real_homo_num;
    int uniq_flag[HOMO_MAX];		/* ¼ŪƱ۵ʤ 1 */
    int matched_flag[HOMO_MRPH_MAX];	/* 줫ηǤȥޥå
					   롼ǥѥ 1 */
    int rep_length, rep_length2, merged_rep_size = DATA_LEN;
    HomoRule	*r_ptr;
    MRPH_DATA	*loop_ptr, *loop_ptr2;
    char fname[SMALL_DATA_LEN2], *cp, *cp2, *rep_strt, *rep_strt2;

    /* ۤƤСĤå */
    if (homo_num > HOMO_MAX) {
	homo_num = HOMO_MAX;
    }

    /* ʻ(ʬ)ۤʤǤĤuniq_flag1ˤ
       => ٤ƻĤ褦ѹ (2006/10/16) */

    uniq_flag[0] = 1;
    real_homo_num = 1;
    for (i = 1; i < homo_num; i++) {
	uniq_flag[i] = 1;
	if (uniq_flag[i]) real_homo_num++;
    }

    /* ¼ŪƱ۵줬ʤвϤʤ */

    if (real_homo_num == 1) return;

    /* 롼 (mrph_homo.rule)˽äͥ褹Ǥ
        Ʊ۵ȥ롼ηǿƱȤ
          Ʊ۵줬롼ηǤΤ줫˥ޥåФ褤
	  롼κǽηǤ˥ޥåΤͥ(pref_mrph )
    */

    flag = FALSE;
    pref_mrph = 0;
    pref_rule = 0;
    for (i = 0, r_ptr = HomoRuleArray; i < CurHomoRuleSize; i++, r_ptr++) {
	if (r_ptr->pattern->mrphsize > HOMO_MRPH_MAX) {
	    fprintf(stderr, ";; The number of Rule morphs is too large in HomoRule.\n");
	    exit(1);
	}
	
	/* ޤǤηå */
	bw_length = m_ptr - sp->mrph_data;
	if ((r_ptr->pre_pattern == NULL &&	/* 㤤 */
	     bw_length != 0) ||
	    (r_ptr->pre_pattern != NULL &&
	     regexpmrphs_match(r_ptr->pre_pattern->mrph + 
			       r_ptr->pre_pattern->mrphsize - 1,
			       r_ptr->pre_pattern->mrphsize,
			       m_ptr - 1, 
			       bw_length,	/* 㤤 */
			       BW_MATCHING, 
			       ALL_MATCHING,/* 㤤 */
			       SHORT_MATCHING) == -1)) {
	    continue;
	}
	
	pref_mrph = 0;
	for (k = 0; k < r_ptr->pattern->mrphsize; k++) matched_flag[k] = FALSE;
	for (j = 0, loop_ptr = m_ptr; j < homo_num; j++, loop_ptr++) {
	    if (uniq_flag[j] == 0) continue;
	    flag = FALSE;
	    for (k = 0; k < r_ptr->pattern->mrphsize; k++) {
		if (matched_flag[k] && (r_ptr->pattern->mrph + k)->ast_flag != AST_FLG)
		    continue;
		if (regexpmrph_match(r_ptr->pattern->mrph + k, loop_ptr) 
		    == TRUE) {
		    flag = TRUE;
		    if (k == 0) pref_mrph = j;
		    matched_flag[k] = TRUE;
		    break;
		}
	    }
	    if (flag == FALSE) break;
	}
	if (flag == TRUE) {
	    for (k = 0; k < r_ptr->pattern->mrphsize; k++) {
		if (matched_flag[k] == FALSE) {
		    flag = FALSE;
		    break;
		}
	    }
	    if (flag == TRUE) {
		pref_rule = i;
		break;
	    }
	}
    }

    /* ¿ޡfeatureͿ */
    assign_cfeature(&((m_ptr+pref_mrph)->f), "ۣ", FALSE);

    if (flag == TRUE) { /* 롼˥ޥå */
	/* 롼˵ҤƤfeatureͿ (ۣפ롼⤢) */
	assign_feature(&((m_ptr+pref_mrph)->f), &((HomoRuleArray + pref_rule)->f), m_ptr, 0, 1, FALSE);

	if (0 && OptDisplay == OPT_DEBUG) {
	    fprintf(Outfp, "Lexical Disambiguation "
		    "(%dth mrph -> %dth homo by %dth rule : %s :", 
		    m_ptr - sp->mrph_data, pref_mrph, pref_rule, 
		    (m_ptr+pref_mrph)->Goi2);
	    for (i = 0, loop_ptr = m_ptr; i < homo_num; i++, loop_ptr++)
		if (uniq_flag[i]) 
		    fprintf(Outfp, " %s", 
			    Class[loop_ptr->Hinshi][loop_ptr->Bunrui].id);
	    fprintf(Outfp, ")\n");
	}
    }
    else {
	if (OptDisplay == OPT_DEBUG) {
	    fprintf(Outfp, ";; Cannot disambiguate lexical ambiguities by rules"
		    " (%dth mrph : %s ?", m_ptr - sp->mrph_data,
		    (m_ptr+pref_mrph)->Goi2);
	    for (i = 0, loop_ptr = m_ptr; i < homo_num; i++, loop_ptr++)
		if (uniq_flag[i]) 
		    fprintf(Outfp, " %s", 
			    Class[loop_ptr->Hinshi][loop_ptr->Bunrui].id);
	    fprintf(Outfp, ")\n");
	}
    }

    /* pref_mrphʳηǾALT¸
       ޤΰ̣ΰpref_mrphfeatureͿ */
    orig_amb_flag = 0;
    if (check_feature((m_ptr+pref_mrph)->f, "ۣ")) {
	/* pref_mrphɽɽ */
	rep_strt = get_mrph_rep(m_ptr + pref_mrph);
	rep_length = get_mrph_rep_length(rep_strt);

	for (i = 0; i < homo_num; i++) {
	    if (i != pref_mrph) {
		/* ɽɽpref_mrphȰۤʤ硢orig_amb_flag1ˤ */
		rep_strt2 = get_mrph_rep(m_ptr+i);
		rep_length2 = get_mrph_rep_length(rep_strt2);
		if (rep_length > 0 && 
		    (rep_length != rep_length2 || strncmp(rep_strt, rep_strt2, rep_length))) {
		    orig_amb_flag = 1;
		}

		/* Ǿfeature(<ALT-...>)Ȥ¸ */
		assign_feature_alt_mrph(&((m_ptr+pref_mrph)->f), m_ptr + i);

		/* pref_mrphʳηǤỊ̇򤹤٤ͿƤ */
		selected_imi2feature((m_ptr+i)->Imi, m_ptr+pref_mrph);
	    }
	}

	for (i = 0; i < homo_num; i++) {
	    if (uniq_flag[i] == 0) continue;
	    sprintf(fname, "ۣ-%s", 
		    Class[(m_ptr+i)->Hinshi][(m_ptr+i)->Bunrui].id);
	    assign_cfeature(&((m_ptr+pref_mrph)->f), fname, FALSE);
	}
    }

    /* ɽɽۣʤȤϥޡƤ */
    if (orig_amb_flag) {
	assign_cfeature(&((m_ptr+pref_mrph)->f), "ۣ", FALSE);
    }

    /* pref_mrphܤΥǡ򥳥ԡ */
    if (pref_mrph != 0) {
	strcpy(m_ptr->Goi2, (m_ptr+pref_mrph)->Goi2);
	strcpy(m_ptr->Yomi, (m_ptr+pref_mrph)->Yomi);
	strcpy(m_ptr->Goi, (m_ptr+pref_mrph)->Goi);
	m_ptr->Hinshi = (m_ptr+pref_mrph)->Hinshi;
	m_ptr->Bunrui = (m_ptr+pref_mrph)->Bunrui;
	m_ptr->Katuyou_Kata = (m_ptr+pref_mrph)->Katuyou_Kata;
	m_ptr->Katuyou_Kei = (m_ptr+pref_mrph)->Katuyou_Kei;
	strcpy(m_ptr->Imi, (m_ptr+pref_mrph)->Imi);
	clear_feature(&(m_ptr->f));
	m_ptr->f = (m_ptr+pref_mrph)->f;
	(m_ptr+pref_mrph)->f = NULL;
	m_ptr->length = (m_ptr+pref_mrph)->length;
    }
}

/*==================================================================*/
		       int readtoeos(FILE *fp)
/*==================================================================*/
{
    U_CHAR input_buffer[DATA_LEN];

    while (1) {
	if (fgets(input_buffer, DATA_LEN, fp) == NULL) return EOF;
	if (str_eq(input_buffer, "EOS\n")) return FALSE;
    }
}

/*==================================================================*/
			int readtonl(FILE *fp)
/*==================================================================*/
{
    int input_buffer;

    while (1) {
	if ((input_buffer = fgetc(fp)) == EOF) return EOF;
	if (input_buffer == '\n') {
	    return FALSE;
	}
    }
}

/*==================================================================*/
	     int read_mrph_file(FILE *fp, U_CHAR *buffer)
/*==================================================================*/
{
    int len;
#ifdef _WIN32
    char *EUCbuffer;
#endif

    if (fgets(buffer, DATA_LEN, fp) == NULL) return EOF;

#ifdef _WIN32
    EUCbuffer = toStringEUC(buffer);
    strcpy(buffer, EUCbuffer);
    free(EUCbuffer);
#endif

    /* Server ⡼ɤξ  \r\n ˤʤ*/
    if (OptMode == SERVER_MODE) {
	len = strlen(buffer);
	if (len > 2 && buffer[len-1] == '\n' && buffer[len-2] == '\r') {
	    buffer[len-2] = '\n';
	    buffer[len-1] = '\0';
	}

	if (buffer[0] == EOf) 
	    return EOF;
    }

    return TRUE;
}

/*==================================================================*/
	     int imi2feature(char *str, MRPH_DATA *m_ptr)
/*==================================================================*/
{
    char *token;

    token = strtok(str, " ");
    while (token) {
	assign_cfeature(&(m_ptr->f), token, FALSE);
	token = strtok(NULL, " ");
    }
}

/*==================================================================*/
	   void delete_existing_features(MRPH_DATA *m_ptr)
/*==================================================================*/
{
    delete_cfeature(&(m_ptr->f), "ƥ");
    delete_cfeature(&(m_ptr->f), "ɥᥤ");
    delete_cfeature(&(m_ptr->f), "ǽư");
    delete_cfeature(&(m_ptr->f), "ɤ");
    delete_cfeature(&(m_ptr->f), "");
    delete_cfeature(&(m_ptr->f), "ư");
    delete_cfeature(&(m_ptr->f), "ºư");
    delete_cfeature(&(m_ptr->f), "ǫư");
    delete_cfeature(&(m_ptr->f), "ɸ");
    delete_cfeature(&(m_ptr->f), "ά");
}

/*==================================================================*/
	    void copy_mrph(MRPH_DATA *dst, MRPH_DATA *src)
/*==================================================================*/
{
    char *imip, *cp;

    strcpy(dst->Goi, src->Goi);
    strcpy(dst->Yomi, src->Yomi);
    strcpy(dst->Goi2, src->Goi2);
    dst->Hinshi = src->Hinshi;
    dst->Bunrui = src->Bunrui;
    dst->Katuyou_Kata = src->Katuyou_Kata;
    dst->Katuyou_Kei = src->Katuyou_Kei;
    strcpy(dst->Imi, src->Imi);

    /* ̣feature */
    if (src->Imi[0] == '\"') { /* ̾ "" ǳƤ */
	imip = &src->Imi[1];
	if (cp = strchr(imip, '\"')) {
	    *cp = '\0';
	}
    }
    else {
	imip = src->Imi;
    }

    imi2feature(imip, dst);
}

/*==================================================================*/
	     int feature_string2f(char *str, FEATURE **f)
/*==================================================================*/
{
    char *token;

    token = strtok(str, "><");
    while (token) {
	assign_cfeature(f, token, FALSE);
	token = strtok(NULL, "><");
    }
}

/*==================================================================*/
int store_one_annotation(SENTENCE_DATA *sp, TAG_DATA *tp, char *token)
/*==================================================================*/
{
    char flag, rel[SMALL_DATA_LEN], word[BNST_LENGTH_MAX];
    int tag_n, sent_n;

    sscanf(token, "%[^/]/%c/%[^/]/%d/%d/%*[^;]", rel, &flag, word, &tag_n, &sent_n);
    tp->c_cpm_ptr->cf.pp[tp->c_cpm_ptr->cf.element_num][0] = pp_kstr_to_code(rel);
    tp->c_cpm_ptr->cf.pp[tp->c_cpm_ptr->cf.element_num][1] = END_M;

    if (tp->c_cpm_ptr->cf.pp[tp->c_cpm_ptr->cf.element_num][0] == END_M) {
	if (OptDisplay == OPT_DEBUG) fprintf(stderr, ";; Unknown case <%s>\n", rel);
	return TRUE;
    }

    if (flag == 'E' || flag == 'U') { /* ꡢޤϡƤʤ(OptReadFeature) */
	tp->c_cpm_ptr->elem_b_ptr[tp->c_cpm_ptr->cf.element_num] = NULL;
	tp->c_cpm_ptr->elem_s_ptr[tp->c_cpm_ptr->cf.element_num] = NULL;	
    }
    else {
	if (sent_n > 0) {
	    /* ۾ʥñ̤ꤵƤ뤫å */
	    /*
	    if (sp->Sen_num - sent_n < 1 || 
		tag_n >= (sentence_data + sp->Sen_num - 1 - sent_n)->Tag_num) {
		fprintf(stderr, ";; discarded inappropriate annotation: %s/%c/%s/%d/%d\n", rel, flag, word, tag_n, sent_n);
		return FALSE;
	    }
	    tp->c_cpm_ptr->elem_b_ptr[tp->c_cpm_ptr->cf.element_num] = (sentence_data + sp->Sen_num - 1 - sent_n)->tag_data + tag_n;
	    tp->c_cpm_ptr->elem_s_ptr[tp->c_cpm_ptr->cf.element_num] = sentence_data + sp->Sen_num - 1 - sent_n;
	    */
	}
	/* ߤоʸ (ʸϤޤsentence_dataäƤʤᡢΤ褦ˤϰʤ)
   	   ۾ʥñ̤ꤵƤ뤫Υåcheck_annotation()ǹԤ */
	else {
	    tp->c_cpm_ptr->elem_b_ptr[tp->c_cpm_ptr->cf.element_num] = sp->tag_data + tag_n;
	    tp->c_cpm_ptr->elem_s_ptr[tp->c_cpm_ptr->cf.element_num] = sp;
	}
    }

    if (flag == 'C') {
	tp->c_cpm_ptr->elem_b_num[tp->c_cpm_ptr->cf.element_num] = tp->c_cpm_ptr->cf.element_num;
    }
    else if (flag == 'N') {
	tp->c_cpm_ptr->elem_b_num[tp->c_cpm_ptr->cf.element_num] = -1;
    }
    else {
	tp->c_cpm_ptr->elem_b_num[tp->c_cpm_ptr->cf.element_num] = -2;
    }

    tp->c_cpm_ptr->cf.element_num++;
    if (tp->c_cpm_ptr->cf.element_num >= CF_ELEMENT_MAX) {
	return FALSE;
    }

    return TRUE;
}

/*==================================================================*/
	 int read_annotation(SENTENCE_DATA *sp, TAG_DATA *tp)
/*==================================================================*/
{
    char *cp, *start_cp;

    /* featureʲϷ̤ */
    if (cp = check_feature(tp->f, "ʲϷ")) {
	tp->c_cpm_ptr = (CF_PRED_MGR *)malloc_data(sizeof(CF_PRED_MGR), "read_annotation");
	memset(tp->c_cpm_ptr, 0, sizeof(CF_PRED_MGR));

	cp += strlen("ʲϷ:");
	cp = strchr(cp, ':') + 1;

	if (OptAnaphora) {
	    cp = strchr(cp, ':') + 1;	  	  
	}

	start_cp = cp;
	for (; *cp; cp++) {
	    if (*cp == ';') {
		if (store_one_annotation(sp, tp, start_cp) == FALSE) {
		    return FALSE;
		}
		start_cp = cp + 1;
	    }
	}
	if (store_one_annotation(sp, tp, start_cp) == FALSE) {
	    return FALSE;
	}
    }

    return TRUE;
}

/*==================================================================*/
	       int check_annotation(SENTENCE_DATA *sp)
/*==================================================================*/
{
    int i, j, k, check[CF_ELEMENT_MAX];
    TAG_DATA *tp;

    for (i = 0; i < sp->Tag_num; i++) {
	tp = sp->tag_data + i;
	if (tp->c_cpm_ptr) {
	    for (j = 0; j < tp->c_cpm_ptr->cf.element_num; j++) {
		/* оʸξˡ۾ʥñ̤ꤵƤ뤫å */
		if (sp == tp->c_cpm_ptr->elem_s_ptr[j] && 
		    (tp->c_cpm_ptr->elem_b_ptr[j] - sp->tag_data) >= sp->Tag_num) {
		    fprintf(stderr, ";; discarded inappropriate annotation: %s/?/%s/%d/0\n", 
			    pp_code_to_kstr(tp->c_cpm_ptr->cf.pp[j][0]), 
			    tp->c_cpm_ptr->elem_b_ptr[j]->head_ptr->Goi, 
			    tp->c_cpm_ptr->elem_b_ptr[j]->num);
		    check[j] = FALSE;
		}
		else {
		    check[j] = TRUE;
		}
	    }

	    /* 餹 */
	    k = 0;
	    for (j = 0; j < tp->c_cpm_ptr->cf.element_num; j++) {
		if (check[j] == TRUE) {
		    if (k != j) {
			tp->c_cpm_ptr->cf.pp[k][0] = tp->c_cpm_ptr->cf.pp[j][0];
			tp->c_cpm_ptr->elem_b_ptr[k] = tp->c_cpm_ptr->elem_b_ptr[j];
			tp->c_cpm_ptr->elem_s_ptr[k] = tp->c_cpm_ptr->elem_s_ptr[j];
			tp->c_cpm_ptr->elem_b_num[k] = tp->c_cpm_ptr->elem_b_num[j];
		    }
		    k++;
		}
	    }

	    if (k) {
		tp->c_cpm_ptr->cf.element_num = k;
	    }
	    else { /* 1Ĥʤʤäfree */
		free(tp->c_cpm_ptr);
		tp->c_cpm_ptr = NULL;
	    }
	}
    }
}

/*==================================================================*/
	      int read_mrph(SENTENCE_DATA *sp, FILE *fp)
/*==================================================================*/
{
    U_CHAR input_buffer[DATA_LEN], rev_ibuffer[DATA_LEN], rest_buffer[DATA_LEN], Hinshi_str[DATA_LEN], Bunrui_str[DATA_LEN];
    U_CHAR Katuyou_Kata_str[DATA_LEN], Katuyou_Kei_str[DATA_LEN];
    MRPH_DATA  *m_ptr = sp->mrph_data;
    int homo_num, offset, mrph_item, bnst_item, tag_item, i, j, homo_flag;

    sp->Mrph_num = 0;
    homo_num = 0;
    ErrorComment = NULL;
    PM_Memo[0] = '\0';
    input_buffer[DATA_LEN-1] = '\n';

    while (1) {
	if (read_mrph_file(fp, input_buffer) == EOF) return EOF;

	if (input_buffer[DATA_LEN-1] != '\n') {
	    input_buffer[DATA_LEN-1] = '\0';
	    fprintf(stderr, ";; Too long mrph <%s> !\n", input_buffer);
	    return readtonl(fp);
	}
	else if (input_buffer[strlen(input_buffer)-1] != '\n') {
	    fprintf(stderr, ";; Too long mrph <%s> !\n", input_buffer);
	    return FALSE;
	}

	/* -i ˤ륳ȹ */
	if (OptIgnoreChar && *input_buffer == OptIgnoreChar) {
	    fprintf(Outfp, "%s", input_buffer);
	    fflush(Outfp);
	    continue;
	}

	/* # ˤΥȹ */

	if (input_buffer[0] == '#') {
	    input_buffer[strlen(input_buffer)-1] = '\0';
	    sp->Comment = (char *)malloc_data(strlen(input_buffer), "read_mrph");
	    sp->KNPSID = (char *)malloc_data(strlen(input_buffer) + 3, "read_mrph");
	    sscanf(input_buffer, "# %s %[^\n]", sp->KNPSID, sp->Comment);

	    /* ʸϤѤäͭ̾쥹å, ʸǡ򥯥ꥢ */
	    if (!strncmp(input_buffer, "# S-ID", 6) && 
		strchr(input_buffer+6, '-')) { /* ֵID-ʸIDפȤʤ */

		/* ͡ʸIDб뤿ᥳȹԤսˤsscanf */
		/* ΤArticleIDArticleIDսˤ(ex. 135531)ˤʤäƤ*/
		i = strlen(input_buffer);
		j = 0;
		while (i > 0) {
		    i--;
		    rev_ibuffer[j++] = input_buffer[i];
		    /* ֹʹߤΥȤ뤿2ʸ(#S-IDδ֤ζ)ʳ */
		    /* ʹߤ */
		    if (i != 1 && (input_buffer[i] == ' ' || input_buffer[i] == '\t')) {
			j = 0;
		    }
		}
		rev_ibuffer[j++] = '\0';

		/* int2147483647ޤǤʤ9(β9)Τɤ */
		sscanf(rev_ibuffer, "%*d-%9d", &ArticleID);

		if (ArticleID && preArticleID && ArticleID != preArticleID) {
		    if (OptDisplay == OPT_DEBUG) fprintf(stderr, "New Article %s\n", input_buffer);
		    if (OptNE) {
			clear_ne_cache();
		    }
		}
		preArticleID = ArticleID;
	    }
	}

	/* ϺѤߤξ */
	/* ʸ */
	else if (input_buffer[0] == '*') {
	    if (sp->Mrph_num == 0) {
		OptInput |= OPT_PARSED;
		if (OptEllipsis) {
		    OptAnalysis = OPT_CASE2;
		}
		sp->Bnst_num = 0;
		sp->Tag_num = 0;
		memset(Bnst_start, 0, sizeof(int)*MRPH_MAX);
		memset(Tag_start, 0, sizeof(int)*MRPH_MAX);
		if (OptReadFeature) {
		    memset(Input_bnst_feature, 0, sizeof(FEATURE *) *BNST_MAX);
		    memset(Input_tag_feature, 0, sizeof(FEATURE *) *TAG_MAX);
		}
	    }

	    if (OptInput == OPT_RAW) {
		fprintf(stderr, ";; Invalid input <%s> !\n", input_buffer);
		return readtoeos(fp);
	    }

	    bnst_item = sscanf(input_buffer, "* %d%c %[^\n]", 
			       &(sp->Best_mgr->dpnd.head[sp->Bnst_num]),
			       &(sp->Best_mgr->dpnd.type[sp->Bnst_num]),
			       rest_buffer);

	    /* ʸϤ줿featureȤ */
	    if (bnst_item == 3) {
		if (OptReadFeature) { 
		    /* feature<>splitfѴ */
		    feature_string2f(rest_buffer, &Input_bnst_feature[sp->Bnst_num]);
		}
	    }
	    else if (bnst_item != 2) {
		fprintf(stderr, ";; Invalid input <%s> !\n", input_buffer);
		OptInput = OPT_RAW;
		return readtoeos(fp);
	    }

	    Bnst_start[sp->Mrph_num - homo_num] = 1;
	    sp->Bnst_num++;
	}
	/* ñ̹ */
	else if (input_buffer[0] == '+') {
	    if (OptInput == OPT_RAW) {
		fprintf(stderr, ";; Invalid input <%s> !\n", input_buffer);
		return readtoeos(fp);
	    }

	    tag_item = sscanf(input_buffer, "+ %d%c %[^\n]", 
			      &Tag_dpnd[sp->Tag_num],
			      &Tag_type[sp->Tag_num],
			      rest_buffer);

	    /* ñ̤Ϥ줿featureȤ */
	    if (tag_item == 3) {
		if (OptReadFeature) { 
		    /* feature<>splitfѴ */
		    feature_string2f(rest_buffer, &Input_tag_feature[sp->Tag_num]);
		}
	    }
	    else if (tag_item != 2) {
		fprintf(stderr, ";; Invalid input <%s> !\n", input_buffer);
		OptInput = OPT_RAW;
		return readtoeos(fp);
	    }

	    Tag_start[sp->Mrph_num - homo_num] = 1;
	    sp->Tag_num++;
	}

	/* ʸ */

	else if (str_eq(input_buffer, "EOS\n")) {
	    /* ǤĤʤȤ */
	    if (sp->Mrph_num == 0) {
		return FALSE;
	    }

	    /* ñ̤ΤʤϺѤξ */
	    if ((OptInput & OPT_PARSED) && sp->Tag_num == 0) {
		OptInput |= OPT_INPUT_BNST;
	    }

	    if (homo_num) {	/* Ʊ۵쥻åȤн */
		lexical_disambiguation(sp, m_ptr - homo_num - 1, homo_num + 1);
		sp->Mrph_num -= homo_num;
		m_ptr -= homo_num;
		for (i = 0; i < homo_num; i++) {
		    clear_feature(&((m_ptr+i)->f));
		}
		homo_num = 0;
	    }
	    else if (sp->Mrph_num > 0) { /* Ʊ۵줬ʤȤɽɽͿ */
		rn2canonical_rn(m_ptr - 1);
	    }

	    /* KNPSIDʤȤ(# S-IDԤʤȤ)Ϳ */
	    if (!sp->KNPSID) {
		/* "S-ID:"(5Х), log(ʸ)/log(10) + 1Х, ID(3Х), +1Х */
		sp->KNPSID = (char *)malloc_data(log(sp->Sen_num) / log(10) + 10, "read_mrph");
		sprintf(sp->KNPSID, "S-ID:%d", sp->Sen_num);
	    }

	    return TRUE;
	}

	/* ̾η */

	else {

	    /* Ʊ۵줫ɤ */
	    if (input_buffer[0] == '@' && input_buffer[1] == ' ' && input_buffer[2] != '@') {
		homo_flag = 1;
	    }
	    else {
		homo_flag = 0;
	    }
	    
	    if (homo_flag == 0 && homo_num) {

		/* Ʊ۵ޡʤƱ۵쥻åȤ
	           lexical_disambiguationƤǽ */		   

		lexical_disambiguation(sp, m_ptr - homo_num - 1, homo_num + 1);
		sp->Mrph_num -= homo_num;
		m_ptr -= homo_num;
		for (i = 0; i < homo_num; i++) {
		    clear_feature(&((m_ptr+i)->f));
		}
		homo_num = 0;
	    }
	    else if (sp->Mrph_num > 0) { /* Ʊ۵줬ʤȤɽɽͿ */
		rn2canonical_rn(m_ptr - 1);
	    }

	    /* ۤʤ褦˥å */
	    if (sp->Mrph_num >= MRPH_MAX) {
		fprintf(stderr, ";; Too many mrph (%s %s%s...)!\n", 
			sp->Comment ? sp->Comment : "", sp->mrph_data, sp->mrph_data+1);
		return readtoeos(fp);
	    }

	    /* Ǿ :
	       (ѷ) ɤ () 
	       ʻ(+ֹ) ʬ(+ֹ) ѷ(+ֹ) ѷ(+ֹ) 
	       ̣
	    */

	    offset = homo_flag ? 2 : 0;
	    mrph_item = sscanf(input_buffer + offset,
			       "%s %s %s %s %d %s %d %s %d %s %d %[^\n]", 
			       m_ptr->Goi2, m_ptr->Yomi, m_ptr->Goi, 
			       Hinshi_str, &(m_ptr->Hinshi), Bunrui_str, &(m_ptr->Bunrui), 
			       Katuyou_Kata_str, &(m_ptr->Katuyou_Kata), 
			       Katuyou_Kei_str, &(m_ptr->Katuyou_Kei), 
			       rest_buffer);
	    m_ptr->type = IS_MRPH_DATA;
	    m_ptr->num = sp->Mrph_num;
	    m_ptr->length = strlen(m_ptr->Goi2);

	    if (mrph_item == 12) {
		char *imip, *cp, *rep_buf;
		/* ̣feature */
		if (strncmp(rest_buffer, "NIL", 3)) {

		    /* ̾ "" ǳƤ */
		    if (rest_buffer[0] == '\"') {
			imip = &rest_buffer[1];
			if (cp = strchr(imip, '\"')) {
			    *cp = '\0';
			}
			/* ɽɽɲä */
			if (strcmp(Hinshi_str, "ü") && strcmp(Hinshi_str, "Ƚ") && 
			    strcmp(Hinshi_str, "ư") && strcmp(Hinshi_str, "") && 
			    !strstr(imip, "ɽɽ")) {
			    rep_buf = make_mrph_rn(m_ptr);
			    if (strlen(imip) + strlen(" ɽɽ ɽɽ:") +
				strlen(rep_buf) + 2 < DATA_LEN) {
				strcat(imip, " ɽɽ ɽɽ:");
				strcat(imip, rep_buf);
			    }
			    free(rep_buf);
			}
			sprintf(m_ptr->Imi, "\"%s\"", imip);
		    }
		    else {
			imip = rest_buffer;
			if (cp = strchr(imip, ' ')) {
			    *cp = '\0';
			}
			strcpy(m_ptr->Imi, imip);
		    }

		    imi2feature(imip, m_ptr);
		}
		else { /* ̣NILΤȤ */
		    /* ɽɽɲä */
		    rep_buf = make_mrph_rn(m_ptr);			
		    if (strcmp(Hinshi_str, "ü") && strcmp(Hinshi_str, "Ƚ") && 
			strcmp(Hinshi_str, "ư") &&	strcmp(Hinshi_str, "") && 
			strlen(" ɽɽ ɽɽ:") + strlen(rep_buf) + 1 < DATA_LEN) {
			imip = rest_buffer;		    
			*imip = '\0';
			strcat(imip, "ɽɽ ɽɽ:");
			strcat(imip, rep_buf);
			sprintf(m_ptr->Imi, "\"%s\"", imip);
			imi2feature(imip, m_ptr);
		    }
		    else {
			strcpy(m_ptr->Imi, "NIL");
		    }
		    free(rep_buf);
		}
	    }
	    else if (mrph_item == 11) {
		strcpy(m_ptr->Imi, "NIL");
	    }
	    else {
		fprintf(stderr, ";; Invalid input (%d items)<%s> !\n", 
			mrph_item, input_buffer);
		if (sp->Comment) fprintf(stderr, "(%s)\n", sp->Comment);
		return readtoeos(fp);
	    }   

	    if (OptInput & OPT_PARSED) {
		m_ptr->Hinshi = get_hinsi_id(Hinshi_str);
		m_ptr->Bunrui = get_bunrui_id(Bunrui_str, m_ptr->Hinshi);
		m_ptr->Katuyou_Kata = get_type_id(Katuyou_Kata_str);
		m_ptr->Katuyou_Kei = get_form_id(Katuyou_Kei_str, m_ptr->Katuyou_Kata);
	    }

	    /* clear_feature(&(m_ptr->f)); 
	       mainʸȤΥ롼פƬǽ˰ư */

	    /* Ʊ۵ϰö sp->mrph_data ˤ */
	    if (homo_flag) homo_num++;

	    sp->Mrph_num++;
	    m_ptr++;
	}
    }
}

/*==================================================================*/
	      void change_one_mrph_imi(MRPH_DATA *m_ptr)
/*==================================================================*/
{
    char org_buffer[DATA_LEN], *cp;

    /* ȤηǾ̣󤪤featureȤ¸ */

    sprintf(org_buffer, "ʻѹ:%s-%s-%s-%d-%d-%d-%d-%s", 
	    m_ptr->Goi2, m_ptr->Yomi, m_ptr->Goi, 
	    m_ptr->Hinshi, m_ptr->Bunrui, 
	    m_ptr->Katuyou_Kata, m_ptr->Katuyou_Kei, m_ptr->Imi);
    assign_cfeature(&(m_ptr->f), org_buffer, FALSE); /* feature */

    if ((cp = strrchr(m_ptr->Imi, '\"'))) {
	*cp = '\0';
	sprintf(org_buffer, " ʻѹ:%s-%s-%s-%d-%d-%d-%d\"", /* ʻΰ̣Ϳʤ */
		m_ptr->Goi2, m_ptr->Yomi, m_ptr->Goi, 
		m_ptr->Hinshi, m_ptr->Bunrui, 
		m_ptr->Katuyou_Kata, m_ptr->Katuyou_Kei);
	strcat(m_ptr->Imi, org_buffer); /* Imi */
    }
}

/*==================================================================*/
void change_one_mrph_rep(MRPH_DATA *m_ptr, int modify_feature_flag, char suffix_char)
/*==================================================================*/
{
    int i, offset;
    char pre[IMI_MAX], str1[IMI_MAX], str2[IMI_MAX], post[IMI_MAX], orig_rep[IMI_MAX], *cp;

    /* ɽɽ:ư/->ɽɽ:ư/v */

    /* ѤʻǤϤʤ */
    if (m_ptr->Katuyou_Kata == 0 || m_ptr->Katuyou_Kei == 0) {
	return;
    }

    if (cp = strstr(m_ptr->Imi, "ɽɽ:")) {
	cp += strlen("ɽɽ:");
	sscanf(cp, "%[^/]", str1);

	pre[0] = '\0';
	strncat(pre, m_ptr->Imi, cp - m_ptr->Imi);

	offset = strlen(str1) + 1;
	sscanf(cp + offset, "%[^ \"]", str2);
	post[0] = '\0';
	offset += strlen(str2);
	strcat(post, cp + offset);

	strcpy(orig_rep, "ɽɽѹ:");
	strncat(orig_rep, cp, offset); /* Ȥɽɽݻ */
    }
    else {
	return;
    }

    /* 촴ˤ */
    str1[strlen(str1) - strlen(Form[m_ptr->Katuyou_Kata][get_form_id(BASIC_FORM, m_ptr->Katuyou_Kata)].gobi)] = '\0';
    str2[strlen(str2) - strlen(Form[m_ptr->Katuyou_Kata][get_form_id(BASIC_FORM, m_ptr->Katuyou_Kata)].gobi)] = '\0';

    /* ѷĤ */
    strcat(str1, Form[m_ptr->Katuyou_Kata][m_ptr->Katuyou_Kei].gobi);
    strcat(str2, Form[m_ptr->Katuyou_Kata][m_ptr->Katuyou_Kei].gobi);

    /* ̣ν: ɽɽȤȤɽɽ */
    if (strlen(pre) + strlen(str1) + strlen(str2) + strlen(orig_rep) + strlen(post) + 4 <= IMI_MAX) {
	sprintf(m_ptr->Imi, "%s%s/%s%c %s%s", pre, str1, str2, suffix_char, orig_rep, post);
    }

    /* featureν */
    if (modify_feature_flag) {
	if (cp = check_feature(m_ptr->f, "ɽɽ")) { /* Ȥɽɽfeature¸ */
	    cp += strlen("ɽɽ:");
	    sprintf(pre, "ɽɽѹ:%s", cp);
	    assign_cfeature(&(m_ptr->f), pre, FALSE);
	}
	sprintf(pre, "ɽɽ:%s/%s%c", str1, str2, suffix_char); /* ɽɽfeature */
	assign_cfeature(&(m_ptr->f), pre, FALSE);
    }
}

/*==================================================================*/
	  void change_one_mrph(MRPH_DATA *m_ptr, FEATURE *f)
/*==================================================================*/
{
    char h_buffer[62], b_buffer[62], kata_buffer[62], kei_buffer[62];
    int num;

    m_ptr->Hinshi = 0;
    m_ptr->Bunrui = 0;
    m_ptr->Katuyou_Kata = 0;
    m_ptr->Katuyou_Kei = 0;

    num = sscanf(f->cp, "%*[^:]:%[^:]:%[^:]:%[^:]:%[^:]", 
		 h_buffer, b_buffer, kata_buffer, kei_buffer);

    m_ptr->Hinshi = get_hinsi_id(h_buffer);
    if (num >= 2) {
	if (!strcmp(b_buffer, "*"))
	    m_ptr->Bunrui = 0;
	else 
	    m_ptr->Bunrui = get_bunrui_id(b_buffer, m_ptr->Hinshi);
    }
    if (num >= 3) {
	m_ptr->Katuyou_Kata = get_type_id(kata_buffer);
	m_ptr->Katuyou_Kei = get_form_id(kei_buffer, 
					 m_ptr->Katuyou_Kata);
    }
    
    /* ʻѹѤʤξϸѹ */
    /*  (ѤʤѤ)ϰäƤʤ */
    if (m_ptr->Katuyou_Kata == 0) {
	strcpy(m_ptr->Goi, m_ptr->Goi2);
    }
}

/*==================================================================*/
	  void change_alt_mrph(MRPH_DATA *m_ptr, FEATURE *f)
/*==================================================================*/
{
    FEATURE **fpp = &(m_ptr->f), *ret_fp = NULL;
    MRPH_DATA m;

    /* ALTΡɽɽ:ư/->ɽɽ:ư/v */

    m.f = NULL;
    while (*fpp) {
	if (!strncmp((*fpp)->cp, "ALT-", 4)) {
	    sscanf((*fpp)->cp + 4, "%[^-]-%[^-]-%[^-]-%d-%d-%d-%d-%[^\n]", 
		   m.Goi2, m.Yomi, m.Goi, 
		   &m.Hinshi, &m.Bunrui, 
		   &m.Katuyou_Kata, &m.Katuyou_Kei, m.Imi);
	    change_one_mrph_imi(&m);
	    change_one_mrph_rep(&m, 0, 'v');
	    change_one_mrph(&m, f);
	    assign_feature_alt_mrph(&ret_fp, &m);
	    free((*fpp)->cp); /* ŤALTϺ */
	    *fpp = (*fpp)->next;
	}
	else {
	    fpp = &((*fpp)->next);
	}
    }

    /* ALT */
    if (ret_fp) {
	append_feature(&(m_ptr->f), ret_fp);
    }
}

/*==================================================================*/
	    void change_mrph(MRPH_DATA *m_ptr, FEATURE *f)
/*==================================================================*/
{
    change_one_mrph_imi(m_ptr); /* ̣feature */
    change_one_mrph_rep(m_ptr, 1, 'v'); /* ɽɽ */
    change_one_mrph(m_ptr, f); /* ʻʤɤ */

    change_alt_mrph(m_ptr, f); /* ALT⽤ */
}

/*==================================================================*/
		      int get_Bunrui(char *cp)
/*==================================================================*/
{
    int j;

    for (j = 1; Class[6][j].id; j++) {
	if (str_eq(Class[6][j].id, cp))
	    return j;
    }
}

/*==================================================================*/
		    int break_feature(FEATURE *fp)
/*==================================================================*/
{
    while (fp) {
	if (!strcmp(fp->cp, "&break:normal")) 
	    return RLOOP_BREAK_NORMAL;
	else if (!strcmp(fp->cp, "&break:jump")) 
	    return RLOOP_BREAK_JUMP;
	else if (!strncmp(fp->cp, "&break", strlen("&break")))
	    return RLOOP_BREAK_NORMAL;
	fp = fp->next;
    }
    return RLOOP_BREAK_NONE;
}

/*==================================================================*/
       void assign_mrph_feature(MrphRule *s_r_ptr, int r_size,
				MRPH_DATA *s_m_ptr, int m_length,
				int mode, int break_mode, int direction, 
				int also_assign_flag, int temp_assign_flag)
/*==================================================================*/
{
    /* ϰ(ʸ,ʸʤ)ФƷǤΥޥå󥰤Ԥ */

    int i, j, k, match_length, feature_break_mode;
    MrphRule *r_ptr;
    MRPH_DATA *m_ptr;

    /* ŬѤϥǡΤ򤵤Ƥɬפ */
    if (direction == RtoL)
	s_m_ptr += m_length-1;
    
    /* MRM
       	1.self_patternƬηǰ
	  2.롼
	    3.self_patternηǰ
	ν˥롼פ (3Υ롼פregexpmrphrule_match)
	
	break_mode == RLOOP_BREAK_NORMAL
	    2Υ٥break
	break_mode == RLOOP_BREAK_JUMP
	    2Υ٥breakself_patternĹ1Υ롼פʤ
     */

    if (mode == RLOOP_MRM) {
	for (i = 0; i < m_length; i++) {
	    r_ptr = s_r_ptr;
	    m_ptr = s_m_ptr+(i*direction);
	    for (j = 0; j < r_size; j++, r_ptr++) {
		if ((match_length = 
		     regexpmrphrule_match(r_ptr, m_ptr, 
					  direction == LtoR ? i : m_length-i-1, 
					  direction == LtoR ? m_length-i : i+1)) != -1) {
		    for (k = 0; k < match_length; k++)
			assign_feature(&((s_m_ptr+i*direction+k)->f), 
				       &(r_ptr->f), s_m_ptr+i*direction, k, match_length - k, temp_assign_flag);
		    feature_break_mode = break_feature(r_ptr->f);
		    if (break_mode == RLOOP_BREAK_NORMAL ||
			feature_break_mode == RLOOP_BREAK_NORMAL) {
			break;
		    } else if (break_mode == RLOOP_BREAK_JUMP ||
			       feature_break_mode == RLOOP_BREAK_JUMP) {
			i += match_length - 1;
			break;
		    }
		}
	    }
	}
    }

    /* RMM
       	1.롼
	  2.self_patternƬηǰ
	    3.self_patternηǰ
	ν˥롼פ (3Υ롼פregexpmrphrule_match)
	
	break_mode == RLOOP_BREAK_NORMAL||RLOOP_BREAK_JUMP
	    2Υ٥break (λȤϹͤˤ)
    */

    else if (mode == RLOOP_RMM) {
	r_ptr = s_r_ptr;
	for (j = 0; j < r_size; j++, r_ptr++) {
	    feature_break_mode = break_feature(r_ptr->f);
	    for (i = 0; i < m_length; i++) {
		m_ptr = s_m_ptr+(i*direction);
		if ((match_length = 
		     regexpmrphrule_match(r_ptr, m_ptr, 
					  direction == LtoR ? i : m_length-i-1, 
					  direction == LtoR ? m_length-i : i+1)) != -1) {
		    for (k = 0; k < match_length; k++)
			assign_feature(&((s_m_ptr+i*direction+k)->f), 
				       &(r_ptr->f), s_m_ptr+i*direction, k, match_length - k, temp_assign_flag);
		    if (break_mode == RLOOP_BREAK_NORMAL ||
			break_mode == RLOOP_BREAK_JUMP ||
			feature_break_mode == RLOOP_BREAK_NORMAL ||
			feature_break_mode == RLOOP_BREAK_JUMP) {
			break;
		    }
		}
	    }
	}
    }
}

/*==================================================================*/
void assign_tag_feature(BnstRule *s_r_ptr, int r_size,
			TAG_DATA *s_b_ptr, int b_length,
			int mode, int break_mode, int direction, 
			int also_assign_flag, int temp_assign_flag)
/*==================================================================*/
{
    /* ϰ(ʸ,ʸʤ)Фƥñ̤Υޥå󥰤Ԥ */

    int i, j, k, match_length, feature_break_mode;
    BnstRule *r_ptr;
    TAG_DATA *b_ptr;

    /* ŬѤϥǡΤ򤵤Ƥɬפ */
    if (direction == RtoL)
	s_b_ptr += b_length-1;
    
    /* MRM
       	1.self_patternƬʸ
	  2.롼
	    3.self_patternʸ
	ν˥롼פ (3Υ롼פregexpbnstrule_match)
	
	break_mode == RLOOP_BREAK_NORMAL
	    2Υ٥break
	break_mode == RLOOP_BREAK_JUMP
	    2Υ٥breakself_patternĹ1Υ롼פʤ
     */

    if (mode == RLOOP_MRM) {
	for (i = 0; i < b_length; i++) {
	    r_ptr = s_r_ptr;
	    b_ptr = s_b_ptr+(i*direction);
	    for (j = 0; j < r_size; j++, r_ptr++) {
		if ((match_length = 
		     regexptagrule_match(r_ptr, b_ptr, 
					 direction == LtoR ? i : b_length-i-1, 
					 direction == LtoR ? b_length-i : i+1)) != -1) {
		    for (k = 0; k < match_length; k++) {
			assign_feature(&((s_b_ptr+i*direction+k)->f), 
				       &(r_ptr->f), s_b_ptr+i*direction, k, match_length - k, temp_assign_flag);
			if (also_assign_flag) { /* °ʸˤͿ */
			    assign_feature(&((s_b_ptr+i*direction+k)->b_ptr->f), 
					   &(r_ptr->f), s_b_ptr+i*direction, k, match_length - k, temp_assign_flag);
			}
		    }
		    feature_break_mode = break_feature(r_ptr->f);
		    if (break_mode == RLOOP_BREAK_NORMAL ||
			feature_break_mode == RLOOP_BREAK_NORMAL) {
			break;
		    } else if (break_mode == RLOOP_BREAK_JUMP ||
			       feature_break_mode == RLOOP_BREAK_JUMP) {
			i += match_length - 1;
			break;
		    }
		}
	    }
	}
    }

    /* RMM
       	1.롼
	  2.self_patternƬʸ
	    3.self_patternʸ
	ν˥롼פ (3Υ롼פregexpbnstrule_match)
	
	break_mode == RLOOP_BREAK_NORMAL||RLOOP_BREAK_JUMP
	    2Υ٥break (λȤϹͤˤ)
    */

    else if (mode == RLOOP_RMM) {
	r_ptr = s_r_ptr;
	for (j = 0; j < r_size; j++, r_ptr++) {
	    feature_break_mode = break_feature(r_ptr->f);
	    for (i = 0; i < b_length; i++) {
		b_ptr = s_b_ptr+(i*direction);
		if ((match_length = 
		     regexptagrule_match(r_ptr, b_ptr, 
					 direction == LtoR ? i : b_length-i-1, 
					 direction == LtoR ? b_length-i : i+1)) != -1) {
		    for (k = 0; k < match_length; k++) {
			assign_feature(&((s_b_ptr+i*direction+k)->f), 
				       &(r_ptr->f), s_b_ptr+i*direction, k, match_length - k, temp_assign_flag);
			if (also_assign_flag) { /* °ʸˤͿ */
			    assign_feature(&((s_b_ptr+i*direction+k)->b_ptr->f), 
					   &(r_ptr->f), s_b_ptr+i*direction, k, match_length - k, temp_assign_flag);
			}
		    }
		    if (break_mode == RLOOP_BREAK_NORMAL ||
			break_mode == RLOOP_BREAK_JUMP ||
			feature_break_mode == RLOOP_BREAK_NORMAL ||
			feature_break_mode == RLOOP_BREAK_JUMP) {
			break;
		    }
		}
	    }
	}
    }
}

/*==================================================================*/
void assign_bnst_feature(BnstRule *s_r_ptr, int r_size,
			 BNST_DATA *s_b_ptr, int b_length,
			 int mode, int break_mode, int direction, 
			 int also_assign_flag, int temp_assign_flag)
/*==================================================================*/
{
    /* ϰ(ʸ,ʸʤ)ФʸΥޥå󥰤Ԥ */

    int i, j, k, match_length, feature_break_mode;
    BnstRule *r_ptr;
    BNST_DATA *b_ptr;

    /* ŬѤϥǡΤ򤵤Ƥɬפ */
    if (direction == RtoL)
	s_b_ptr += b_length-1;
    
    /* MRM
       	1.self_patternƬʸ
	  2.롼
	    3.self_patternʸ
	ν˥롼פ (3Υ롼פregexpbnstrule_match)
	
	break_mode == RLOOP_BREAK_NORMAL
	    2Υ٥break
	break_mode == RLOOP_BREAK_JUMP
	    2Υ٥breakself_patternĹ1Υ롼פʤ
     */

    if (mode == RLOOP_MRM) {
	for (i = 0; i < b_length; i++) {
	    r_ptr = s_r_ptr;
	    b_ptr = s_b_ptr+(i*direction);
	    for (j = 0; j < r_size; j++, r_ptr++) {
		if ((match_length = 
		     regexpbnstrule_match(r_ptr, b_ptr, 
					  direction == LtoR ? i : b_length-i-1, 
					  direction == LtoR ? b_length-i : i+1)) != -1) {
		    for (k = 0; k < match_length; k++) {
			assign_feature(&((s_b_ptr+i*direction+k)->f), 
				       &(r_ptr->f), s_b_ptr+i*direction, k, match_length - k, temp_assign_flag);
			if (also_assign_flag) { /* headΥñ̤ˤͿ */
			    assign_feature(&(((s_b_ptr+i*direction+k)->tag_ptr + (s_b_ptr+i*direction+k)->tag_num - 1)->f), 
					   &(r_ptr->f), s_b_ptr+i*direction, k, match_length - k, temp_assign_flag);
			}
		    }
		    feature_break_mode = break_feature(r_ptr->f);
		    if (break_mode == RLOOP_BREAK_NORMAL ||
			feature_break_mode == RLOOP_BREAK_NORMAL) {
			break;
		    } else if (break_mode == RLOOP_BREAK_JUMP ||
			       feature_break_mode == RLOOP_BREAK_JUMP) {
			i += match_length - 1;
			break;
		    }
		}
	    }
	}
    }

    /* RMM
       	1.롼
	  2.self_patternƬʸ
	    3.self_patternʸ
	ν˥롼פ (3Υ롼פregexpbnstrule_match)
	
	break_mode == RLOOP_BREAK_NORMAL||RLOOP_BREAK_JUMP
	    2Υ٥break (λȤϹͤˤ)
    */

    else if (mode == RLOOP_RMM) {
	r_ptr = s_r_ptr;
	for (j = 0; j < r_size; j++, r_ptr++) {
	    feature_break_mode = break_feature(r_ptr->f);
	    for (i = 0; i < b_length; i++) {
		b_ptr = s_b_ptr+(i*direction);
		if ((match_length = 
		     regexpbnstrule_match(r_ptr, b_ptr, 
					  direction == LtoR ? i : b_length-i-1, 
					  direction == LtoR ? b_length-i : i+1)) != -1) {
		    for (k = 0; k < match_length; k++) {
			assign_feature(&((s_b_ptr+i*direction+k)->f), 
				       &(r_ptr->f), s_b_ptr+i*direction, k, match_length - k, temp_assign_flag);
			if (also_assign_flag) { /* headΥñ̤ˤͿ */
			    assign_feature(&(((s_b_ptr+i*direction+k)->tag_ptr + (s_b_ptr+i*direction+k)->tag_num - 1)->f), 
					   &(r_ptr->f), s_b_ptr+i*direction, k, match_length - k, temp_assign_flag);
			}
		    }
		    if (break_mode == RLOOP_BREAK_NORMAL ||
			break_mode == RLOOP_BREAK_JUMP ||
			feature_break_mode == RLOOP_BREAK_NORMAL ||
			feature_break_mode == RLOOP_BREAK_JUMP) {
			break;
		    }
		}
	    }
	}
    }
}

/*==================================================================*/
void assign_general_feature(void *data, int size, int flag, int also_assign_flag, int temp_assign_flag)
/*==================================================================*/
{
    int i;
    void (*assign_function)();

    /* , ñ, ʸξʬ */
    if (flag == MorphRuleType || flag == PreProcessMorphRuleType || flag == NeMorphRuleType) {
	assign_function = assign_mrph_feature;
    }
    else if (flag == TagRuleType || flag == AfterDpndTagRuleType || flag == PostProcessTagRuleType) {
	assign_function = assign_tag_feature;
    }
    else if (flag == BnstRuleType || flag == AfterDpndBnstRuleType) {
	assign_function = assign_bnst_feature;
    }

    for (i = 0; i < GeneralRuleNum; i++) {
	if ((GeneralRuleArray + i)->type == flag) {
	    assign_function((GeneralRuleArray+i)->RuleArray, 
			    (GeneralRuleArray+i)->CurRuleSize, 
			    data, size, 
			    (GeneralRuleArray+i)->mode, 
			    (GeneralRuleArray+i)->breakmode, 
			    (GeneralRuleArray+i)->direction, 
			    also_assign_flag, temp_assign_flag);
	}
    }
}

/*==================================================================*/
      BNST_DATA *init_bnst(SENTENCE_DATA *sp, MRPH_DATA *m_ptr)
/*==================================================================*/
{
    int i;
    char *cp;
    BNST_DATA *b_ptr;

    b_ptr = sp->bnst_data + sp->Bnst_num;
    b_ptr->type = IS_BNST_DATA;
    b_ptr->num = sp->Bnst_num;
    sp->Bnst_num++;
    if (sp->Bnst_num > BNST_MAX) {
	fprintf(stderr, ";; Too many bnst (%s %s%s...)!\n", 
		sp->Comment ? sp->Comment : "", sp->mrph_data, sp->mrph_data+1);
	sp->Bnst_num = 0;
	return NULL;
    }

    b_ptr->mrph_ptr = m_ptr;
    b_ptr->mrph_num = 0;

    b_ptr->BGH_num = 0;
    b_ptr->SM_num = 0;

    b_ptr->para_key_type = PARA_KEY_O;
    b_ptr->para_top_p = FALSE;
    b_ptr->para_type = PARA_NIL;
    b_ptr->to_para_p = FALSE;

    b_ptr->cpm_ptr = NULL;
    b_ptr->voice = 0;

    b_ptr->space = 0;

    b_ptr->pred_b_ptr = NULL;
    
    for (i = 0, cp = b_ptr->SCASE_code; i < SCASE_CODE_SIZE; i++, cp++) *cp = 0;

    /* clear_feature(&(b_ptr->f));
       mainʸȤΥ롼פƬǽ˰ư */

    return b_ptr;
}

/*==================================================================*/
	void make_Jiritu_Go(SENTENCE_DATA *sp, BNST_DATA *ptr)
/*==================================================================*/
{
    MRPH_DATA *mp;

    ptr->Jiritu_Go[0] = '\0';

    /* 缭ʬƬʳΩȤƤƤ */
    for (mp = ptr->mrph_ptr; mp <= ptr->head_ptr; mp++) {
	if (!check_feature(mp->f, "Ƭ")) {
	    if (strlen(ptr->Jiritu_Go) + strlen(mp->Goi) + 2 > BNST_LENGTH_MAX) {
		fprintf(stderr, ";; Too big bunsetsu (%s %s...)!\n", 
			sp->Comment ? sp->Comment : "", ptr->mrph_ptr);
		return;
	    }
	    strcat(ptr->Jiritu_Go, mp->Goi);
	}
    }
}

/*==================================================================*/
		 void decide_head_ptr(BNST_DATA *ptr)
/*==================================================================*/
{
    int i;

    if (ptr->type == IS_TAG_DATA) {
	for (i = ptr->mrph_num - 1; i >= 0 ; i--) {
	    if (check_feature((ptr->mrph_ptr + i)->f, "Ƹ") || 
		check_feature((ptr->mrph_ptr + i)->f, "Ƹ")) {
		ptr->head_ptr = ptr->mrph_ptr + i;
		return;
	    }
	}
    }
    /* ʸΤȤϷ֤̾ΡפheadȤʤ */
    else {
	for (i = ptr->mrph_num - 1; i >= 0 ; i--) {
	    if (!check_feature((ptr->mrph_ptr + i)->f, "ü󸫽и") && /* ֤Ρ */
		(check_feature((ptr->mrph_ptr + i)->f, "Ƹ") || 
		 check_feature((ptr->mrph_ptr + i)->f, "Ƹ"))) {
		ptr->head_ptr = ptr->mrph_ptr + i;
		assign_cfeature(&(ptr->head_ptr->f), "ʸ缭", FALSE);
		return;
	    }
	}
    }

    /* °줷ʤ */
    ptr->head_ptr = ptr->mrph_ptr + ptr->mrph_num - 1;
}

/*==================================================================*/
      int calc_bnst_length(SENTENCE_DATA *sp, BNST_DATA *b_ptr)
/*==================================================================*/
{
    int j;
    MRPH_DATA *m_ptr;

    b_ptr->length = 0;
    for (j = 0, m_ptr = b_ptr->mrph_ptr; j < b_ptr->mrph_num; j++, m_ptr++) {
	b_ptr->length += strlen(m_ptr->Goi2);

	if (b_ptr->length > BNST_LENGTH_MAX) {
	    fprintf(stderr, ";; Too big bunsetsu (%s %s...)!\n", 
		    sp->Comment ? sp->Comment : "", b_ptr->mrph_ptr);
	    return FALSE;
	}
    }
    return TRUE;
}

/*==================================================================*/
		 int make_bunsetsu(SENTENCE_DATA *sp)
/*==================================================================*/
{
    int i, j;
    MRPH_DATA	*m_ptr;
    BNST_DATA	*b_ptr = NULL;
    
    sp->Bnst_num = 0;
    sp->Max_New_Bnst_num = 0;

    for (i = 0, m_ptr = sp->mrph_data; i < sp->Mrph_num; i++, m_ptr++) {
	if (check_feature(m_ptr->f, "ʸ")) {
	    if ((b_ptr = init_bnst(sp, m_ptr)) == NULL) return FALSE;
	}
	b_ptr->mrph_num++;
    }

    for (i = 0, b_ptr = sp->bnst_data; i < sp->Bnst_num; i++, b_ptr++) {
	if (calc_bnst_length(sp, b_ptr) == FALSE) {
	    return FALSE;
	}
    }
    return TRUE;
}

/*==================================================================*/
	       int make_bunsetsu_pm(SENTENCE_DATA *sp)
/*==================================================================*/
{
    int i, j;
    char *cp;
    MRPH_DATA	*m_ptr;
    BNST_DATA	*b_ptr = sp->bnst_data;

    for (i = 0, m_ptr = sp->mrph_data; i < sp->Mrph_num; i++, m_ptr++) {
	if (Bnst_start[i]) {
	    if (i != 0) b_ptr++;
	    b_ptr->type = IS_BNST_DATA;
	    b_ptr->num = b_ptr-sp->bnst_data;
	    b_ptr->mrph_ptr = m_ptr;
	    b_ptr->mrph_num = 1;
	    b_ptr->cpm_ptr = NULL;
	    b_ptr->voice = 0;
	    b_ptr->pred_b_ptr = NULL;
	    for (j = 0, cp = b_ptr->SCASE_code; j < SCASE_CODE_SIZE; j++, cp++)
		*cp = 0;
	    /* clear_feature(&(b_ptr->f));
	       mainʸȤΥ롼פƬǽ˰ư */
	}
	else {
	    b_ptr->mrph_num++;
	}
    }

    for (i = 0, b_ptr = sp->bnst_data; i < sp->Bnst_num; i++, b_ptr++) {
	if (OptReadFeature) {
	    b_ptr->f = Input_bnst_feature[i];
	}
	assign_cfeature(&(b_ptr->f), "Ϻ", FALSE);
	if (calc_bnst_length(sp, b_ptr) == FALSE) {
	    return FALSE;
	}
    }
    return TRUE;
}

/*==================================================================*/
	   void push_tag_units(TAG_DATA *tp, MRPH_DATA *mp)
/*==================================================================*/
{
    if (check_feature(mp->f, "ΩƬ")) {
	if (tp->settou_num == 0) {
	    tp->settou_ptr = mp;
	}
	tp->settou_num++;
    }
    else if (check_feature(mp->f, "Ω") || 
	     check_feature(mp->f, "Ƹ")) {
	if (tp->jiritu_num == 0) {
	    tp->jiritu_ptr = mp;
	}
	tp->jiritu_num++;
    }
    else {
	if (tp->fuzoku_num == 0) {
	    tp->fuzoku_ptr = mp;
	}
	tp->fuzoku_num++;
    }
    tp->mrph_num++;
}

/*==================================================================*/
	     void after_make_tag_units(SENTENCE_DATA *sp)
/*==================================================================*/
{
    int i;
    TAG_DATA *tp;

    for (i = 0; i < sp->Tag_num; i++) {
	tp = sp->tag_data + i;

	tp->type = IS_TAG_DATA;

	decide_head_ptr((BNST_DATA *)tp);

	if (OptReadFeature) {
	    tp->f = Input_tag_feature[i];
	    read_annotation(sp, tp);
	}
	else {
	    tp->c_cpm_ptr = NULL;
	}

	/* BNST_DATAcastƤ tricky? */
	get_bnst_code_all((BNST_DATA *)tp);

	if (tp->inum != 0) {
	    assign_cfeature(&(tp->f), "ʸ", FALSE); /* case_analysis.rule ǻȤäƤ */
	    assign_cfeature(&(tp->f), ":ʸ", FALSE);
	}
	else {
	    /* headΤȤʸfeature򥳥ԡ */
	    /* <ʸƬ>, <ʸ>ĤʸƬʸ᤬2ñ̰ʾľϡ
	       <ʸƬ>ΤĤְ֤äƤΤǲǽ */
	    copy_feature(&(tp->f), tp->b_ptr->f);
	    delete_cfeature(&(tp->f), ""); /* <>ʸȥñ̤Ǥϰۤʤ */

	    /* ֤̾ΡפѸԡΤǺ */
	    if (check_feature(tp->head_ptr->f, "ü󸫽и")) {
		delete_cfeature(&(tp->f), "Ѹ");
	    }
	}

	/* ƥñ̤Ĺ׻Ƥ */
	calc_bnst_length(sp, (BNST_DATA *)tp);
    }

    /* <ʸƬ>ν */
    if (sp->bnst_data->tag_num > 1) {
	delete_cfeature(&((sp->bnst_data->tag_ptr + sp->bnst_data->tag_num - 1)->f), "ʸƬ");
	assign_cfeature(&(sp->tag_data->f), "ʸƬ", FALSE);
    }

    /* ñ̥롼ŬѤ */
    assign_general_feature(sp->tag_data, sp->Tag_num, TagRuleType, FALSE, FALSE);

    /* NTTɤfeatureɽ */
    sm2feature(sp);
}

/*==================================================================*/
	 void make_mrph_set_inum(SENTENCE_DATA *sp, int num)
/*==================================================================*/
{
    int j, count = 0;

    for (j = num - 1; j >= 0; j--) {
	(sp->mrph_data + j)->inum = count++;
	if ((sp->mrph_data + j)->tnum >= 0) {
	    break;
	}
    }
}

/*==================================================================*/
       void make_tag_unit_set_inum(SENTENCE_DATA *sp, int num)
/*==================================================================*/
{
    int j, count = 0;

    for (j = num - 2; j >= 0; j--) {
	(sp->tag_data + j)->inum = ++count;
	if ((sp->tag_data + j)->bnum >= 0) {
	    break;
	}
    }
}

/*==================================================================*/
		void make_tag_units(SENTENCE_DATA *sp)
/*==================================================================*/
{
    int i;
    char *flag;
    MRPH_DATA *mp;
    TAG_DATA *tp = NULL;
    BNST_DATA *bp = sp->bnst_data, *pre_bp;

    sp->Tag_num = 0;

    for (i = 0; i < sp->Mrph_num; i++) {
	mp = sp->mrph_data + i;
	flag = check_feature(mp->f, "ñ̻");

	/* ʸϤޤηǤ<ñ̻>ĤƤʤ */
	if (flag || 
	    (bp != NULL && bp->mrph_ptr == mp)) {
	    tp = sp->tag_data + sp->Tag_num;

	    if (flag == NULL) {
		fprintf(stderr, ";; morpheme %d must be <ñ̻>! (%s)\n", i, 
			sp->KNPSID ? sp->KNPSID : "?");
	    }

	    memset(tp, 0, sizeof(TAG_DATA));
	    tp->num = sp->Tag_num;
	    tp->mrph_ptr = mp;
	    mp->tnum = tp->num;
	    make_mrph_set_inum(sp, i);

	    /* ʸڤȰפȤ */
	    if (bp != NULL && bp->mrph_ptr == tp->mrph_ptr) {
		/* ̤äinumͿ */
		if (sp->Tag_num > 0 && (tp - 1)->bnum < 0) {
		    make_tag_unit_set_inum(sp, sp->Tag_num);
		}
		tp->bnum = bp->num;
		tp->b_ptr = bp;		/* ñ̤ʸإޡ */
		bp->tag_ptr = tp;	/* ʸᤫ饿ñ̤إޡ */
		bp->tag_num = 1;
		pre_bp = bp;
		if (bp->num < sp->Bnst_num - 1) {
		    bp++;
		}
		else {
		    /* Ǹʸ᤬ä */
		    bp = NULL;
		}
	    }
	    else {
		tp->bnum = -1;
		tp->b_ptr = pre_bp;
		pre_bp->tag_num++;
	    }
	    sp->Tag_num++;
	}
	else {
	    mp->tnum = -1;
	}
	push_tag_units(tp, mp);
    }

    if ((sp->tag_data + sp->Tag_num - 1)->bnum < 0) {
	make_tag_unit_set_inum(sp, sp->Tag_num);
    }
    make_mrph_set_inum(sp, sp->Mrph_num);

    after_make_tag_units(sp);
}

/*==================================================================*/
	      void make_tag_units_pm(SENTENCE_DATA *sp)
/*==================================================================*/
{
    int i;
    MRPH_DATA *mp;
    TAG_DATA *tp = sp->tag_data;
    BNST_DATA *bp = sp->bnst_data, *pre_bp;

    for (i = 0; i < sp->Mrph_num; i++) {
	mp = sp->mrph_data + i;

	if (Tag_start[i]) {
	    if (i != 0) tp++;

	    if (check_feature(mp->f, "ñ̻") == NULL) {
		fprintf(stderr, ";; morpheme %d must be <ñ̻>! (%s)\n", i, 
			sp->KNPSID ? sp->KNPSID : "?");
	    }

	    memset(tp, 0, sizeof(TAG_DATA));
	    tp->num = tp - sp->tag_data;
	    tp->mrph_ptr = mp;

	    /* ʸڤȰפȤ */
	    if (bp != NULL && bp->mrph_ptr == tp->mrph_ptr) {
		/* ̤äinumͿ */
		if (tp->num > 0 && (tp - 1)->bnum < 0) {
		    make_tag_unit_set_inum(sp, tp->num);
		}
		tp->bnum = bp->num;
		tp->b_ptr = bp;		/* ñ̤ʸإޡ */
		bp->tag_ptr = tp;	/* ʸᤫ饿ñ̤إޡ */
		bp->tag_num = 1;
		pre_bp = bp;
		if (bp->num < sp->Bnst_num - 1) {
		    bp++;
		}
		else {
		    /* Ǹʸ᤬ä */
		    bp = NULL;
		}
	    }
	    else {
		tp->bnum = -1;
		tp->b_ptr = pre_bp;
		pre_bp->tag_num++;
	    }
	}
	push_tag_units(tp, mp);
    }

    if ((sp->tag_data + sp->Tag_num - 1)->bnum < 0) {
	make_tag_unit_set_inum(sp, sp->Tag_num);
    }

    after_make_tag_units(sp);
}


/*==================================================================*/
	     void dpnd_info_to_tag_pm(SENTENCE_DATA *sp)
/*==================================================================*/
{
    /* ˴ؤξ DPND  TAG_DATA ˥ԡ (Ϻ) */

    int		i;

    for (i = 0; i < sp->Tag_num; i++) {
	(sp->tag_data + i)->dpnd_head = Tag_dpnd[i];
	(sp->tag_data + i)->dpnd_type = Tag_type[i];
    }
}

/*==================================================================*/
		  void reset_mrph(SENTENCE_DATA *sp)
/*==================================================================*/
{
    int i, delete_count = 0, move_table[MRPH_MAX];

    for (i = 1; i < sp->Mrph_num; i++) {
	if ((sp->mrph_data + i)->Goi[0] == '\0') { /* ޡƤʤʤä */
	    move_table[i] = 0;
	    delete_count++;
	}
	else {
	    move_table[i] = delete_count; /* ˰ư뤫 */
	}

	if (delete_count) { /* 񤭤ȤfeatureƤ */
	    clear_feature(&((sp->mrph_data + i)->f));
	}
    }

    for (i = 1; i < sp->Mrph_num; i++) {
	if (move_table[i] > 0) { /* ư٤ */
	    copy_mrph(sp->mrph_data + i - move_table[i], sp->mrph_data + i);
	}
    }

    sp->Mrph_num -= delete_count;
}

/*==================================================================*/
	 void merge_mrph_rep(MRPH_DATA *dst, MRPH_DATA *src)
/*==================================================================*/
{
    int offset;
    char src_str1[IMI_MAX], src_str2[IMI_MAX];
    char dst_pre[IMI_MAX], dst_str1[IMI_MAX], dst_str2[IMI_MAX], dst_post[IMI_MAX];
    char *cp;

    if (cp = strstr(src->Imi, "ɽɽ:")) { /* ޡ */
	cp += strlen("ɽɽ:");
	sscanf(cp, "%[^/]", src_str1);
	sscanf(cp + strlen(src_str1) + 1, "%[^ \"]", src_str2);
    }
    else {
	return;
    }

    if (cp = strstr(dst->Imi, "ɽɽ:")) { /* ޡ */
	cp += strlen("ɽɽ:");
	sscanf(cp, "%[^/]", dst_str1);

	dst_pre[0] = '\0';
	strncat(dst_pre, dst->Imi, cp - dst->Imi);

	offset = strlen(dst_str1) + 1;
	sscanf(cp + offset, "%[^ \"]", dst_str2);
	dst_post[0] = '\0';
	offset += strlen(dst_str2);
	strcat(dst_post, cp + offset);
    }
    else {
	return;
    }

    if (strlen(dst_str1) + strlen(src_str1) < IMI_MAX && 
	strlen(dst_str2) + strlen(src_str2) < IMI_MAX) {
	strcat(dst_str1, src_str1); /* ʬΥޡ */
	strcat(dst_str2, src_str2); /* ɤʬΥޡ */
    }
    else {
	return;
    }

    /* ̣ν */
    if (strlen(dst_pre) + strlen(dst_str1) + strlen(dst_str2) + strlen(dst_post) + 2 <= IMI_MAX) {
	sprintf(dst->Imi, "%s%s/%s%s", dst_pre, dst_str1, dst_str2, dst_post);
    }
}

/*==================================================================*/
     int merge_mrph(SENTENCE_DATA *sp, int start_num, int length)
/*==================================================================*/
{
    int i, goi_length = 0, yomi_length = 0, goi2_length = 0;

    /* ƬηǤ˥ޡ */

    /* ޤޡĹå */
    for (i = 0; i < length; i++) {
	goi_length  += strlen((sp->mrph_data + start_num + i)->Goi);
	yomi_length += strlen((sp->mrph_data + start_num + i)->Yomi);
	goi2_length += strlen((sp->mrph_data + start_num + i)->Goi2);
    }
    if (goi_length  > WORD_LEN_MAX || 
	yomi_length > WORD_LEN_MAX || 
	goi2_length > WORD_LEN_MAX) {
	return FALSE; /* Ĺʤ顢Τ褦ʥޡŬʤΤǡѤ */
    }

    for (i = 1; i < length; i++) {
	strcat((sp->mrph_data + start_num)->Goi,  (sp->mrph_data + start_num + i)->Goi);
	strcat((sp->mrph_data + start_num)->Yomi, (sp->mrph_data + start_num + i)->Yomi);
	strcat((sp->mrph_data + start_num)->Goi2, (sp->mrph_data + start_num + i)->Goi2);
	merge_mrph_rep(sp->mrph_data + start_num, sp->mrph_data + start_num + i); /* Imiΰɽɽޡ */

	(sp->mrph_data + start_num + i)->Goi[0] = '\0'; /* ޡѤߤΰ */
    }

    delete_alt_feature(&((sp->mrph_data + start_num)->f)); /* ALT */
    assign_rep_f_from_imi(sp->mrph_data + start_num); /* Imiΰɽɽfeature */
    return TRUE;
}

/*==================================================================*/
	       void preprocess_mrph(SENTENCE_DATA *sp)
/*==================================================================*/
{
    int i, start_num;
    char *cp, merge_type[SMALL_DATA_LEN];
    FEATURE *fp;

    assign_general_feature(sp->mrph_data, sp->Mrph_num, PreProcessMorphRuleType, FALSE, FALSE);

    merge_type[0] = '\0';
    for (i = 0; i < sp->Mrph_num; i++) {
	cp = NULL;
	fp = (sp->mrph_data + i)->f;
	while (fp) {
	    if (!strncmp(fp->cp, "Ϣ-", strlen("Ϣ-"))) {
		if (cp) {
		    fprintf(stderr, ";; Both %s and %s are assigned to %s\n", cp, fp->cp, (sp->mrph_data + i)->Goi);
		}
		else {
		    cp = fp->cp;
		}
	    }
	    fp = fp->next;
	}

	if (cp) { /* Ϣ뤬ä */
	    if (!merge_type[0]) { /*  */
		start_num = i;
		strcpy(merge_type, cp);
	    }
	    else if (strcmp(merge_type, cp)) { /* ľޤǤȥפۤʤ */
		if (merge_mrph(sp, start_num, i - start_num) == FALSE) {
		    delete_cfeature_from_mrphs(sp->mrph_data + start_num, i - start_num, merge_type);
		}
		start_num = i;
		strcpy(merge_type, cp);
	    }
	}
	else {
	    if (merge_type[0]) { /* ľޤǤηϢ */
		if (merge_mrph(sp, start_num, i - start_num) == FALSE) {
		    delete_cfeature_from_mrphs(sp->mrph_data + start_num, i - start_num, merge_type);
		}
		merge_type[0] = '\0';
	    }
	}
    }

    if (merge_type[0]) {
	if (merge_mrph(sp, start_num, i - start_num) == FALSE) {
	    delete_cfeature_from_mrphs(sp->mrph_data + start_num, i - start_num, merge_type);
	}
    }

    reset_mrph(sp);
}

/*====================================================================
				 END
====================================================================*/
