/*
 * Decompiled with CFR 0.152.
 */
package org.omegat.tokenizer;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.cjk.CJKBigramFilter;
import org.apache.lucene.analysis.cjk.CJKWidthFilter;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.omegat.tokenizer.BaseTokenizer;
import org.omegat.tokenizer.Tokenizer;

@Tokenizer(languages={"zh", "ja", "ko"})
public class LuceneCJKTokenizer
extends BaseTokenizer {
    @Override
    protected TokenStream getTokenStream(String strOrig, boolean stemsAllowed, boolean stopWordsAllowed) throws IOException {
        if (stemsAllowed) {
            CharArraySet stopWords = stopWordsAllowed ? CJKAnalyzer.getDefaultStopSet() : CharArraySet.EMPTY_SET;
            return new CJKAnalyzer(stopWords).tokenStream("", (Reader)new StringReader(strOrig));
        }
        return new LowerCaseFilter((TokenStream)new CJKBigramFilter((TokenStream)new CJKWidthFilter(super.getStandardTokenStream(strOrig))));
    }
}

