/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tagging.ca;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import morfologik.stemming.DictionaryLookup;
import morfologik.stemming.IStemmer;
import org.jetbrains.annotations.Nullable;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.Language;
import org.languagetool.chunking.ChunkTag;
import org.languagetool.language.Catalan;
import org.languagetool.language.ValencianCatalan;
import org.languagetool.tagging.BaseTagger;
import org.languagetool.tools.StringTools;

public class CatalanTagger
extends BaseTagger {
    public static final CatalanTagger INSTANCE_VAL = new CatalanTagger(new ValencianCatalan());
    public static final CatalanTagger INSTANCE_CAT = new CatalanTagger(new Catalan());
    private static final Pattern ADJ_PART_FS = Pattern.compile("VMP00SF.|A[QO].[FC]S.");
    private static final Pattern VERB = Pattern.compile("V.+");
    private static final Pattern PREFIXES_FOR_VERBS = Pattern.compile("(auto)(.*[aeiou\u00e0\u00e9\u00e8\u00ed\u00f2\u00f3\u00ef\u00fc].+[aeiou\u00e0\u00e9\u00e8\u00ed\u00f2\u00f3\u00ef\u00fc].*)", 66);
    private static final List<String> ALLUPPERCASE_EXCEPTIONS = Arrays.asList("ARNAU", "CRISTIAN", "TOM\u00c0S");
    private String variant;

    public CatalanTagger(Language language) {
        super("/ca/" + language.getShortCodeWithCountryAndVariant() + ".dict", new Locale("ca"), false);
        this.variant = language.getVariant();
    }

    public boolean overwriteWithManualTagger() {
        return false;
    }

    public List<AnalyzedTokenReadings> tag(List<String> sentenceTokens) {
        ArrayList<AnalyzedTokenReadings> tokenReadings = new ArrayList<AnalyzedTokenReadings>();
        int pos = 0;
        DictionaryLookup dictLookup = new DictionaryLookup(this.getDictionary());
        for (String originalWord : sentenceTokens) {
            ArrayList<ChunkTag> listChunkTags;
            boolean containsTypewriterApostrophe = false;
            boolean containsTypographicApostrophe = false;
            if (originalWord.length() > 1) {
                if (originalWord.contains("'")) {
                    containsTypewriterApostrophe = true;
                }
                if (originalWord.contains("\u2019")) {
                    containsTypographicApostrophe = true;
                    originalWord = originalWord.replace("\u2019", "'");
                }
            }
            String normalizedWord = StringTools.normalizeNFC((String)originalWord);
            ArrayList<AnalyzedToken> l = new ArrayList<AnalyzedToken>();
            String lowerWord = normalizedWord.toLowerCase(this.locale);
            boolean isLowercase = normalizedWord.equals(lowerWord);
            boolean isMixedCase = StringTools.isMixedCase((String)normalizedWord);
            boolean isAllUpper = StringTools.isAllUppercase((String)normalizedWord);
            List taggerTokens = this.asAnalyzedTokenListForTaggedWords(originalWord, this.getWordTagger().tag(normalizedWord));
            this.addTokens(taggerTokens, l);
            if (!isLowercase && !isMixedCase) {
                List lowerTaggerTokens = this.asAnalyzedTokenListForTaggedWords(originalWord, this.getWordTagger().tag(lowerWord));
                this.addTokens(lowerTaggerTokens, l);
            }
            if ((l.isEmpty() || ALLUPPERCASE_EXCEPTIONS.contains(normalizedWord)) && isAllUpper) {
                String firstUpper = StringTools.uppercaseFirstChar((String)lowerWord);
                List firstupperTaggerTokens = this.asAnalyzedTokenListForTaggedWords(originalWord, this.getWordTagger().tag(firstUpper));
                this.addTokens(firstupperTaggerTokens, l);
            }
            if (l.isEmpty() && !isMixedCase) {
                this.addTokens(this.additionalTags(originalWord, (IStemmer)dictLookup), l);
            }
            if (l.isEmpty()) {
                l.add(new AnalyzedToken(originalWord, null, null));
            }
            AnalyzedTokenReadings atr = new AnalyzedTokenReadings(l, pos);
            if (containsTypewriterApostrophe) {
                listChunkTags = new ArrayList<ChunkTag>();
                listChunkTags.add(new ChunkTag("containsTypewriterApostrophe"));
                atr.setChunkTags(listChunkTags);
            }
            if (containsTypographicApostrophe) {
                listChunkTags = new ArrayList();
                listChunkTags.add(new ChunkTag("containsTypographicApostrophe"));
                atr.setChunkTags(listChunkTags);
            }
            tokenReadings.add(atr);
            pos += originalWord.length();
        }
        return tokenReadings;
    }

    @Nullable
    protected List<AnalyzedToken> additionalTags(String word, IStemmer stemmer) {
        String posTag;
        List taggerTokens;
        Matcher matcher;
        DictionaryLookup dictLookup = new DictionaryLookup(this.getDictionary());
        ArrayList<AnalyzedToken> additionalTaggedTokens = new ArrayList<AnalyzedToken>();
        String lowerWord = StringTools.normalizeNFC((String)word.toLowerCase(this.locale));
        if (lowerWord.endsWith("ment")) {
            String possibleAdj = lowerWord.replaceAll("^(.+)ment$", "$1");
            List taggerTokens2 = this.asAnalyzedTokenList(possibleAdj, dictLookup.lookup((CharSequence)possibleAdj));
            for (AnalyzedToken taggerToken : taggerTokens2) {
                Matcher m;
                String posTag2 = taggerToken.getPOSTag();
                if (posTag2 == null || !(m = ADJ_PART_FS.matcher(posTag2)).matches()) continue;
                additionalTaggedTokens.add(new AnalyzedToken(word, "RG", lowerWord));
                return additionalTaggedTokens;
            }
        }
        if ((matcher = PREFIXES_FOR_VERBS.matcher(word)).matches()) {
            String possibleVerb = StringTools.normalizeNFC((String)matcher.group(2).toLowerCase());
            taggerTokens = this.asAnalyzedTokenList(possibleVerb, dictLookup.lookup((CharSequence)possibleVerb));
            for (AnalyzedToken taggerToken : taggerTokens) {
                Matcher m;
                if (taggerToken.getLemma().equals("n\u00f2mer") || (posTag = taggerToken.getPOSTag()) == null || !(m = VERB.matcher(posTag)).matches()) continue;
                String lemma = matcher.group(1).toLowerCase().concat(taggerToken.getLemma());
                additionalTaggedTokens.add(new AnalyzedToken(word, posTag, lemma));
            }
            return additionalTaggedTokens;
        }
        if (word.contains("\u0140") || word.contains("\u013f")) {
            String possibleWord = lowerWord.replaceAll("\u0140", "l\u00b7");
            return this.asAnalyzedTokenList(word, dictLookup.lookup((CharSequence)possibleWord));
        }
        if (this.variant != null && lowerWord.endsWith("iste")) {
            String possibleAdjNoun = lowerWord.replaceAll("^(.+)iste$", "$1ista");
            taggerTokens = this.asAnalyzedTokenList(possibleAdjNoun, dictLookup.lookup((CharSequence)possibleAdjNoun));
            for (AnalyzedToken taggerToken : taggerTokens) {
                posTag = taggerToken.getPOSTag();
                if (posTag == null) continue;
                if (posTag.equals("NCCS000")) {
                    additionalTaggedTokens.add(new AnalyzedToken(word, "NCMS000", possibleAdjNoun));
                }
                if (posTag.equals("AQ0CS0")) {
                    additionalTaggedTokens.add(new AnalyzedToken(word, "AQ0MS0", possibleAdjNoun));
                }
                if (additionalTaggedTokens.isEmpty()) continue;
                return additionalTaggedTokens;
            }
        }
        return null;
    }

    private void addTokens(List<AnalyzedToken> taggedTokens, List<AnalyzedToken> l) {
        if (taggedTokens != null) {
            l.addAll(taggedTokens);
        }
    }
}

