/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tagging.uk;

import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.function.UnaryOperator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.languagetool.AnalyzedToken;
import org.languagetool.rules.uk.LemmaHelper;
import org.languagetool.tagging.BaseTagger;
import org.languagetool.tagging.TaggedWord;
import org.languagetool.tagging.WordTagger;
import org.languagetool.tagging.uk.CompoundTagger;
import org.languagetool.tagging.uk.IPOSTag;
import org.languagetool.tagging.uk.PosTagHelper;
import org.languagetool.tokenizers.uk.UkrainianWordTokenizer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class UkrainianTagger
extends BaseTagger {
    private static final Logger logger = LoggerFactory.getLogger(UkrainianTagger.class);
    private static final Pattern NUMBER = Pattern.compile("[-+\u00b1]?[0-9]+(,[0-9]+)?([-\u2013\u2014][0-9]+(,[0-9]+)?)?|\\d{1,3}([\\s\u00a0\u202f]\\d{3})+");
    private static final Pattern LATIN_NUMBER = Pattern.compile("(?=[MDCLXVI])M*(C[MD]|D?C*)(X[CL]|L?X*)(I[XV]|V?I*)");
    private static final Pattern LATIN_NUMBER_CYR = Pattern.compile("[IX\u0406\u0425V]{2,4}(-[\u0430-\u044f\u0456]{1,4})?|[IX\u0406\u0425V](-[\u0430-\u044f\u0456]{1,4})");
    private static final Pattern HASHTAG = Pattern.compile("#[\u0430-\u044f\u0456\u0457\u0454\u0491a-z_][\u0430-\u044f\u0456\u0457\u0454\u0491a-z0-9_]*", 66);
    private static final Pattern DATE = Pattern.compile("[\\d]{1,2}\\.[\\d]{1,2}\\.[\\d]{4}");
    private static final Pattern TIME = Pattern.compile("([01]?[0-9]|2[0-3])[.:][0-5][0-9]");
    private static final Pattern ALT_DASHES_IN_WORD = Pattern.compile("[\u0430-\u044f\u0456\u0457\u0454\u04910-9a-z]\u2013[\u0430-\u044f\u0456\u0457\u0454\u0491]|[\u0430-\u044f\u0456\u0457\u0454\u0491]\u2013[0-9]", 66);
    private static final Pattern COMPOUND_WITH_QUOTES_REGEX = Pattern.compile("-[\u00ab\"\u201e]");
    private final CompoundTagger compoundTagger;

    public UkrainianTagger() {
        super("/uk/ukrainian.dict", new Locale("uk", "UA"), false);
        this.compoundTagger = new CompoundTagger(this, this.wordTagger, this.locale);
    }

    public List<AnalyzedToken> additionalTags(String word, WordTagger wordTagger) {
        Set<AnalyzedToken> newAnalyzedTokens;
        if (NUMBER.matcher(word).matches()) {
            ArrayList<AnalyzedToken> additionalTaggedTokens = new ArrayList<AnalyzedToken>();
            additionalTaggedTokens.add(new AnalyzedToken(word, IPOSTag.number.getText(), word));
            return additionalTaggedTokens;
        }
        if (LATIN_NUMBER.matcher(word).matches() && !word.matches("[MD]+")) {
            ArrayList<AnalyzedToken> additionalTaggedTokens = new ArrayList<AnalyzedToken>();
            additionalTaggedTokens.add(new AnalyzedToken(word, "number:latin", word));
            return additionalTaggedTokens;
        }
        if (LATIN_NUMBER_CYR.matcher(word).matches()) {
            ArrayList<AnalyzedToken> additionalTaggedTokens = new ArrayList<AnalyzedToken>();
            additionalTaggedTokens.add(new AnalyzedToken(word, "number:latin:bad", word));
            return additionalTaggedTokens;
        }
        if (TIME.matcher(word).matches()) {
            ArrayList<AnalyzedToken> additionalTaggedTokens = new ArrayList<AnalyzedToken>();
            additionalTaggedTokens.add(new AnalyzedToken(word, IPOSTag.time.getText(), word));
            return additionalTaggedTokens;
        }
        if (DATE.matcher(word).matches()) {
            ArrayList<AnalyzedToken> additionalTaggedTokens = new ArrayList<AnalyzedToken>();
            additionalTaggedTokens.add(new AnalyzedToken(word, IPOSTag.date.getText(), word));
            return additionalTaggedTokens;
        }
        if ((word.indexOf(40) > 0 || word.indexOf(47) > 0) && (newAnalyzedTokens = this.compoundTagger.generateEntities(word)).size() > 0) {
            return new ArrayList<AnalyzedToken>(newAnalyzedTokens);
        }
        if (word.startsWith("#") && HASHTAG.matcher(word).matches()) {
            ArrayList<AnalyzedToken> additionalTaggedTokens = new ArrayList<AnalyzedToken>();
            additionalTaggedTokens.add(new AnalyzedToken(word, IPOSTag.hashtag.getText(), word));
            return additionalTaggedTokens;
        }
        if (word.length() >= 3 && word.indexOf(45) > 0) {
            if (word.length() >= 6 && COMPOUND_WITH_QUOTES_REGEX.matcher(word).find()) {
                String adjustedWord = word.replaceAll("[\u00ab\u00bb\"\u201e\u201c]", "");
                return this.getAdjustedAnalyzedTokens(word, adjustedWord, null, null, null);
            }
            try {
                List<AnalyzedToken> guessedCompoundTags = this.compoundTagger.guessCompoundTag(word);
                return guessedCompoundTags;
            }
            catch (Exception e) {
                logger.error("Failed to tag \"" + word + "\"", (Throwable)e);
                return new ArrayList<AnalyzedToken>();
            }
        }
        return this.compoundTagger.guessOtherTags(word);
    }

    protected List<AnalyzedToken> getAnalyzedTokens(String word) {
        List<AnalyzedToken> analyzedTokens;
        String newWord;
        List<AnalyzedToken> newTokens;
        if (word.indexOf(96) > 0) {
            word = word.replace('`', '\'');
        }
        List<AnalyzedToken> tokens = super.getAnalyzedTokens(word);
        if (word.length() < 2) {
            return tokens;
        }
        if (((AnalyzedToken)tokens.get(0)).hasNoTag()) {
            String origWord = word;
            if (word.length() > 2) {
                if (word.indexOf(8211) > 0 && ALT_DASHES_IN_WORD.matcher(word).find()) {
                    word = origWord.replace('\u2013', '-');
                    newTokens = super.getAnalyzedTokens(word);
                    if (newTokens.size() > 0 && !((AnalyzedToken)newTokens.get(0)).hasNoTag()) {
                        newTokens.add(new AnalyzedToken(origWord, null, null));
                        tokens = newTokens;
                    }
                } else if (word.contains("\u0491") || word.contains("\u0490")) {
                    tokens = this.convertTokens(tokens, word, "\u0491", "\u0433", ":alt");
                } else if (word.contains("\u0456\u044f")) {
                    tokens = this.convertTokens(tokens, word, "\u0456\u044f", "\u0456\u0430", ":alt");
                } else if (word.endsWith("\u0442\u0435\u0440")) {
                    tokens = this.convertTokens(tokens, word, "\u0442\u0435\u0440", "\u0442\u0440", ":alt");
                } else if (word.contains("\u043b\u044c\u043e")) {
                    tokens = this.convertTokens(tokens, word, "\u043b\u044c\u043e", "\u043b\u043e", ":alt");
                } else if (word.startsWith("\u0441\u044c\u0432\u044f")) {
                    tokens = this.convertTokens(tokens, word, "\u0441\u044c\u0432\u044f", "\u0441\u0432\u044f", ":arch");
                } else if (word.startsWith("\u0441\u044c\u0432\u0456")) {
                    tokens = this.convertTokens(tokens, word, "\u0441\u044c\u0432\u0456", "\u0441\u0432\u0456", ":arch");
                } else if (word.contains("\u044c\u0441\u043a") && !word.endsWith("\u0441\u043a\u0430\u044f") && !word.equals("\u041a\u043e\u043c\u0441\u043e\u043c\u043e\u043b\u044c\u0441\u043a\u043e\u043c")) {
                    tokens = this.convertTokens(tokens, word, "\u044c\u0441\u043a", "\u044c\u0441\u044c\u043a", ":bad");
                }
                if (tokens.get(0).hasNoTag() && word.length() >= 3) {
                    String adjustedWord;
                    List<AnalyzedToken> newTokens2;
                    String adjustedWord2;
                    List<AnalyzedToken> newTokens3;
                    Matcher matcher;
                    Matcher matcher2;
                    if (word.length() >= 9 && (matcher2 = CompoundTagger.LEFT_O_ADJ_INVALID_PATTERN.matcher(word)).matches()) {
                        String prefix = matcher2.group(1);
                        String adjustedWord3 = matcher2.group(2);
                        List<AnalyzedToken> newTokens4 = this.getAdjustedAnalyzedTokens(word, adjustedWord3, Pattern.compile("^adj.*"), null, lemma -> prefix + lemma);
                        if (!newTokens4.isEmpty()) {
                            tokens = newTokens4;
                        }
                    }
                    if (tokens.get(0).hasNoTag() && !word.equalsIgnoreCase("\u0456\u0456\u0456") && (matcher = Pattern.compile("([\u0430\u0435\u0454\u0438\u0456\u0457\u043e\u0443\u044e\u044f])\\1{2,}", 66).matcher(word)).find() && !(newTokens3 = this.getAdjustedAnalyzedTokens(word, adjustedWord2 = matcher.replaceAll("$1"), Pattern.compile("(?!noun.*:prop|.*abbr).*"), ":alt", lemma -> lemma)).isEmpty()) {
                        tokens = newTokens3;
                    }
                    if (tokens.get(0).hasNoTag() && word.indexOf("[") != -1 && word.indexOf("]") != -1 && UkrainianWordTokenizer.WORDS_WITH_BRACKETS_PATTERN.matcher(word).find() && !(newTokens2 = this.getAdjustedAnalyzedTokens(word, adjustedWord = word.replace("[", "").replace("]", ""), null, ":alt", lemma -> lemma)).isEmpty()) {
                        tokens = newTokens2;
                    }
                }
            }
        }
        if (word.length() > 2 && LemmaHelper.isAllUppercaseUk(word) && (newTokens = this.getAdjustedAnalyzedTokens(word, newWord = LemmaHelper.capitalizeProperName(word), Pattern.compile("noun.*?:prop.*"), null, null)).size() > 0) {
            if (tokens.get(0).hasNoTag()) {
                tokens = newTokens;
            } else {
                tokens.addAll(newTokens);
            }
        }
        if ((analyzedTokens = this.analyzeAllCapitamizedAdj(word)).size() > 0) {
            if (((AnalyzedToken)tokens.get(0)).hasNoTag()) {
                tokens = analyzedTokens;
            } else {
                for (AnalyzedToken token : analyzedTokens) {
                    if (tokens.contains(token)) continue;
                    tokens.add(token);
                }
            }
        }
        return tokens;
    }

    protected List<AnalyzedToken> analyzeAllCapitamizedAdj(String word) {
        String lowerCasedWord;
        List wdList;
        String[] parts;
        if (word.indexOf(45) > 1 && !word.endsWith("-") && Stream.of(parts = word.split("-")).allMatch(LemmaHelper::isCapitalized) && PosTagHelper.hasPosTagPart2(wdList = this.wordTagger.tag(lowerCasedWord = word.toLowerCase()), "adj")) {
            List<AnalyzedToken> analyzedTokens = this.asAnalyzedTokenListForTaggedWordsInternal(word, wdList);
            analyzedTokens = PosTagHelper.filter(analyzedTokens, Pattern.compile("adj.*"));
            return analyzedTokens;
        }
        return new ArrayList<AnalyzedToken>();
    }

    private List<AnalyzedToken> convertTokens(List<AnalyzedToken> origTokens, String word, String str, String dictStr, String additionalTag) {
        List<AnalyzedToken> newTokens;
        String adjustedWord = word.replace(str, dictStr);
        if (str.length() == 1) {
            adjustedWord = adjustedWord.replace(str.toUpperCase(), dictStr.toUpperCase());
        }
        if ((newTokens = this.getAdjustedAnalyzedTokens(word, adjustedWord, null, additionalTag, lemma -> lemma.replace(dictStr, str))).isEmpty()) {
            return origTokens;
        }
        return newTokens;
    }

    private List<AnalyzedToken> getAdjustedAnalyzedTokens(String word, String adjustedWord, Pattern posTagRegex, String additionalTag, UnaryOperator<String> lemmaFunction) {
        List newTokens = super.getAnalyzedTokens(adjustedWord);
        if (((AnalyzedToken)newTokens.get(0)).hasNoTag()) {
            return new ArrayList<AnalyzedToken>();
        }
        ArrayList<AnalyzedToken> derivedTokens = new ArrayList<AnalyzedToken>();
        for (int i = 0; i < newTokens.size(); ++i) {
            AnalyzedToken analyzedToken = (AnalyzedToken)newTokens.get(i);
            String posTag = analyzedToken.getPOSTag();
            if (!adjustedWord.equals(analyzedToken.getToken()) || posTagRegex != null && !posTagRegex.matcher(posTag).matches()) continue;
            String lemma = analyzedToken.getLemma();
            if (lemmaFunction != null) {
                lemma = (String)lemmaFunction.apply(lemma);
            }
            if (additionalTag != null) {
                posTag = PosTagHelper.addIfNotContains(posTag, additionalTag);
            }
            AnalyzedToken newToken = new AnalyzedToken(word, posTag, lemma);
            derivedTokens.add(newToken);
        }
        return derivedTokens;
    }

    List<AnalyzedToken> asAnalyzedTokenListForTaggedWordsInternal(String word, List<TaggedWord> taggedWords) {
        return super.asAnalyzedTokenListForTaggedWords(word, taggedWords);
    }

    public WordTagger getWordTagger() {
        return super.getWordTagger();
    }
}

