/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.trees.international.pennchinese;

import edu.stanford.nlp.io.EncodingPrintWriter;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.regex.Pattern;

public class ChineseUtils {
    private static final Redwood.RedwoodChannels log = Redwood.channels(ChineseUtils.class);
    private static final boolean ONLY_BMP = false;
    public static final String ONEWHITE = "[\\s\\p{Zs}]";
    public static final String WHITE = "[\\s\\p{Zs}]*";
    public static final String WHITEPLUS = "[\\s\\p{Zs}]+";
    public static final String NUMBERS = "[\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341]";
    public static final String MID_DOT_REGEX_STR = "[\u00b7\u0387\u2022\u2024\u2027\u2219\u22c5\u30fb]";
    public static final int LEAVE = 0;
    public static final int ASCII = 1;
    public static final int NORMALIZE = 1;
    public static final int FULLWIDTH = 2;
    public static final int DELETE = 3;
    public static final int DELETE_EXCEPT_BETWEEN_ASCII = 4;
    public static final int MAX_LEGAL = 4;
    private static final Pattern dateChars = Pattern.compile("[\u5e74\u6708\u65e5]+");
    private static final Pattern dateCharsPlus = Pattern.compile("[\u5e74\u6708\u65e5\u53f7]+");
    private static final Pattern numberChars = Pattern.compile("[0-9\uff10-\uff19\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341\u96f6\u3007\u767e\u5343\u4e07\u4ebf\u5169\u25cb\u25ef\u3021-\u3029\u3038-\u303a]+");
    private static final Pattern letterChars = Pattern.compile("[A-Za-z\uff21-\uff3a\uff41-\uff5a]+");
    private static final Pattern periodChars = Pattern.compile("[\ufe52\u2027\uff0e.\u70b9]+");
    private static final Pattern separatingPuncChars = Pattern.compile("[]!\"(),;:<=>?\\[\\\\`{|}~^\u3001-\u3003\u3008-\u3011\u3014-\u301f\u3030\uff3d\uff01\uff02\uff08\uff09\uff0c\uff1b\uff1a\uff1c\uff1d\uff1e\uff1f\uff3b\uff3c\uff40\uff5b\uff5c\uff5d\uff5e\uff3e]+");
    private static final Pattern ambiguousPuncChars = Pattern.compile("[-#$%&'*+/@_\uff0d\uff03\uff04\uff05\uff06\uff07\uff0a\uff0b\uff0f\uff20\uff3f]+");
    private static final Pattern midDotPattern = Pattern.compile("[\u00b7\u0387\u2022\u2024\u2027\u2219\u22c5\u30fb]+");

    private ChineseUtils() {
    }

    public static boolean isNumber(char c) {
        return StringUtils.matches(String.valueOf(c), NUMBERS) || Character.isDigit(c);
    }

    public static String normalize(String in) {
        return ChineseUtils.normalize(in, 2, 1);
    }

    public static String normalize(String in, int ascii, int spaceChar) {
        return ChineseUtils.normalize(in, ascii, spaceChar, 0);
    }

    public static String normalize(String in, int ascii, int spaceChar, int midDot) {
        if (ascii < 0 || ascii > 4 || spaceChar < 0 || spaceChar > 4) {
            throw new IllegalArgumentException("ChineseUtils: Unknown parameter option");
        }
        return ChineseUtils.normalizeUnicode(in, ascii, spaceChar, midDot);
    }

    private static String normalizeBMP(String in, int ascii, int spaceChar, int midDot) {
        StringBuilder out2 = new StringBuilder();
        int len = in.length();
        for (int i = 0; i < len; ++i) {
            Character.UnicodeBlock cub;
            char cp = in.charAt(i);
            if (Character.isHighSurrogate(cp)) {
                if (i + 1 < len) {
                    log.warn("ChineseUtils.normalize warning: non-BMP codepoint U+" + Integer.toHexString(Character.codePointAt(in, i)) + " in " + in);
                } else {
                    log.warn("ChineseUtils.normalize warning: unmatched high surrogate character U+" + Integer.toHexString(Character.codePointAt(in, i)) + " in " + in);
                }
            }
            if ((cub = Character.UnicodeBlock.of(cp)) == Character.UnicodeBlock.PRIVATE_USE_AREA || cub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A || cub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B) {
                EncodingPrintWriter.err.println("ChineseUtils.normalize warning: private use area codepoint U+" + Integer.toHexString(cp) + " in " + in);
            }
            boolean delete = false;
            switch (ascii) {
                case 0: {
                    break;
                }
                case 1: {
                    if (cp < '\uff01' || cp > '\uff5e') break;
                    cp = (char)(cp - 65248);
                    break;
                }
                case 2: {
                    if (cp < '!' || cp > '~') break;
                    cp = (char)(cp + 65248);
                    break;
                }
                default: {
                    throw new IllegalArgumentException("ChineseUtils: Unsupported parameter option: ascii=" + ascii);
                }
            }
            switch (spaceChar) {
                case 0: {
                    break;
                }
                case 1: {
                    if (!Character.isSpaceChar(cp)) break;
                    cp = ' ';
                    break;
                }
                case 2: {
                    if (!Character.isSpaceChar(cp)) break;
                    cp = '\u3000';
                    break;
                }
                case 3: {
                    if (!Character.isSpaceChar(cp)) break;
                    delete = true;
                    break;
                }
                case 4: {
                    char cpp = '\u0000';
                    if (i > 0) {
                        cpp = in.charAt(i - 1);
                    }
                    char cpn = '\u0000';
                    if (i < len - 1) {
                        cpn = in.charAt(i + 1);
                    }
                    if (!Character.isSpaceChar(cp) || ChineseUtils.isAsciiLowHigh(cpp) && ChineseUtils.isAsciiLowHigh(cpn)) break;
                    delete = true;
                }
            }
            switch (midDot) {
                case 0: {
                    break;
                }
                case 1: {
                    if (!ChineseUtils.isMidDot(cp)) break;
                    cp = '\u00b7';
                    break;
                }
                case 2: {
                    if (!ChineseUtils.isMidDot(cp)) break;
                    cp = '\u30fb';
                    break;
                }
                case 3: {
                    if (!ChineseUtils.isMidDot(cp)) break;
                    delete = true;
                    break;
                }
                default: {
                    throw new IllegalArgumentException("ChineseUtils: Unsupported parameter option: midDot=" + midDot);
                }
            }
            if (delete) continue;
            out2.append(cp);
        }
        return out2.toString();
    }

    private static String normalizeUnicode(String in, int ascii, int spaceChar, int midDot) {
        int cp;
        StringBuilder out2 = new StringBuilder();
        int len = in.length();
        int cpp = 0;
        for (int offset = 0; offset < len; offset += Character.charCount(cp)) {
            cp = in.codePointAt(offset);
            Character.UnicodeBlock cub = Character.UnicodeBlock.of(cp);
            if (cub == Character.UnicodeBlock.PRIVATE_USE_AREA || cub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A || cub == Character.UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B) {
                EncodingPrintWriter.err.println("ChineseUtils.normalize warning: private use area codepoint U+" + Integer.toHexString(cp) + " in " + in);
            }
            boolean delete = false;
            switch (ascii) {
                case 0: {
                    break;
                }
                case 1: {
                    if (cp < 65281 || cp > 65374) break;
                    cp -= 65248;
                    break;
                }
                case 2: {
                    if (cp < 33 || cp > 126) break;
                    cp += 65248;
                    break;
                }
                default: {
                    throw new IllegalArgumentException("ChineseUtils: Unsupported parameter option: ascii=" + ascii);
                }
            }
            switch (spaceChar) {
                case 0: {
                    break;
                }
                case 1: {
                    if (!Character.isSpaceChar(cp)) break;
                    cp = 32;
                    break;
                }
                case 2: {
                    if (!Character.isSpaceChar(cp)) break;
                    cp = 12288;
                    break;
                }
                case 3: {
                    if (!Character.isSpaceChar(cp)) break;
                    delete = true;
                    break;
                }
                case 4: {
                    int nextOffset = offset + Character.charCount(cp);
                    int cpn = 0;
                    if (nextOffset < len) {
                        cpn = in.codePointAt(nextOffset);
                    }
                    if (!Character.isSpaceChar(cp) || ChineseUtils.isAsciiLowHigh(cpp) && ChineseUtils.isAsciiLowHigh(cpn)) break;
                    delete = true;
                }
            }
            switch (midDot) {
                case 0: {
                    break;
                }
                case 1: {
                    if (!ChineseUtils.isMidDot(cp)) break;
                    cp = 183;
                    break;
                }
                case 2: {
                    if (!ChineseUtils.isMidDot(cp)) break;
                    cp = 12539;
                    break;
                }
                case 3: {
                    if (!ChineseUtils.isMidDot(cp)) break;
                    delete = true;
                    break;
                }
                default: {
                    throw new IllegalArgumentException("ChineseUtils: Unsupported parameter option: midDot=" + midDot);
                }
            }
            if (!delete) {
                out2.appendCodePoint(cp);
            }
            cpp = cp;
        }
        return out2.toString();
    }

    private static boolean isMidDot(int cp) {
        return cp == 183 || cp == 903 || cp == 8226 || cp == 8228 || cp == 8231 || cp == 8729 || cp == 8901 || cp == 12539;
    }

    private static boolean isAsciiLowHigh(int cp) {
        return cp >= 65281 && cp <= 65374 || cp >= 33 && cp <= 126;
    }

    public static void main(String[] args) throws IOException {
        if (args.length < 3) {
            log.info("usage: ChineseUtils ascii space midDot word*");
            log.info("  First 3 args are int flags; a filter or maps args as words; assumes UTF-8");
            return;
        }
        int i = Integer.parseInt(args[0]);
        int j = Integer.parseInt(args[1]);
        int midDot = Integer.parseInt(args[2]);
        if (args.length > 3) {
            for (int k = 3; k < args.length; ++k) {
                EncodingPrintWriter.out.println(ChineseUtils.normalize(args[k], i, j, midDot));
            }
        } else {
            String line;
            BufferedReader r = IOUtils.readerFromStdin("UTF-8");
            while ((line = r.readLine()) != null) {
                EncodingPrintWriter.out.println(ChineseUtils.normalize(line, i, j, midDot));
            }
        }
    }

    public static String shapeOf(CharSequence input, boolean augmentedDateChars, boolean useMidDotShape) {
        String shape;
        if (augmentedDateChars && dateCharsPlus.matcher(input).matches()) {
            shape = "D";
        } else {
            if (input.charAt(0) == '\u7b2c') {
                return "o";
            }
            shape = dateChars.matcher(input).matches() ? "D" : (numberChars.matcher(input).matches() ? "N" : (letterChars.matcher(input).matches() ? "L" : (periodChars.matcher(input).matches() ? "P" : (separatingPuncChars.matcher(input).matches() ? "S" : (ambiguousPuncChars.matcher(input).matches() ? "A" : (useMidDotShape && midDotPattern.matcher(input).matches() ? "M" : "C"))))));
        }
        return shape;
    }
}

