/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.xpack.ml.filestructurefinder;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collections;
import java.util.DoubleSummaryStatistics;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.SortedMap;
import java.util.stream.Collectors;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.xpack.core.ml.filestructurefinder.FieldStats;
import org.elasticsearch.xpack.core.ml.filestructurefinder.FileStructure;
import org.elasticsearch.xpack.ml.filestructurefinder.FileStructureFinder;
import org.elasticsearch.xpack.ml.filestructurefinder.FileStructureOverrides;
import org.elasticsearch.xpack.ml.filestructurefinder.FileStructureUtils;
import org.elasticsearch.xpack.ml.filestructurefinder.TimeoutChecker;
import org.elasticsearch.xpack.ml.filestructurefinder.TimestampFormatFinder;
import org.supercsv.exception.SuperCsvException;
import org.supercsv.io.CsvListReader;
import org.supercsv.prefs.CsvPreference;
import org.supercsv.util.Util;

public class DelimitedFileStructureFinder
implements FileStructureFinder {
    private static final String REGEX_NEEDS_ESCAPE_PATTERN = "([\\\\|()\\[\\]{}^$.+*?])";
    private static final int MAX_LEVENSHTEIN_COMPARISONS = 100;
    private static final int LONG_FIELD_THRESHOLD = 100;
    private final List<String> sampleMessages;
    private final FileStructure structure;

    static DelimitedFileStructureFinder makeDelimitedFileStructureFinder(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker, CsvPreference csvPreference, boolean trimFields, FileStructureOverrides overrides, TimeoutChecker timeoutChecker) throws IOException {
        Tuple<String, TimestampFormatFinder> timeField;
        int index;
        String[] columnNames;
        Tuple<List<List<String>>, List<Integer>> parsed = DelimitedFileStructureFinder.readRows(sample, csvPreference, timeoutChecker);
        List rows = (List)parsed.v1();
        List lineNumbers = (List)parsed.v2();
        Tuple<Boolean, String[]> headerInfo = DelimitedFileStructureFinder.findHeaderFromSample(explanation, rows, overrides);
        boolean isHeaderInFile = (Boolean)headerInfo.v1();
        String[] header = (String[])headerInfo.v2();
        List<String> overriddenColumnNames = overrides.getColumnNames();
        if (overriddenColumnNames != null) {
            if (overriddenColumnNames.size() != header.length) {
                throw new IllegalArgumentException("[" + overriddenColumnNames.size() + "] column names were specified [" + String.join((CharSequence)",", overriddenColumnNames) + "] but there are [" + header.length + "] columns in the sample");
            }
            columnNames = overriddenColumnNames.toArray(new String[0]);
        } else {
            columnNames = new String[header.length];
            for (int i = 0; i < header.length; ++i) {
                assert (header[i] != null);
                String rawHeader = trimFields ? header[i].trim() : header[i];
                columnNames[i] = rawHeader.isEmpty() ? "column" + (i + 1) : rawHeader.replace('.', '_');
            }
        }
        List<String> sampleLines = Arrays.asList(sample.split("\n"));
        ArrayList<String> sampleMessages = new ArrayList<String>();
        ArrayList sampleRecords = new ArrayList();
        int prevMessageEndLineNumber = isHeaderInFile ? (Integer)lineNumbers.get(0) : -1;
        int n = index = isHeaderInFile ? 1 : 0;
        while (index < rows.size()) {
            List row = (List)rows.get(index);
            int lineNumber = (Integer)lineNumbers.get(index);
            LinkedHashMap sampleRecord = new LinkedHashMap();
            Util.filterListToMap(sampleRecord, (String[])columnNames, (List)(trimFields ? row.stream().map(field -> field == null ? null : field.trim()).collect(Collectors.toList()) : row));
            sampleRecords.add(sampleRecord);
            sampleMessages.add(String.join((CharSequence)"\n", sampleLines.subList(prevMessageEndLineNumber + 1, (Integer)lineNumbers.get(index))));
            prevMessageEndLineNumber = lineNumber;
            ++index;
        }
        String preamble = String.join((CharSequence)"\n", sampleLines.subList(0, (Integer)lineNumbers.get(1))) + "\n";
        sampleLines = null;
        char delimiter = (char)csvPreference.getDelimiterChar();
        FileStructure.Builder structureBuilder = new FileStructure.Builder(FileStructure.Format.DELIMITED).setCharset(charsetName).setHasByteOrderMarker(hasByteOrderMarker).setSampleStart(preamble).setNumLinesAnalyzed(((Integer)lineNumbers.get(lineNumbers.size() - 1)).intValue()).setNumMessagesAnalyzed(sampleRecords.size()).setHasHeaderRow(Boolean.valueOf(isHeaderInFile)).setDelimiter(Character.valueOf(delimiter)).setQuote(Character.valueOf(csvPreference.getQuoteChar())).setColumnNames(Arrays.stream(columnNames).collect(Collectors.toList()));
        if (trimFields) {
            structureBuilder.setShouldTrimFields(Boolean.valueOf(true));
        }
        if ((timeField = FileStructureUtils.guessTimestampField(explanation, sampleRecords, overrides, timeoutChecker)) != null) {
            String timeLineRegex = null;
            StringBuilder builder = new StringBuilder("^");
            for (String column2 : Arrays.asList(columnNames).subList(0, columnNames.length - 1)) {
                if (((String)timeField.v1()).equals(column2)) {
                    builder.append("\"?");
                    String simpleTimePattern = ((TimestampFormatFinder)timeField.v2()).getSimplePattern().pattern();
                    builder.append(simpleTimePattern.startsWith("\\b") ? simpleTimePattern.substring(2) : simpleTimePattern);
                    timeLineRegex = builder.toString();
                    break;
                }
                builder.append(".*?");
                if (delimiter == '\t') {
                    builder.append("\\t");
                    continue;
                }
                builder.append(delimiter);
            }
            if (isHeaderInFile) {
                String quote = String.valueOf(csvPreference.getQuoteChar());
                String twoQuotes = quote + quote;
                String optQuote = quote.replaceAll(REGEX_NEEDS_ESCAPE_PATTERN, "\\\\$1") + "?";
                structureBuilder.setExcludeLinesPattern("^" + Arrays.stream(header).map(column -> optQuote + column.replace(quote, twoQuotes).replaceAll(REGEX_NEEDS_ESCAPE_PATTERN, "\\\\$1") + optQuote).collect(Collectors.joining(",")));
            }
            boolean needClientTimeZone = ((TimestampFormatFinder)timeField.v2()).hasTimezoneDependentParsing();
            structureBuilder.setTimestampField((String)timeField.v1()).setJodaTimestampFormats(((TimestampFormatFinder)timeField.v2()).getJodaTimestampFormats()).setJavaTimestampFormats(((TimestampFormatFinder)timeField.v2()).getJavaTimestampFormats()).setNeedClientTimezone(needClientTimeZone).setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(null, Collections.emptyMap(), (String)timeField.v1(), ((TimestampFormatFinder)timeField.v2()).getJavaTimestampFormats(), needClientTimeZone)).setMultilineStartPattern(timeLineRegex);
        }
        Tuple<SortedMap<String, Object>, SortedMap<String, FieldStats>> mappingsAndFieldStats = FileStructureUtils.guessMappingsAndCalculateFieldStats(explanation, sampleRecords, timeoutChecker);
        SortedMap mappings = (SortedMap)mappingsAndFieldStats.v1();
        if (timeField != null) {
            mappings.put("@timestamp", FileStructureUtils.DATE_MAPPING_WITHOUT_FORMAT);
        }
        if (mappingsAndFieldStats.v2() != null) {
            structureBuilder.setFieldStats((Map)mappingsAndFieldStats.v2());
        }
        FileStructure structure = structureBuilder.setMappings((Map)mappings).setExplanation(explanation).build();
        return new DelimitedFileStructureFinder(sampleMessages, structure);
    }

    private DelimitedFileStructureFinder(List<String> sampleMessages, FileStructure structure) {
        this.sampleMessages = Collections.unmodifiableList(sampleMessages);
        this.structure = structure;
    }

    @Override
    public List<String> getSampleMessages() {
        return this.sampleMessages;
    }

    @Override
    public FileStructure getStructure() {
        return this.structure;
    }

    static Tuple<List<List<String>>, List<Integer>> readRows(String sample, CsvPreference csvPreference, TimeoutChecker timeoutChecker) throws IOException {
        int fieldsInFirstRow = -1;
        ArrayList<List> rows = new ArrayList<List>();
        ArrayList<Integer> lineNumbers = new ArrayList<Integer>();
        try (CsvListReader csvReader = new CsvListReader((Reader)new StringReader(sample), csvPreference);){
            try {
                List row;
                while ((row = csvReader.read()) != null) {
                    if (fieldsInFirstRow < 0) {
                        fieldsInFirstRow = row.size();
                    } else {
                        while (row.size() > fieldsInFirstRow && row.get(row.size() - 1) == null) {
                            row.remove(row.size() - 1);
                        }
                    }
                    rows.add(row);
                    timeoutChecker.check("delimited record parsing");
                    lineNumbers.add(csvReader.getLineNumber());
                }
            }
            catch (SuperCsvException e) {
                if (DelimitedFileStructureFinder.notUnexpectedEndOfFile(e)) {
                    throw e;
                }
            }
        }
        assert (!rows.isEmpty());
        assert (lineNumbers.size() == rows.size());
        if (((List)rows.get(0)).size() != ((List)rows.get(rows.size() - 1)).size()) {
            rows.remove(rows.size() - 1);
            lineNumbers.remove(lineNumbers.size() - 1);
        }
        assert (rows.size() > 1);
        return new Tuple(rows, lineNumbers);
    }

    static Tuple<Boolean, String[]> findHeaderFromSample(List<String> explanation, List<List<String>> rows, FileStructureOverrides overrides) {
        Object[] header;
        assert (!rows.isEmpty());
        List<String> overriddenColumnNames = overrides.getColumnNames();
        List<String> firstRow = rows.get(0);
        boolean isHeaderInFile = true;
        if (overrides.getHasHeaderRow() != null) {
            String duplicateValue;
            isHeaderInFile = overrides.getHasHeaderRow();
            if (isHeaderInFile && overriddenColumnNames == null && (duplicateValue = DelimitedFileStructureFinder.findDuplicateNonEmptyValues(firstRow)) != null) {
                throw new IllegalArgumentException("Sample specified to contain a header row, but the first row contains duplicate values: [" + duplicateValue + "]");
            }
            explanation.add("Sample specified to " + (isHeaderInFile ? "contain" : "not contain") + " a header row");
        } else if (DelimitedFileStructureFinder.findDuplicateNonEmptyValues(firstRow) != null) {
            isHeaderInFile = false;
            explanation.add("First row contains duplicate values, so assuming it's not a header");
        } else if (rows.size() < 3) {
            explanation.add("Too little data to accurately assess whether header is in sample - guessing it is");
        } else {
            isHeaderInFile = DelimitedFileStructureFinder.isFirstRowUnusual(explanation, rows);
        }
        if (isHeaderInFile) {
            header = (String[])firstRow.stream().map(field -> field == null ? "" : field).toArray(String[]::new);
        } else {
            header = new String[firstRow.size()];
            Arrays.fill(header, "");
        }
        return new Tuple((Object)isHeaderInFile, (Object)header);
    }

    static String findDuplicateNonEmptyValues(List<String> row) {
        HashSet<String> values = new HashSet<String>();
        for (String value : row) {
            if (value == null || value.isEmpty() || values.add(value)) continue;
            return value;
        }
        return null;
    }

    private static boolean isFirstRowUnusual(List<String> explanation, List<List<String>> rows) {
        assert (rows.size() >= 3);
        List<String> firstRow = rows.get(0);
        String firstRowStr = firstRow.stream().map(field -> field == null ? "" : field).collect(Collectors.joining(""));
        List<List<String>> otherRows = rows.subList(1, rows.size());
        ArrayList<String> otherRowStrs = new ArrayList<String>();
        for (List<String> row : otherRows) {
            otherRowStrs.add(row.stream().map(str -> str == null ? "" : str).collect(Collectors.joining("")));
        }
        double firstRowLength = firstRowStr.length();
        DoubleSummaryStatistics otherRowStats = otherRowStrs.stream().mapToDouble(otherRow -> otherRow.length()).collect(DoubleSummaryStatistics::new, DoubleSummaryStatistics::accept, DoubleSummaryStatistics::combine);
        double otherLengthRange = otherRowStats.getMax() - otherRowStats.getMin();
        if (firstRowLength < otherRowStats.getMin() - otherLengthRange / 10.0 || firstRowLength > otherRowStats.getMax() + otherLengthRange / 10.0) {
            explanation.add("First row is unusual based on length test: [" + firstRowLength + "] and [" + DelimitedFileStructureFinder.toNiceString(otherRowStats) + "]");
            return true;
        }
        explanation.add("First row is not unusual based on length test: [" + firstRowLength + "] and [" + DelimitedFileStructureFinder.toNiceString(otherRowStats) + "]");
        BitSet shortFieldMask = DelimitedFileStructureFinder.makeShortFieldMask(rows, 100);
        DoubleSummaryStatistics firstRowStats = otherRows.stream().limit(100L).mapToDouble(otherRow -> DelimitedFileStructureFinder.levenshteinFieldwiseCompareRows(firstRow, otherRow, shortFieldMask)).collect(DoubleSummaryStatistics::new, DoubleSummaryStatistics::accept, DoubleSummaryStatistics::combine);
        otherRowStats = new DoubleSummaryStatistics();
        int numComparisons = 0;
        int proportion = otherRowStrs.size() / 100;
        int innerIncrement = 1 + proportion * proportion;
        Random random = new Random(firstRow.hashCode());
        for (int i = 0; numComparisons < 100 && i < otherRowStrs.size(); ++i) {
            for (int j = i + 1 + random.nextInt(innerIncrement); numComparisons < 100 && j < otherRowStrs.size(); ++numComparisons, j += innerIncrement) {
                otherRowStats.accept(DelimitedFileStructureFinder.levenshteinFieldwiseCompareRows(otherRows.get(i), otherRows.get(j), shortFieldMask));
            }
        }
        if (firstRowStats.getAverage() > otherRowStats.getAverage() * 1.2) {
            explanation.add("First row is unusual based on Levenshtein test [" + DelimitedFileStructureFinder.toNiceString(firstRowStats) + "] and [" + DelimitedFileStructureFinder.toNiceString(otherRowStats) + "]");
            return true;
        }
        explanation.add("First row is not unusual based on Levenshtein test [" + DelimitedFileStructureFinder.toNiceString(firstRowStats) + "] and [" + DelimitedFileStructureFinder.toNiceString(otherRowStats) + "]");
        return false;
    }

    private static String toNiceString(DoubleSummaryStatistics stats) {
        return String.format(Locale.ROOT, "count=%d, min=%f, average=%f, max=%f", stats.getCount(), stats.getMin(), stats.getAverage(), stats.getMax());
    }

    static BitSet makeShortFieldMask(List<List<String>> rows, int longFieldThreshold) {
        assert (!rows.isEmpty());
        BitSet shortFieldMask = new BitSet();
        int maxLength = rows.stream().map(List::size).max(Integer::compareTo).get();
        int index = 0;
        while (index < maxLength) {
            int i = index++;
            shortFieldMask.set(i, rows.stream().allMatch(row -> i >= row.size() || row.get(i) == null || ((String)row.get(i)).length() < longFieldThreshold));
        }
        return shortFieldMask;
    }

    static int levenshteinFieldwiseCompareRows(List<String> firstRow, List<String> secondRow) {
        int largestSize = Math.max(firstRow.size(), secondRow.size());
        if (largestSize < 1) {
            return 0;
        }
        BitSet allFields = new BitSet();
        allFields.set(0, largestSize);
        return DelimitedFileStructureFinder.levenshteinFieldwiseCompareRows(firstRow, secondRow, allFields);
    }

    static int levenshteinFieldwiseCompareRows(List<String> firstRow, List<String> secondRow, BitSet fieldMask) {
        int result = 0;
        int index = fieldMask.nextSetBit(0);
        while (index >= 0) {
            result += DelimitedFileStructureFinder.levenshteinDistance(index < firstRow.size() ? firstRow.get(index) : "", index < secondRow.size() ? secondRow.get(index) : "");
            index = fieldMask.nextSetBit(index + 1);
        }
        return result;
    }

    static int levenshteinDistance(String first, String second) {
        int secondLen;
        int firstLen = first == null ? 0 : first.length();
        int n = secondLen = second == null ? 0 : second.length();
        if (firstLen == 0) {
            return secondLen;
        }
        if (secondLen == 0) {
            return firstLen;
        }
        int[] currentCol = new int[secondLen + 1];
        int[] prevCol = new int[secondLen + 1];
        for (int down = 0; down <= secondLen; ++down) {
            currentCol[down] = down;
        }
        for (int across = 1; across <= firstLen; ++across) {
            int[] tmp = prevCol;
            prevCol = currentCol;
            currentCol = tmp;
            currentCol[0] = across;
            for (int down = 1; down <= secondLen; ++down) {
                if (first.charAt(across - 1) == second.charAt(down - 1)) {
                    currentCol[down] = prevCol[down - 1];
                    continue;
                }
                int option1 = prevCol[down];
                int option2 = currentCol[down - 1];
                int option3 = prevCol[down - 1];
                currentCol[down] = Math.min(Math.min(option1, option2), option3) + 1;
            }
        }
        return currentCol[secondLen];
    }

    static boolean lineHasUnescapedQuote(String line, CsvPreference csvPreference) {
        char quote = csvPreference.getQuoteChar();
        String lineWithEscapedQuotesRemoved = line.replace(String.valueOf(quote) + quote, "");
        for (int index = 1; index < lineWithEscapedQuotesRemoved.length() - 1; ++index) {
            if (lineWithEscapedQuotesRemoved.charAt(index) != quote || lineWithEscapedQuotesRemoved.codePointAt(index - 1) == csvPreference.getDelimiterChar() || lineWithEscapedQuotesRemoved.codePointAt(index + 1) == csvPreference.getDelimiterChar()) continue;
            return true;
        }
        return false;
    }

    /*
     * WARNING - Removed back jump from a try to a catch block - possible behaviour change.
     * Unable to fully structure code
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    static boolean canCreateFromSample(List<String> explanation, String sample, int minFieldsPerRow, CsvPreference csvPreference, String formatName) {
        for (String sampleLine : sampleLines = sample.split("\n")) {
            if (!DelimitedFileStructureFinder.lineHasUnescapedQuote(sampleLine, csvPreference)) continue;
            explanation.add("Not " + formatName + " because a line has an unescaped quote that is not at the beginning or end of a field: [" + sampleLine + "]");
            return false;
        }
        try {
            block22: {
                csvReader = new CsvListReader((Reader)new StringReader(sample), csvPreference);
                fieldsInFirstRow = -1;
                fieldsInLastRow = -1;
                numberOfRows = 0;
lbl12:
                // 3 sources

                while (true) {
                    try {
                        row = csvReader.read();
                        if (row == null) ** GOTO lbl50
                        ++numberOfRows;
                        if (fieldsInFirstRow >= 0) ** GOTO lbl40
                        fieldsInFirstRow = fieldsInThisRow;
                        if (fieldsInFirstRow < minFieldsPerRow) {
                            explanation.add("Not " + formatName + " because the first row has fewer than [" + minFieldsPerRow + "] fields: [" + fieldsInFirstRow + "]");
                            var12_19 = false;
                            break;
                        }
                        ** GOTO lbl-1000
                    }
                    catch (SuperCsvException e) {
                        if (DelimitedFileStructureFinder.notUnexpectedEndOfFile(e)) {
                            explanation.add("Not " + formatName + " because there was a parsing exception: [" + e.getMessage() + "]");
                            var11_18 = false;
                            csvReader.close();
                            return var11_18;
                        }
                        break block22;
                    }
                    break;
                }
                csvReader.close();
                return var12_19;
lbl-1000:
                // 1 sources

                {
                    fieldsInLastRow = fieldsInFirstRow;
                    ** GOTO lbl12
lbl40:
                    // 2 sources

                    for (fieldsInThisRow = row.size(); fieldsInThisRow > fieldsInFirstRow && row.get(fieldsInThisRow - 1) == null; --fieldsInThisRow) {
                    }
                    if (fieldsInLastRow == fieldsInFirstRow) ** GOTO lbl-1000
                    explanation.add("Not " + formatName + " because row [" + (numberOfRows - 1) + "] has a different number of fields to the first row: [" + fieldsInFirstRow + "] and [" + fieldsInLastRow + "]");
                    var12_20 = false;
                }
                csvReader.close();
                return var12_20;
lbl-1000:
                // 1 sources

                {
                    fieldsInLastRow = fieldsInThisRow;
                    ** continue;
lbl50:
                    // 1 sources

                    if (fieldsInLastRow <= fieldsInFirstRow) ** GOTO lbl-1000
                    explanation.add("Not " + formatName + " because last row has more fields than first row: [" + fieldsInFirstRow + "] and [" + fieldsInLastRow + "]");
                    var11_18 = false;
                }
                csvReader.close();
                return var11_18;
lbl-1000:
                // 1 sources

                {
                    if (fieldsInLastRow >= fieldsInFirstRow) break block22;
                    --numberOfRows;
                }
            }
            if (numberOfRows <= 1) {
                explanation.add("Not " + formatName + " because fewer than 2 complete records in sample: [" + numberOfRows + "]");
                return false;
            }
            explanation.add("Deciding sample is " + formatName);
            return true;
        }
        catch (IOException e) {
            explanation.add("Not " + formatName + " because there was a parsing exception: [" + e.getMessage() + "]");
            return false;
        }
    }

    private static boolean notUnexpectedEndOfFile(SuperCsvException e) {
        return !e.getMessage().startsWith("unexpected end of file while reading quoted column");
    }
}

