/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.benchmark.byTask.feeds;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.Locale;
import java.util.Properties;
import java.util.Set;
import org.apache.lucene.benchmark.byTask.feeds.DocData;
import org.apache.lucene.benchmark.byTask.feeds.HTMLParser;
import org.apache.lucene.benchmark.byTask.feeds.TrecContentSource;
import org.cyberneko.html.parsers.SAXParser;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class DemoHTMLParser
implements HTMLParser {
    @Override
    public DocData parse(DocData docData, String name, Date date, Reader reader, TrecContentSource trecSrc) throws IOException {
        try {
            return this.parse(docData, name, date, new InputSource(reader), trecSrc);
        }
        catch (SAXException saxe) {
            throw new IOException("SAX exception occurred while parsing HTML document.", saxe);
        }
    }

    public DocData parse(DocData docData, String name, Date date, InputSource source, TrecContentSource trecSrc) throws IOException, SAXException {
        Date newDate;
        Parser p = new Parser(source);
        Properties props = p.metaTags;
        String dateStr = props.getProperty("date");
        if (dateStr != null && (newDate = trecSrc.parseDate(dateStr)) != null) {
            date = newDate;
        }
        docData.clear();
        docData.setName(name);
        docData.setBody(p.body);
        docData.setTitle(p.title);
        docData.setProps(props);
        docData.setDate(date);
        return docData;
    }

    public static final class Parser {
        public final Properties metaTags = new Properties();
        public final String title;
        public final String body;
        static final Set<String> ENDLINE_ELEMENTS = Parser.createElementNameSet("p", "h1", "h2", "h3", "h4", "h5", "h6", "div", "ul", "ol", "dl", "pre", "hr", "blockquote", "address", "fieldset", "table", "form", "noscript", "li", "dt", "dd", "noframes", "br", "tr", "select", "option");
        static final Set<String> SUPPRESS_ELEMENTS = Parser.createElementNameSet("style", "script");

        public Parser(Reader reader) throws IOException, SAXException {
            this(new InputSource(reader));
        }

        public Parser(InputSource source) throws IOException, SAXException {
            SAXParser parser = new SAXParser();
            parser.setFeature("http://xml.org/sax/features/namespaces", true);
            parser.setFeature("http://cyberneko.org/html/features/balance-tags", true);
            parser.setFeature("http://cyberneko.org/html/features/report-errors", false);
            parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
            parser.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
            final StringBuilder title = new StringBuilder();
            final StringBuilder body = new StringBuilder();
            DefaultHandler handler = new DefaultHandler(){
                private int inBODY = 0;
                private int inHEAD = 0;
                private int inTITLE = 0;
                private int suppressed = 0;

                @Override
                public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
                    if (this.inHEAD > 0) {
                        if ("title".equals(localName)) {
                            ++this.inTITLE;
                        } else if ("meta".equals(localName)) {
                            String name = atts.getValue("name");
                            if (name == null) {
                                name = atts.getValue("http-equiv");
                            }
                            String val = atts.getValue("content");
                            if (name != null && val != null) {
                                Parser.this.metaTags.setProperty(name.toLowerCase(Locale.ROOT), val);
                            }
                        }
                    } else if (this.inBODY > 0) {
                        String alt;
                        if (SUPPRESS_ELEMENTS.contains(localName)) {
                            ++this.suppressed;
                        } else if ("img".equals(localName) && (alt = atts.getValue("alt")) != null) {
                            body.append('[').append(alt).append(']');
                        }
                    } else if ("body".equals(localName)) {
                        ++this.inBODY;
                    } else if ("head".equals(localName)) {
                        ++this.inHEAD;
                    } else if ("frameset".equals(localName)) {
                        throw new SAXException("This parser does not support HTML framesets.");
                    }
                }

                @Override
                public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
                    if (this.inBODY > 0) {
                        if ("body".equals(localName)) {
                            --this.inBODY;
                        } else if (ENDLINE_ELEMENTS.contains(localName)) {
                            body.append('\n');
                        } else if (SUPPRESS_ELEMENTS.contains(localName)) {
                            --this.suppressed;
                        }
                    } else if (this.inHEAD > 0) {
                        if ("head".equals(localName)) {
                            --this.inHEAD;
                        } else if (this.inTITLE > 0 && "title".equals(localName)) {
                            --this.inTITLE;
                        }
                    }
                }

                @Override
                public void characters(char[] ch, int start, int length) throws SAXException {
                    if (this.inBODY > 0 && this.suppressed == 0) {
                        body.append(ch, start, length);
                    } else if (this.inTITLE > 0) {
                        title.append(ch, start, length);
                    }
                }

                @Override
                public InputSource resolveEntity(String publicId, String systemId) {
                    return new InputSource(new StringReader(""));
                }
            };
            parser.setContentHandler(handler);
            parser.setErrorHandler(handler);
            parser.parse(source);
            this.title = title.toString().trim();
            this.body = body.toString();
        }

        private static final Set<String> createElementNameSet(String ... names) {
            return Collections.unmodifiableSet(new HashSet<String>(Arrays.asList(names)));
        }
    }
}

