package edu.uci.ics.crawler4j.parser;

import ch.qos.logback.classic.net.SyslogAppender;
import com.rapidminer.extension.webtableextraction.microdataparser.BaseNodeVisitor;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:edu/uci/ics/crawler4j/parser/HtmlContentHandler.class */
public class HtmlContentHandler extends DefaultHandler {
    private static final int MAX_ANCHOR_LENGTH = 100;
    private String base;
    private String metaRefresh;
    private String metaLocation;
    private final Map<String, String> metaTags = new HashMap();
    private ExtractedUrlAnchorPair curUrl = null;
    private boolean anchorFlag = false;
    private final StringBuilder anchorText = new StringBuilder();
    private boolean isWithinBodyElement = false;
    private final StringBuilder bodyText = new StringBuilder();
    private final List<ExtractedUrlAnchorPair> outgoingUrls = new ArrayList();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:edu/uci/ics/crawler4j/parser/HtmlContentHandler$Element.class */
    public enum Element {
        A,
        AREA,
        LINK,
        IFRAME,
        FRAME,
        EMBED,
        IMG,
        BASE,
        META,
        BODY,
        SCRIPT
    }

    /* loaded from: input_file:edu/uci/ics/crawler4j/parser/HtmlContentHandler$HtmlFactory.class */
    private static class HtmlFactory {
        private static final Map<String, Element> name2Element = new HashMap();

        private HtmlFactory() {
        }

        public static Element getElement(String str) {
            return name2Element.get(str);
        }

        static {
            for (Element element : Element.values()) {
                name2Element.put(element.toString().toLowerCase(), element);
            }
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        int indexOf;
        String value;
        Element element = HtmlFactory.getElement(str2);
        if (element == Element.A || element == Element.AREA || element == Element.LINK) {
            String value2 = attributes.getValue(BaseNodeVisitor.VALUE_EXTRACTION_ATTRIBUTE_HREF);
            if (value2 != null) {
                this.anchorFlag = true;
                addToOutgoingUrls(value2, str2, attributes);
                return;
            }
            return;
        }
        if (element == Element.IMG) {
            String value3 = attributes.getValue(BaseNodeVisitor.VALUE_EXTRACTION_ATTRIBUTE_SRC);
            if (value3 != null) {
                addToOutgoingUrls(value3, str2);
                return;
            }
            return;
        }
        if (element == Element.IFRAME || element == Element.FRAME || element == Element.EMBED || element == Element.SCRIPT) {
            String value4 = attributes.getValue(BaseNodeVisitor.VALUE_EXTRACTION_ATTRIBUTE_SRC);
            if (value4 != null) {
                addToOutgoingUrls(value4, str2);
                return;
            }
            return;
        }
        if (element == Element.BASE) {
            if (this.base == null || (value = attributes.getValue(BaseNodeVisitor.VALUE_EXTRACTION_ATTRIBUTE_HREF)) == null) {
                return;
            }
            this.base = value;
            return;
        }
        if (element != Element.META) {
            if (element == Element.BODY) {
                this.isWithinBodyElement = true;
                return;
            }
            return;
        }
        String value5 = attributes.getValue("http-equiv");
        if (value5 == null) {
            value5 = attributes.getValue("name");
        }
        String value6 = attributes.getValue("content");
        if (value5 == null || value6 == null) {
            return;
        }
        String lowerCase = value5.toLowerCase();
        this.metaTags.put(lowerCase, value6);
        if ("refresh".equals(lowerCase) && this.metaRefresh == null && (indexOf = value6.toLowerCase().indexOf("url=")) != -1) {
            this.metaRefresh = value6.substring(indexOf + 4);
            addToOutgoingUrls(this.metaRefresh, str2);
        }
        if ("location".equals(lowerCase) && this.metaLocation == null) {
            this.metaLocation = value6;
            addToOutgoingUrls(this.metaLocation, str2);
        }
    }

    private void addToOutgoingUrls(String str, String str2) {
        this.curUrl = new ExtractedUrlAnchorPair();
        this.curUrl.setHref(str);
        this.curUrl.setTag(str2);
        this.outgoingUrls.add(this.curUrl);
    }

    private void addToOutgoingUrls(String str, String str2, Attributes attributes) {
        this.curUrl = new ExtractedUrlAnchorPair();
        this.curUrl.setHref(str);
        this.curUrl.setTag(str2);
        for (int i = 0; i < attributes.getLength(); i++) {
            String localName = attributes.getLocalName(i);
            this.curUrl.setAttribute(localName, attributes.getValue(localName));
        }
        this.outgoingUrls.add(this.curUrl);
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        Element element = HtmlFactory.getElement(str2);
        if (element != Element.A && element != Element.AREA && element != Element.LINK) {
            if (element == Element.BODY) {
                this.isWithinBodyElement = false;
                return;
            }
            return;
        }
        this.anchorFlag = false;
        if (this.curUrl != null) {
            String trim = this.anchorText.toString().replaceAll("\n", " ").replaceAll(SyslogAppender.DEFAULT_STACKTRACE_PATTERN, " ").trim();
            if (!trim.isEmpty()) {
                if (trim.length() > 100) {
                    trim = trim.substring(0, 100) + "...";
                }
                this.curUrl.setTag(str2);
                this.curUrl.setAnchor(trim);
            }
            this.anchorText.delete(0, this.anchorText.length());
        }
        this.curUrl = null;
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) throws SAXException {
        if (this.isWithinBodyElement) {
            if (this.bodyText.length() > 0) {
                this.bodyText.append(' ');
            }
            this.bodyText.append(cArr, i, i2);
            if (this.anchorFlag) {
                this.anchorText.append(new String(cArr, i, i2));
            }
        }
    }

    public String getBodyText() {
        return this.bodyText.toString();
    }

    public List<ExtractedUrlAnchorPair> getOutgoingUrls() {
        return this.outgoingUrls;
    }

    public String getBaseUrl() {
        return this.base;
    }

    public Map<String, String> getMetaTags() {
        return this.metaTags;
    }
}
