package org.apache.tika.parser.html;

import com.google.gwt.dom.client.BaseElement;
import com.google.gwt.dom.client.LinkElement;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.sax.TextContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/* loaded from: input_file:org/apache/tika/parser/html/HtmlHandler.class */
class HtmlHandler extends TextContentHandler {
    private static final Set<String> URI_ATTRIBUTES = new HashSet<String>() { // from class: org.apache.tika.parser.html.HtmlHandler.1
        {
            add("src");
            add("href");
            add("longdesc");
            add("usemap");
            add("data");
            add("cite");
            add("codebase");
            add("classid");
        }
    };
    private final HtmlMapper mapper;
    private final XHTMLContentHandler xhtml;
    private final Metadata metadata;
    private int bodyLevel;
    private int discardLevel;
    private int titleLevel;
    private final StringBuilder title;

    private HtmlHandler(HtmlMapper htmlMapper, XHTMLContentHandler xHTMLContentHandler, Metadata metadata) {
        super(xHTMLContentHandler);
        String str;
        this.bodyLevel = 0;
        this.discardLevel = 0;
        this.titleLevel = 0;
        this.title = new StringBuilder();
        this.mapper = htmlMapper;
        this.xhtml = xHTMLContentHandler;
        this.metadata = metadata;
        if (metadata.get("Content-Location") != null || (str = metadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY)) == null) {
            return;
        }
        String trim = str.trim();
        try {
            new URL(trim);
            metadata.set("Content-Location", trim);
        } catch (MalformedURLException e) {
        }
    }

    public HtmlHandler(HtmlMapper htmlMapper, ContentHandler contentHandler, Metadata metadata) {
        this(htmlMapper, new XHTMLContentHandler(contentHandler, metadata), metadata);
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        String mapSafeElement;
        if ("TITLE".equals(str3) || this.titleLevel > 0) {
            this.titleLevel++;
        }
        if ("BODY".equals(str3) || "FRAMESET".equals(str3) || this.bodyLevel > 0) {
            this.bodyLevel++;
        }
        if (this.mapper.isDiscardElement(str3) || this.discardLevel > 0) {
            this.discardLevel++;
        }
        if (this.bodyLevel == 0 && this.discardLevel == 0) {
            if (!"META".equals(str3) || attributes.getValue("content") == null) {
                if ("BASE".equals(str3) && attributes.getValue("href") != null) {
                    this.metadata.set("Content-Location", resolve(attributes.getValue("href")));
                    this.xhtml.startElement(str, str2, BaseElement.TAG, attributes);
                } else if ("LINK".equals(str3)) {
                    startElementWithSafeAttributes(LinkElement.TAG, attributes);
                }
            } else if (attributes.getValue("http-equiv") != null) {
                this.metadata.set(attributes.getValue("http-equiv"), attributes.getValue("content"));
            } else if (attributes.getValue("name") != null) {
                this.metadata.set(attributes.getValue("name"), attributes.getValue("content"));
                if (attributes.getValue("name").equalsIgnoreCase("ICBM")) {
                    Matcher matcher = Pattern.compile("\\s*(-?\\d+\\.\\d+)[,\\s]+(-?\\d+\\.\\d+)\\s*").matcher(attributes.getValue("content"));
                    if (matcher.matches()) {
                        this.metadata.set(Metadata.LATITUDE, matcher.group(1));
                        this.metadata.set(Metadata.LONGITUDE, matcher.group(2));
                    }
                }
            }
        }
        if (this.bodyLevel > 0 && this.discardLevel == 0 && (mapSafeElement = this.mapper.mapSafeElement(str3)) != null) {
            startElementWithSafeAttributes(mapSafeElement, attributes);
        }
        this.title.setLength(0);
    }

    private void startElementWithSafeAttributes(String str, Attributes attributes) throws SAXException {
        if (attributes.getLength() == 0) {
            this.xhtml.startElement(str);
            return;
        }
        AttributesImpl attributesImpl = new AttributesImpl(attributes);
        int i = 0;
        while (i < attributesImpl.getLength()) {
            String mapSafeAttribute = this.mapper.mapSafeAttribute(str, attributesImpl.getLocalName(i));
            if (mapSafeAttribute == null) {
                attributesImpl.removeAttribute(i);
                i--;
            } else {
                attributesImpl.setLocalName(i, mapSafeAttribute);
                if (URI_ATTRIBUTES.contains(mapSafeAttribute)) {
                    attributesImpl.setValue(i, resolve(attributesImpl.getValue(i)));
                }
            }
            i++;
        }
        this.xhtml.startElement(str, attributesImpl);
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        if (this.bodyLevel == 0 && this.discardLevel == 0) {
            if ("LINK".equals(str3)) {
                this.xhtml.endElement(LinkElement.TAG);
            } else if ("BASE".equals(str3)) {
                this.xhtml.endElement(BaseElement.TAG);
            }
        }
        if (this.bodyLevel > 0 && this.discardLevel == 0) {
            String mapSafeElement = this.mapper.mapSafeElement(str3);
            if (mapSafeElement != null) {
                this.xhtml.endElement(mapSafeElement);
            } else if (XHTMLContentHandler.ENDLINE.contains(str3.toLowerCase(Locale.ENGLISH))) {
                this.xhtml.newline();
            }
        }
        if (this.titleLevel > 0) {
            this.titleLevel--;
            if (this.titleLevel == 0) {
                this.metadata.set("title", this.title.toString().trim());
            }
        }
        if (this.bodyLevel > 0) {
            this.bodyLevel--;
        }
        if (this.discardLevel > 0) {
            this.discardLevel--;
        }
    }

    @Override // org.apache.tika.sax.TextContentHandler, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) throws SAXException {
        if (this.titleLevel > 0 && this.bodyLevel == 0) {
            this.title.append(cArr, i, i2);
        }
        if (this.bodyLevel <= 0 || this.discardLevel != 0) {
            return;
        }
        super.characters(cArr, i, i2);
    }

    @Override // org.apache.tika.sax.TextContentHandler, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler, org.xml.sax.DocumentHandler
    public void ignorableWhitespace(char[] cArr, int i, int i2) throws SAXException {
        if (this.bodyLevel <= 0 || this.discardLevel != 0) {
            return;
        }
        super.ignorableWhitespace(cArr, i, i2);
    }

    private String resolve(String str) {
        String trim = str.trim();
        if (this.metadata.get("Content-Location") == null) {
            return trim;
        }
        String lowerCase = trim.toLowerCase(Locale.ENGLISH);
        if (lowerCase.startsWith("urn:") || lowerCase.startsWith("mailto:") || lowerCase.startsWith("tel:") || lowerCase.startsWith("data:") || lowerCase.startsWith("javascript:") || lowerCase.startsWith("about:")) {
            return trim;
        }
        try {
            URL url = new URL(this.metadata.get("Content-Location").trim());
            String path = url.getPath();
            return (!trim.startsWith("?") || path.length() <= 0 || path.endsWith("/")) ? new URL(url, trim).toExternalForm() : new URL(url.getProtocol(), url.getHost(), url.getPort(), url.getPath() + trim).toExternalForm();
        } catch (MalformedURLException e) {
            return trim;
        }
    }
}
