package org.semanticdesktop.aperture.hypertext.linkextractor.html;

import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.pdfbox.pdmodel.documentinterchange.taggedpdf.StandardStructureTypes;
import org.jaudiotagger.audio.mp4.Mp4AudioHeader;
import org.jaudiotagger.tag.lyrics3.Lyrics3v2Fields;
import org.modeshape.sequencer.ddl.DdlConstants;
import org.semanticdesktop.aperture.hypertext.linkextractor.LinkExtractor;

/* loaded from: input_file:lib/modeshape-mimetype-detector-aperture-2.8.3.Final-jar-with-dependencies.jar:org/semanticdesktop/aperture/hypertext/linkextractor/html/HtmlLinkExtractor.class */
public class HtmlLinkExtractor implements LinkExtractor, TokenHandler {
    private URL baseURL;
    private boolean includeEmbeddedResources;
    private ArrayList links;
    private String startTag;
    private HashMap attributes = new HashMap();

    @Override // org.semanticdesktop.aperture.hypertext.linkextractor.LinkExtractor
    public synchronized List extractLinks(InputStream inputStream, Map map) throws IOException {
        Tokenizer tokenizer = new Tokenizer(this);
        this.links = new ArrayList();
        this.baseURL = null;
        this.includeEmbeddedResources = false;
        Object obj = map.get(BASE_URL_KEY);
        if (obj instanceof URL) {
            this.baseURL = (URL) obj;
        } else if (obj instanceof String) {
            this.baseURL = new URL((String) obj);
        }
        Object obj2 = map.get(INCLUDE_EMBEDDED_RESOURCES_KEY);
        if (obj2 instanceof Boolean) {
            this.includeEmbeddedResources = ((Boolean) obj2).booleanValue();
        }
        tokenizer.read(inputStream);
        ArrayList arrayList = this.links;
        this.links = null;
        return arrayList;
    }

    @Override // org.semanticdesktop.aperture.hypertext.linkextractor.html.TokenHandler
    public void startDocument() {
    }

    @Override // org.semanticdesktop.aperture.hypertext.linkextractor.html.TokenHandler
    public void endDocument() {
    }

    @Override // org.semanticdesktop.aperture.hypertext.linkextractor.html.TokenHandler
    public void startOfStartTag(String str) {
        this.startTag = str.toUpperCase();
    }

    @Override // org.semanticdesktop.aperture.hypertext.linkextractor.html.TokenHandler
    public void endOfStartTag() {
        ArrayList arrayList = new ArrayList();
        if ("BASE".equals(this.startTag)) {
            String str = (String) this.attributes.get("HREF");
            if (str != null) {
                try {
                    this.baseURL = new URL(str);
                } catch (MalformedURLException e) {
                }
            }
        } else if ("META".equals(this.startTag)) {
            String str2 = (String) this.attributes.get("CONTENT");
            String str3 = (String) this.attributes.get("HTTP-EQUIV");
            if (str2 != null && str3 != null && str3.trim().equalsIgnoreCase("REFRESH")) {
                String lowerCase = str2.toLowerCase();
                int indexOf = lowerCase.indexOf("url");
                if (indexOf != -1) {
                    indexOf += 3;
                }
                int max = Math.max(Math.max(indexOf, lowerCase.indexOf(44)), lowerCase.indexOf(59));
                if (max != -1) {
                    str2 = str2.substring(max + 1);
                }
                arrayList.add(str2);
            }
        } else if ("A".equals(this.startTag)) {
            arrayList.add(this.attributes.get("HREF"));
        } else if ("FRAME".equals(this.startTag) || "IFRAME".equals(this.startTag)) {
            arrayList.add(this.attributes.get("SRC"));
            arrayList.add(this.attributes.get("LONGDESC"));
        } else if ("HEAD".equals(this.startTag)) {
            arrayList.add(this.attributes.get(Mp4AudioHeader.FIELD_PROFILE));
        } else if ("AREA".equals(this.startTag)) {
            arrayList.add(this.attributes.get("HREF"));
        } else if ("Q".equals(this.startTag) || "BLOCKQUOTE".equals(this.startTag) || "INS".equals(this.startTag) || "DEL".equals(this.startTag)) {
            arrayList.add(this.attributes.get("CITE"));
        } else if ("LINK".equals(this.startTag)) {
            if (this.includeEmbeddedResources) {
                arrayList.add(this.attributes.get("HREF"));
                arrayList.add(this.attributes.get("SRC"));
            }
        } else if ("LAYER".equals(this.startTag) || "ILAYER".equals(this.startTag)) {
            arrayList.add(this.attributes.get("SRC"));
            if (this.includeEmbeddedResources) {
                arrayList.add(this.attributes.get("BACKGROUND"));
            }
        } else if ("BODY".equals(this.startTag) || DdlConstants.TABLE.equals(this.startTag) || StandardStructureTypes.TR.equals(this.startTag) || StandardStructureTypes.TH.equals(this.startTag) || StandardStructureTypes.TD.equals(this.startTag)) {
            if (this.includeEmbeddedResources) {
                arrayList.add(this.attributes.get("BACKGROUND"));
            }
        } else if (Lyrics3v2Fields.FIELD_V2_IMAGE.equals(this.startTag)) {
            if (this.includeEmbeddedResources) {
                arrayList.add(this.attributes.get("SRC"));
                arrayList.add(this.attributes.get("LOWSRC"));
            }
            arrayList.add(this.attributes.get("LONGDESC"));
            arrayList.add(this.attributes.get("USEMAP"));
        } else if ("INPUT".equals(this.startTag)) {
            if (this.includeEmbeddedResources) {
                arrayList.add(this.attributes.get("SRC"));
            }
            arrayList.add(this.attributes.get("USEMAP"));
        }
        int size = arrayList.size();
        for (int i = 0; i < size; i++) {
            String str4 = (String) arrayList.get(i);
            if (str4 != null) {
                String resolveEntities = EntityResolver.resolveEntities(str4);
                if (this.baseURL != null) {
                    try {
                        resolveEntities = new URL(this.baseURL, resolveEntities).toExternalForm();
                    } catch (MalformedURLException e2) {
                    }
                }
                this.links.add(resolveEntities);
            }
        }
        this.attributes.clear();
    }

    @Override // org.semanticdesktop.aperture.hypertext.linkextractor.html.TokenHandler
    public void endTag(String str) {
    }

    @Override // org.semanticdesktop.aperture.hypertext.linkextractor.html.TokenHandler
    public void attribute(String str) {
    }

    @Override // org.semanticdesktop.aperture.hypertext.linkextractor.html.TokenHandler
    public void attribute(String str, String str2) {
        this.attributes.put(str.toUpperCase(), str2);
    }

    @Override // org.semanticdesktop.aperture.hypertext.linkextractor.html.TokenHandler
    public void text(String str) {
    }

    @Override // org.semanticdesktop.aperture.hypertext.linkextractor.html.TokenHandler
    public void comment(String str) {
    }

    @Override // org.semanticdesktop.aperture.hypertext.linkextractor.html.TokenHandler
    public void docType(String str, String str2, String str3, String str4) {
    }

    @Override // org.semanticdesktop.aperture.hypertext.linkextractor.html.TokenHandler
    public void error(String str) {
    }
}
