package org.apache.tika.parser.microsoft.ooxml;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.openxml4j.opc.TargetMode;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Ole10Native;
import org.apache.poi.poifs.filesystem.Ole10NativeException;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.apache.tika.sax.EmbeddedContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.xmlbeans.XmlException;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:lib/modeshape-extractor-tika-2.8.2.Final-jar-with-dependencies.jar:org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.class */
public abstract class AbstractOOXMLExtractor implements OOXMLExtractor {
    static final String RELATION_AUDIO = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/audio";
    static final String RELATION_IMAGE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image";
    static final String RELATION_OLE_OBJECT = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject";
    static final String RELATION_PACKAGE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/package";
    private static final String TYPE_OLE_OBJECT = "application/vnd.openxmlformats-officedocument.oleObject";
    protected POIXMLTextExtractor extractor;
    private final EmbeddedDocumentExtractor embeddedExtractor;
    private final String type;

    public AbstractOOXMLExtractor(ParseContext parseContext, POIXMLTextExtractor pOIXMLTextExtractor, String str) {
        this.extractor = pOIXMLTextExtractor;
        this.type = str;
        EmbeddedDocumentExtractor embeddedDocumentExtractor = (EmbeddedDocumentExtractor) parseContext.get(EmbeddedDocumentExtractor.class);
        if (embeddedDocumentExtractor == null) {
            this.embeddedExtractor = new ParsingEmbeddedDocumentExtractor(parseContext);
        } else {
            this.embeddedExtractor = embeddedDocumentExtractor;
        }
    }

    @Override // org.apache.tika.parser.microsoft.ooxml.OOXMLExtractor
    public POIXMLDocument getDocument() {
        return this.extractor.getDocument();
    }

    @Override // org.apache.tika.parser.microsoft.ooxml.OOXMLExtractor
    public MetadataExtractor getMetadataExtractor() {
        return new MetadataExtractor(this.extractor, this.type);
    }

    @Override // org.apache.tika.parser.microsoft.ooxml.OOXMLExtractor
    public void getXHTML(ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws SAXException, XmlException, IOException, TikaException {
        XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
        xHTMLContentHandler.startDocument();
        buildXHTML(xHTMLContentHandler);
        handleEmbeddedParts(contentHandler);
        xHTMLContentHandler.endDocument();
    }

    private void handleEmbeddedParts(ContentHandler contentHandler) throws TikaException, IOException, SAXException {
        try {
            Iterator<PackagePart> it = getMainDocumentParts().iterator();
            while (it.hasNext()) {
                Iterator<PackageRelationship> it2 = it.next().getRelationships().iterator();
                while (it2.hasNext()) {
                    PackageRelationship next = it2.next();
                    if (next.getTargetMode() == TargetMode.INTERNAL) {
                        URI targetURI = next.getTargetURI();
                        if (targetURI.getFragment() != null) {
                            try {
                                String uri = targetURI.toString();
                                targetURI = new URI(uri.substring(0, uri.indexOf(35)));
                            } catch (URISyntaxException e) {
                                throw new TikaException("Broken OOXML file", e);
                            }
                        }
                        PackagePart part = next.getPackage().getPart(PackagingURIHelper.createPartName(targetURI));
                        String relationshipType = next.getRelationshipType();
                        if ("http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject".equals(relationshipType) && TYPE_OLE_OBJECT.equals(part.getContentType())) {
                            handleEmbeddedOLE(part, contentHandler);
                        } else if (RELATION_AUDIO.equals(relationshipType) || "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image".equals(relationshipType) || "http://schemas.openxmlformats.org/officeDocument/2006/relationships/package".equals(relationshipType) || "http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject".equals(relationshipType)) {
                            handleEmbeddedFile(part, contentHandler);
                        }
                    }
                }
            }
        } catch (InvalidFormatException e2) {
            throw new TikaException("Broken OOXML file", e2);
        }
    }

    private void handleEmbeddedOLE(PackagePart packagePart, ContentHandler contentHandler) throws IOException, SAXException {
        POIFSFileSystem pOIFSFileSystem = new POIFSFileSystem(packagePart.getInputStream());
        try {
            Metadata metadata = new Metadata();
            TikaInputStream tikaInputStream = null;
            DirectoryNode root = pOIFSFileSystem.getRoot();
            OfficeParser.POIFSDocumentType detectType = OfficeParser.POIFSDocumentType.detectType((DirectoryEntry) root);
            if (root.hasEntry("CONTENTS") && root.hasEntry("\u0001Ole") && root.hasEntry("\u0001CompObj") && root.hasEntry("\u0003ObjInfo")) {
                TikaInputStream tikaInputStream2 = TikaInputStream.get((InputStream) pOIFSFileSystem.createDocumentInputStream("CONTENTS"));
                if (this.embeddedExtractor.shouldParseEmbedded(metadata)) {
                    this.embeddedExtractor.parseEmbedded(tikaInputStream2, new EmbeddedContentHandler(contentHandler), metadata, false);
                }
            } else if (OfficeParser.POIFSDocumentType.OLE10_NATIVE == detectType) {
                Ole10Native createFromEmbeddedOleObject = Ole10Native.createFromEmbeddedOleObject(pOIFSFileSystem);
                metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, createFromEmbeddedOleObject.getLabel());
                byte[] dataBuffer = createFromEmbeddedOleObject.getDataBuffer();
                if (dataBuffer != null) {
                    tikaInputStream = TikaInputStream.get(dataBuffer);
                }
                if (tikaInputStream != null && this.embeddedExtractor.shouldParseEmbedded(metadata)) {
                    this.embeddedExtractor.parseEmbedded(tikaInputStream, new EmbeddedContentHandler(contentHandler), metadata, false);
                }
            } else {
                handleEmbeddedFile(packagePart, contentHandler);
            }
        } catch (FileNotFoundException e) {
        } catch (Ole10NativeException e2) {
        }
    }

    protected void handleEmbeddedFile(PackagePart packagePart, ContentHandler contentHandler) throws SAXException, IOException {
        Metadata metadata = new Metadata();
        String name = packagePart.getPartName().getName();
        metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, name.substring(name.lastIndexOf(47) + 1));
        metadata.set("Content-Type", packagePart.getContentType());
        if (this.embeddedExtractor.shouldParseEmbedded(metadata)) {
            this.embeddedExtractor.parseEmbedded(TikaInputStream.get(packagePart.getInputStream()), new EmbeddedContentHandler(contentHandler), metadata, false);
        }
    }

    protected abstract void buildXHTML(XHTMLContentHandler xHTMLContentHandler) throws SAXException, XmlException, IOException;

    protected abstract List<PackagePart> getMainDocumentParts() throws TikaException;
}
