package org.apache.tika.parser.wacz;

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.compress.archivers.zip.ZipFile;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/apache/tika/parser/wacz/WACZParser.class */
public class WACZParser extends AbstractParser {
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet(Arrays.asList(MediaType.application("x-wacz"))));

    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
        xHTMLContentHandler.startDocument();
        EmbeddedDocumentExtractor embeddedDocumentExtractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(parseContext);
        if (inputStream instanceof TikaInputStream) {
            ZipFile zipFile = (ZipFile) ((TikaInputStream) inputStream).getOpenContainer();
            if (zipFile == null && ((TikaInputStream) inputStream).hasFile()) {
                zipFile = new ZipFile(((TikaInputStream) inputStream).getFile());
            }
            if (zipFile != null) {
                try {
                    processZip(zipFile, xHTMLContentHandler, metadata, embeddedDocumentExtractor);
                    zipFile.close();
                } catch (Throwable th) {
                    zipFile.close();
                    throw th;
                }
            } else {
                processStream(inputStream, xHTMLContentHandler, metadata, embeddedDocumentExtractor);
            }
        } else {
            processStream(inputStream, xHTMLContentHandler, metadata, embeddedDocumentExtractor);
        }
        xHTMLContentHandler.endDocument();
    }

    private void processStream(InputStream inputStream, XHTMLContentHandler xHTMLContentHandler, Metadata metadata, EmbeddedDocumentExtractor embeddedDocumentExtractor) throws SAXException, IOException {
        ZipArchiveInputStream zipArchiveInputStream = new ZipArchiveInputStream(new CloseShieldInputStream(inputStream));
        try {
            for (ZipArchiveEntry nextZipEntry = zipArchiveInputStream.getNextZipEntry(); nextZipEntry != null; nextZipEntry = zipArchiveInputStream.getNextZipEntry()) {
                String name = nextZipEntry.getName();
                if (name.startsWith("archive/")) {
                    processWARC(zipArchiveInputStream, nextZipEntry, name.substring(8), xHTMLContentHandler, metadata, embeddedDocumentExtractor);
                } else if ("datapackage.json".equals(name)) {
                    processDataPackage(zipArchiveInputStream, nextZipEntry, xHTMLContentHandler, metadata);
                }
            }
            zipArchiveInputStream.close();
        } catch (Throwable th) {
            try {
                zipArchiveInputStream.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }

    private void processDataPackage(InputStream inputStream, ZipArchiveEntry zipArchiveEntry, XHTMLContentHandler xHTMLContentHandler, Metadata metadata) throws IOException {
    }

    private void processWARC(InputStream inputStream, ZipArchiveEntry zipArchiveEntry, String str, XHTMLContentHandler xHTMLContentHandler, Metadata metadata, EmbeddedDocumentExtractor embeddedDocumentExtractor) throws IOException, SAXException {
        Metadata metadata2 = new Metadata();
        metadata2.set("resourceName", str);
        metadata2.set("Content-Length", Long.toString(zipArchiveEntry.getSize()));
        InputStream maybeGzipInputStream = getMaybeGzipInputStream(TikaInputStream.get(inputStream));
        try {
            if (embeddedDocumentExtractor.shouldParseEmbedded(metadata2)) {
                embeddedDocumentExtractor.parseEmbedded(maybeGzipInputStream, xHTMLContentHandler, metadata2, true);
            }
            if (maybeGzipInputStream != null) {
                maybeGzipInputStream.close();
            }
        } catch (Throwable th) {
            if (maybeGzipInputStream != null) {
                try {
                    maybeGzipInputStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    private InputStream getMaybeGzipInputStream(InputStream inputStream) throws IOException {
        inputStream.mark(2);
        byte[] bArr = new byte[2];
        try {
            IOUtils.readFully(inputStream, bArr);
            return 35615 == (((bArr[1] & 255) << 8) | (bArr[0] & 255)) ? new GzipCompressorInputStream(inputStream) : inputStream;
        } finally {
            inputStream.reset();
        }
    }

    private void processZip(ZipFile zipFile, XHTMLContentHandler xHTMLContentHandler, Metadata metadata, EmbeddedDocumentExtractor embeddedDocumentExtractor) throws IOException, SAXException {
        Enumeration entries = zipFile.getEntries();
        while (entries.hasMoreElements()) {
            ZipArchiveEntry zipArchiveEntry = (ZipArchiveEntry) entries.nextElement();
            String name = zipArchiveEntry.getName();
            if (name.startsWith("archive/")) {
                processWARC(TikaInputStream.get(zipFile.getInputStream(zipArchiveEntry)), zipArchiveEntry, name.substring(8), xHTMLContentHandler, metadata, embeddedDocumentExtractor);
            } else if ("datapackage.json".equals(name)) {
                processDataPackage(TikaInputStream.get(zipFile.getInputStream(zipArchiveEntry)), zipArchiveEntry, xHTMLContentHandler, metadata);
            }
        }
    }
}
