package org.semanticdesktop.aperture.extractor.util;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import java.util.StringTokenizer;
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.ontoware.rdf2go.model.Model;
import org.ontoware.rdf2go.model.node.Resource;
import org.ontoware.rdf2go.model.node.URI;
import org.ontoware.rdf2go.vocabulary.RDF;
import org.semanticdesktop.aperture.rdf.RDFContainer;
import org.semanticdesktop.aperture.util.UriUtil;
import org.semanticdesktop.aperture.vocabulary.NCO;
import org.semanticdesktop.aperture.vocabulary.NFO;
import org.semanticdesktop.aperture.vocabulary.NIE;
import org.slf4j.Logger;

/* loaded from: input_file:lib/modeshape-mimetype-detector-aperture-2.8.1.Final-jar-with-dependencies.jar:org/semanticdesktop/aperture/extractor/util/PoiUtil.class */
public class PoiUtil {
    private static final String BUFFER_SIZE_PROPERTY = "aperture.poiUtil.bufferSize";
    private static final int DEFAULT_BUFFER_SIZE = 4194304;

    /* loaded from: input_file:lib/modeshape-mimetype-detector-aperture-2.8.1.Final-jar-with-dependencies.jar:org/semanticdesktop/aperture/extractor/util/PoiUtil$TextExtractor.class */
    public interface TextExtractor {
        String getText(POIFSFileSystem pOIFSFileSystem) throws IOException;
    }

    public static SummaryInformation getSummaryInformation(POIFSFileSystem pOIFSFileSystem) {
        SummaryInformation summaryInformation = null;
        try {
            DocumentInputStream createDocumentInputStream = pOIFSFileSystem.createDocumentInputStream(SummaryInformation.DEFAULT_STREAM_NAME);
            summaryInformation = (SummaryInformation) PropertySetFactory.create(createDocumentInputStream);
            createDocumentInputStream.close();
        } catch (Exception e) {
        }
        return summaryInformation;
    }

    public static InputStream extractMetadata(InputStream inputStream, boolean z, RDFContainer rDFContainer) throws IOException {
        if (z) {
            int bufferSize = getBufferSize();
            if (!inputStream.markSupported()) {
                inputStream = new BufferedInputStream(inputStream, bufferSize);
            }
            inputStream.mark(bufferSize);
        }
        extractMetadata(new POIFSFileSystem(inputStream), rDFContainer);
        if (z) {
            inputStream.reset();
        }
        return inputStream;
    }

    public static void extractMetadata(POIFSFileSystem pOIFSFileSystem, RDFContainer rDFContainer) {
        SummaryInformation summaryInformation = getSummaryInformation(pOIFSFileSystem);
        if (summaryInformation != null) {
            copyString(summaryInformation.getTitle(), NIE.title, rDFContainer);
            copyString(summaryInformation.getSubject(), NIE.subject, rDFContainer);
            copyString(summaryInformation.getComments(), NIE.description, rDFContainer);
            copyString(summaryInformation.getApplicationName(), NIE.generator, rDFContainer);
            copyContact(summaryInformation.getAuthor(), NCO.creator, rDFContainer);
            copyContact(summaryInformation.getLastAuthor(), NCO.contributor, rDFContainer);
            copyDate(summaryInformation.getCreateDateTime(), NIE.contentCreated, rDFContainer);
            copyDate(summaryInformation.getLastSaveDateTime(), NIE.contentLastModified, rDFContainer);
            int pageCount = summaryInformation.getPageCount();
            if (pageCount > 1) {
                rDFContainer.add(RDF.type, NFO.PaginatedTextDocument);
                rDFContainer.add(NFO.pageCount, pageCount);
            }
            String keywords = summaryInformation.getKeywords();
            if (keywords != null) {
                StringTokenizer stringTokenizer = new StringTokenizer(keywords, " \t.,;|/\\", false);
                while (stringTokenizer.hasMoreTokens()) {
                    rDFContainer.add(NIE.keyword, stringTokenizer.nextToken());
                }
            }
        }
    }

    private static void copyString(String str, URI uri, RDFContainer rDFContainer) {
        if (str != null) {
            String trim = str.trim();
            if (trim.equals("")) {
                return;
            }
            rDFContainer.add(uri, trim);
        }
    }

    private static void copyDate(Date date, URI uri, RDFContainer rDFContainer) {
        if (date != null) {
            rDFContainer.add(uri, date);
        }
    }

    private static void copyContact(String str, URI uri, RDFContainer rDFContainer) {
        if (str != null) {
            Model model = rDFContainer.getModel();
            Resource generateRandomResource = UriUtil.generateRandomResource(model);
            model.addStatement(rDFContainer.getDescribedUri(), uri, generateRandomResource);
            model.addStatement(generateRandomResource, RDF.type, NCO.Contact);
            model.addStatement(generateRandomResource, NCO.fullname, str);
        }
    }

    public static InputStream extractAll(InputStream inputStream, TextExtractor textExtractor, RDFContainer rDFContainer, Logger logger) {
        int bufferSize = getBufferSize();
        if (!inputStream.markSupported()) {
            inputStream = new BufferedInputStream(inputStream, bufferSize);
        }
        inputStream.mark(bufferSize);
        String str = null;
        try {
            POIFSFileSystem pOIFSFileSystem = new POIFSFileSystem(inputStream);
            if (textExtractor != null) {
                try {
                    str = textExtractor.getText(pOIFSFileSystem);
                } catch (Exception e) {
                }
            }
            extractMetadata(pOIFSFileSystem, rDFContainer);
        } catch (IOException e2) {
        }
        if (str == null) {
            if (textExtractor != null) {
                logger.info("regular POI-based processing failed, falling back to heuristic string extraction for " + rDFContainer.getDescribedUri());
            }
            try {
                inputStream.reset();
                str = new StringExtractor().extract(inputStream);
            } catch (IOException e3) {
                logger.warn("IOException while processing " + rDFContainer.getDescribedUri(), (Throwable) e3);
            }
        }
        if (str != null) {
            String trim = str.trim();
            if (!trim.equals("")) {
                rDFContainer.add(NIE.plainTextContent, trim);
            }
        }
        try {
            inputStream.reset();
        } catch (Exception e4) {
        }
        return inputStream;
    }

    private static int getBufferSize() {
        int i = 4194304;
        String property = System.getProperty(BUFFER_SIZE_PROPERTY);
        if (property != null && !property.equals("")) {
            i = Integer.parseInt(property);
        }
        if (i < 0) {
            throw new IllegalArgumentException("Negative buffer sizes not allowed: " + i);
        }
        return i;
    }
}
