package org.modeshape.extractor.tika;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.tika.detect.DefaultDetector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.DefaultParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
import org.modeshape.common.collection.Collections;
import org.modeshape.common.util.Logger;
import org.modeshape.common.util.StringUtil;
import org.modeshape.graph.text.TextExtractor;
import org.modeshape.graph.text.TextExtractorContext;
import org.modeshape.graph.text.TextExtractorOutput;

/* loaded from: input_file:org/modeshape/extractor/tika/TikaTextExtractor.class */
public class TikaTextExtractor implements TextExtractor {
    private static final Logger LOGGER = Logger.getLogger(TikaTextExtractor.class);
    public static final Set<String> DEFAULT_EXCLUDED_MIME_TYPES = Collections.unmodifiableSet(new String[]{"application/x-archive", "application/x-bzip", "application/x-bzip2", "application/x-cpio", "application/x-gtar", "application/x-gzip", "application/x-tar", "application/zip", "application/vnd.teiid.vdb"});
    private final Set<String> excludedMimeTypes = new HashSet();
    private final Set<String> includedMimeTypes = new HashSet();
    private final Set<String> supportedMediaTypes = new HashSet();
    private final Lock initLock = new ReentrantLock();
    private DefaultParser parser;

    public TikaTextExtractor() {
        this.excludedMimeTypes.addAll(DEFAULT_EXCLUDED_MIME_TYPES);
    }

    public boolean supportsMimeType(String str) {
        if (this.excludedMimeTypes.contains(str)) {
            return false;
        }
        initialize();
        return this.includedMimeTypes.isEmpty() ? this.supportedMediaTypes.contains(str) : this.supportedMediaTypes.contains(str) && this.includedMimeTypes.contains(str);
    }

    public void extractFrom(InputStream inputStream, TextExtractorOutput textExtractorOutput, TextExtractorContext textExtractorContext) throws IOException {
        DefaultParser initialize = initialize();
        Metadata prepareMetadata = prepareMetadata(inputStream, textExtractorContext);
        try {
            BodyContentHandler bodyContentHandler = new BodyContentHandler();
            initialize.parse(inputStream, bodyContentHandler, prepareMetadata, new ParseContext());
            textExtractorOutput.recordText(bodyContentHandler.toString().trim());
        } catch (IOException e) {
            throw e;
        } catch (Throwable th) {
            textExtractorContext.getProblems().addError(th, TikaI18n.errorWhileExtractingTextFrom, new Object[]{textExtractorContext.getInputPath(), th.getMessage()});
        }
    }

    private Metadata prepareMetadata(InputStream inputStream, TextExtractorContext textExtractorContext) throws IOException {
        Metadata metadata = new Metadata();
        if (StringUtil.isBlank(textExtractorContext.getMimeType())) {
            LOGGER.warn(TikaI18n.warnMimeTypeNotSet, new Object[0]);
            metadata.set("resourceName", textExtractorContext.getInputPath().getLastSegment().getString());
            metadata.set("Content-Type", new DefaultDetector(getClass().getClassLoader()).detect(inputStream, metadata).toString());
        } else {
            metadata.set("Content-Type", textExtractorContext.getMimeType());
        }
        return metadata;
    }

    protected DefaultParser initialize() {
        if (this.parser == null) {
            try {
                this.initLock.lock();
                if (this.parser == null) {
                    this.parser = new DefaultParser(getClass().getClassLoader());
                }
                for (MediaType mediaType : this.parser.getParsers().keySet()) {
                    this.supportedMediaTypes.add(mediaType.getType() + "/" + mediaType.getSubtype());
                }
            } finally {
                this.initLock.unlock();
            }
        }
        return this.parser;
    }

    public Set<String> getIncludedMimeTypes() {
        return Collections.unmodifiableSet(this.includedMimeTypes);
    }

    public void setIncludedMimeTypes(String str) {
        if (str == null || str.length() == 0) {
            return;
        }
        this.includedMimeTypes.clear();
        for (String str2 : str.split("[,\\s]")) {
            includeMimeType(str2);
        }
    }

    public void addIncludedMimeType(String str) {
        if (str == null || str.length() == 0) {
            return;
        }
        includeMimeType(str);
    }

    public void includeMimeType(String str) {
        if (str == null) {
            return;
        }
        String trim = str.trim();
        if (trim.length() != 0) {
            this.includedMimeTypes.add(trim);
        }
    }

    public Set<String> getExcludedMimeTypes() {
        return Collections.unmodifiableSet(this.excludedMimeTypes);
    }

    public void setExcludedMimeTypes(String str) {
        if (str == null || str.length() == 0) {
            return;
        }
        this.excludedMimeTypes.clear();
        for (String str2 : str.split("[,\\s]")) {
            excludeMimeType(str2);
        }
    }

    public void addExcludedMimeType(String str) {
        if (str == null || str.length() == 0) {
            return;
        }
        excludeMimeType(str);
    }

    public void excludeMimeType(String str) {
        if (str == null) {
            return;
        }
        String trim = str.trim();
        if (trim.length() != 0) {
            this.excludedMimeTypes.add(trim);
        }
    }
}
