/*
 * Decompiled with CFR 0.152.
 */
package org.modeshape.extractor.tika;

import java.io.IOException;
import java.io.InputStream;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import javax.jcr.RepositoryException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.DefaultParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
import org.modeshape.common.collection.Collections;
import org.modeshape.common.i18n.I18nResource;
import org.modeshape.common.logging.Logger;
import org.modeshape.common.util.StringUtil;
import org.modeshape.extractor.tika.TikaI18n;
import org.modeshape.jcr.api.Binary;
import org.modeshape.jcr.api.text.TextExtractor;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class TikaTextExtractor
extends TextExtractor {
    protected static final Logger LOGGER = Logger.getLogger(TikaTextExtractor.class);
    public static final Set<MediaType> DEFAULT_EXCLUDED_MIME_TYPES = Collections.unmodifiableSet((Object[])new MediaType[]{MediaType.application((String)"x-archive"), MediaType.application((String)"x-bzip"), MediaType.application((String)"x-bzip2"), MediaType.application((String)"x-cpio"), MediaType.application((String)"x-gtar"), MediaType.application((String)"x-gzip"), MediaType.application((String)"x-tar"), MediaType.application((String)"zip"), MediaType.application((String)"vnd.teiid.vdb"), MediaType.image((String)"*"), MediaType.audio((String)"*"), MediaType.video((String)"*")});
    private Set<MediaType> excludedMimeTypes = new HashSet<MediaType>();
    private Set<String> includedMimeTypes = new HashSet<String>();
    private Set<String> supportedMediaTypes = new HashSet<String>();
    private Integer writeLimit;
    private final Lock initLock = new ReentrantLock();
    private DefaultParser parser;

    public TikaTextExtractor() {
        this.excludedMimeTypes.addAll(DEFAULT_EXCLUDED_MIME_TYPES);
    }

    public boolean supportsMimeType(String mimeType) {
        MediaType mediaType = MediaType.parse((String)mimeType);
        if (mediaType == null) {
            this.getLogger().debug("Invalid mime-type:" + mimeType, new Object[0]);
            return false;
        }
        for (MediaType excludedMediaType : this.excludedMimeTypes) {
            if (excludedMediaType.equals((Object)mediaType)) {
                return false;
            }
            if (!excludedMediaType.getSubtype().equalsIgnoreCase("*") || !mediaType.getType().equalsIgnoreCase(excludedMediaType.getType())) continue;
            return false;
        }
        this.initialize();
        return this.includedMimeTypes.isEmpty() ? this.supportedMediaTypes.contains(mimeType) : this.supportedMediaTypes.contains(mimeType) && this.includedMimeTypes.contains(mimeType);
    }

    public void extractFrom(final Binary binary, final TextExtractor.Output output, final TextExtractor.Context context) throws Exception {
        final DefaultParser parser = this.initialize();
        final Integer writeLimit = this.writeLimit;
        this.processStream(binary, (TextExtractor.BinaryOperation)new TextExtractor.BinaryOperation<Object>(){

            public Object execute(InputStream stream) throws Exception {
                Metadata metadata = TikaTextExtractor.this.prepareMetadata(binary, context);
                try {
                    LOGGER.debug("Using TikaTextExtractor to extract text", new Object[0]);
                    BodyContentHandler textHandler = writeLimit == null ? new BodyContentHandler() : new BodyContentHandler(writeLimit + 1);
                    parser.parse(stream, (ContentHandler)textHandler, metadata, new ParseContext());
                    String text = textHandler.toString().trim();
                    output.recordText(text);
                    LOGGER.debug("TikaTextExtractor found text: " + text, new Object[0]);
                }
                catch (SAXException sae) {
                    LOGGER.warn((I18nResource)TikaI18n.parseExceptionWhileExtractingText, new Object[]{sae.getMessage()});
                }
                catch (NoClassDefFoundError ncdfe) {
                    LOGGER.warn((I18nResource)TikaI18n.warnNoClassDefFound, new Object[]{ncdfe.getMessage()});
                }
                catch (Throwable e) {
                    LOGGER.error(e, (I18nResource)TikaI18n.errorWhileExtractingTextFrom, new Object[]{e.getMessage()});
                }
                return null;
            }
        });
    }

    protected final Metadata prepareMetadata(Binary binary, TextExtractor.Context context) throws IOException, RepositoryException {
        Metadata metadata = new Metadata();
        String mimeType = binary.getMimeType();
        if (StringUtil.isBlank((String)mimeType)) {
            mimeType = context.mimeTypeOf(null, binary);
        }
        if (!StringUtil.isBlank((String)mimeType)) {
            metadata.set("Content-Type", mimeType);
        }
        return metadata;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected DefaultParser initialize() {
        if (this.parser == null) {
            try {
                this.initLock.lock();
                if (this.parser == null) {
                    this.parser = new DefaultParser(((Object)((Object)this)).getClass().getClassLoader());
                }
                LOGGER.debug("Initializing TikaTextExtractor", new Object[0]);
                Map parsers = this.parser.getParsers();
                LOGGER.debug("TikaTextExtractor found " + parsers.size() + " parsers", new Object[0]);
                for (MediaType mediaType : parsers.keySet()) {
                    String mimeType = mediaType.getType() + "/" + mediaType.getSubtype();
                    this.supportedMediaTypes.add(mimeType);
                    LOGGER.debug("TikaTextExtractor will support '" + mimeType + "'", new Object[0]);
                }
            }
            finally {
                this.initLock.unlock();
            }
        }
        return this.parser;
    }

    public Set<String> getIncludedMimeTypes() {
        return Collections.unmodifiableSet(this.includedMimeTypes);
    }

    public void setIncludedMimeTypes(String includedMimeTypes) {
        if (includedMimeTypes == null || includedMimeTypes.length() == 0) {
            return;
        }
        this.includedMimeTypes.clear();
        for (String mimeType : includedMimeTypes.split("[,\\s]")) {
            this.includeMimeType(mimeType);
        }
    }

    public void setIncludedMimeTypes(Collection<String> includedMimeTypes) {
        if (includedMimeTypes != null) {
            this.includedMimeTypes = new HashSet<String>(includedMimeTypes);
        }
    }

    private void includeMimeType(String mimeType) {
        if (mimeType == null) {
            return;
        }
        if ((mimeType = mimeType.trim()).length() != 0) {
            this.includedMimeTypes.add(mimeType);
        }
    }

    public Set<String> getExcludedMimeTypes() {
        HashSet<String> result = new HashSet<String>();
        for (MediaType mediaType : this.excludedMimeTypes) {
            result.add(mediaType.toString());
        }
        return Collections.unmodifiableSet(result);
    }

    public void setExcludedMimeTypes(String excludedMimeTypes) {
        if (excludedMimeTypes == null || excludedMimeTypes.length() == 0) {
            return;
        }
        this.excludedMimeTypes.clear();
        for (String mimeType : excludedMimeTypes.split("[,\\s]")) {
            this.excludeMimeType(mimeType);
        }
    }

    public void setExcludedMimeTypes(Collection<String> excludedMimeTypes) {
        if (excludedMimeTypes != null) {
            this.excludedMimeTypes.clear();
            for (String excludedMimeType : excludedMimeTypes) {
                this.excludeMimeType(excludedMimeType);
            }
        }
    }

    private void excludeMimeType(String mimeType) {
        MediaType mediaType = MediaType.parse((String)mimeType);
        if (mediaType == null) {
            this.getLogger().debug("Invalid media type: {0}", new Object[]{mimeType});
            return;
        }
        this.excludedMimeTypes.add(mediaType);
    }

    public void setWriteLimit(Integer writeLimit) {
        this.writeLimit = writeLimit;
    }
}

