/*
 * Decompiled with CFR 0.152.
 */
package org.modeshape.extractor.tika;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import javax.jcr.RepositoryException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.DefaultParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
import org.modeshape.common.collection.Collections;
import org.modeshape.common.i18n.I18nResource;
import org.modeshape.common.logging.Logger;
import org.modeshape.common.util.StringUtil;
import org.modeshape.extractor.tika.TikaI18n;
import org.modeshape.jcr.api.Binary;
import org.modeshape.jcr.api.text.TextExtractor;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class TikaTextExtractor
extends TextExtractor {
    protected static final Logger LOGGER = Logger.getLogger(TikaTextExtractor.class);
    protected static final Set<MediaType> DEFAULT_EXCLUDED_MIME_TYPES = Collections.unmodifiableSet((Object[])new MediaType[]{MediaType.application((String)"x-archive"), MediaType.application((String)"x-bzip"), MediaType.application((String)"x-bzip2"), MediaType.application((String)"x-cpio"), MediaType.application((String)"x-gtar"), MediaType.application((String)"x-gzip"), MediaType.application((String)"x-tar"), MediaType.application((String)"zip"), MediaType.application((String)"vnd.teiid.vdb"), MediaType.image((String)"*"), MediaType.audio((String)"*"), MediaType.video((String)"*")});
    private final Set<MediaType> excludedMediaTypes = new HashSet<MediaType>();
    private final Set<MediaType> includedMediaTypes = new HashSet<MediaType>();
    private final Set<MediaType> parserSupportedMediaTypes = new HashSet<MediaType>();
    private Integer writeLimit;
    private final Lock initLock = new ReentrantLock();
    private DefaultParser parser;

    public TikaTextExtractor() {
        this.excludedMediaTypes.addAll(DEFAULT_EXCLUDED_MIME_TYPES);
    }

    public boolean supportsMimeType(String mimeType) {
        MediaType mediaType = MediaType.parse((String)mimeType);
        if (mediaType == null) {
            this.logger().debug("Invalid mime-type: {0}", new Object[]{mimeType});
            return false;
        }
        this.initialize();
        for (MediaType excludedMediaType : this.excludedMediaTypes) {
            if (excludedMediaType.equals((Object)mediaType)) {
                return false;
            }
            if (!excludedMediaType.getSubtype().equalsIgnoreCase("*") || !mediaType.getType().equalsIgnoreCase(excludedMediaType.getType())) continue;
            return false;
        }
        return this.includedMediaTypes.isEmpty() ? this.parserSupportedMediaTypes.contains(mediaType) : this.parserSupportedMediaTypes.contains(mediaType) && this.includedMediaTypes.contains(mediaType);
    }

    public void extractFrom(final Binary binary, final TextExtractor.Output output, final TextExtractor.Context context) throws Exception {
        final DefaultParser parser = this.initialize();
        final Integer writeLimit = this.writeLimit;
        this.processStream(binary, (TextExtractor.BinaryOperation)new TextExtractor.BinaryOperation<Object>(){

            /*
             * WARNING - Removed try catching itself - possible behaviour change.
             */
            public Object execute(InputStream stream) throws Exception {
                Metadata metadata = TikaTextExtractor.this.prepareMetadata(binary, context);
                BodyContentHandler textHandler = writeLimit == null ? new BodyContentHandler() : new BodyContentHandler(writeLimit + 1);
                try {
                    LOGGER.debug("Using TikaTextExtractor to extract text", new Object[0]);
                    parser.parse(stream, (ContentHandler)textHandler, metadata, new ParseContext());
                }
                catch (SAXException sae) {
                    LOGGER.warn((I18nResource)TikaI18n.parseExceptionWhileExtractingText, new Object[]{sae.getMessage()});
                }
                catch (NoClassDefFoundError ncdfe) {
                    LOGGER.warn((I18nResource)TikaI18n.warnNoClassDefFound, new Object[]{ncdfe.getMessage()});
                }
                catch (Throwable e) {
                    LOGGER.error(e, (I18nResource)TikaI18n.errorWhileExtractingTextFrom, new Object[]{e.getMessage()});
                }
                finally {
                    String text = textHandler.toString().trim();
                    if (!StringUtil.isBlank((String)text)) {
                        output.recordText(text);
                        LOGGER.debug("TikaTextExtractor found text: " + text, new Object[0]);
                    }
                }
                return null;
            }
        });
    }

    protected final Metadata prepareMetadata(Binary binary, TextExtractor.Context context) throws IOException, RepositoryException {
        Metadata metadata = new Metadata();
        String mimeType = binary.getMimeType();
        if (StringUtil.isBlank((String)mimeType)) {
            mimeType = context.mimeTypeOf(null, binary);
        }
        if (!StringUtil.isBlank((String)mimeType)) {
            metadata.set("Content-Type", mimeType);
        }
        return metadata;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected DefaultParser initialize() {
        if (this.parser == null) {
            this.initLock.lock();
            try {
                if (this.parser == null) {
                    this.parser = new DefaultParser(((Object)((Object)this)).getClass().getClassLoader());
                }
                LOGGER.debug("Initializing Tika Text Extractor", new Object[0]);
                Map parsers = this.parser.getParsers();
                LOGGER.debug("Tika parsers found: {0}", new Object[]{parsers.size()});
                for (MediaType mediaType : parsers.keySet()) {
                    this.parserSupportedMediaTypes.add(mediaType);
                    LOGGER.debug("Tika Text Extractor will support the {0} media-type", new Object[]{mediaType});
                }
                this.convertStringMimeTypesToMediaTypes(this.getExcludedMimeTypes(), this.excludedMediaTypes);
                this.convertStringMimeTypesToMediaTypes(this.getIncludedMimeTypes(), this.includedMediaTypes);
                LOGGER.debug("Initialized {0}", new Object[]{this});
            }
            finally {
                this.initLock.unlock();
            }
        }
        return this.parser;
    }

    private void convertStringMimeTypesToMediaTypes(Set<String> mimeTypes, Set<MediaType> mediaTypes) {
        for (String mimeTypeEntry : mimeTypes) {
            String[] multipleMimeTypes;
            for (String mimeType : multipleMimeTypes = mimeTypeEntry.split("[,\\s]")) {
                if (StringUtil.isBlank((String)mimeType)) continue;
                MediaType mediaType = MediaType.parse((String)mimeType.trim());
                if (mediaType == null) {
                    this.logger().debug("Invalid media type: {0}", new Object[]{mimeType});
                    continue;
                }
                mediaTypes.add(mediaType);
            }
        }
    }

    protected void setWriteLimit(Integer writeLimit) {
        this.writeLimit = writeLimit;
    }

    protected Set<MediaType> getExcludedMediaTypes() {
        return this.excludedMediaTypes;
    }

    protected Set<MediaType> getIncludedMediaTypes() {
        return this.includedMediaTypes;
    }

    protected Set<MediaType> getParserSupportedMediaTypes() {
        return this.parserSupportedMediaTypes;
    }

    public String toString() {
        StringBuilder sb = new StringBuilder("TikaTextExtractor{");
        sb.append("excludedMediaTypes=").append(this.excludedMediaTypes);
        sb.append(", includedMediaTypes=").append(this.includedMediaTypes);
        sb.append(", parserSupportedMediaTypes=").append(this.parserSupportedMediaTypes);
        sb.append(", writeLimit=").append(this.writeLimit != null ? this.writeLimit : "unlimited");
        sb.append('}');
        return sb.toString();
    }
}

