/*
 * Decompiled with CFR 0.152.
 */
package org.modeshape.extractor.tika;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.DefaultParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
import org.modeshape.common.collection.Collections;
import org.modeshape.extractor.tika.TikaI18n;
import org.modeshape.graph.text.TextExtractor;
import org.modeshape.graph.text.TextExtractorContext;
import org.modeshape.graph.text.TextExtractorOutput;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class TikaTextExtractor
implements TextExtractor {
    public static final Set<String> DEFAULT_EXCLUDED_MIME_TYPES = Collections.unmodifiableSet((Object[])new String[]{"application/x-archive", "application/x-bzip", "application/x-bzip2", "application/x-cpio", "application/x-gtar", "application/x-gzip", "application/x-tar", "application/zip", "application/vnd.teiid.vdb"});
    private final Set<String> excludedMimeTypes = new HashSet<String>();
    private final Set<String> includedMimeTypes = new HashSet<String>();
    private final Set<String> supportedMediaTypes = new HashSet<String>();
    private final Lock initLock = new ReentrantLock();
    private DefaultParser parser;

    public TikaTextExtractor() {
        this.excludedMimeTypes.addAll(DEFAULT_EXCLUDED_MIME_TYPES);
    }

    public boolean supportsMimeType(String mimeType) {
        if (this.excludedMimeTypes.contains(mimeType)) {
            return false;
        }
        this.initialize();
        return this.includedMimeTypes.isEmpty() ? this.supportedMediaTypes.contains(mimeType) : this.supportedMediaTypes.contains(mimeType) && this.includedMimeTypes.contains(mimeType);
    }

    public void extractFrom(InputStream stream, TextExtractorOutput output, TextExtractorContext context) throws IOException {
        DefaultParser parser = this.initialize();
        String mimeType = context.getMimeType();
        Metadata metadata = new Metadata();
        if (mimeType != null) {
            metadata.set("Content-Type", mimeType);
        }
        BodyContentHandler textHandler = new BodyContentHandler(){
            private char[] space = new char[]{' '};
            private boolean first = true;

            public void characters(char[] ch, int start, int length) throws SAXException {
                if (!this.first) {
                    super.characters(this.space, 0, 1);
                }
                super.characters(ch, start, length);
                this.first = false;
            }
        };
        ParseContext parseContext = new ParseContext();
        try {
            parser.parse(stream, (ContentHandler)textHandler, metadata, parseContext);
            output.recordText(textHandler.toString().trim());
        }
        catch (IOException e) {
            throw e;
        }
        catch (Throwable e) {
            context.getProblems().addError(e, TikaI18n.errorWhileExtractingTextFrom, new Object[]{context.getInputPath(), e.getMessage()});
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected DefaultParser initialize() {
        if (this.parser == null) {
            try {
                this.initLock.lock();
                if (this.parser == null) {
                    this.parser = new DefaultParser(this.getClass().getClassLoader());
                }
                Map parsers = this.parser.getParsers();
                for (MediaType mediaType : parsers.keySet()) {
                    String mimeType = mediaType.getType() + "/" + mediaType.getSubtype();
                    this.supportedMediaTypes.add(mimeType);
                }
            }
            finally {
                this.initLock.unlock();
            }
        }
        return this.parser;
    }

    public Set<String> getIncludedMimeTypes() {
        return Collections.unmodifiableSet(this.includedMimeTypes);
    }

    public void setIncludedMimeTypes(String includedMimeTypes) {
        if (includedMimeTypes == null || includedMimeTypes.length() == 0) {
            return;
        }
        this.includedMimeTypes.clear();
        for (String mimeType : includedMimeTypes.split("[,\\s]")) {
            this.includeMimeType(mimeType);
        }
    }

    public void addIncludedMimeType(String includedMimeType) {
        if (includedMimeType == null || includedMimeType.length() == 0) {
            return;
        }
        this.includeMimeType(includedMimeType);
    }

    public void includeMimeType(String mimeType) {
        if (mimeType == null) {
            return;
        }
        if ((mimeType = mimeType.trim()).length() != 0) {
            this.includedMimeTypes.add(mimeType);
        }
    }

    public Set<String> getExcludedMimeTypes() {
        return Collections.unmodifiableSet(this.excludedMimeTypes);
    }

    public void setExcludedMimeTypes(String excludedMimeTypes) {
        if (excludedMimeTypes == null || excludedMimeTypes.length() == 0) {
            return;
        }
        this.excludedMimeTypes.clear();
        for (String mimeType : excludedMimeTypes.split("[,\\s]")) {
            this.excludeMimeType(mimeType);
        }
    }

    public void addExcludedMimeType(String excludedMimeType) {
        if (excludedMimeType == null || excludedMimeType.length() == 0) {
            return;
        }
        this.excludeMimeType(excludedMimeType);
    }

    public void excludeMimeType(String mimeType) {
        if (mimeType == null) {
            return;
        }
        if ((mimeType = mimeType.trim()).length() != 0) {
            this.excludedMimeTypes.add(mimeType);
        }
    }
}

