/*
 * Decompiled with CFR 0.152.
 */
package org.modeshape.extractor.tika;

import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import org.hamcrest.Matcher;
import org.hamcrest.core.Is;
import org.hamcrest.core.IsNull;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.modeshape.common.util.IoUtil;
import org.modeshape.extractor.tika.TikaTextExtractor;
import org.modeshape.jcr.InMemoryTestBinary;
import org.modeshape.jcr.api.Binary;
import org.modeshape.jcr.api.text.TextExtractor;
import org.modeshape.jcr.text.TextExtractorContext;
import org.modeshape.jcr.text.TextExtractorOutput;

public class TikaTextExtractorTest {
    private TikaTextExtractor extractor;
    private LinkedList<String> extracted = null;
    private LinkedList<String> expected = null;

    @Before
    public void beforeEach() {
        this.extractor = new TikaTextExtractor();
        this.extracted = new LinkedList();
        this.expected = new LinkedList();
    }

    @Test
    public void shouldIncludedNoMimeTypesByDefault() {
        Assert.assertThat((Object)this.extractor.getIncludedMimeTypes().isEmpty(), (Matcher)Is.is((Object)true));
    }

    @Test
    public void shouldExcludedPackageTypeMimeTypesByDefault() {
        Assert.assertThat((Object)this.extractor.getExcludedMimeTypes().containsAll(TikaTextExtractor.DEFAULT_EXCLUDED_MIME_TYPES), (Matcher)Is.is((Object)true));
    }

    @Test
    public void shouldSupportExtractingFromTextFiles() throws Exception {
        Assert.assertThat((Object)this.extractor.supportsMimeType("text/plain"), (Matcher)Is.is((Object)true));
    }

    @Test
    public void shouldSupportExtractingFromPdfFiles() throws Exception {
        Assert.assertThat((Object)this.extractor.supportsMimeType("application/pdf"), (Matcher)Is.is((Object)true));
    }

    @Test
    public void shouldNotSupportExtractingFromPostscriptFiles() throws Exception {
        Assert.assertThat((Object)this.extractor.supportsMimeType("application/postscript"), (Matcher)Is.is((Object)false));
    }

    @Test
    public void shouldSupportExtractingFromDocWordFiles() throws Exception {
        Assert.assertThat((Object)this.extractor.supportsMimeType("application/msword"), (Matcher)Is.is((Object)true));
    }

    @Test
    public void shouldSupportExtractingFromDocxWordFiles() throws Exception {
        Assert.assertThat((Object)this.extractor.supportsMimeType("application/vnd.openxmlformats-officedocument.wordprocessingml.document"), (Matcher)Is.is((Object)true));
    }

    @Test
    public void shouldExtractTextFromTextFile1() throws Exception {
        this.extractTermsFrom("modeshape.txt");
        this.loadExpectedFrom("modeshape.txt");
        this.extractedShouldHave(this.remainingExpectedTerms());
    }

    @Test
    public void shouldExtractTextFromTextFile2() throws Exception {
        this.extractTermsFrom("text-file.txt");
        this.loadExpectedFrom("text-file.txt");
        this.extractedShouldHave(this.remainingExpectedTerms());
    }

    @Test
    public void shouldExtractTextFromDocFile() throws Exception {
        this.extractTermsFrom("modeshape.doc");
        this.loadExpectedFrom("modeshape.txt");
        this.extractedShouldHave(this.remainingExpectedTerms());
    }

    @Test
    public void shouldExtractTextFromDocxFile() throws Exception {
        this.extractTermsFrom("modeshape.docx");
        this.loadExpectedFrom("modeshape.txt");
    }

    @Test
    public void shouldExtractTextFromPdfFileGS() throws Exception {
        this.extractTermsFrom("modeshape_gs.pdf");
        this.assertExtractedMatchesExpected();
    }

    @Test
    @Ignore(value="Exposes the Tika/PDF box bug that characters get duplicated when parsing pdfs produced by PDF Context")
    public void shouldExtractTextFromPdfFilePdfContext() throws Exception {
        this.extractTermsFrom("modeshape_pdfcontext.pdf");
        this.assertExtractedMatchesExpected();
    }

    private void assertExtractedMatchesExpected() throws IOException {
        this.loadExpectedFrom("modeshape.txt");
        this.extractedShouldHave("2011-01-24");
        this.extractedShouldHave("-", "1/2", "-");
        this.extractedShouldHave(this.expectedTermsThrough("-", "versioning"));
        this.extractedShouldHave("2011-01-24");
        this.extractedShouldHave("-", "2/2", "-");
        this.extractedShouldHave(this.remainingExpectedTerms());
    }

    private List<String> remainingExpectedTerms() {
        return this.expected;
    }

    private void extractedShouldHave(String ... words) {
        for (String word : words) {
            Assert.assertThat((Object)this.extracted.pop(), (Matcher)Is.is((Object)word));
        }
    }

    private void extractedShouldHave(List<String> words) {
        for (String word : words) {
            Assert.assertThat((Object)this.extracted.pop(), (Matcher)Is.is((Object)word));
        }
    }

    private List<String> expectedTermsThrough(String ... words) {
        if (words == null || words.length == 0) {
            return Collections.emptyList();
        }
        LinkedList<String> result = new LinkedList<String>();
        String nextWord = words[0];
        while (nextWord != null && !this.expected.isEmpty()) {
            String word = this.expected.pop();
            result.add(word);
            if (!word.equals(nextWord)) continue;
            boolean foundAll = true;
            for (int i = 1; i != words.length; ++i) {
                String next = this.expected.pop();
                result.add(next);
                if (next.equals(words[i])) continue;
                foundAll = false;
                break;
            }
            if (!foundAll) continue;
            return result;
        }
        return result;
    }

    private void extractTermsFrom(String resourcePath) throws Exception {
        InputStream stream = this.getClass().getClassLoader().getResourceAsStream(resourcePath);
        TextExtractorOutput output = new TextExtractorOutput();
        this.extractor.extractFrom((Binary)new InMemoryTestBinary(stream), (TextExtractor.Output)output, (TextExtractor.Context)new TextExtractorContext());
        output.toString();
        this.addWords(this.extracted, output.getText());
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void loadExpectedFrom(String resourcePath) throws IOException {
        InputStream stream = this.getClass().getClassLoader().getResourceAsStream(resourcePath);
        Assert.assertThat((Object)stream, (Matcher)Is.is((Matcher)IsNull.notNullValue()));
        try {
            this.addWords(this.expected, IoUtil.read((InputStream)stream));
        }
        finally {
            stream.close();
        }
    }

    private void addWords(List<String> words, String input) {
        for (String word : input.split("[\\s\"]+")) {
            if (word.length() <= 0) continue;
            words.add(word);
        }
    }
}

