package org.modeshape.extractor.tika;

import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.hamcrest.core.Is;
import org.hamcrest.core.IsNull;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.modeshape.common.collection.Problems;
import org.modeshape.common.collection.SimpleProblems;
import org.modeshape.common.util.IoUtil;
import org.modeshape.graph.ExecutionContext;
import org.modeshape.graph.property.Path;
import org.modeshape.graph.property.Property;
import org.modeshape.graph.text.TextExtractorContext;

/* loaded from: input_file:org/modeshape/extractor/tika/TikaTextExtractorTest.class */
public class TikaTextExtractorTest {
    private TikaTextExtractor extractor;
    private ExecutionContext execContext;
    private Path inputPath;
    private Set<Property> inputProperties;
    private String mimeType;
    private Problems problems;
    private boolean print = false;
    private LinkedList<String> extracted = null;
    private LinkedList<String> expected = null;

    @Before
    public void beforeEach() {
        this.execContext = new ExecutionContext();
        this.extractor = new TikaTextExtractor();
        this.inputProperties = new HashSet();
        this.print = false;
        this.extracted = new LinkedList<>();
        this.expected = new LinkedList<>();
    }

    @Test
    public void shouldIncludedNoMimeTypesByDefault() {
        Assert.assertThat(Boolean.valueOf(this.extractor.getIncludedMimeTypes().isEmpty()), Is.is(true));
    }

    @Test
    public void shouldExcludedPackageTypeMimeTypesByDefault() {
        Assert.assertThat(Boolean.valueOf(this.extractor.getExcludedMimeTypes().containsAll(TikaTextExtractor.DEFAULT_EXCLUDED_MIME_TYPES)), Is.is(true));
    }

    @Test
    public void shouldSupportExtractingFromTextFiles() throws IOException {
        Assert.assertThat(Boolean.valueOf(this.extractor.supportsMimeType(mimeTypeOf("modeshape.txt"))), Is.is(true));
    }

    @Test
    public void shouldSupportExtractingFromPdfFiles() throws IOException {
        Assert.assertThat(Boolean.valueOf(this.extractor.supportsMimeType(mimeTypeOf("modeshape.pdf"))), Is.is(true));
    }

    @Test
    public void shouldNotSupportExtractingFromPostscriptFiles() throws IOException {
        Assert.assertThat(Boolean.valueOf(this.extractor.supportsMimeType(mimeTypeOf("modeshape.ps"))), Is.is(false));
    }

    @Test
    public void shouldSupportExtractingFromDocWordFiles() throws IOException {
        Assert.assertThat(Boolean.valueOf(this.extractor.supportsMimeType(mimeTypeOf("modeshape.doc"))), Is.is(true));
    }

    @Test
    public void shouldSupportExtractingFromDocxWordFiles() throws IOException {
        Assert.assertThat(Boolean.valueOf(this.extractor.supportsMimeType(mimeTypeOf("modeshape.docx"))), Is.is(true));
    }

    @Test
    public void shouldExtractTextFromTextFile() throws IOException {
        extractTermsFrom("modeshape.txt");
        loadExpectedFrom("modeshape.txt");
        extractedShouldHave(remainingExpectedTerms());
    }

    @Test
    public void shouldExtractTextFromDocFile() throws IOException {
        extractTermsFrom("modeshape.doc");
        loadExpectedFrom("modeshape.txt");
        extractedShouldHave(remainingExpectedTerms());
    }

    @Test
    public void shouldExtractTextFromDocxFile() throws IOException {
        this.print = true;
        extractTermsFrom("modeshape.docx");
        loadExpectedFrom("modeshape.txt");
    }

    @Test
    public void shouldExtractTextFromPdfFile() throws IOException {
        extractTermsFrom("modeshape.pdf");
        loadExpectedFrom("modeshape.txt");
        extractedShouldHave("2011-01-24");
        extractedShouldHave(expectedTermsThrough("-", "versioning"));
        extractedShouldHave("-", "1/2", "-");
        loadExpectedFrom("modeshape.txt");
        expectedTermsThrough("managing", "this", "complex", "and");
        extractedShouldHave(expectedTermsThrough("-", "versioning"));
        extractedShouldHave("2011-01-24");
        extractedShouldHave(remainingExpectedTerms());
    }

    protected Path path(String str) {
        return (Path) this.execContext.getValueFactories().getPathFactory().create(str);
    }

    protected List<String> remainingExpectedTerms() {
        return this.expected;
    }

    protected void extractedShouldHave(String... strArr) {
        for (String str : strArr) {
            Assert.assertThat(this.extracted.pop(), Is.is(str));
        }
    }

    protected void extractedShouldHave(List<String> list) {
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            Assert.assertThat(this.extracted.pop(), Is.is(it.next()));
        }
    }

    protected List<String> expectedTermsThrough(String... strArr) {
        if (strArr == null || strArr.length == 0) {
            return Collections.emptyList();
        }
        LinkedList linkedList = new LinkedList();
        String str = strArr[0];
        while (str != null && !this.expected.isEmpty()) {
            String pop = this.expected.pop();
            linkedList.add(pop);
            if (pop.equals(str)) {
                boolean z = true;
                int i = 1;
                while (true) {
                    if (i == strArr.length) {
                        break;
                    }
                    String pop2 = this.expected.pop();
                    linkedList.add(pop2);
                    if (!pop2.equals(strArr[i])) {
                        z = false;
                        break;
                    }
                    i++;
                }
                if (z) {
                    return linkedList;
                }
            }
        }
        System.out.println("expected terms thru " + strArr + " are: " + linkedList);
        return linkedList;
    }

    protected void extractTermsFrom(String str) throws IOException {
        InputStream resourceAsStream = getClass().getClassLoader().getResourceAsStream(str);
        Assert.assertThat(resourceAsStream, Is.is(IsNull.notNullValue()));
        try {
            if (this.inputPath == null) {
                this.inputPath = path(str);
            }
            if (this.mimeType == null) {
                this.mimeType = this.execContext.getMimeTypeDetector().mimeTypeOf(str, (InputStream) null);
            }
            if (this.problems == null) {
                this.problems = new SimpleProblems();
            }
            TextExtractorContext textExtractorContext = new TextExtractorContext(this.execContext, this.inputPath, this.inputProperties, this.mimeType, this.problems);
            StringTextExtractorOutput stringTextExtractorOutput = new StringTextExtractorOutput();
            this.extractor.extractFrom(resourceAsStream, stringTextExtractorOutput, textExtractorContext);
            String obj = stringTextExtractorOutput.toString();
            if (this.print) {
                System.out.println("Text extracted from \"" + str + "\"");
                System.out.println("============================================");
                System.out.println(obj);
            }
            if (!this.problems.isEmpty()) {
                System.out.println(this.problems);
                Assert.assertThat(Integer.valueOf(this.problems.size()), Is.is(0));
            }
            addWords(this.extracted, stringTextExtractorOutput.toString());
            resourceAsStream.close();
        } catch (Throwable th) {
            resourceAsStream.close();
            throw th;
        }
    }

    protected void loadExpectedFrom(String str) throws IOException {
        InputStream resourceAsStream = getClass().getClassLoader().getResourceAsStream(str);
        Assert.assertThat(resourceAsStream, Is.is(IsNull.notNullValue()));
        try {
            addWords(this.expected, IoUtil.read(resourceAsStream));
            resourceAsStream.close();
        } catch (Throwable th) {
            resourceAsStream.close();
            throw th;
        }
    }

    protected void addWords(List<String> list, String str) {
        for (String str2 : str.split("[\\s\"]+")) {
            if (str2.length() > 0) {
                list.add(str2);
            }
        }
    }

    protected String mimeTypeOf(String str) throws IOException {
        return this.execContext.getMimeTypeDetector().mimeTypeOf(str, (InputStream) null);
    }
}
