/*
 * Decompiled with CFR 0.152.
 */
package org.kie.pmml.commons.model.expressions;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import org.apache.commons.text.similarity.LevenshteinDistance;
import org.junit.Assert;
import org.junit.Test;
import org.kie.pmml.api.enums.COUNT_HITS;
import org.kie.pmml.api.enums.LOCAL_TERM_WEIGHTS;
import org.kie.pmml.commons.CommonTestingUtility;
import org.kie.pmml.commons.model.ProcessingDTO;
import org.kie.pmml.commons.model.expressions.KiePMMLConstant;
import org.kie.pmml.commons.model.expressions.KiePMMLExpression;
import org.kie.pmml.commons.model.expressions.KiePMMLFieldRef;
import org.kie.pmml.commons.model.expressions.KiePMMLInlineTable;
import org.kie.pmml.commons.model.expressions.KiePMMLRow;
import org.kie.pmml.commons.model.expressions.KiePMMLTextIndex;
import org.kie.pmml.commons.model.expressions.KiePMMLTextIndexNormalization;
import org.kie.pmml.commons.model.tuples.KiePMMLNameValue;

public class KiePMMLTextIndexTest {
    private static final String TERM_0 = "brown fox";
    private static final String TEXT_0 = "The quick browny fox 234 -. jumps over the lazy dog with another Brown Fox. The brown fox runs away and to be with another  ; : brown-foxy.";
    private static final String NOT_NORMALIZED_TEXT_0 = "The quick blacky fox 234 -. jumps over the lazy dog with trotother Brown Fox. The brown fox runs away and to be with another  ; : again.";
    private static final String TERM_1 = "ui_good";
    private static final String NOT_NORMALIZED_TEXT_1 = "Testing the app for a few days convinced me the interfaces are excellent!";
    private static final String FIELD_NAME = "FIELD_NAME";

    @Test
    public void evaluateNoTextIndex0Normalizations() {
        KiePMMLConstant kiePMMLConstant = new KiePMMLConstant("NAME-1", Collections.emptyList(), (Object)TERM_0, null);
        List<KiePMMLNameValue> kiePMMLNameValues = Collections.singletonList(new KiePMMLNameValue(FIELD_NAME, (Object)TEXT_0));
        ProcessingDTO processingDTO = CommonTestingUtility.getProcessingDTO(kiePMMLNameValues);
        double frequency = 3.0;
        double logarithmic = Math.log10(1.0 + frequency);
        int maxFrequency = 2;
        double augmentedNormalizedTermFrequency = 0.5 * (1.0 + frequency / (double)maxFrequency);
        HashMap<LOCAL_TERM_WEIGHTS, Double> expectedResults = new HashMap<LOCAL_TERM_WEIGHTS, Double>();
        expectedResults.put(LOCAL_TERM_WEIGHTS.TERM_FREQUENCY, frequency);
        expectedResults.put(LOCAL_TERM_WEIGHTS.BINARY, 1.0);
        expectedResults.put(LOCAL_TERM_WEIGHTS.LOGARITHMIC, logarithmic);
        expectedResults.put(LOCAL_TERM_WEIGHTS.AUGMENTED_NORMALIZED_TERM_FREQUENCY, augmentedNormalizedTermFrequency);
        expectedResults.forEach((localTermWeights, expected) -> {
            KiePMMLTextIndex kiePMMLTextIndex = (KiePMMLTextIndex)KiePMMLTextIndex.builder((String)FIELD_NAME, Collections.emptyList(), (KiePMMLExpression)kiePMMLConstant).withMaxLevenshteinDistance(2).withLocalTermWeights(localTermWeights).withIsCaseSensitive(true).withWordSeparatorCharacterRE("\\s+").build();
            Assert.assertEquals((Object)expected, (Object)kiePMMLTextIndex.evaluate(processingDTO));
        });
    }

    @Test
    public void evaluateTextIndex0Normalizations() {
        KiePMMLConstant kiePMMLConstant = new KiePMMLConstant("NAME-1", Collections.emptyList(), (Object)TERM_0, null);
        List<KiePMMLNameValue> kiePMMLNameValues = Collections.singletonList(new KiePMMLNameValue(FIELD_NAME, (Object)NOT_NORMALIZED_TEXT_0));
        ProcessingDTO processingDTO = CommonTestingUtility.getProcessingDTO(kiePMMLNameValues);
        double frequency = 3.0;
        double logarithmic = Math.log10(1.0 + frequency);
        int maxFrequency = 2;
        double augmentedNormalizedTermFrequency = 0.5 * (1.0 + frequency / (double)maxFrequency);
        HashMap<LOCAL_TERM_WEIGHTS, Double> expectedResults = new HashMap<LOCAL_TERM_WEIGHTS, Double>();
        expectedResults.put(LOCAL_TERM_WEIGHTS.TERM_FREQUENCY, frequency);
        expectedResults.put(LOCAL_TERM_WEIGHTS.BINARY, 1.0);
        expectedResults.put(LOCAL_TERM_WEIGHTS.LOGARITHMIC, logarithmic);
        expectedResults.put(LOCAL_TERM_WEIGHTS.AUGMENTED_NORMALIZED_TERM_FREQUENCY, augmentedNormalizedTermFrequency);
        expectedResults.forEach((localTermWeights, expected) -> {
            KiePMMLTextIndex kiePMMLTextIndex = (KiePMMLTextIndex)KiePMMLTextIndex.builder((String)FIELD_NAME, Collections.emptyList(), (KiePMMLExpression)kiePMMLConstant).withMaxLevenshteinDistance(2).withLocalTermWeights(localTermWeights).withIsCaseSensitive(true).withTextIndexNormalizations(this.getKiePMMLTextIndexNormalizations()).build();
            Assert.assertEquals((Object)expected, (Object)kiePMMLTextIndex.evaluate(processingDTO));
        });
    }

    @Test
    public void evaluateTextIndex1Normalizations() {
        HashMap<String, String> columnValues = new HashMap<String, String>();
        columnValues.put("string", "interfaces?");
        columnValues.put("stem", "interface");
        columnValues.put("regex", "true");
        KiePMMLRow row0_0 = new KiePMMLRow(columnValues);
        columnValues = new HashMap();
        columnValues.put("string", "is|are|seem(ed|s?)|were");
        columnValues.put("stem", "be");
        columnValues.put("regex", "true");
        KiePMMLRow row0_1 = new KiePMMLRow(columnValues);
        columnValues = new HashMap();
        columnValues.put("string", "user friendl(y|iness)");
        columnValues.put("stem", "user_friendly");
        columnValues.put("regex", "true");
        KiePMMLRow row0_2 = new KiePMMLRow(columnValues);
        KiePMMLInlineTable inlineTable0 = new KiePMMLInlineTable("inlineTable0", Collections.emptyList(), Arrays.asList(row0_0, row0_1, row0_2));
        KiePMMLTextIndexNormalization indexNormalization0 = (KiePMMLTextIndexNormalization)KiePMMLTextIndexNormalization.builder((String)"indexNormalization0", Collections.emptyList()).withInField("string").withOutField("stem").withRegexField("regex").withKiePMMLInlineTable(inlineTable0).build();
        columnValues = new HashMap();
        columnValues.put("re", "interface be (user_friendly|well designed|excellent)");
        columnValues.put("feature", TERM_1);
        columnValues.put("regex", "true");
        KiePMMLRow row1_0 = new KiePMMLRow(columnValues);
        KiePMMLInlineTable inlineTable1 = new KiePMMLInlineTable("inlineTable1", Collections.emptyList(), Collections.singletonList(row1_0));
        KiePMMLTextIndexNormalization indexNormalization1 = (KiePMMLTextIndexNormalization)KiePMMLTextIndexNormalization.builder((String)"indexNormalization1", Collections.emptyList()).withInField("re").withOutField("feature").withRegexField("regex").withKiePMMLInlineTable(inlineTable1).build();
        KiePMMLFieldRef kiePMMLFieldRef = new KiePMMLFieldRef("term", Collections.emptyList(), null);
        KiePMMLTextIndex kiePMMLTextIndex = (KiePMMLTextIndex)KiePMMLTextIndex.builder((String)"reviewText", Collections.emptyList(), (KiePMMLExpression)kiePMMLFieldRef).withMaxLevenshteinDistance(2).withLocalTermWeights(LOCAL_TERM_WEIGHTS.BINARY).withIsCaseSensitive(false).withTextIndexNormalizations(Arrays.asList(indexNormalization0, indexNormalization1)).build();
        List<KiePMMLNameValue> kiePMMLNameValues = Arrays.asList(new KiePMMLNameValue("term", (Object)TERM_1), new KiePMMLNameValue("reviewText", (Object)NOT_NORMALIZED_TEXT_1));
        ProcessingDTO processingDTO = CommonTestingUtility.getProcessingDTO(kiePMMLNameValues);
        Assert.assertEquals((Object)1.0, (Object)kiePMMLTextIndex.evaluate(processingDTO));
    }

    @Test
    public void evaluateRawTokenize() {
        LevenshteinDistance levenshteinDistance = new LevenshteinDistance(Integer.valueOf(2));
        double frequency = 3.0;
        double logarithmic = Math.log10(1.0 + frequency);
        int maxFrequency = 2;
        double augmentedNormalizedTermFrequency = 0.5 * (1.0 + frequency / (double)maxFrequency);
        HashMap<LOCAL_TERM_WEIGHTS, Double> expectedResults = new HashMap<LOCAL_TERM_WEIGHTS, Double>();
        expectedResults.put(LOCAL_TERM_WEIGHTS.TERM_FREQUENCY, frequency);
        expectedResults.put(LOCAL_TERM_WEIGHTS.BINARY, 1.0);
        expectedResults.put(LOCAL_TERM_WEIGHTS.LOGARITHMIC, logarithmic);
        expectedResults.put(LOCAL_TERM_WEIGHTS.AUGMENTED_NORMALIZED_TERM_FREQUENCY, augmentedNormalizedTermFrequency);
        expectedResults.forEach((localTermWeights, expected) -> Assert.assertEquals((double)expected, (double)KiePMMLTextIndex.evaluateRaw((boolean)true, (boolean)true, (String)TERM_0, (String)TEXT_0, (String)"\\s+", (LOCAL_TERM_WEIGHTS)localTermWeights, (COUNT_HITS)COUNT_HITS.ALL_HITS, (LevenshteinDistance)levenshteinDistance), (double)1.0E-7));
        maxFrequency = 3;
        augmentedNormalizedTermFrequency = 0.5 * (1.0 + frequency / (double)maxFrequency);
        expectedResults = new HashMap();
        expectedResults.put(LOCAL_TERM_WEIGHTS.TERM_FREQUENCY, frequency);
        expectedResults.put(LOCAL_TERM_WEIGHTS.BINARY, 1.0);
        expectedResults.put(LOCAL_TERM_WEIGHTS.LOGARITHMIC, logarithmic);
        expectedResults.put(LOCAL_TERM_WEIGHTS.AUGMENTED_NORMALIZED_TERM_FREQUENCY, augmentedNormalizedTermFrequency);
        expectedResults.forEach((localTermWeights, expected) -> Assert.assertEquals((double)expected, (double)KiePMMLTextIndex.evaluateRaw((boolean)false, (boolean)true, (String)TERM_0, (String)TEXT_0, (String)"\\s+", (LOCAL_TERM_WEIGHTS)localTermWeights, (COUNT_HITS)COUNT_HITS.ALL_HITS, (LevenshteinDistance)levenshteinDistance), (double)1.0E-7));
        frequency = 4.0;
        logarithmic = Math.log10(1.0 + frequency);
        augmentedNormalizedTermFrequency = 0.5 * (1.0 + frequency / (double)maxFrequency);
        expectedResults = new HashMap();
        expectedResults.put(LOCAL_TERM_WEIGHTS.TERM_FREQUENCY, frequency);
        expectedResults.put(LOCAL_TERM_WEIGHTS.BINARY, 1.0);
        expectedResults.put(LOCAL_TERM_WEIGHTS.LOGARITHMIC, logarithmic);
        expectedResults.put(LOCAL_TERM_WEIGHTS.AUGMENTED_NORMALIZED_TERM_FREQUENCY, augmentedNormalizedTermFrequency);
        expectedResults.forEach((localTermWeights, expected) -> Assert.assertEquals((double)expected, (double)KiePMMLTextIndex.evaluateRaw((boolean)false, (boolean)true, (String)TERM_0, (String)TEXT_0, (String)"[\\s\\-]", (LOCAL_TERM_WEIGHTS)localTermWeights, (COUNT_HITS)COUNT_HITS.ALL_HITS, (LevenshteinDistance)levenshteinDistance), (double)1.0E-7));
    }

    @Test
    public void evaluateRawNoTokenize() {
        LevenshteinDistance levenshteinDistance = new LevenshteinDistance(Integer.valueOf(2));
        HashMap<LOCAL_TERM_WEIGHTS, Double> expectedResults = new HashMap<LOCAL_TERM_WEIGHTS, Double>();
        double frequency = 3.0;
        double logarithmic = Math.log10(1.0 + frequency);
        int maxFrequency = 2;
        double augmentedNormalizedTermFrequency = 0.5 * (1.0 + frequency / (double)maxFrequency);
        expectedResults.put(LOCAL_TERM_WEIGHTS.TERM_FREQUENCY, frequency);
        expectedResults.put(LOCAL_TERM_WEIGHTS.BINARY, 1.0);
        expectedResults.put(LOCAL_TERM_WEIGHTS.LOGARITHMIC, logarithmic);
        expectedResults.put(LOCAL_TERM_WEIGHTS.AUGMENTED_NORMALIZED_TERM_FREQUENCY, augmentedNormalizedTermFrequency);
        expectedResults.forEach((localTermWeights, expected) -> Assert.assertEquals((double)expected, (double)KiePMMLTextIndex.evaluateRaw((boolean)true, (boolean)false, (String)TERM_0, (String)TEXT_0, (String)"\\s+", (LOCAL_TERM_WEIGHTS)localTermWeights, (COUNT_HITS)COUNT_HITS.ALL_HITS, (LevenshteinDistance)levenshteinDistance), (double)1.0E-7));
        maxFrequency = 3;
        augmentedNormalizedTermFrequency = 0.5 * (1.0 + frequency / (double)maxFrequency);
        expectedResults = new HashMap();
        expectedResults.put(LOCAL_TERM_WEIGHTS.TERM_FREQUENCY, frequency);
        expectedResults.put(LOCAL_TERM_WEIGHTS.BINARY, 1.0);
        expectedResults.put(LOCAL_TERM_WEIGHTS.LOGARITHMIC, logarithmic);
        expectedResults.put(LOCAL_TERM_WEIGHTS.AUGMENTED_NORMALIZED_TERM_FREQUENCY, augmentedNormalizedTermFrequency);
        expectedResults.forEach((localTermWeights, expected) -> Assert.assertEquals((double)expected, (double)KiePMMLTextIndex.evaluateRaw((boolean)false, (boolean)false, (String)TERM_0, (String)TEXT_0, (String)"\\s+", (LOCAL_TERM_WEIGHTS)localTermWeights, (COUNT_HITS)COUNT_HITS.ALL_HITS, (LevenshteinDistance)levenshteinDistance), (double)1.0E-7));
        frequency = 3.0;
        logarithmic = Math.log10(1.0 + frequency);
        augmentedNormalizedTermFrequency = 0.5 * (1.0 + frequency / (double)maxFrequency);
        expectedResults = new HashMap();
        expectedResults.put(LOCAL_TERM_WEIGHTS.TERM_FREQUENCY, frequency);
        expectedResults.put(LOCAL_TERM_WEIGHTS.BINARY, 1.0);
        expectedResults.put(LOCAL_TERM_WEIGHTS.LOGARITHMIC, logarithmic);
        expectedResults.put(LOCAL_TERM_WEIGHTS.AUGMENTED_NORMALIZED_TERM_FREQUENCY, augmentedNormalizedTermFrequency);
        expectedResults.forEach((localTermWeights, expected) -> Assert.assertEquals((double)expected, (double)KiePMMLTextIndex.evaluateRaw((boolean)false, (boolean)false, (String)TERM_0, (String)TEXT_0, (String)"[\\s\\-]", (LOCAL_TERM_WEIGHTS)localTermWeights, (COUNT_HITS)COUNT_HITS.ALL_HITS, (LevenshteinDistance)levenshteinDistance), (double)1.0E-7));
    }

    @Test
    public void evaluateAugmentedNormalizedTermFrequency() {
        HashMap<Integer, String> source = new HashMap<Integer, String>();
        int maxFrequency = 23;
        source.put(maxFrequency, "aword");
        source.put(19, "anotherword");
        source.put(5, "adifferentword");
        source.put(3, "lastword");
        ArrayList texts = new ArrayList();
        source.forEach((integer, s) -> IntStream.range(0, integer).forEach(i -> texts.add(s)));
        Collections.shuffle(texts);
        int calculatedLevenshteinDistance = 4;
        boolean binaryEvaluation = true;
        double expected = 0.5 * ((double)binaryEvaluation + (double)calculatedLevenshteinDistance / (double)maxFrequency);
        Assert.assertEquals((double)expected, (double)KiePMMLTextIndex.evaluateAugmentedNormalizedTermFrequency((int)calculatedLevenshteinDistance, texts), (double)0.0);
    }

    @Test
    public void evaluateLevenshteinDistanceAllHits() {
        String wordSeparatorCharacterRE = "\\s+";
        Pattern pattern = Pattern.compile(wordSeparatorCharacterRE);
        List terms = KiePMMLTextIndex.splitText((String)TERM_0, (Pattern)pattern);
        List texts = KiePMMLTextIndex.splitText((String)TEXT_0, (Pattern)pattern);
        LevenshteinDistance levenshteinDistance = new LevenshteinDistance(Integer.valueOf(0));
        Assert.assertEquals((long)1L, (long)KiePMMLTextIndex.evaluateLevenshteinDistanceAllHits((LevenshteinDistance)levenshteinDistance, (List)terms, (List)texts));
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(1));
        Assert.assertEquals((long)2L, (long)KiePMMLTextIndex.evaluateLevenshteinDistanceAllHits((LevenshteinDistance)levenshteinDistance, (List)terms, (List)texts));
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(2));
        Assert.assertEquals((long)3L, (long)KiePMMLTextIndex.evaluateLevenshteinDistanceAllHits((LevenshteinDistance)levenshteinDistance, (List)terms, (List)texts));
        wordSeparatorCharacterRE = "[\\s\\-]";
        pattern = Pattern.compile(wordSeparatorCharacterRE);
        terms = KiePMMLTextIndex.splitText((String)TERM_0, (Pattern)pattern);
        texts = KiePMMLTextIndex.splitText((String)TEXT_0, (Pattern)pattern);
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(0));
        Assert.assertEquals((long)1L, (long)KiePMMLTextIndex.evaluateLevenshteinDistanceAllHits((LevenshteinDistance)levenshteinDistance, (List)terms, (List)texts));
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(1));
        Assert.assertEquals((long)3L, (long)KiePMMLTextIndex.evaluateLevenshteinDistanceAllHits((LevenshteinDistance)levenshteinDistance, (List)terms, (List)texts));
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(2));
        Assert.assertEquals((long)4L, (long)KiePMMLTextIndex.evaluateLevenshteinDistanceAllHits((LevenshteinDistance)levenshteinDistance, (List)terms, (List)texts));
    }

    @Test
    public void evaluateLevenshteinDistanceBestHits() {
        String wordSeparatorCharacterRE = "\\s+";
        Pattern pattern = Pattern.compile(wordSeparatorCharacterRE);
        List terms = KiePMMLTextIndex.splitText((String)"The", (Pattern)pattern);
        List texts = KiePMMLTextIndex.splitText((String)TEXT_0, (Pattern)pattern);
        LevenshteinDistance levenshteinDistance = new LevenshteinDistance(Integer.valueOf(0));
        Assert.assertEquals((long)2L, (long)KiePMMLTextIndex.evaluateLevenshteinDistanceBestHits((LevenshteinDistance)levenshteinDistance, (List)terms, (List)texts));
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(1));
        Assert.assertEquals((long)2L, (long)KiePMMLTextIndex.evaluateLevenshteinDistanceBestHits((LevenshteinDistance)levenshteinDistance, (List)terms, (List)texts));
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(2));
        Assert.assertEquals((long)2L, (long)KiePMMLTextIndex.evaluateLevenshteinDistanceBestHits((LevenshteinDistance)levenshteinDistance, (List)terms, (List)texts));
        wordSeparatorCharacterRE = "[\\s\\-]";
        pattern = Pattern.compile(wordSeparatorCharacterRE);
        terms = KiePMMLTextIndex.splitText((String)"The", (Pattern)pattern);
        texts = KiePMMLTextIndex.splitText((String)TEXT_0, (Pattern)pattern);
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(0));
        Assert.assertEquals((long)2L, (long)KiePMMLTextIndex.evaluateLevenshteinDistanceBestHits((LevenshteinDistance)levenshteinDistance, (List)terms, (List)texts));
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(1));
        Assert.assertEquals((long)2L, (long)KiePMMLTextIndex.evaluateLevenshteinDistanceBestHits((LevenshteinDistance)levenshteinDistance, (List)terms, (List)texts));
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(2));
        Assert.assertEquals((long)2L, (long)KiePMMLTextIndex.evaluateLevenshteinDistanceBestHits((LevenshteinDistance)levenshteinDistance, (List)terms, (List)texts));
    }

    @Test
    public void evaluateLevenshteinDistanceSplitText() {
        String toSearch = TERM_0;
        String toScan = TERM_0;
        LevenshteinDistance levenshteinDistance = new LevenshteinDistance(Integer.valueOf(0));
        Assert.assertEquals((long)0L, (long)KiePMMLTextIndex.evaluateLevenshteinDistance((LevenshteinDistance)levenshteinDistance, (String)toSearch, (String)toScan));
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(1));
        Assert.assertEquals((long)0L, (long)KiePMMLTextIndex.evaluateLevenshteinDistance((LevenshteinDistance)levenshteinDistance, (String)toSearch, (String)toScan));
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(2));
        Assert.assertEquals((long)0L, (long)KiePMMLTextIndex.evaluateLevenshteinDistance((LevenshteinDistance)levenshteinDistance, (String)toSearch, (String)toScan));
        toScan = "brown foxy";
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(0));
        Assert.assertEquals((long)-1L, (long)KiePMMLTextIndex.evaluateLevenshteinDistance((LevenshteinDistance)levenshteinDistance, (String)toSearch, (String)toScan));
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(1));
        Assert.assertEquals((long)1L, (long)KiePMMLTextIndex.evaluateLevenshteinDistance((LevenshteinDistance)levenshteinDistance, (String)toSearch, (String)toScan));
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(2));
        Assert.assertEquals((long)1L, (long)KiePMMLTextIndex.evaluateLevenshteinDistance((LevenshteinDistance)levenshteinDistance, (String)toSearch, (String)toScan));
        toScan = "browny foxy";
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(0));
        Assert.assertEquals((long)-1L, (long)KiePMMLTextIndex.evaluateLevenshteinDistance((LevenshteinDistance)levenshteinDistance, (String)toSearch, (String)toScan));
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(1));
        Assert.assertEquals((long)-1L, (long)KiePMMLTextIndex.evaluateLevenshteinDistance((LevenshteinDistance)levenshteinDistance, (String)toSearch, (String)toScan));
        levenshteinDistance = new LevenshteinDistance(Integer.valueOf(2));
        Assert.assertEquals((long)2L, (long)KiePMMLTextIndex.evaluateLevenshteinDistance((LevenshteinDistance)levenshteinDistance, (String)toSearch, (String)toScan));
    }

    @Test
    public void splitText() {
        Pattern unwantedPattern = Pattern.compile("[^a-zA-Z0-9 ]");
        Pattern wantedPattern = Pattern.compile("[a-zA-Z0-9]");
        Pattern pattern = Pattern.compile("\\s+");
        List retrieved = KiePMMLTextIndex.splitText((String)TEXT_0, (Pattern)pattern);
        Assert.assertEquals((long)25L, (long)retrieved.size());
        retrieved.forEach(txt -> {
            Assert.assertFalse((boolean)unwantedPattern.matcher((CharSequence)txt).find());
            Assert.assertTrue((boolean)wantedPattern.matcher((CharSequence)txt).find());
        });
        pattern = Pattern.compile("[\\s\\-]");
        retrieved = KiePMMLTextIndex.splitText((String)TEXT_0, (Pattern)pattern);
        Assert.assertEquals((long)26L, (long)retrieved.size());
        retrieved.forEach(txt -> {
            Assert.assertFalse((boolean)unwantedPattern.matcher((CharSequence)txt).find());
            Assert.assertTrue((boolean)wantedPattern.matcher((CharSequence)txt).find());
        });
    }

    private List<KiePMMLTextIndexNormalization> getKiePMMLTextIndexNormalizations() {
        HashMap<String, String> columnValues0 = new HashMap<String, String>();
        columnValues0.put("string", "blacky");
        columnValues0.put("stem", "browny");
        columnValues0.put("regex", "false");
        KiePMMLRow row0 = new KiePMMLRow(columnValues0);
        HashMap<String, String> columnValues1 = new HashMap<String, String>();
        columnValues1.put("string", "trot?");
        columnValues1.put("stem", "an");
        columnValues1.put("regex", "true");
        KiePMMLRow row1 = new KiePMMLRow(columnValues1);
        KiePMMLInlineTable inlineTable1 = new KiePMMLInlineTable("inlineTable1", Collections.emptyList(), Arrays.asList(row0, row1));
        KiePMMLTextIndexNormalization indexNormalization1 = (KiePMMLTextIndexNormalization)KiePMMLTextIndexNormalization.builder((String)"indexNormalization1", Collections.emptyList()).withKiePMMLInlineTable(inlineTable1).withRecursive(true).build();
        HashMap<String, String> columnValues2 = new HashMap<String, String>();
        columnValues2.put("string", "again|is|are|seem(ed|s?)|were?");
        columnValues2.put("stem", "brown-foxy");
        columnValues2.put("regex", "true");
        KiePMMLRow row2 = new KiePMMLRow(columnValues2);
        KiePMMLInlineTable inlineTable2 = new KiePMMLInlineTable("inlineTable2", Collections.emptyList(), Collections.singletonList(row2));
        KiePMMLTextIndexNormalization indexNormalization2 = (KiePMMLTextIndexNormalization)KiePMMLTextIndexNormalization.builder((String)"indexNormalization2", Collections.emptyList()).withKiePMMLInlineTable(inlineTable2).withRecursive(true).build();
        return Arrays.asList(indexNormalization1, indexNormalization2);
    }
}

