package org.kie.pmml.commons.model.expressions;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import org.apache.commons.text.similarity.LevenshteinDistance;
import org.assertj.core.api.Assertions;
import org.assertj.core.data.Offset;
import org.junit.jupiter.api.Test;
import org.kie.pmml.api.enums.COUNT_HITS;
import org.kie.pmml.api.enums.DATA_TYPE;
import org.kie.pmml.api.enums.LOCAL_TERM_WEIGHTS;
import org.kie.pmml.commons.CommonTestingUtility;
import org.kie.pmml.commons.model.ProcessingDTO;
import org.kie.pmml.commons.model.tuples.KiePMMLNameValue;

/* loaded from: input_file:org/kie/pmml/commons/model/expressions/KiePMMLTextIndexTest.class */
public class KiePMMLTextIndexTest {
    private static final String TERM_0 = "brown fox";
    private static final String TEXT_0 = "The quick browny fox 234 -. jumps over the lazy dog with another Brown Fox. The brown fox runs away and to be with another  ; : brown-foxy.";
    private static final String NOT_NORMALIZED_TEXT_0 = "The quick blacky fox 234 -. jumps over the lazy dog with trotother Brown Fox. The brown fox runs away and to be with another  ; : again.";
    private static final String TERM_1 = "ui_good";
    private static final String NOT_NORMALIZED_TEXT_1 = "Testing the app for a few days convinced me the interfaces are excellent!";
    private static final String FIELD_NAME = "FIELD_NAME";

    @Test
    void evaluateNoTextIndex0Normalizations() {
        KiePMMLConstant kiePMMLConstant = new KiePMMLConstant("NAME-1", Collections.emptyList(), TERM_0, (DATA_TYPE) null);
        ProcessingDTO processingDTO = CommonTestingUtility.getProcessingDTO(Collections.singletonList(new KiePMMLNameValue(FIELD_NAME, TEXT_0)));
        double log10 = Math.log10(1.0d + 3.0d);
        HashMap hashMap = new HashMap();
        hashMap.put(LOCAL_TERM_WEIGHTS.TERM_FREQUENCY, Double.valueOf(3.0d));
        hashMap.put(LOCAL_TERM_WEIGHTS.BINARY, Double.valueOf(1.0d));
        hashMap.put(LOCAL_TERM_WEIGHTS.LOGARITHMIC, Double.valueOf(log10));
        hashMap.put(LOCAL_TERM_WEIGHTS.AUGMENTED_NORMALIZED_TERM_FREQUENCY, Double.valueOf(0.5d * (1.0d + (3.0d / 2))));
        hashMap.forEach((local_term_weights, d) -> {
            Assertions.assertThat(KiePMMLTextIndex.builder(FIELD_NAME, Collections.emptyList(), kiePMMLConstant).withMaxLevenshteinDistance(2).withLocalTermWeights(local_term_weights).withIsCaseSensitive(true).withWordSeparatorCharacterRE("\\s+").build().evaluate(processingDTO)).isEqualTo(d);
        });
    }

    @Test
    void evaluateTextIndex0Normalizations() {
        KiePMMLConstant kiePMMLConstant = new KiePMMLConstant("NAME-1", Collections.emptyList(), TERM_0, (DATA_TYPE) null);
        ProcessingDTO processingDTO = CommonTestingUtility.getProcessingDTO(Collections.singletonList(new KiePMMLNameValue(FIELD_NAME, NOT_NORMALIZED_TEXT_0)));
        double log10 = Math.log10(1.0d + 3.0d);
        HashMap hashMap = new HashMap();
        hashMap.put(LOCAL_TERM_WEIGHTS.TERM_FREQUENCY, Double.valueOf(3.0d));
        hashMap.put(LOCAL_TERM_WEIGHTS.BINARY, Double.valueOf(1.0d));
        hashMap.put(LOCAL_TERM_WEIGHTS.LOGARITHMIC, Double.valueOf(log10));
        hashMap.put(LOCAL_TERM_WEIGHTS.AUGMENTED_NORMALIZED_TERM_FREQUENCY, Double.valueOf(0.5d * (1.0d + (3.0d / 2))));
        hashMap.forEach((local_term_weights, d) -> {
            Assertions.assertThat(KiePMMLTextIndex.builder(FIELD_NAME, Collections.emptyList(), kiePMMLConstant).withMaxLevenshteinDistance(2).withLocalTermWeights(local_term_weights).withIsCaseSensitive(true).withTextIndexNormalizations(getKiePMMLTextIndexNormalizations()).build().evaluate(processingDTO)).isEqualTo(d);
        });
    }

    @Test
    void evaluateTextIndex1Normalizations() {
        HashMap hashMap = new HashMap();
        hashMap.put("string", "interfaces?");
        hashMap.put("stem", "interface");
        hashMap.put("regex", "true");
        KiePMMLRow kiePMMLRow = new KiePMMLRow(hashMap);
        HashMap hashMap2 = new HashMap();
        hashMap2.put("string", "is|are|seem(ed|s?)|were");
        hashMap2.put("stem", "be");
        hashMap2.put("regex", "true");
        KiePMMLRow kiePMMLRow2 = new KiePMMLRow(hashMap2);
        HashMap hashMap3 = new HashMap();
        hashMap3.put("string", "user friendl(y|iness)");
        hashMap3.put("stem", "user_friendly");
        hashMap3.put("regex", "true");
        KiePMMLTextIndexNormalization build = KiePMMLTextIndexNormalization.builder("indexNormalization0", Collections.emptyList()).withInField("string").withOutField("stem").withRegexField("regex").withKiePMMLInlineTable(new KiePMMLInlineTable("inlineTable0", Collections.emptyList(), Arrays.asList(kiePMMLRow, kiePMMLRow2, new KiePMMLRow(hashMap3)))).build();
        HashMap hashMap4 = new HashMap();
        hashMap4.put("re", "interface be (user_friendly|well designed|excellent)");
        hashMap4.put("feature", TERM_1);
        hashMap4.put("regex", "true");
        Assertions.assertThat(KiePMMLTextIndex.builder("reviewText", Collections.emptyList(), new KiePMMLFieldRef("term", Collections.emptyList(), (String) null)).withMaxLevenshteinDistance(2).withLocalTermWeights(LOCAL_TERM_WEIGHTS.BINARY).withIsCaseSensitive(false).withTextIndexNormalizations(Arrays.asList(build, KiePMMLTextIndexNormalization.builder("indexNormalization1", Collections.emptyList()).withInField("re").withOutField("feature").withRegexField("regex").withKiePMMLInlineTable(new KiePMMLInlineTable("inlineTable1", Collections.emptyList(), Collections.singletonList(new KiePMMLRow(hashMap4)))).build())).build().evaluate(CommonTestingUtility.getProcessingDTO(Arrays.asList(new KiePMMLNameValue("term", TERM_1), new KiePMMLNameValue("reviewText", NOT_NORMALIZED_TEXT_1))))).isEqualTo(Double.valueOf(1.0d));
    }

    @Test
    void evaluateRawTokenize() {
        LevenshteinDistance levenshteinDistance = new LevenshteinDistance(2);
        double log10 = Math.log10(1.0d + 3.0d);
        HashMap hashMap = new HashMap();
        hashMap.put(LOCAL_TERM_WEIGHTS.TERM_FREQUENCY, Double.valueOf(3.0d));
        hashMap.put(LOCAL_TERM_WEIGHTS.BINARY, Double.valueOf(1.0d));
        hashMap.put(LOCAL_TERM_WEIGHTS.LOGARITHMIC, Double.valueOf(log10));
        hashMap.put(LOCAL_TERM_WEIGHTS.AUGMENTED_NORMALIZED_TERM_FREQUENCY, Double.valueOf(0.5d * (1.0d + (3.0d / 2))));
        hashMap.forEach((local_term_weights, d) -> {
            Assertions.assertThat(KiePMMLTextIndex.evaluateRaw(true, true, TERM_0, TEXT_0, "\\s+", local_term_weights, COUNT_HITS.ALL_HITS, levenshteinDistance)).isCloseTo(d, Offset.offset(Double.valueOf(1.0E-7d)));
        });
        HashMap hashMap2 = new HashMap();
        hashMap2.put(LOCAL_TERM_WEIGHTS.TERM_FREQUENCY, Double.valueOf(3.0d));
        hashMap2.put(LOCAL_TERM_WEIGHTS.BINARY, Double.valueOf(1.0d));
        hashMap2.put(LOCAL_TERM_WEIGHTS.LOGARITHMIC, Double.valueOf(log10));
        hashMap2.put(LOCAL_TERM_WEIGHTS.AUGMENTED_NORMALIZED_TERM_FREQUENCY, Double.valueOf(0.5d * (1.0d + (3.0d / 3))));
        hashMap2.forEach((local_term_weights2, d2) -> {
            Assertions.assertThat(KiePMMLTextIndex.evaluateRaw(false, true, TERM_0, TEXT_0, "\\s+", local_term_weights2, COUNT_HITS.ALL_HITS, levenshteinDistance)).isCloseTo(d2, Offset.offset(Double.valueOf(1.0E-7d)));
        });
        double log102 = Math.log10(1.0d + 4.0d);
        HashMap hashMap3 = new HashMap();
        hashMap3.put(LOCAL_TERM_WEIGHTS.TERM_FREQUENCY, Double.valueOf(4.0d));
        hashMap3.put(LOCAL_TERM_WEIGHTS.BINARY, Double.valueOf(1.0d));
        hashMap3.put(LOCAL_TERM_WEIGHTS.LOGARITHMIC, Double.valueOf(log102));
        hashMap3.put(LOCAL_TERM_WEIGHTS.AUGMENTED_NORMALIZED_TERM_FREQUENCY, Double.valueOf(0.5d * (1.0d + (4.0d / 3))));
        hashMap3.forEach((local_term_weights3, d3) -> {
            Assertions.assertThat(KiePMMLTextIndex.evaluateRaw(false, true, TERM_0, TEXT_0, "[\\s\\-]", local_term_weights3, COUNT_HITS.ALL_HITS, levenshteinDistance)).isCloseTo(d3, Offset.offset(Double.valueOf(1.0E-7d)));
        });
    }

    @Test
    void evaluateRawNoTokenize() {
        LevenshteinDistance levenshteinDistance = new LevenshteinDistance(2);
        HashMap hashMap = new HashMap();
        double log10 = Math.log10(1.0d + 3.0d);
        hashMap.put(LOCAL_TERM_WEIGHTS.TERM_FREQUENCY, Double.valueOf(3.0d));
        hashMap.put(LOCAL_TERM_WEIGHTS.BINARY, Double.valueOf(1.0d));
        hashMap.put(LOCAL_TERM_WEIGHTS.LOGARITHMIC, Double.valueOf(log10));
        hashMap.put(LOCAL_TERM_WEIGHTS.AUGMENTED_NORMALIZED_TERM_FREQUENCY, Double.valueOf(0.5d * (1.0d + (3.0d / 2))));
        hashMap.forEach((local_term_weights, d) -> {
            Assertions.assertThat(KiePMMLTextIndex.evaluateRaw(true, false, TERM_0, TEXT_0, "\\s+", local_term_weights, COUNT_HITS.ALL_HITS, levenshteinDistance)).isCloseTo(d, Offset.offset(Double.valueOf(1.0E-7d)));
        });
        HashMap hashMap2 = new HashMap();
        hashMap2.put(LOCAL_TERM_WEIGHTS.TERM_FREQUENCY, Double.valueOf(3.0d));
        hashMap2.put(LOCAL_TERM_WEIGHTS.BINARY, Double.valueOf(1.0d));
        hashMap2.put(LOCAL_TERM_WEIGHTS.LOGARITHMIC, Double.valueOf(log10));
        hashMap2.put(LOCAL_TERM_WEIGHTS.AUGMENTED_NORMALIZED_TERM_FREQUENCY, Double.valueOf(0.5d * (1.0d + (3.0d / 3))));
        hashMap2.forEach((local_term_weights2, d2) -> {
            Assertions.assertThat(KiePMMLTextIndex.evaluateRaw(false, false, TERM_0, TEXT_0, "\\s+", local_term_weights2, COUNT_HITS.ALL_HITS, levenshteinDistance)).isCloseTo(d2, Offset.offset(Double.valueOf(1.0E-7d)));
        });
        double log102 = Math.log10(1.0d + 3.0d);
        HashMap hashMap3 = new HashMap();
        hashMap3.put(LOCAL_TERM_WEIGHTS.TERM_FREQUENCY, Double.valueOf(3.0d));
        hashMap3.put(LOCAL_TERM_WEIGHTS.BINARY, Double.valueOf(1.0d));
        hashMap3.put(LOCAL_TERM_WEIGHTS.LOGARITHMIC, Double.valueOf(log102));
        hashMap3.put(LOCAL_TERM_WEIGHTS.AUGMENTED_NORMALIZED_TERM_FREQUENCY, Double.valueOf(0.5d * (1.0d + (3.0d / 3))));
        hashMap3.forEach((local_term_weights3, d3) -> {
            Assertions.assertThat(KiePMMLTextIndex.evaluateRaw(false, false, TERM_0, TEXT_0, "[\\s\\-]", local_term_weights3, COUNT_HITS.ALL_HITS, levenshteinDistance)).isCloseTo(d3, Offset.offset(Double.valueOf(1.0E-7d)));
        });
    }

    @Test
    void evaluateAugmentedNormalizedTermFrequency() {
        HashMap hashMap = new HashMap();
        hashMap.put(23, "aword");
        hashMap.put(19, "anotherword");
        hashMap.put(5, "adifferentword");
        hashMap.put(3, "lastword");
        ArrayList arrayList = new ArrayList();
        hashMap.forEach((num, str) -> {
            IntStream.range(0, num.intValue()).forEach(i -> {
                arrayList.add(str);
            });
        });
        Collections.shuffle(arrayList);
        Assertions.assertThat(KiePMMLTextIndex.evaluateAugmentedNormalizedTermFrequency(4, arrayList)).isCloseTo(0.5d * (1 + (4 / 23)), Offset.offset(Double.valueOf(0.0d)));
    }

    @Test
    void evaluateLevenshteinDistanceAllHits() {
        Pattern compile = Pattern.compile("\\s+");
        List splitText = KiePMMLTextIndex.splitText(TERM_0, compile);
        List splitText2 = KiePMMLTextIndex.splitText(TEXT_0, compile);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistanceAllHits(new LevenshteinDistance(0), splitText, splitText2)).isEqualTo(1);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistanceAllHits(new LevenshteinDistance(1), splitText, splitText2)).isEqualTo(2);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistanceAllHits(new LevenshteinDistance(2), splitText, splitText2)).isEqualTo(3);
        Pattern compile2 = Pattern.compile("[\\s\\-]");
        List splitText3 = KiePMMLTextIndex.splitText(TERM_0, compile2);
        List splitText4 = KiePMMLTextIndex.splitText(TEXT_0, compile2);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistanceAllHits(new LevenshteinDistance(0), splitText3, splitText4)).isEqualTo(1);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistanceAllHits(new LevenshteinDistance(1), splitText3, splitText4)).isEqualTo(3);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistanceAllHits(new LevenshteinDistance(2), splitText3, splitText4)).isEqualTo(4);
    }

    @Test
    void evaluateLevenshteinDistanceBestHits() {
        Pattern compile = Pattern.compile("\\s+");
        List splitText = KiePMMLTextIndex.splitText("The", compile);
        List splitText2 = KiePMMLTextIndex.splitText(TEXT_0, compile);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistanceBestHits(new LevenshteinDistance(0), splitText, splitText2)).isEqualTo(2);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistanceBestHits(new LevenshteinDistance(1), splitText, splitText2)).isEqualTo(2);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistanceBestHits(new LevenshteinDistance(2), splitText, splitText2)).isEqualTo(2);
        Pattern compile2 = Pattern.compile("[\\s\\-]");
        List splitText3 = KiePMMLTextIndex.splitText("The", compile2);
        List splitText4 = KiePMMLTextIndex.splitText(TEXT_0, compile2);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistanceBestHits(new LevenshteinDistance(0), splitText3, splitText4)).isEqualTo(2);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistanceBestHits(new LevenshteinDistance(1), splitText3, splitText4)).isEqualTo(2);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistanceBestHits(new LevenshteinDistance(2), splitText3, splitText4)).isEqualTo(2);
    }

    @Test
    void evaluateLevenshteinDistanceSplitText() {
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistance(new LevenshteinDistance(0), TERM_0, TERM_0)).isEqualTo(0);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistance(new LevenshteinDistance(1), TERM_0, TERM_0)).isEqualTo(0);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistance(new LevenshteinDistance(2), TERM_0, TERM_0)).isEqualTo(0);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistance(new LevenshteinDistance(0), TERM_0, "brown foxy")).isEqualTo(-1);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistance(new LevenshteinDistance(1), TERM_0, "brown foxy")).isEqualTo(1);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistance(new LevenshteinDistance(2), TERM_0, "brown foxy")).isEqualTo(1);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistance(new LevenshteinDistance(0), TERM_0, "browny foxy")).isEqualTo(-1);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistance(new LevenshteinDistance(1), TERM_0, "browny foxy")).isEqualTo(-1);
        Assertions.assertThat(KiePMMLTextIndex.evaluateLevenshteinDistance(new LevenshteinDistance(2), TERM_0, "browny foxy")).isEqualTo(2);
    }

    @Test
    void splitText() {
        Pattern compile = Pattern.compile("[^a-zA-Z0-9 ]");
        Pattern compile2 = Pattern.compile("[a-zA-Z0-9]");
        List splitText = KiePMMLTextIndex.splitText(TEXT_0, Pattern.compile("\\s+"));
        Assertions.assertThat(splitText).hasSize(25);
        splitText.forEach(str -> {
            Assertions.assertThat(compile.matcher(str).find()).isFalse();
            Assertions.assertThat(compile2.matcher(str).find()).isTrue();
        });
        List splitText2 = KiePMMLTextIndex.splitText(TEXT_0, Pattern.compile("[\\s\\-]"));
        Assertions.assertThat(splitText2).hasSize(26);
        splitText2.forEach(str2 -> {
            Assertions.assertThat(compile.matcher(str2).find()).isFalse();
            Assertions.assertThat(compile2.matcher(str2).find()).isTrue();
        });
    }

    private List<KiePMMLTextIndexNormalization> getKiePMMLTextIndexNormalizations() {
        HashMap hashMap = new HashMap();
        hashMap.put("string", "blacky");
        hashMap.put("stem", "browny");
        hashMap.put("regex", "false");
        KiePMMLRow kiePMMLRow = new KiePMMLRow(hashMap);
        HashMap hashMap2 = new HashMap();
        hashMap2.put("string", "trot?");
        hashMap2.put("stem", "an");
        hashMap2.put("regex", "true");
        KiePMMLTextIndexNormalization build = KiePMMLTextIndexNormalization.builder("indexNormalization1", Collections.emptyList()).withKiePMMLInlineTable(new KiePMMLInlineTable("inlineTable1", Collections.emptyList(), Arrays.asList(kiePMMLRow, new KiePMMLRow(hashMap2)))).withRecursive(true).build();
        HashMap hashMap3 = new HashMap();
        hashMap3.put("string", "again|is|are|seem(ed|s?)|were?");
        hashMap3.put("stem", "brown-foxy");
        hashMap3.put("regex", "true");
        return Arrays.asList(build, KiePMMLTextIndexNormalization.builder("indexNormalization2", Collections.emptyList()).withKiePMMLInlineTable(new KiePMMLInlineTable("inlineTable2", Collections.emptyList(), Collections.singletonList(new KiePMMLRow(hashMap3)))).withRecursive(true).build());
    }
}
