package org.kie.pmml.commons.model.expressions;

import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.StringJoiner;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.text.similarity.LevenshteinDistance;
import org.kie.pmml.api.enums.COUNT_HITS;
import org.kie.pmml.api.enums.LOCAL_TERM_WEIGHTS;
import org.kie.pmml.api.exceptions.KiePMMLException;
import org.kie.pmml.commons.model.KiePMMLExtension;
import org.kie.pmml.commons.model.ProcessingDTO;
import org.kie.pmml.commons.model.abstracts.AbstractKiePMMLComponent;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.PropertyAccessor;

/* loaded from: input_file:BOOT-INF/lib/kie-pmml-commons-8.14.2-SNAPSHOT.jar:org/kie/pmml/commons/model/expressions/KiePMMLTextIndex.class */
public class KiePMMLTextIndex extends AbstractKiePMMLComponent implements KiePMMLExpression {
    private static final long serialVersionUID = -1946996874918753317L;
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) KiePMMLTextIndex.class);
    public static final String DEFAULT_TOKENIZER = "\\s+";
    private final KiePMMLExpression expression;
    private LOCAL_TERM_WEIGHTS localTermWeights;
    private boolean isCaseSensitive;
    private int maxLevenshteinDistance;
    private COUNT_HITS countHits;
    private String wordSeparatorCharacterRE;
    private boolean tokenize;
    private LevenshteinDistance levenshteinDistance;
    private List<KiePMMLTextIndexNormalization> textIndexNormalizations;

    /* loaded from: input_file:BOOT-INF/lib/kie-pmml-commons-8.14.2-SNAPSHOT.jar:org/kie/pmml/commons/model/expressions/KiePMMLTextIndex$Builder.class */
    public static class Builder extends AbstractKiePMMLComponent.Builder<KiePMMLTextIndex> {
        private Builder(String str, List<KiePMMLExtension> list, KiePMMLExpression kiePMMLExpression) {
            super("TextIndex-", () -> {
                return new KiePMMLTextIndex(str, list, kiePMMLExpression);
            });
        }

        public Builder withLocalTermWeights(LOCAL_TERM_WEIGHTS local_term_weights) {
            if (local_term_weights != null) {
                ((KiePMMLTextIndex) this.toBuild).localTermWeights = local_term_weights;
            }
            return this;
        }

        public Builder withIsCaseSensitive(boolean z) {
            ((KiePMMLTextIndex) this.toBuild).isCaseSensitive = z;
            return this;
        }

        public Builder withMaxLevenshteinDistance(int i) {
            ((KiePMMLTextIndex) this.toBuild).maxLevenshteinDistance = i;
            ((KiePMMLTextIndex) this.toBuild).levenshteinDistance = new LevenshteinDistance(Integer.valueOf(i));
            return this;
        }

        public Builder withCountHits(COUNT_HITS count_hits) {
            if (count_hits != null) {
                ((KiePMMLTextIndex) this.toBuild).countHits = count_hits;
            }
            return this;
        }

        public Builder withWordSeparatorCharacterRE(String str) {
            if (str != null) {
                ((KiePMMLTextIndex) this.toBuild).wordSeparatorCharacterRE = str;
            }
            return this;
        }

        public Builder withTokenize(boolean z) {
            ((KiePMMLTextIndex) this.toBuild).tokenize = z;
            return this;
        }

        public Builder withTextIndexNormalizations(List<KiePMMLTextIndexNormalization> list) {
            if (list != null) {
                ((KiePMMLTextIndex) this.toBuild).textIndexNormalizations = list;
            }
            return this;
        }
    }

    private KiePMMLTextIndex(String str, List<KiePMMLExtension> list, KiePMMLExpression kiePMMLExpression) {
        super(str, list);
        this.localTermWeights = LOCAL_TERM_WEIGHTS.TERM_FREQUENCY;
        this.isCaseSensitive = false;
        this.maxLevenshteinDistance = 0;
        this.countHits = COUNT_HITS.ALL_HITS;
        this.wordSeparatorCharacterRE = DEFAULT_TOKENIZER;
        this.tokenize = true;
        this.expression = kiePMMLExpression;
        this.levenshteinDistance = new LevenshteinDistance(Integer.valueOf(this.maxLevenshteinDistance));
    }

    public static Builder builder(String str, List<KiePMMLExtension> list, KiePMMLExpression kiePMMLExpression) {
        return new Builder(str, list, kiePMMLExpression);
    }

    static double evaluateRaw(boolean z, boolean z2, String str, String str2, String str3, LOCAL_TERM_WEIGHTS local_term_weights, COUNT_HITS count_hits, LevenshteinDistance levenshteinDistance) {
        int evaluateLevenshteinDistanceBestHits;
        if (!z) {
            str = str.toLowerCase();
            str2 = str2.toLowerCase();
        }
        Pattern compile = z2 ? Pattern.compile(str3) : Pattern.compile(DEFAULT_TOKENIZER);
        List<String> splitText = splitText(str, compile);
        List<String> splitText2 = splitText(str2, compile);
        switch (count_hits) {
            case ALL_HITS:
                evaluateLevenshteinDistanceBestHits = evaluateLevenshteinDistanceAllHits(levenshteinDistance, splitText, splitText2);
                break;
            case BEST_HITS:
                evaluateLevenshteinDistanceBestHits = evaluateLevenshteinDistanceBestHits(levenshteinDistance, splitText, splitText2);
                break;
            default:
                throw new IllegalArgumentException("Unknown COUNT_HITS " + count_hits);
        }
        switch (local_term_weights) {
            case TERM_FREQUENCY:
                return evaluateLevenshteinDistanceBestHits;
            case BINARY:
                return evaluateBinary(evaluateLevenshteinDistanceBestHits);
            case LOGARITHMIC:
                return evaluateLogarithmic(evaluateLevenshteinDistanceBestHits);
            case AUGMENTED_NORMALIZED_TERM_FREQUENCY:
                return evaluateAugmentedNormalizedTermFrequency(evaluateLevenshteinDistanceBestHits, splitText2);
            default:
                throw new IllegalArgumentException("Unknown LOCAL_TERM_WEIGHTS " + local_term_weights);
        }
    }

    static int evaluateBinary(int i) {
        return i >= 0 ? 1 : 0;
    }

    static double evaluateLogarithmic(int i) {
        return Math.log10(1.0d + i);
    }

    static double evaluateAugmentedNormalizedTermFrequency(int i, List<String> list) {
        return 0.5d * (evaluateBinary(i) + (i / ((Integer) ((Map) list.stream().collect(Collectors.groupingBy(Function.identity(), Collectors.counting()))).values().stream().max(Comparator.comparingLong(l -> {
            return l.longValue();
        })).map((v0) -> {
            return v0.intValue();
        }).orElseThrow(() -> {
            return new KiePMMLException("Failed to find most frequent word!");
        })).intValue()));
    }

    static int evaluateLevenshteinDistanceAllHits(LevenshteinDistance levenshteinDistance, List<String> list, List<String> list2) {
        logger.debug("evaluateLevenshteinDistanceAllHits {} {}", list, list2);
        int size = list.size();
        int size2 = (list2.size() - size) + 1;
        String join = String.join(" ", list);
        int i = 0;
        for (int i2 = 0; i2 < size2; i2++) {
            if (evaluateLevenshteinDistance(levenshteinDistance, join, String.join(" ", list2.subList(i2, i2 + size))) > -1) {
                i++;
            }
        }
        return i;
    }

    static int evaluateLevenshteinDistanceBestHits(LevenshteinDistance levenshteinDistance, List<String> list, List<String> list2) {
        logger.debug("evaluateLevenshteinDistanceBestHits {} {}", list, list2);
        int size = list.size();
        int size2 = (list2.size() - size) + 1;
        String join = String.join(" ", list);
        TreeMap treeMap = new TreeMap();
        for (int i = 0; i < size2; i++) {
            int evaluateLevenshteinDistance = evaluateLevenshteinDistance(levenshteinDistance, join, String.join(" ", list2.subList(i, i + size)));
            if (evaluateLevenshteinDistance > -1) {
                if (treeMap.containsKey(Integer.valueOf(evaluateLevenshteinDistance))) {
                    ((AtomicInteger) treeMap.get(Integer.valueOf(evaluateLevenshteinDistance))).addAndGet(1);
                } else {
                    treeMap.put(Integer.valueOf(evaluateLevenshteinDistance), new AtomicInteger(1));
                }
            }
        }
        return ((AtomicInteger) treeMap.get(treeMap.firstKey())).get();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static int evaluateLevenshteinDistance(LevenshteinDistance levenshteinDistance, String str, String str2) {
        logger.debug("evaluateLevenshteinDistance {} {}", str, str2);
        return levenshteinDistance.apply((CharSequence) str, (CharSequence) str2).intValue();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static List<String> splitText(String str, Pattern pattern) {
        return (List) pattern.splitAsStream(str).map(str2 -> {
            return str2.replaceAll("[^a-zA-Z0-9 ]", "");
        }).filter(str3 -> {
            return !str3.isEmpty();
        }).collect(Collectors.toList());
    }

    @Override // org.kie.pmml.commons.model.expressions.KiePMMLExpression
    public Object evaluate(ProcessingDTO processingDTO) {
        String str = (String) this.expression.evaluate(processingDTO);
        String str2 = (String) ExpressionsUtils.getFromPossibleSources(this.name, processingDTO).orElseThrow(() -> {
            return new KiePMMLException("No text to scan in " + this);
        });
        if (this.textIndexNormalizations != null) {
            Iterator<KiePMMLTextIndexNormalization> it = this.textIndexNormalizations.iterator();
            while (it.hasNext()) {
                str2 = it.next().replace(str2, this.isCaseSensitive, this.maxLevenshteinDistance, false, DEFAULT_TOKENIZER);
            }
        }
        return Double.valueOf(evaluateRaw(this.isCaseSensitive, this.tokenize, str, str2, this.wordSeparatorCharacterRE, this.localTermWeights, this.countHits, this.levenshteinDistance));
    }

    public String toString() {
        return new StringJoiner(", ", KiePMMLTextIndex.class.getSimpleName() + PropertyAccessor.PROPERTY_KEY_PREFIX, "]").add("name='" + this.name + "'").add("localTermWeights=" + this.localTermWeights).add("isCaseSensitive=" + this.isCaseSensitive).add("maxLevenshteinDistance=" + this.maxLevenshteinDistance).add("countHits=" + this.countHits).add("wordSeparatorCharacterRE='" + this.wordSeparatorCharacterRE + "'").add("tokenize=" + this.tokenize).toString();
    }

    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (obj == null || getClass() != obj.getClass()) {
            return false;
        }
        KiePMMLTextIndex kiePMMLTextIndex = (KiePMMLTextIndex) obj;
        return this.isCaseSensitive == kiePMMLTextIndex.isCaseSensitive && this.maxLevenshteinDistance == kiePMMLTextIndex.maxLevenshteinDistance && this.tokenize == kiePMMLTextIndex.tokenize && this.localTermWeights == kiePMMLTextIndex.localTermWeights && this.countHits == kiePMMLTextIndex.countHits && this.wordSeparatorCharacterRE.equals(kiePMMLTextIndex.wordSeparatorCharacterRE);
    }

    public int hashCode() {
        return Objects.hash(this.localTermWeights, Boolean.valueOf(this.isCaseSensitive), Integer.valueOf(this.maxLevenshteinDistance), this.countHits, this.wordSeparatorCharacterRE, Boolean.valueOf(this.tokenize));
    }
}
