package org.apache.cassandra.index.sasi.analyzer;

import com.carrotsearch.hppc.IntObjectMap;
import com.carrotsearch.hppc.IntObjectOpenHashMap;
import com.google.common.annotations.VisibleForTesting;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.ByteBuffer;
import java.util.Map;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.db.marshal.UTF8Type;
import org.apache.cassandra.index.sasi.analyzer.filter.BasicResultFilters;
import org.apache.cassandra.index.sasi.analyzer.filter.FilterPipelineBuilder;
import org.apache.cassandra.index.sasi.analyzer.filter.FilterPipelineExecutor;
import org.apache.cassandra.index.sasi.analyzer.filter.FilterPipelineTask;
import org.apache.cassandra.index.sasi.analyzer.filter.StemmingFilters;
import org.apache.cassandra.index.sasi.analyzer.filter.StopWordFilters;
import org.apache.cassandra.io.util.DataInputBuffer;
import org.apache.cassandra.utils.ByteBufferUtil;

/* loaded from: input_file:lib/cassandra-all-3.4.jar:org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.class */
public class StandardAnalyzer extends AbstractAnalyzer {
    private AbstractType validator;
    private StandardTokenizerInterface scanner;
    private StandardTokenizerOptions options;
    private FilterPipelineTask filterPipeline;
    protected Reader inputReader = null;

    /* loaded from: input_file:lib/cassandra-all-3.4.jar:org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer$TokenType.class */
    public enum TokenType {
        EOF(-1),
        ALPHANUM(0),
        NUM(6),
        SOUTHEAST_ASIAN(9),
        IDEOGRAPHIC(10),
        HIRAGANA(11),
        KATAKANA(12),
        HANGUL(13);

        private static final IntObjectMap<TokenType> TOKENS = new IntObjectOpenHashMap();
        public final int value;

        TokenType(int i) {
            this.value = i;
        }

        public int getValue() {
            return this.value;
        }

        public static TokenType fromValue(int i) {
            return TOKENS.get(i);
        }

        static {
            for (TokenType tokenType : values()) {
                TOKENS.put(tokenType.value, tokenType);
            }
        }
    }

    public String getToken() {
        return this.scanner.getText();
    }

    public final boolean incrementToken() throws IOException {
        while (TokenType.fromValue(this.scanner.getNextToken()) != TokenType.EOF) {
            if (this.scanner.yylength() <= this.options.getMaxTokenLength() && this.scanner.yylength() >= this.options.getMinTokenLength()) {
                return true;
            }
        }
        return false;
    }

    protected String getFilteredCurrentToken() throws IOException {
        Object execute;
        String token = getToken();
        while (true) {
            execute = FilterPipelineExecutor.execute(this.filterPipeline, token);
            if (execute == null && incrementToken()) {
                token = getToken();
            }
        }
        return (String) execute;
    }

    private FilterPipelineTask getFilterPipeline() {
        FilterPipelineBuilder filterPipelineBuilder = new FilterPipelineBuilder(new BasicResultFilters.NoOperation());
        if (!this.options.isCaseSensitive() && this.options.shouldLowerCaseTerms()) {
            filterPipelineBuilder = filterPipelineBuilder.add("to_lower", new BasicResultFilters.LowerCase());
        }
        if (!this.options.isCaseSensitive() && this.options.shouldUpperCaseTerms()) {
            filterPipelineBuilder = filterPipelineBuilder.add("to_upper", new BasicResultFilters.UpperCase());
        }
        if (this.options.shouldStemTerms()) {
            filterPipelineBuilder = filterPipelineBuilder.add("term_stemming", new StemmingFilters.DefaultStemmingFilter(this.options.getLocale()));
        }
        if (this.options.shouldIgnoreStopTerms()) {
            filterPipelineBuilder = filterPipelineBuilder.add("skip_stop_words", new StopWordFilters.DefaultStopWordFilter(this.options.getLocale()));
        }
        return filterPipelineBuilder.build();
    }

    @Override // org.apache.cassandra.index.sasi.analyzer.AbstractAnalyzer
    public void init(Map<String, String> map, AbstractType abstractType) {
        init(StandardTokenizerOptions.buildFromMap(map), abstractType);
    }

    @VisibleForTesting
    protected void init(StandardTokenizerOptions standardTokenizerOptions) {
        init(standardTokenizerOptions, UTF8Type.instance);
    }

    public void init(StandardTokenizerOptions standardTokenizerOptions, AbstractType abstractType) {
        this.validator = abstractType;
        this.options = standardTokenizerOptions;
        this.filterPipeline = getFilterPipeline();
        InputStreamReader inputStreamReader = new InputStreamReader(new DataInputBuffer(ByteBufferUtil.EMPTY_BYTE_BUFFER, false));
        this.scanner = new StandardTokenizerImpl(inputStreamReader);
        this.inputReader = inputStreamReader;
    }

    @Override // java.util.Iterator
    public boolean hasNext() {
        try {
            if (!incrementToken() || getFilteredCurrentToken() == null) {
                return false;
            }
            this.next = this.validator.fromString(normalize(getFilteredCurrentToken()));
            return true;
        } catch (IOException e) {
            return false;
        }
    }

    @Override // org.apache.cassandra.index.sasi.analyzer.AbstractAnalyzer
    public void reset(ByteBuffer byteBuffer) {
        this.next = null;
        InputStreamReader inputStreamReader = new InputStreamReader(new DataInputBuffer(byteBuffer, false));
        this.scanner.yyreset(inputStreamReader);
        this.inputReader = inputStreamReader;
    }

    public void reset(InputStream inputStream) {
        this.next = null;
        InputStreamReader inputStreamReader = new InputStreamReader(inputStream);
        this.scanner.yyreset(inputStreamReader);
        this.inputReader = inputStreamReader;
    }

    @Override // org.apache.cassandra.index.sasi.analyzer.AbstractAnalyzer
    public boolean isTokenizing() {
        return true;
    }
}
