package io.quarkiverse.langchain4j.easyrag.runtime;

import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
import dev.langchain4j.data.document.splitter.DocumentSplitters;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.embedding.onnx.HuggingFaceTokenizer;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.PathMatcher;
import java.nio.file.attribute.FileAttribute;
import java.util.List;
import org.jboss.logging.Logger;

/* loaded from: input_file:io/quarkiverse/langchain4j/easyrag/runtime/EasyRagIngestor.class */
public class EasyRagIngestor {
    private static final Logger LOGGER = Logger.getLogger(EasyRagIngestor.class);
    private EmbeddingModel embeddingModel;
    private EmbeddingStore<TextSegment> embeddingStore;
    private EasyRagConfig config;

    public EasyRagIngestor(EmbeddingModel embeddingModel, EmbeddingStore<TextSegment> embeddingStore, EasyRagConfig easyRagConfig) {
        this.embeddingModel = embeddingModel;
        this.embeddingStore = embeddingStore;
        this.config = easyRagConfig;
    }

    public void ingest() {
        if (!this.config.reuseEmbeddings().enabled() || !(this.embeddingStore instanceof InMemoryEmbeddingStore)) {
            ingestDocumentsFromFilesystem(this.config, this.embeddingStore, this.embeddingModel);
            return;
        }
        Path absolutePath = Path.of(this.config.reuseEmbeddings().file(), new String[0]).toAbsolutePath();
        if (Files.exists(absolutePath, new LinkOption[0])) {
            this.embeddingStore.toString();
            return;
        }
        try {
            Files.createDirectories(absolutePath.getParent(), new FileAttribute[0]);
            ingestDocumentsFromFilesystem(this.config, this.embeddingStore, this.embeddingModel);
            LOGGER.infof("Writing embeddings to %s", absolutePath);
            this.embeddingStore.serializeToFile(absolutePath);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private void ingestDocumentsFromFilesystem(EasyRagConfig easyRagConfig, EmbeddingStore<TextSegment> embeddingStore, EmbeddingModel embeddingModel) {
        PathMatcher pathMatcher = FileSystems.getDefault().getPathMatcher(easyRagConfig.pathMatcher());
        LOGGER.info("Ingesting documents from path: " + easyRagConfig.path() + ", path matcher = " + easyRagConfig.pathMatcher() + ", recursive = " + easyRagConfig.recursive());
        List loadDocumentsRecursively = easyRagConfig.recursive().booleanValue() ? FileSystemDocumentLoader.loadDocumentsRecursively(easyRagConfig.path(), pathMatcher) : FileSystemDocumentLoader.loadDocuments(easyRagConfig.path(), pathMatcher);
        List list = DocumentSplitters.recursive(easyRagConfig.maxSegmentSize().intValue(), easyRagConfig.maxOverlapSize().intValue(), new HuggingFaceTokenizer()).splitAll(loadDocumentsRecursively).stream().map(textSegment -> {
            return new Document(textSegment.text());
        }).toList();
        EmbeddingStoreIngestor.builder().embeddingModel(embeddingModel).embeddingStore(embeddingStore).build().ingest(list);
        LOGGER.info("Ingested " + loadDocumentsRecursively.size() + " files as " + list.size() + " documents");
    }
}
