package org.apache.tika.parser.txt;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.Set;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:lib/modeshape-extractor-tika-2.8.2.Final-jar-with-dependencies.jar:org/apache/tika/parser/txt/TXTParser.class */
public class TXTParser extends AbstractParser {
    private static final long serialVersionUID = -6656102320836888910L;
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.TEXT_PLAIN);

    @Override // org.apache.tika.parser.Parser
    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        MediaType parse;
        if (!inputStream.markSupported()) {
            inputStream = new BufferedInputStream(inputStream);
        }
        CharsetDetector charsetDetector = new CharsetDetector();
        String str = metadata.get("Content-Encoding");
        String str2 = metadata.get("Content-Type");
        if (str == null && str2 != null && (parse = MediaType.parse(str2)) != null) {
            str = parse.getParameters().get("charset");
        }
        if (str != null) {
            charsetDetector.setDeclaredEncoding(str);
        }
        charsetDetector.setText(inputStream);
        CharsetMatch[] detectAll = charsetDetector.detectAll();
        int length = detectAll.length;
        int i = 0;
        while (true) {
            if (i >= length) {
                break;
            }
            CharsetMatch charsetMatch = detectAll[i];
            if (Charset.isSupported(charsetMatch.getName())) {
                metadata.set("Content-Encoding", charsetMatch.getName());
                break;
            }
            i++;
        }
        String str3 = metadata.get("Content-Encoding");
        if (str3 == null) {
            throw new TikaException("Text encoding could not be detected and no encoding hint is available in document metadata");
        }
        metadata.set("Content-Type", "text/plain");
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, str3));
            bufferedReader.mark(1);
            if (bufferedReader.read() != 65279) {
                bufferedReader.reset();
            }
            XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
            xHTMLContentHandler.startDocument();
            xHTMLContentHandler.startElement("p");
            char[] cArr = new char[4096];
            for (int read = bufferedReader.read(cArr); read != -1; read = bufferedReader.read(cArr)) {
                xHTMLContentHandler.characters(cArr, 0, read);
            }
            xHTMLContentHandler.endElement("p");
            xHTMLContentHandler.endDocument();
        } catch (UnsupportedEncodingException e) {
            throw new TikaException("Unsupported text encoding: " + str3, e);
        }
    }
}
