package org.jasen.core.token;

import com.sun.mail.smtp.SMTPMessage;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.List;
import javax.mail.Header;
import javax.mail.MessagingException;
import javax.mail.Session;
import javax.mail.internet.MimeMessage;
import org.buni.meldware.mail.imap4.IMAP4Constants;
import org.buni.meldware.mail.message.StandardMailHeaders;
import org.jasen.core.linguistics.LinguisticAnalyzer;
import org.jasen.core.parsers.StandardMimeMessageParser;
import org.jasen.core.parsers.URLParser;
import org.jasen.error.JasenException;
import org.jasen.interfaces.JasenMessage;
import org.jasen.interfaces.MimeMessageTokenizer;
import org.jasen.interfaces.ParserData;
import org.jasen.interfaces.TokenErrorRecorder;
import org.jasen.util.MimeUtils;

/* loaded from: input_file:jasen.jar:org/jasen/core/token/EmailTokenizer.class */
public class EmailTokenizer implements MimeMessageTokenizer {
    public static final char HEADER_TOKEN_DELIMITER = 449;
    public static String[] IGNORED_HEADERS = {"thread-index", "date", "content-class", "content-type", "received", "mime-version"};
    public static String[] INCLUDED_HEADERS = {"subject", "from", "to", "cc", "bcc", "return-path"};
    private boolean ignoreHeaders = false;
    protected int tokenLimit = 20;
    protected int linguisticLimit = 3;
    private SpamTokenizer tokenizer = new SpamTokenizer();

    static {
        Arrays.sort(IGNORED_HEADERS);
        Arrays.sort(INCLUDED_HEADERS);
    }

    public EmailTokenizer() throws IOException {
        this.tokenizer.maxTokens = this.tokenLimit;
        this.tokenizer.linguisticLimit = this.linguisticLimit;
    }

    protected String[] tokenize(MimeMessage mimeMessage, String str, String str2, String str3, String str4, String str5, TokenErrorRecorder tokenErrorRecorder) throws JasenException {
        String[] strArr;
        try {
            String[] strArr2 = this.tokenizer.tokenize(str5, tokenErrorRecorder);
            Header[] allHeaders = MimeUtils.getAllHeaders(mimeMessage);
            for (int i = 0; i < allHeaders.length; i++) {
                String lowerCase = allHeaders[i].getName().toLowerCase();
                String[] strArr3 = (String[]) null;
                if (this.ignoreHeaders) {
                    if (includeHeader(lowerCase)) {
                        str3 = allHeaders[i].getValue();
                        strArr3 = this.tokenizer.tokenize(str3, tokenErrorRecorder);
                    }
                } else if (!ignoreHeader(lowerCase)) {
                    str3 = allHeaders[i].getValue();
                    if (lowerCase.equalsIgnoreCase(StandardMailHeaders.MESSAGE_ID)) {
                        int indexOf = str3.indexOf(64);
                        if (indexOf > -1) {
                            str3 = str3.substring(indexOf, str3.length());
                        }
                        strArr3 = this.tokenizer.tokenize(str3, true, tokenErrorRecorder);
                    } else {
                        strArr3 = lowerCase.equalsIgnoreCase(StandardMailHeaders.RECEIVED) ? this.tokenizer.tokenize(str3, true, tokenErrorRecorder) : this.tokenizer.tokenize(str3, tokenErrorRecorder);
                    }
                }
                if (strArr3 != null) {
                    if (Arrays.binarySearch(INCLUDED_HEADERS, lowerCase) > -1) {
                        for (int i2 = 0; i2 < strArr3.length; i2++) {
                            strArr3[i2] = new StringBuffer(String.valueOf((char) 449)).append(allHeaders[i].getName()).append((char) 449).append(strArr3[i2]).toString();
                        }
                    }
                    if (strArr2 != null) {
                        String[] strArr4 = new String[strArr2.length + strArr3.length];
                        System.arraycopy(strArr2, 0, strArr4, 0, strArr2.length);
                        System.arraycopy(strArr3, 0, strArr4, strArr2.length, strArr3.length);
                        strArr2 = strArr4;
                    } else {
                        strArr2 = strArr3;
                    }
                }
            }
            if (str == null && str3 == null) {
                return strArr2;
            }
            List list = null;
            if (str != null) {
                URLParser uRLParser = new URLParser();
                uRLParser.parse(str);
                list = uRLParser.getUrls();
            }
            if (str2 != null) {
                URLParser uRLParser2 = new URLParser();
                uRLParser2.parse(str2);
                if (uRLParser2.getUrls() != null) {
                    if (list == null) {
                        list = uRLParser2.getUrls();
                    } else {
                        list.addAll(uRLParser2.getUrls());
                    }
                }
            }
            if (list == null) {
                return strArr2;
            }
            String[] strArr5 = (String[]) list.toArray(new String[list.size()]);
            if (strArr2 == null || strArr5 == null) {
                strArr = strArr5 == null ? strArr2 : strArr5;
            } else {
                strArr = new String[strArr2.length + strArr5.length];
                System.arraycopy(strArr2, 0, strArr, 0, strArr2.length);
                System.arraycopy(strArr5, 0, strArr, strArr2.length, strArr5.length);
            }
            return strArr;
        } catch (IOException e) {
            throw new JasenException(e);
        } catch (MessagingException e2) {
            throw new JasenException((Throwable) e2);
        }
    }

    @Override // org.jasen.interfaces.MimeMessageTokenizer
    public String[] tokenize(MimeMessage mimeMessage, JasenMessage jasenMessage, ParserData parserData) throws JasenException {
        String str = null;
        String htmlPart = jasenMessage.getHtmlPart();
        String textPart = jasenMessage.getTextPart();
        String htmlAsText = parserData.getHtmlAsText();
        String textParsed = parserData.getTextParsed();
        if (htmlAsText != null && htmlAsText.trim().length() > 0) {
            str = htmlAsText;
        } else if (textParsed != null && textParsed.trim().length() > 0) {
            str = textParsed;
        }
        return tokenize(mimeMessage, htmlPart, textPart, textParsed, htmlAsText, str, parserData.getTokenErrorRecorder());
    }

    private boolean ignoreHeader(String str) {
        return Arrays.binarySearch(IGNORED_HEADERS, str) > -1 || str.startsWith("x");
    }

    private boolean includeHeader(String str) {
        return Arrays.binarySearch(INCLUDED_HEADERS, str) > -1;
    }

    public int getLinguisticLimit() {
        return this.linguisticLimit;
    }

    public void setLinguisticLimit(int i) {
        this.linguisticLimit = i;
    }

    public boolean isIgnoreHeaders() {
        return this.ignoreHeaders;
    }

    public void setIgnoreHeaders(boolean z) {
        this.ignoreHeaders = z;
    }

    public int getTokenLimit() {
        return this.tokenLimit;
    }

    @Override // org.jasen.interfaces.MimeMessageTokenizer
    public void setTokenLimit(int i) {
        this.tokenLimit = i;
        if (this.tokenizer != null) {
            this.tokenizer.setMaxTokens(i);
        }
    }

    public static void main(String[] strArr) {
        try {
            File[] listFiles = new File("D:\\Projects\\Synetek\\Service\\EveryMail\\core\\poll").listFiles();
            File file = new File("c:/output.txt");
            if (file.exists()) {
                file.delete();
            }
            FileOutputStream fileOutputStream = new FileOutputStream(file);
            PrintWriter printWriter = new PrintWriter(fileOutputStream);
            for (int i = 0; i < listFiles.length; i++) {
                if (listFiles[i].isFile()) {
                    try {
                        printWriter.println("*************************************************");
                        printWriter.println(new StringBuffer("File ").append(i + 1).append(": ").append(listFiles[i].getName()).toString());
                        printWriter.println("*************************************************");
                        MimeMessage sMTPMessage = new SMTPMessage((Session) null, new FileInputStream(listFiles[i]));
                        JasenMessage parse = new StandardMimeMessageParser().parse(sMTPMessage);
                        printWriter.println(new StringBuffer("HTML: ").append(parse.getHtmlPart()).toString());
                        EmailTokenizer emailTokenizer = new EmailTokenizer();
                        emailTokenizer.setIgnoreHeaders(true);
                        System.currentTimeMillis();
                        String[] strArr2 = emailTokenizer.tokenize(sMTPMessage, parse, null);
                        if (strArr2 != null) {
                            LinguisticAnalyzer.getInstance();
                            for (int i2 = 0; i2 < strArr2.length; i2++) {
                                printWriter.println(new StringBuffer("TOKEN: [").append(strArr2[i2]).append("]").toString());
                                LinguisticAnalyzer.getInstance().getWordScore(strArr2[i2]);
                            }
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
                System.out.println(new StringBuffer("Processed ").append(i + 1).append(IMAP4Constants.DIR_SEPARATOR).append(listFiles.length).toString());
            }
            fileOutputStream.flush();
            printWriter.flush();
            fileOutputStream.close();
            printWriter.close();
        } catch (Exception e2) {
            e2.printStackTrace();
        }
    }
}
