package org.apache.stanbol.enhancer.engines.htmlextractor.impl;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;

/* loaded from: input_file:WEB-INF/lib/org.apache.stanbol.enhancer.engines.htmlextractor-0.10.0.jar:org/apache/stanbol/enhancer/engines/htmlextractor/impl/HtmlParser.class */
public class HtmlParser {
    private static final Logger LOG = LoggerFactory.getLogger(HtmlParser.class);
    private String baseURI = StringUtils.EMPTY;

    public String getBaseURI() {
        return this.baseURI;
    }

    public void setBaseURI(String str) {
        this.baseURI = str;
    }

    public Document getDOM(String str) {
        if (str != null) {
            return getDOM(new ByteArrayInputStream(str.getBytes()), null);
        }
        return null;
    }

    public Document getDOM(InputStream inputStream, String str) {
        Document document = null;
        try {
            document = DOMBuilder.jsoup2DOM(Jsoup.parse(inputStream, str, this.baseURI));
        } catch (IOException e) {
            e.printStackTrace();
        } catch (RuntimeException e2) {
            e2.printStackTrace();
        }
        return document;
    }

    public static void main(String[] strArr) throws Exception {
        int i = 0;
        String str = null;
        while (i < strArr.length && strArr[i].startsWith("-")) {
            if (strArr[i].equals("-enc")) {
                i++;
                str = strArr[i];
            }
            i++;
        }
        HtmlParser htmlParser = new HtmlParser();
        for (int i2 = i; i2 < strArr.length; i2++) {
            FileInputStream fileInputStream = new FileInputStream(strArr[i2]);
            Document dom = htmlParser.getDOM(fileInputStream, str);
            FileOutputStream fileOutputStream = new FileOutputStream(new File(strArr[i2]).getName() + ".xml");
            DOMUtils.writeXml(dom, "UTF-8", null, fileOutputStream);
            fileOutputStream.close();
            fileInputStream.close();
        }
    }
}
