package org.apache.tika.parser.microsoft.ooxml;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.pdfbox.pdmodel.common.PDPageLabelRange;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
import org.apache.poi.xwpf.usermodel.BodyType;
import org.apache.poi.xwpf.usermodel.IBody;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFHyperlink;
import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFPicture;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFStyles;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.microsoft.WordExtractor;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.impl.jam.xml.JamXmlElements;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.class */
public class XWPFWordExtractorDecorator extends AbstractOOXMLExtractor {
    private XWPFDocument document;
    private XWPFStyles styles;

    public XWPFWordExtractorDecorator(ParseContext parseContext, XWPFWordExtractor xWPFWordExtractor) {
        super(parseContext, xWPFWordExtractor, "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
        this.document = (XWPFDocument) xWPFWordExtractor.getDocument();
        this.styles = this.document.getStyles();
    }

    @Override // org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor
    protected void buildXHTML(XHTMLContentHandler xHTMLContentHandler) throws SAXException, XmlException, IOException {
        XWPFHeaderFooterPolicy headerFooterPolicy = this.document.getHeaderFooterPolicy();
        extractHeaders(xHTMLContentHandler, headerFooterPolicy);
        extractIBodyText(this.document, xHTMLContentHandler);
        extractFooters(xHTMLContentHandler, headerFooterPolicy);
    }

    private void extractIBodyText(IBody iBody, XHTMLContentHandler xHTMLContentHandler) throws SAXException, XmlException, IOException {
        for (IBodyElement iBodyElement : iBody.getBodyElements()) {
            if (iBodyElement instanceof XWPFParagraph) {
                extractParagraph((XWPFParagraph) iBodyElement, xHTMLContentHandler);
            }
            if (iBodyElement instanceof XWPFTable) {
                extractTable((XWPFTable) iBodyElement, xHTMLContentHandler);
            }
        }
    }

    private void extractParagraph(XWPFParagraph xWPFParagraph, XHTMLContentHandler xHTMLContentHandler) throws SAXException, XmlException, IOException {
        XWPFPictureData pictureData;
        CTSectPr sectPr;
        XWPFHeaderFooterPolicy xWPFHeaderFooterPolicy = null;
        if (xWPFParagraph.getCTP().getPPr() != null && (sectPr = xWPFParagraph.getCTP().getPPr().getSectPr()) != null) {
            xWPFHeaderFooterPolicy = new XWPFHeaderFooterPolicy(this.document, sectPr);
            extractHeaders(xHTMLContentHandler, xWPFHeaderFooterPolicy);
        }
        String str = "p";
        String str2 = null;
        if (xWPFParagraph.getStyleID() != null) {
            WordExtractor.TagAndStyle buildParagraphTagAndStyle = WordExtractor.buildParagraphTagAndStyle(this.styles.getStyle(xWPFParagraph.getStyleID()).getName(), xWPFParagraph.getPartType() == BodyType.TABLECELL);
            str = buildParagraphTagAndStyle.getTag();
            str2 = buildParagraphTagAndStyle.getStyleClass();
        }
        if (str2 == null) {
            xHTMLContentHandler.startElement(str);
        } else {
            xHTMLContentHandler.startElement(str, JamXmlElements.CLASS, str2);
        }
        Iterator<CTBookmark> it = xWPFParagraph.getCTP().getBookmarkStartList().iterator();
        while (it.hasNext()) {
            xHTMLContentHandler.startElement(PDPageLabelRange.STYLE_LETTERS_LOWER, "name", it.next().getName());
            xHTMLContentHandler.endElement(PDPageLabelRange.STYLE_LETTERS_LOWER);
        }
        for (XWPFRun xWPFRun : xWPFParagraph.getRuns()) {
            ArrayList arrayList = new ArrayList();
            if (xWPFRun instanceof XWPFHyperlinkRun) {
                XWPFHyperlinkRun xWPFHyperlinkRun = (XWPFHyperlinkRun) xWPFRun;
                XWPFHyperlink hyperlink = xWPFHyperlinkRun.getHyperlink(this.document);
                if (hyperlink != null && hyperlink.getURL() != null) {
                    xHTMLContentHandler.startElement(PDPageLabelRange.STYLE_LETTERS_LOWER, "href", hyperlink.getURL());
                    arrayList.add(PDPageLabelRange.STYLE_LETTERS_LOWER);
                } else if (xWPFHyperlinkRun.getAnchor() != null && xWPFHyperlinkRun.getAnchor().length() > 0) {
                    xHTMLContentHandler.startElement(PDPageLabelRange.STYLE_LETTERS_LOWER, "href", "#" + xWPFHyperlinkRun.getAnchor());
                    arrayList.add(PDPageLabelRange.STYLE_LETTERS_LOWER);
                }
            }
            if (xWPFRun.isBold()) {
                xHTMLContentHandler.startElement("b");
                arrayList.add("b");
            }
            if (xWPFRun.isItalic()) {
                xHTMLContentHandler.startElement("i");
                arrayList.add("i");
            }
            xHTMLContentHandler.characters(xWPFRun.toString());
            for (int size = arrayList.size() - 1; size >= 0; size--) {
                xHTMLContentHandler.endElement((String) arrayList.get(size));
            }
            for (XWPFPicture xWPFPicture : xWPFRun.getEmbeddedPictures()) {
                if (xWPFParagraph.getDocument() != null && (pictureData = xWPFPicture.getPictureData()) != null) {
                    xHTMLContentHandler.startElement("img", "src", "embedded:" + pictureData.getFileName());
                    xHTMLContentHandler.endElement("img");
                }
            }
        }
        String commentText = new XWPFCommentsDecorator(xWPFParagraph, null).getCommentText();
        if (commentText != null && commentText.length() > 0) {
            xHTMLContentHandler.characters(commentText);
        }
        String footnoteText = xWPFParagraph.getFootnoteText();
        if (footnoteText != null && footnoteText.length() > 0) {
            xHTMLContentHandler.characters(footnoteText + "\n");
        }
        xHTMLContentHandler.endElement(str);
        if (xWPFHeaderFooterPolicy != null) {
            extractFooters(xHTMLContentHandler, xWPFHeaderFooterPolicy);
        }
    }

    private void extractTable(XWPFTable xWPFTable, XHTMLContentHandler xHTMLContentHandler) throws SAXException, XmlException, IOException {
        xHTMLContentHandler.startElement("table");
        xHTMLContentHandler.startElement("tbody");
        for (XWPFTableRow xWPFTableRow : xWPFTable.getRows()) {
            xHTMLContentHandler.startElement("tr");
            for (XWPFTableCell xWPFTableCell : xWPFTableRow.getTableCells()) {
                xHTMLContentHandler.startElement("td");
                extractIBodyText(xWPFTableCell, xHTMLContentHandler);
                xHTMLContentHandler.endElement("td");
            }
            xHTMLContentHandler.endElement("tr");
        }
        xHTMLContentHandler.endElement("tbody");
        xHTMLContentHandler.endElement("table");
    }

    private void extractFooters(XHTMLContentHandler xHTMLContentHandler, XWPFHeaderFooterPolicy xWPFHeaderFooterPolicy) throws SAXException {
        if (xWPFHeaderFooterPolicy.getFirstPageFooter() != null) {
            xHTMLContentHandler.element("p", xWPFHeaderFooterPolicy.getFirstPageFooter().getText());
        }
        if (xWPFHeaderFooterPolicy.getEvenPageFooter() != null) {
            xHTMLContentHandler.element("p", xWPFHeaderFooterPolicy.getEvenPageFooter().getText());
        }
        if (xWPFHeaderFooterPolicy.getDefaultFooter() != null) {
            xHTMLContentHandler.element("p", xWPFHeaderFooterPolicy.getDefaultFooter().getText());
        }
    }

    private void extractHeaders(XHTMLContentHandler xHTMLContentHandler, XWPFHeaderFooterPolicy xWPFHeaderFooterPolicy) throws SAXException {
        if (xWPFHeaderFooterPolicy.getFirstPageHeader() != null) {
            xHTMLContentHandler.element("p", xWPFHeaderFooterPolicy.getFirstPageHeader().getText());
        }
        if (xWPFHeaderFooterPolicy.getEvenPageHeader() != null) {
            xHTMLContentHandler.element("p", xWPFHeaderFooterPolicy.getEvenPageHeader().getText());
        }
        if (xWPFHeaderFooterPolicy.getDefaultHeader() != null) {
            xHTMLContentHandler.element("p", xWPFHeaderFooterPolicy.getDefaultHeader().getText());
        }
    }

    @Override // org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor
    protected List<PackagePart> getMainDocumentParts() {
        ArrayList arrayList = new ArrayList();
        arrayList.add(this.document.getPackagePart());
        return arrayList;
    }
}
