package org.semanticdesktop.aperture.extractor.openxml;

import com.ctc.wstx.cfg.InputConfigFlags;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.StringTokenizer;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.infinispan.schematic.document.Bson;
import org.modeshape.jcr.RepositoryConfiguration;
import org.ontoware.rdf2go.exception.ModelException;
import org.ontoware.rdf2go.model.Model;
import org.ontoware.rdf2go.model.node.Resource;
import org.ontoware.rdf2go.model.node.URI;
import org.ontoware.rdf2go.vocabulary.RDF;
import org.semanticdesktop.aperture.extractor.Extractor;
import org.semanticdesktop.aperture.extractor.ExtractorException;
import org.semanticdesktop.aperture.rdf.RDFContainer;
import org.semanticdesktop.aperture.util.SimpleSAXAdapter;
import org.semanticdesktop.aperture.util.SimpleSAXListener;
import org.semanticdesktop.aperture.util.SimpleSAXParser;
import org.semanticdesktop.aperture.util.UriUtil;
import org.semanticdesktop.aperture.vocabulary.NCO;
import org.semanticdesktop.aperture.vocabulary.NFO;
import org.semanticdesktop.aperture.vocabulary.NIE;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;

/* loaded from: input_file:WEB-INF/lib/aperture-1.1.0.Beta1.jar:org/semanticdesktop/aperture/extractor/openxml/OpenXmlExtractor.class */
public class OpenXmlExtractor implements Extractor {
    private static final String CONTENT_TYPES_FILE = "[Content_Types].xml";
    private static final int BUFFER_SIZE = 4194304;
    private static final String END_OF_LINE = System.getProperty("line.separator", "\n");
    private static final HashSet<String> TEXT_ELEMENT_TYPES = new HashSet<>();
    private static final HashMap<String, String> TEXT_ATTRIBUTE_TYPES = new HashMap<>();
    private ContentTypes contentTypes;
    private Logger logger = LoggerFactory.getLogger(getClass());
    private StringBuilder fullText = new StringBuilder(InputConfigFlags.CFG_LAZY_PARSING);

    /* loaded from: input_file:WEB-INF/lib/aperture-1.1.0.Beta1.jar:org/semanticdesktop/aperture/extractor/openxml/OpenXmlExtractor$AttributeTextCollector.class */
    private class AttributeTextCollector extends SimpleSAXAdapter {
        private String fullTextTag;

        public AttributeTextCollector(String str) {
            this.fullTextTag = str;
        }

        @Override // org.semanticdesktop.aperture.util.SimpleSAXAdapter, org.semanticdesktop.aperture.util.SimpleSAXListener
        public void startTag(String str, Map map, String str2) throws SAXException {
            if (this.fullTextTag.equals(str)) {
                Object obj = map.get("name");
                if (obj instanceof String) {
                    OpenXmlExtractor.this.fullText.append(obj);
                    OpenXmlExtractor.this.fullText.append(OpenXmlExtractor.END_OF_LINE);
                }
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/aperture-1.1.0.Beta1.jar:org/semanticdesktop/aperture/extractor/openxml/OpenXmlExtractor$ContentTypes.class */
    public static class ContentTypes {
        private HashMap<String, String> defaults = new HashMap<>();
        private HashMap<String, String> overrides = new HashMap<>();

        public void addDefault(String str, String str2) {
            this.defaults.put(str, str2);
        }

        public void addOverride(String str, String str2) {
            this.overrides.put(str, str2);
        }

        public String getDefault(String str) {
            return this.defaults.get(str);
        }

        public String getOverride(String str) {
            return this.overrides.get(str);
        }

        public String getType(String str) {
            String override = getOverride(str);
            if (override != null) {
                return override;
            }
            int lastIndexOf = str.lastIndexOf(46);
            if (lastIndexOf < 0 || lastIndexOf >= str.length() - 1) {
                return null;
            }
            return getDefault(str.substring(lastIndexOf + 1));
        }

        public String toString() {
            return "ContentTypes[default=" + this.defaults + ",overrides=" + this.overrides + "]";
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/aperture-1.1.0.Beta1.jar:org/semanticdesktop/aperture/extractor/openxml/OpenXmlExtractor$NonCloseableStream.class */
    public static class NonCloseableStream extends FilterInputStream {
        public NonCloseableStream(InputStream inputStream) {
            super(inputStream);
        }

        @Override // java.io.FilterInputStream, java.io.InputStream, java.io.Closeable, java.lang.AutoCloseable
        public void close() throws IOException {
        }
    }

    /* loaded from: input_file:WEB-INF/lib/aperture-1.1.0.Beta1.jar:org/semanticdesktop/aperture/extractor/openxml/OpenXmlExtractor$TextCollector.class */
    private class TextCollector extends SimpleSAXAdapter {
        private int initialLength;
        private boolean insideTabs;

        private TextCollector() {
            this.insideTabs = false;
        }

        @Override // org.semanticdesktop.aperture.util.SimpleSAXAdapter, org.semanticdesktop.aperture.util.SimpleSAXListener
        public void startDocument() throws SAXException {
            this.initialLength = OpenXmlExtractor.this.fullText.length();
        }

        @Override // org.semanticdesktop.aperture.util.SimpleSAXAdapter, org.semanticdesktop.aperture.util.SimpleSAXListener
        public void startTag(String str, Map map, String str2) throws SAXException {
            if ("w:t".equals(str)) {
                OpenXmlExtractor.this.fullText.append(str2);
                return;
            }
            if ("t".equals(str) || "p:text".equals(str) || "a:t".equals(str) || "st:t".equals(str) || "v".equals(str)) {
                OpenXmlExtractor.this.fullText.append(str2);
                OpenXmlExtractor.this.fullText.append(' ');
            } else if ("w:tab".equals(str) && !this.insideTabs) {
                OpenXmlExtractor.this.fullText.append('\t');
            } else if ("w:tabs".equals(str)) {
                this.insideTabs = true;
            }
        }

        @Override // org.semanticdesktop.aperture.util.SimpleSAXAdapter, org.semanticdesktop.aperture.util.SimpleSAXListener
        public void endTag(String str) throws SAXException {
            if ("w:p".equals(str)) {
                OpenXmlExtractor.this.fullText.append(OpenXmlExtractor.END_OF_LINE);
            } else if ("w:tabs".equals(str)) {
                this.insideTabs = false;
            }
        }

        @Override // org.semanticdesktop.aperture.util.SimpleSAXAdapter, org.semanticdesktop.aperture.util.SimpleSAXListener
        public void endDocument() throws SAXException {
            if (OpenXmlExtractor.this.fullText.length() > this.initialLength) {
                OpenXmlExtractor.this.fullText.append(OpenXmlExtractor.END_OF_LINE);
            }
        }
    }

    /* JADX WARN: Code restructure failed: missing block: B:13:0x0044, code lost:
    
        parseContentTypes(r0);
     */
    @Override // org.semanticdesktop.aperture.extractor.Extractor
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public void extract(org.ontoware.rdf2go.model.node.URI r9, java.io.InputStream r10, java.nio.charset.Charset r11, java.lang.String r12, org.semanticdesktop.aperture.rdf.RDFContainer r13) throws org.semanticdesktop.aperture.extractor.ExtractorException {
        /*
            Method dump skipped, instructions count: 342
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: org.semanticdesktop.aperture.extractor.openxml.OpenXmlExtractor.extract(org.ontoware.rdf2go.model.node.URI, java.io.InputStream, java.nio.charset.Charset, java.lang.String, org.semanticdesktop.aperture.rdf.RDFContainer):void");
    }

    private void parseContentTypes(InputStream inputStream) throws ExtractorException {
        String attribute;
        Document document = getDocument(inputStream, false);
        this.contentTypes = new ContentTypes();
        NodeList childNodes = document.getDocumentElement().getChildNodes();
        int length = childNodes.getLength();
        for (int i = 0; i < length; i++) {
            Node item = childNodes.item(i);
            if (item.getNodeType() == 1) {
                Element element = (Element) item;
                String tagName = element.getTagName();
                String attribute2 = element.getAttribute("ContentType");
                if ("Default".equals(tagName)) {
                    String attribute3 = element.getAttribute("Extension");
                    if (attribute3 != null && attribute2 != null) {
                        this.contentTypes.addDefault(attribute3, attribute2);
                    }
                } else if ("Override".equals(tagName) && (attribute = element.getAttribute("PartName")) != null && attribute2 != null) {
                    this.contentTypes.addOverride(attribute, attribute2);
                }
            }
        }
    }

    private Document getDocument(InputStream inputStream, boolean z) throws ExtractorException {
        DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
        newInstance.setNamespaceAware(z);
        newInstance.setValidating(false);
        newInstance.setExpandEntityReferences(false);
        try {
            try {
                return newInstance.newDocumentBuilder().parse(new NonCloseableStream(inputStream));
            } catch (IOException e) {
                throw new ExtractorException(e);
            } catch (SAXException e2) {
                throw new ExtractorException(e2);
            }
        } catch (ParserConfigurationException e3) {
            throw new RuntimeException("unable to instantiate DocumentBuilder", e3);
        }
    }

    private String toAbsoluteName(String str) {
        return str.startsWith("/") ? str : "/" + str;
    }

    private void process(InputStream inputStream, SimpleSAXListener simpleSAXListener) {
        try {
            SimpleSAXParser simpleSAXParser = new SimpleSAXParser();
            simpleSAXParser.setListener(simpleSAXListener);
            simpleSAXParser.setTrimWhiteSpace(false);
            try {
                simpleSAXParser.parse(new NonCloseableStream(inputStream));
            } catch (Exception e) {
                this.logger.warn("Exception while parsing XML", (Throwable) e);
            }
        } catch (Exception e2) {
            throw new RuntimeException("unable to instantiate SAXParser", e2);
        }
    }

    private void extractMetadata(InputStream inputStream, RDFContainer rDFContainer) throws ExtractorException {
        Element element;
        String text;
        Element documentElement = getDocument(inputStream, true).getDocumentElement();
        rDFContainer.add(RDF.type, NFO.Document);
        NodeList childNodes = documentElement.getChildNodes();
        int length = childNodes.getLength();
        for (int i = 0; i < length; i++) {
            Node item = childNodes.item(i);
            if (item.getNodeType() == 1 && (text = getText((element = (Element) item))) != null) {
                String namespaceURI = element.getNamespaceURI();
                if (namespaceURI != null) {
                    if (!namespaceURI.endsWith("/")) {
                        namespaceURI = namespaceURI + "/";
                    }
                    try {
                        rDFContainer.add(rDFContainer.getValueFactory().createURI(namespaceURI + element.getLocalName()), text);
                    } catch (ModelException e) {
                        this.logger.error("ModelException while adding statement, ignoring", (Throwable) e);
                    }
                }
                mapToApertureProperty(element, text, rDFContainer);
            }
        }
    }

    private void mapToApertureProperty(Element element, String str, RDFContainer rDFContainer) {
        String localName = element.getLocalName();
        if ("title".equals(localName)) {
            rDFContainer.add(NIE.title, str);
            return;
        }
        if ("subject".equals(localName)) {
            rDFContainer.add(NIE.subject, str);
            return;
        }
        if ("created".equals(localName)) {
            rDFContainer.add(NIE.contentCreated, convertStringToDate(str));
            return;
        }
        if ("creator".equals(localName)) {
            addContactStatement(NCO.creator, str, rDFContainer);
            return;
        }
        if (RepositoryConfiguration.FieldName.DESCRIPTION.equals(localName)) {
            rDFContainer.add(NIE.description, str);
            return;
        }
        if ("lastModifiedBy".equals(localName)) {
            addContactStatement(NCO.contributor, str, rDFContainer);
            return;
        }
        if ("modified".equals(localName)) {
            rDFContainer.add(NIE.contentLastModified, convertStringToDate(str));
            return;
        }
        if ("Application".equals(localName)) {
            rDFContainer.add(NIE.generator, str);
            return;
        }
        if ("Pages".equals(localName)) {
            try {
                rDFContainer.add(RDF.type, NFO.PaginatedTextDocument);
                rDFContainer.add(NFO.pageCount, Integer.parseInt(str));
            } catch (NumberFormatException e) {
            }
        } else if ("keywords".equals(localName)) {
            StringTokenizer stringTokenizer = new StringTokenizer(str, " \t.,;|/\\", false);
            while (stringTokenizer.hasMoreTokens()) {
                rDFContainer.add(NIE.keyword, stringTokenizer.nextToken());
            }
        }
    }

    private Date convertStringToDate(String str) {
        try {
            return new SimpleDateFormat(Bson.DATE_FORMAT).parse(str);
        } catch (ParseException e) {
            try {
                return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ").parse(str);
            } catch (ParseException e2) {
                return null;
            }
        }
    }

    private void addContactStatement(URI uri, String str, RDFContainer rDFContainer) {
        Model model = rDFContainer.getModel();
        Resource generateRandomResource = UriUtil.generateRandomResource(model);
        model.addStatement(generateRandomResource, RDF.type, NCO.Contact);
        model.addStatement(generateRandomResource, NCO.fullname, str);
        rDFContainer.add(uri, generateRandomResource);
    }

    private String getText(Element element) {
        Node firstChild = element.getFirstChild();
        if (firstChild instanceof Text) {
            return ((Text) firstChild).getWholeText();
        }
        return null;
    }

    static {
        TEXT_ELEMENT_TYPES.add("application/vnd.ms-word.document.macroEnabled.main+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.ms-word.template.macroEnabledTemplate.main+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.comments+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.slide+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.slideMaster+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.notesMaster+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.presentationml.handoutMaster+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.main+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.document.glossary+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml");
        TEXT_ELEMENT_TYPES.add("application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml");
        TEXT_ATTRIBUTE_TYPES.put("application/vnd.openxmlformats-officedocument.presentationml.commentAuthors+xml", "p:cmAuthor");
        TEXT_ATTRIBUTE_TYPES.put("application/vnd.openxmlformats-officedocument.spreadsheetml.main+xml", "sheet");
        TEXT_ATTRIBUTE_TYPES.put("application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml", "sheet");
        TEXT_ATTRIBUTE_TYPES.put("application/vnd.openxmlformats-officedocument.spreadsheetml.table+xml", "tableColumn");
        TEXT_ATTRIBUTE_TYPES.put("application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml", "p:cSld");
    }
}
