package org.semanticdesktop.aperture.extractor.opendocument;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.text.ParseException;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import javax.jcr.Workspace;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.ontoware.rdf2go.exception.ModelException;
import org.ontoware.rdf2go.model.Model;
import org.ontoware.rdf2go.model.node.Resource;
import org.ontoware.rdf2go.model.node.URI;
import org.ontoware.rdf2go.vocabulary.RDF;
import org.semanticdesktop.aperture.extractor.Extractor;
import org.semanticdesktop.aperture.extractor.ExtractorException;
import org.semanticdesktop.aperture.rdf.RDFContainer;
import org.semanticdesktop.aperture.util.DateUtil;
import org.semanticdesktop.aperture.util.IOUtil;
import org.semanticdesktop.aperture.util.ResourceUtil;
import org.semanticdesktop.aperture.util.SimpleSAXAdapter;
import org.semanticdesktop.aperture.util.SimpleSAXParser;
import org.semanticdesktop.aperture.util.UriUtil;
import org.semanticdesktop.aperture.vocabulary.NCO;
import org.semanticdesktop.aperture.vocabulary.NFO;
import org.semanticdesktop.aperture.vocabulary.NIE;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Attr;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;

/* loaded from: input_file:modeshape-unit-test/lib/aperture-1.1.0.Beta1.jar:org/semanticdesktop/aperture/extractor/opendocument/OpenDocumentExtractor.class */
public class OpenDocumentExtractor implements Extractor {
    private Logger logger = LoggerFactory.getLogger(getClass());
    private static final String END_OF_LINE = System.getProperty("line.separator", "\n");
    private static final String SYSTEM_ID = ResourceUtil.getURL("org/semanticdesktop/aperture/extractor/opendocument/office.dtd", OpenDocumentExtractor.class).toString();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:modeshape-unit-test/lib/aperture-1.1.0.Beta1.jar:org/semanticdesktop/aperture/extractor/opendocument/OpenDocumentExtractor$ContentExtractor.class */
    public static class ContentExtractor extends SimpleSAXAdapter {
        private static final String OFFICE_BODY = "office:body";
        private static final String MATH_MATH = "math:math";
        private static final String TEXT_P = "text:p";
        private static final String TEXT_H = "text:h";
        private StringBuilder contents;
        private boolean insideBody;

        private ContentExtractor() {
            this.contents = new StringBuilder(4096);
            this.insideBody = false;
        }

        public String getContents() {
            return this.contents.toString();
        }

        @Override // org.semanticdesktop.aperture.util.SimpleSAXAdapter, org.semanticdesktop.aperture.util.SimpleSAXListener
        public void startTag(String str, Map map, String str2) {
            if (OFFICE_BODY.equals(str) || MATH_MATH.equals(str)) {
                this.insideBody = true;
                return;
            }
            if (!this.insideBody || str2.length() <= 0) {
                return;
            }
            if (TEXT_H.equals(str) && this.contents.length() > 0) {
                this.contents.append(OpenDocumentExtractor.END_OF_LINE);
                this.contents.append(OpenDocumentExtractor.END_OF_LINE);
            }
            this.contents.append(str2);
            if (TEXT_P.equals(str)) {
                this.contents.append(OpenDocumentExtractor.END_OF_LINE);
            } else if (!TEXT_H.equals(str)) {
                this.contents.append(' ');
            } else {
                this.contents.append(OpenDocumentExtractor.END_OF_LINE);
                this.contents.append(OpenDocumentExtractor.END_OF_LINE);
            }
        }

        @Override // org.semanticdesktop.aperture.util.SimpleSAXAdapter, org.semanticdesktop.aperture.util.SimpleSAXListener
        public void endTag(String str) {
            if (OFFICE_BODY.equals(str) || MATH_MATH.equals(str)) {
                this.insideBody = false;
            }
        }
    }

    @Override // org.semanticdesktop.aperture.extractor.Extractor
    public void extract(URI uri, InputStream inputStream, Charset charset, String str, RDFContainer rDFContainer) throws ExtractorException {
        byte[] bArr = null;
        byte[] bArr2 = null;
        try {
            ZipInputStream zipInputStream = new ZipInputStream(inputStream);
            while (true) {
                ZipEntry nextEntry = zipInputStream.getNextEntry();
                if (nextEntry == null) {
                    break;
                }
                String name = nextEntry.getName();
                if ("content.xml".equals(name)) {
                    bArr = IOUtil.readBytes(zipInputStream);
                } else if ("meta.xml".equals(name)) {
                    bArr2 = IOUtil.readBytes(zipInputStream);
                }
                zipInputStream.closeEntry();
            }
            zipInputStream.close();
            rDFContainer.add(RDF.type, NFO.Document);
            if (bArr != null) {
                extractFullText(bArr, rDFContainer);
            }
            if (bArr2 != null) {
                extractMetadata(bArr2, rDFContainer);
            }
        } catch (IOException e) {
            throw new ExtractorException(e);
        }
    }

    private void extractFullText(byte[] bArr, RDFContainer rDFContainer) throws ExtractorException {
        try {
            SimpleSAXParser simpleSAXParser = new SimpleSAXParser();
            ContentExtractor contentExtractor = new ContentExtractor();
            simpleSAXParser.setListener(contentExtractor);
            try {
                simpleSAXParser.parse(new ByteArrayInputStream(bArr), SYSTEM_ID);
                String contents = contentExtractor.getContents();
                if (contents == null || contents.equals("")) {
                    return;
                }
                rDFContainer.add(NIE.plainTextContent, contents);
            } catch (IOException e) {
                throw new ExtractorException(e);
            } catch (SAXException e2) {
                throw new ExtractorException(e2);
            }
        } catch (Exception e3) {
            throw new RuntimeException("unable to instantiate SAXParser", e3);
        }
    }

    private void extractMetadata(byte[] bArr, RDFContainer rDFContainer) throws ExtractorException {
        DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
        newInstance.setNamespaceAware(true);
        newInstance.setValidating(false);
        newInstance.setExpandEntityReferences(false);
        try {
            try {
                NodeList childNodes = newInstance.newDocumentBuilder().parse(new ByteArrayInputStream(bArr), SYSTEM_ID).getDocumentElement().getFirstChild().getChildNodes();
                int length = childNodes.getLength();
                for (int i = 0; i < length; i++) {
                    Node item = childNodes.item(i);
                    addOasisMetadataPropertyToRdfContainer(item, rDFContainer);
                    mapToApertureProperty(item, rDFContainer);
                }
            } catch (IOException e) {
                throw new ExtractorException(e);
            } catch (SAXException e2) {
                throw new ExtractorException(e2);
            }
        } catch (ParserConfigurationException e3) {
            throw new RuntimeException("unable to instantiate DocumentBuilder", e3);
        }
    }

    private void addOasisMetadataPropertyToRdfContainer(Node node, RDFContainer rDFContainer) {
        String namespaceURI = node.getNamespaceURI();
        if (namespaceURI != null) {
            if (!namespaceURI.endsWith(Workspace.PATH_WORKSPACE_ROOT)) {
                namespaceURI = namespaceURI + Workspace.PATH_WORKSPACE_ROOT;
            }
            try {
                String str = namespaceURI + node.getLocalName();
                String text = getText(node);
                URI createURI = rDFContainer.getValueFactory().createURI(str);
                if (text != null) {
                    rDFContainer.add(createURI, text);
                }
            } catch (ModelException e) {
                this.logger.error("ModelException while adding statement, ignoring", (Throwable) e);
            }
        }
    }

    private String getText(Node node) {
        if (node instanceof Attr) {
            return ((Attr) node).getValue();
        }
        Node firstChild = node.getFirstChild();
        if (firstChild instanceof Text) {
            return ((Text) firstChild).getWholeText();
        }
        return null;
    }

    private void mapToApertureProperty(Node node, RDFContainer rDFContainer) {
        NamedNodeMap attributes;
        String nodeName = node.getNodeName();
        if ("dc:creator".equals(nodeName)) {
            addContactStatement(NCO.creator, node.getFirstChild().getNodeValue(), rDFContainer);
            return;
        }
        if ("meta:initial-creator".equals(nodeName)) {
            addContactStatement(NCO.creator, node.getFirstChild().getNodeValue(), rDFContainer);
            return;
        }
        if ("dc:title".equals(nodeName)) {
            addStatement(NIE.title, node.getFirstChild(), rDFContainer);
            return;
        }
        if ("dc:description".equals(nodeName)) {
            addStatement(NIE.description, node.getFirstChild(), rDFContainer);
            return;
        }
        if ("dc:subject".equals(nodeName)) {
            addStatement(NIE.subject, node.getFirstChild(), rDFContainer);
            return;
        }
        if ("dc:date".equals(nodeName)) {
            addDateStatement(NIE.informationElementDate, node.getFirstChild(), rDFContainer);
            return;
        }
        if ("meta:creation-date".equals(nodeName)) {
            addDateStatement(NIE.contentCreated, node.getFirstChild(), rDFContainer);
            return;
        }
        if ("meta:print-date".equals(nodeName)) {
            addDateStatement(NIE.informationElementDate, node.getFirstChild(), rDFContainer);
            return;
        }
        if ("dc:language".equals(nodeName)) {
            addStatement(NIE.language, node.getFirstChild(), rDFContainer);
            return;
        }
        if ("meta:generator".equals(nodeName)) {
            addStatement(NIE.generator, node.getFirstChild(), rDFContainer);
            return;
        }
        if ("meta:user-defined".equals(nodeName)) {
            return;
        }
        if ("meta:keywords".equals(nodeName)) {
            NodeList childNodes = node.getChildNodes();
            int length = childNodes.getLength();
            for (int i = 0; i < length; i++) {
                Node item = childNodes.item(i);
                if ("meta:keyword".equals(item.getNodeName())) {
                    addStatement(NIE.keyword, item.getFirstChild(), rDFContainer);
                }
            }
            return;
        }
        if ("meta:keyword".equals(nodeName)) {
            addStatement(NIE.keyword, node.getFirstChild(), rDFContainer);
            return;
        }
        if (!"meta:document-statistic".equals(nodeName) || (attributes = node.getAttributes()) == null) {
            return;
        }
        for (int i2 = 0; i2 < attributes.getLength(); i2++) {
            Node item2 = attributes.item(i2);
            addOasisMetadataPropertyToRdfContainer(item2, rDFContainer);
            if (item2 instanceof Attr) {
                mapStatisticsAttributeToApertureProperty((Attr) item2, rDFContainer);
            }
        }
    }

    private void mapStatisticsAttributeToApertureProperty(Attr attr, RDFContainer rDFContainer) {
        String value;
        if (!"meta:page-count".equals(attr.getNodeName()) || (value = attr.getValue()) == null) {
            return;
        }
        try {
            int parseInt = Integer.parseInt(value);
            rDFContainer.add(RDF.type, NFO.PaginatedTextDocument);
            rDFContainer.add(NFO.pageCount, parseInt);
        } catch (NumberFormatException e) {
        }
    }

    private void addStatement(URI uri, Node node, RDFContainer rDFContainer) {
        if (node != null) {
            addStatement(uri, node.getNodeValue(), rDFContainer);
        }
    }

    private void addStatement(URI uri, String str, RDFContainer rDFContainer) {
        if (str != null) {
            rDFContainer.add(uri, str);
        }
    }

    private void addDateStatement(URI uri, Node node, RDFContainer rDFContainer) {
        String nodeValue;
        if (node == null || (nodeValue = node.getNodeValue()) == null) {
            return;
        }
        try {
            rDFContainer.add(uri, DateUtil.string2DateTime(nodeValue));
        } catch (ParseException e) {
        }
    }

    private void addContactStatement(URI uri, String str, RDFContainer rDFContainer) {
        Model model = rDFContainer.getModel();
        Resource generateRandomResource = UriUtil.generateRandomResource(model);
        model.addStatement(generateRandomResource, RDF.type, NCO.Contact);
        model.addStatement(generateRandomResource, NCO.fullname, str);
        rDFContainer.add(uri, generateRandomResource);
    }
}
