package org.semanticdesktop.aperture.extractor.html;

import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Iterator;
import org.ontoware.rdf2go.model.Model;
import org.ontoware.rdf2go.model.node.Resource;
import org.ontoware.rdf2go.model.node.URI;
import org.ontoware.rdf2go.vocabulary.RDF;
import org.semanticdesktop.aperture.extractor.Extractor;
import org.semanticdesktop.aperture.extractor.ExtractorException;
import org.semanticdesktop.aperture.extractor.util.HtmlParserUtil;
import org.semanticdesktop.aperture.rdf.RDFContainer;
import org.semanticdesktop.aperture.util.UriUtil;
import org.semanticdesktop.aperture.vocabulary.NCO;
import org.semanticdesktop.aperture.vocabulary.NFO;
import org.semanticdesktop.aperture.vocabulary.NIE;

/* loaded from: input_file:lib/modeshape-mimetype-detector-aperture-2.8.2.Final-jar-with-dependencies.jar:org/semanticdesktop/aperture/extractor/html/HtmlExtractor.class */
public class HtmlExtractor implements Extractor {

    /* loaded from: input_file:lib/modeshape-mimetype-detector-aperture-2.8.2.Final-jar-with-dependencies.jar:org/semanticdesktop/aperture/extractor/html/HtmlExtractor$TextAndMetadataExtractor.class */
    private static class TextAndMetadataExtractor extends HtmlParserUtil.ContentExtractor {
        private RDFContainer container;

        public TextAndMetadataExtractor(RDFContainer rDFContainer) {
            this.container = rDFContainer;
        }

        public void finishedParsing() {
            this.container.add(RDF.type, NFO.HtmlDocument);
            this.container.add(NIE.plainTextContent, getText());
            Iterator keywords = getKeywords();
            while (keywords.hasNext()) {
                addProperty(NIE.keyword, (String) keywords.next());
            }
            addProperty(NIE.title, getTitle());
            addContactProperty(NCO.creator, getAuthor());
            addProperty(NIE.description, getDescription());
        }

        private void addContactProperty(URI uri, String str) {
            if (str != null) {
                String trim = str.trim();
                Model model = this.container.getModel();
                Resource generateRandomResource = UriUtil.generateRandomResource(model);
                model.addStatement(generateRandomResource, RDF.type, NCO.Contact);
                model.addStatement(generateRandomResource, NCO.fullname, trim);
                this.container.add(uri, generateRandomResource);
            }
        }

        private void addProperty(URI uri, String str) {
            if (str != null) {
                String trim = str.trim();
                if (trim.length() > 0) {
                    this.container.add(uri, trim);
                }
            }
        }
    }

    @Override // org.semanticdesktop.aperture.extractor.Extractor
    public void extract(URI uri, InputStream inputStream, Charset charset, String str, RDFContainer rDFContainer) throws ExtractorException {
        HtmlParserUtil.parse(inputStream, charset, new TextAndMetadataExtractor(rDFContainer));
    }
}
