/*
 * Decompiled with CFR 0.152.
 */
package org.semanticdesktop.aperture.crawler.base;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.Stack;
import org.ontoware.aifbcommons.collection.ClosableIterator;
import org.semanticdesktop.aperture.accessor.AccessData;
import org.semanticdesktop.aperture.accessor.DataAccessorRegistry;
import org.semanticdesktop.aperture.accessor.DataObject;
import org.semanticdesktop.aperture.accessor.RDFContainerFactory;
import org.semanticdesktop.aperture.crawler.CrawlReport;
import org.semanticdesktop.aperture.crawler.Crawler;
import org.semanticdesktop.aperture.crawler.CrawlerHandler;
import org.semanticdesktop.aperture.crawler.ExitCode;
import org.semanticdesktop.aperture.crawler.base.CrawlReportBase;
import org.semanticdesktop.aperture.datasource.DataSource;
import org.semanticdesktop.aperture.datasource.config.ConfigurationUtil;
import org.semanticdesktop.aperture.datasource.config.DomainBoundaries;
import org.semanticdesktop.aperture.subcrawler.SubCrawler;
import org.semanticdesktop.aperture.subcrawler.SubCrawlerException;
import org.semanticdesktop.aperture.subcrawler.SubCrawlerHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class CrawlerBase
implements Crawler {
    private Logger logger = LoggerFactory.getLogger(this.getClass());
    protected DataSource source;
    protected DataAccessorRegistry accessorRegistry;
    protected AccessData accessData;
    protected File crawlReportFile;
    private CrawlReportBase crawlReport;
    private CrawlerHandler handler;
    protected boolean stopRequested = false;
    private DomainBoundaries domain;
    private Object subCrawlerMonitor = new Object();
    private Stack<SubCrawler> subCrawlerStack = new Stack();

    public void setDataSource(DataSource dataSource) {
        this.source = dataSource;
    }

    public DataSource getDataSource() {
        return this.source;
    }

    public void setDataAccessorRegistry(DataAccessorRegistry dataAccessorRegistry) {
        this.accessorRegistry = dataAccessorRegistry;
    }

    public DataAccessorRegistry getDataAccessorRegistry() {
        return this.accessorRegistry;
    }

    public void setAccessData(AccessData accessData) {
        this.accessData = accessData;
    }

    public AccessData getAccessData() {
        return this.accessData;
    }

    public void setCrawlerHandler(CrawlerHandler crawlerHandler) {
        this.handler = crawlerHandler;
    }

    public CrawlerHandler getCrawlerHandler() {
        return this.handler;
    }

    public synchronized void crawl() {
        this.crawlReport = new CrawlReportBase();
        this.crawlReport.setCrawlStarted(System.currentTimeMillis());
        this.domain = ConfigurationUtil.getDomainBoundaries(this.source.getConfiguration());
        this.stopRequested = false;
        ExitCode exitCode = null;
        this.handler.crawlStarted(this);
        try {
            if (this.accessData != null) {
                this.accessData.initialize();
            }
            if ((exitCode = this.crawlObjects()).equals(ExitCode.COMPLETED) && this.accessData != null) {
                this.reportUntouched();
            }
            if (this.accessData != null) {
                this.accessData.store();
            }
        }
        catch (IOException iOException) {
            this.logger.error("IOException while accessing AccessData", (Throwable)iOException);
            exitCode = ExitCode.FATAL_ERROR;
        }
        this.crawlReport.setExitCode(exitCode);
        this.crawlReport.setCrawlStopped(System.currentTimeMillis());
        this.storeCrawlReport();
        this.handler.crawlStopped(this, exitCode);
    }

    protected abstract ExitCode crawlObjects();

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void stop() {
        Object object = this.subCrawlerMonitor;
        synchronized (object) {
            this.stopRequested = true;
            if (!this.subCrawlerStack.empty()) {
                for (SubCrawler subCrawler : this.subCrawlerStack) {
                    subCrawler.stopSubCrawler();
                }
            }
        }
    }

    public boolean isStopRequested() {
        return this.stopRequested;
    }

    public void clear() {
        this.handler.clearStarted(this);
        ExitCode exitCode = ExitCode.COMPLETED;
        try {
            if (this.accessData != null) {
                this.accessData.initialize();
                Iterator iterator = this.accessData.getStoredIDs().iterator();
                while (!this.stopRequested && iterator.hasNext()) {
                    this.clear((String)iterator.next());
                }
                this.accessData.clear();
                if (this.stopRequested) {
                    exitCode = ExitCode.STOP_REQUESTED;
                }
            }
        }
        catch (IOException iOException) {
            this.logger.error("IOException while accessing AccessData", (Throwable)iOException);
            exitCode = ExitCode.FATAL_ERROR;
        }
        this.handler.clearFinished(this, exitCode);
    }

    protected void clear(String string) {
        this.handler.clearingObject(this, string);
    }

    public void setCrawlReportFile(File file) {
        this.crawlReportFile = file;
    }

    public File getCrawlReportFile() {
        return this.crawlReportFile;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public CrawlReport getCrawlReport() {
        if (this.crawlReport == null && this.crawlReportFile != null && this.crawlReportFile.exists()) {
            try {
                CrawlReportBase crawlReportBase = new CrawlReportBase();
                BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(this.crawlReportFile));
                try {
                    crawlReportBase.read(bufferedInputStream);
                    this.crawlReport = crawlReportBase;
                }
                finally {
                    ((InputStream)bufferedInputStream).close();
                }
            }
            catch (IOException iOException) {
                this.logger.error("Unable to load crawl report file", (Throwable)iOException);
            }
        }
        return this.crawlReport;
    }

    protected void reportAccessingObject(String string) {
        this.handler.accessingObject(this, string);
    }

    protected void reportNewDataObject(DataObject dataObject) {
        this.touchObject(dataObject.getID().toString());
        this.crawlReport.increaseNewCount();
        this.handler.objectNew(this, dataObject);
    }

    private void touchObject(String string) {
        if (this.accessData != null) {
            this.accessData.touch(string);
        }
    }

    protected void reportModifiedDataObject(DataObject dataObject) {
        this.touchObject(dataObject.getID().toString());
        this.crawlReport.increaseChangedCount();
        this.handler.objectChanged(this, dataObject);
    }

    protected void reportUnmodifiedDataObject(String string) {
        this.accessData.touchRecursively(string);
        ClosableIterator closableIterator = this.accessData.getAggregatedIDsClosure(string);
        while (closableIterator.hasNext()) {
            this.crawlReport.increaseUnchangedCount();
            this.handler.objectNotModified(this, closableIterator.next().toString());
        }
    }

    protected void reportDeletedDataObject(String string) {
        ClosableIterator closableIterator = this.accessData.getAggregatedIDsClosure(string);
        while (closableIterator.hasNext()) {
            String string2 = (String)closableIterator.next();
            this.handler.objectRemoved(this, string2);
            this.crawlReport.increaseRemovedCount();
        }
        this.accessData.remove(string);
    }

    protected void reportUntouched() {
        ClosableIterator closableIterator = this.accessData.getUntouchedIDsIterator();
        while (closableIterator.hasNext()) {
            this.handler.objectRemoved(this, closableIterator.next().toString());
            this.crawlReport.increaseRemovedCount();
        }
        this.accessData.removeUntouchedIDs();
    }

    protected RDFContainerFactory getRDFContainerFactory(String string) {
        return this.handler.getRDFContainerFactory(this, string);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected void storeCrawlReport() {
        if (this.crawlReport != null && this.crawlReportFile != null) {
            try {
                BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(this.crawlReportFile));
                try {
                    this.crawlReport.write(bufferedOutputStream);
                }
                finally {
                    ((OutputStream)bufferedOutputStream).close();
                }
            }
            catch (IOException iOException) {
                this.logger.error("Unable to write crawl report file", (Throwable)iOException);
            }
        }
    }

    protected boolean inDomain(String string) {
        return this.domain.inDomain(string);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void runSubCrawler(SubCrawler subCrawler, DataObject dataObject, InputStream inputStream, Charset charset, String string) throws SubCrawlerException {
        try {
            Object object = this.subCrawlerMonitor;
            synchronized (object) {
                block18: {
                    if (!this.stopRequested) break block18;
                    this.logger.debug("Not starting the subCrawler, the crawler has been requested to stop");
                    return;
                }
                this.subCrawlerStack.push(subCrawler);
            }
            subCrawler.subCrawl(dataObject.getID(), inputStream, new DefaultSubCrawlerHandler(this, dataObject), this.source, this.accessData, charset, string, dataObject.getMetadata());
        }
        finally {
            Object object = this.subCrawlerMonitor;
            synchronized (object) {
                SubCrawler subCrawler2 = this.subCrawlerStack.pop();
                if (subCrawler2 != subCrawler) {
                    this.logger.error("SubCrawler stack error");
                    throw new SubCrawlerException("SubCrawlerStack error push/pop got desynchronized");
                }
                subCrawler2 = null;
                subCrawler = null;
            }
        }
    }

    private static class DefaultSubCrawlerHandler
    implements SubCrawlerHandler {
        private CrawlerBase crawlerBase;
        private String subCrawledObjectId;

        public DefaultSubCrawlerHandler(CrawlerBase crawlerBase, DataObject dataObject) {
            this.crawlerBase = crawlerBase;
            this.subCrawledObjectId = dataObject.getID().toString();
        }

        public RDFContainerFactory getRDFContainerFactory(String string) {
            return this.crawlerBase.handler.getRDFContainerFactory(this.crawlerBase, string);
        }

        public void objectChanged(DataObject dataObject) {
            if (this.crawlerBase.accessData != null) {
                this.crawlerBase.accessData.putAggregatedID(this.subCrawledObjectId, dataObject.getID().toString());
            }
            this.crawlerBase.reportModifiedDataObject(dataObject);
        }

        public void objectNew(DataObject dataObject) {
            if (this.crawlerBase.accessData != null) {
                this.crawlerBase.accessData.putAggregatedID(this.subCrawledObjectId, dataObject.getID().toString());
            }
            this.crawlerBase.reportNewDataObject(dataObject);
        }

        public void objectNotModified(String string) {
            if (this.crawlerBase.accessData != null) {
                this.crawlerBase.accessData.putAggregatedID(this.subCrawledObjectId, string);
            }
            this.crawlerBase.reportUnmodifiedDataObject(string);
        }
    }
}

