package websphinx;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.StringTokenizer;
import java.util.Vector;
import net.sf.ehcache.config.TimeoutBehaviorConfiguration;
import org.springframework.beans.factory.xml.BeanDefinitionParserDelegate;
import rcm.util.PriorityQueue;

/* loaded from: input_file:websphinx/Crawler.class */
public class Crawler implements Runnable, Serializable {
    private LinkPredicate linkPredicate;
    private PagePredicate pagePredicate;
    private Action action;
    private transient Worm[] worms;
    private transient PriorityQueue fetchQueue;
    private transient PriorityQueue crawlQueue;
    private transient int numLinksTested;
    private transient int numPagesVisited;
    private transient int numPagesLeft;
    private transient Vector crawlListeners;
    private transient Vector linkListeners;
    private transient Hashtable visitedPages;
    private transient RobotExclusion robotExclusion;
    private static final long serialVersionUID = serialVersionUID;
    private static final long serialVersionUID = serialVersionUID;
    public static final String[] WEB = null;
    public static final String[] SERVER = {BeanDefinitionParserDelegate.LOCAL_REF_ATTRIBUTE};
    public static final String[] SUBTREE = {"sibling", "descendent"};
    public static final String[] HYPERLINKS = {"hyperlink"};
    public static final String[] HYPERLINKS_AND_IMAGES = {"hyperlink", "image"};
    public static final String[] ALL_LINKS = null;
    private String name = getClass().getName();
    private transient Link[] roots = null;
    private String[] rootHrefs = null;
    private String[] domain = WEB;
    private boolean synchronous = false;
    private boolean depthFirst = true;
    private String[] type = HYPERLINKS;
    private boolean ignoreVisitedLinks = true;
    private int maxDepth = 5;
    private DownloadParameters dp = new DownloadParameters().changeUserAgent(this.name);
    private Vector classifiers = new Vector();
    private transient Link[] crawledRoots = null;
    private transient int state = 2;

    public Crawler() {
        addClassifier(new StandardClassifier());
        init();
    }

    private void init() {
        this.state = 2;
        this.numLinksTested = 0;
        this.numPagesVisited = 0;
        this.numPagesLeft = 0;
        this.worms = null;
        this.crawlQueue = new PriorityQueue();
        this.fetchQueue = new PriorityQueue();
        this.crawlListeners = new Vector();
        this.linkListeners = new Vector();
        this.visitedPages = new Hashtable();
        this.robotExclusion = new RobotExclusion(getName());
    }

    private void writeObject(ObjectOutputStream objectOutputStream) throws IOException {
        if (this.roots != null) {
            this.rootHrefs = new String[this.roots.length];
            for (int i = 0; i < this.roots.length; i++) {
                this.rootHrefs[i] = this.roots[i].getURL().toString();
            }
        } else {
            this.rootHrefs = null;
        }
        objectOutputStream.defaultWriteObject();
        this.rootHrefs = null;
    }

    private void readObject(ObjectInputStream objectInputStream) throws IOException, ClassNotFoundException {
        objectInputStream.defaultReadObject();
        if (this.rootHrefs != null) {
            this.roots = new Link[this.rootHrefs.length];
            for (int i = 0; i < this.rootHrefs.length; i++) {
                this.roots[i] = new Link(this.rootHrefs[i]);
            }
        } else {
            this.roots = null;
        }
        this.domain = useStandard(WEB, this.domain);
        this.domain = useStandard(SERVER, this.domain);
        this.domain = useStandard(SUBTREE, this.domain);
        this.type = useStandard(HYPERLINKS, this.type);
        this.type = useStandard(HYPERLINKS_AND_IMAGES, this.type);
        this.type = useStandard(ALL_LINKS, this.type);
        init();
        if (this.linkPredicate != null) {
            this.linkPredicate.connected(this);
        }
        if (this.pagePredicate != null) {
            this.pagePredicate.connected(this);
        }
        if (this.action != null) {
            this.action.connected(this);
        }
    }

    private static String[] useStandard(String[] strArr, String[] strArr2) {
        if (strArr2 == null || strArr == null || strArr == strArr2) {
            return strArr2;
        }
        if (strArr2.length != strArr.length) {
            return strArr2;
        }
        for (int i = 0; i < strArr2.length; i++) {
            if (!strArr2[i].equals(strArr[i])) {
                return strArr2;
            }
        }
        return strArr;
    }

    @Override // java.lang.Runnable
    public void run() {
        this.crawledRoots = this.roots;
        if (this.state == 1) {
            clear();
        }
        if (this.state == 2 && this.crawledRoots != null) {
            float f = 0.0f;
            float length = 1.0f / this.crawledRoots.length;
            for (int i = 0; i < this.crawledRoots.length; i++) {
                this.crawledRoots[i].setPriority(f);
                f += length;
            }
            submit(this.crawledRoots);
        }
        this.state = 0;
        sendCrawlEvent(this.state);
        synchronized (this.crawlQueue) {
            CrawlTimer crawlTimer = new CrawlTimer(this);
            int crawlTimeout = this.dp.getCrawlTimeout();
            if (crawlTimeout > 0) {
                crawlTimer.set(crawlTimeout * 1000, false);
            }
            int max = Math.max(this.dp.getMaxThreads(), 1);
            this.worms = new Worm[max];
            for (int i2 = 0; i2 < max; i2++) {
                this.worms[i2] = new Worm(this, i2);
                this.worms[i2].start();
            }
            while (this.state == 0) {
                try {
                    if (this.numPagesLeft == 0) {
                        this.state = 1;
                        sendCrawlEvent(this.state);
                    } else if (this.synchronous) {
                        Link link = (Link) this.crawlQueue.getMin();
                        if (link.getStatus() == 7) {
                            process(link);
                        } else {
                            this.crawlQueue.wait();
                        }
                    } else {
                        this.crawlQueue.wait();
                    }
                } catch (InterruptedException e) {
                }
            }
            crawlTimer.cancel();
            for (int i3 = 0; i3 < this.worms.length; i3++) {
                this.worms[i3].die();
            }
            if (this.state == 4) {
                synchronized (this.fetchQueue) {
                    for (int i4 = 0; i4 < this.worms.length; i4++) {
                        if (this.worms[i4].link != null) {
                            this.fetchQueue.put(this.worms[i4].link);
                        }
                    }
                }
            }
            this.worms = null;
        }
    }

    public void clear() {
        stop();
        this.numPagesVisited = 0;
        this.numLinksTested = 0;
        clearVisited();
        if (this.crawledRoots != null) {
            for (int i = 0; i < this.crawledRoots.length; i++) {
                this.crawledRoots[i].disconnect();
            }
        }
        this.crawledRoots = null;
        this.state = 2;
        sendCrawlEvent(this.state);
    }

    public void pause() {
        if (this.state == 0) {
            synchronized (this.crawlQueue) {
                this.state = 4;
                this.crawlQueue.notify();
            }
            sendCrawlEvent(this.state);
        }
    }

    public void stop() {
        if (this.state == 0 || this.state == 4) {
            synchronized (this.crawlQueue) {
                synchronized (this.fetchQueue) {
                    this.state = 1;
                    this.fetchQueue.clear();
                    this.crawlQueue.clear();
                    this.numPagesLeft = 0;
                    this.crawlQueue.notify();
                }
            }
            sendCrawlEvent(this.state);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void timedOut() {
        if (this.state == 0) {
            synchronized (this.crawlQueue) {
                synchronized (this.fetchQueue) {
                    this.state = 3;
                    this.fetchQueue.clear();
                    this.crawlQueue.clear();
                    this.numPagesLeft = 0;
                    this.crawlQueue.notify();
                }
            }
            sendCrawlEvent(this.state);
        }
    }

    public int getState() {
        return this.state;
    }

    public void visit(Page page) {
    }

    public boolean shouldVisit(Link link) {
        return true;
    }

    public void expand(Page page) {
        Link[] links = page.getLinks();
        if (links == null || links.length <= 0) {
            return;
        }
        float f = this.depthFirst ? -this.numPagesVisited : this.numPagesVisited;
        float length = 1.0f / links.length;
        for (Link link : links) {
            link.setPriority(f);
            f += length;
            link.setDownloadParameters(this.dp);
            this.numLinksTested++;
            if (this.ignoreVisitedLinks && visited(link)) {
                sendLinkEvent(link, 2);
            } else if ((this.type != null && !link.hasAnyLabels(this.type)) || ((this.domain != null && !link.hasAnyLabels(this.domain)) || ((this.linkPredicate != null && !this.linkPredicate.shouldVisit(link)) || !shouldVisit(link)))) {
                sendLinkEvent(link, 1);
            } else if (page.getDepth() >= this.maxDepth) {
                sendLinkEvent(link, 3);
            } else {
                submit(link);
            }
        }
    }

    public int getPagesVisited() {
        return this.numPagesVisited;
    }

    public int getLinksTested() {
        return this.numLinksTested;
    }

    public int getPagesLeft() {
        return this.numPagesLeft;
    }

    public int getActiveThreads() {
        Worm[] wormArr = this.worms;
        if (wormArr == null) {
            return 0;
        }
        int i = 0;
        for (int i2 = 0; i2 < wormArr.length; i2++) {
            if (wormArr[i2] != null && wormArr[i2].link != null) {
                i++;
            }
        }
        return i;
    }

    public String getName() {
        return this.name;
    }

    public void setName(String str) {
        this.name = str;
    }

    public String toString() {
        return getName();
    }

    public Link[] getRoots() {
        if (this.roots == null) {
            return new Link[0];
        }
        Link[] linkArr = new Link[this.roots.length];
        System.arraycopy(this.roots, 0, linkArr, 0, this.roots.length);
        return linkArr;
    }

    public Link[] getCrawledRoots() {
        if (this.crawledRoots == null) {
            return null;
        }
        Link[] linkArr = new Link[this.crawledRoots.length];
        System.arraycopy(this.crawledRoots, 0, linkArr, 0, this.crawledRoots.length);
        return linkArr;
    }

    public String getRootHrefs() {
        StringBuffer stringBuffer = new StringBuffer();
        if (this.roots != null) {
            for (int i = 0; i < this.roots.length; i++) {
                if (stringBuffer.length() > 0) {
                    stringBuffer.append('\n');
                }
                stringBuffer.append(this.roots[i].getURL().toExternalForm());
            }
        }
        return stringBuffer.toString();
    }

    public void setRootHrefs(String str) throws MalformedURLException {
        Vector vector = new Vector();
        StringTokenizer stringTokenizer = new StringTokenizer(str);
        while (stringTokenizer.hasMoreElements()) {
            vector.addElement(new Link(stringTokenizer.nextToken()));
        }
        this.roots = new Link[vector.size()];
        vector.copyInto(this.roots);
    }

    public void setRoot(Link link) {
        this.roots = new Link[1];
        this.roots[0] = link;
    }

    public void setRoots(Link[] linkArr) {
        this.roots = new Link[linkArr.length];
        System.arraycopy(linkArr, 0, this.roots, 0, linkArr.length);
    }

    public void addRoot(Link link) {
        if (this.roots == null) {
            setRoot(link);
            return;
        }
        Link[] linkArr = new Link[this.roots.length + 1];
        System.arraycopy(this.roots, 0, linkArr, 0, this.roots.length);
        linkArr[linkArr.length - 1] = link;
        this.roots = linkArr;
    }

    public String[] getDomain() {
        return this.domain;
    }

    public void setDomain(String[] strArr) {
        this.domain = strArr;
    }

    public String[] getLinkType() {
        return this.type;
    }

    public void setLinkType(String[] strArr) {
        this.type = strArr;
    }

    public boolean getDepthFirst() {
        return this.depthFirst;
    }

    public void setDepthFirst(boolean z) {
        this.depthFirst = z;
    }

    public boolean getSynchronous() {
        return this.synchronous;
    }

    public void setSynchronous(boolean z) {
        this.synchronous = z;
    }

    public boolean getIgnoreVisitedLinks() {
        return this.ignoreVisitedLinks;
    }

    public void setIgnoreVisitedLinks(boolean z) {
        this.ignoreVisitedLinks = z;
    }

    public int getMaxDepth() {
        return this.maxDepth;
    }

    public void setMaxDepth(int i) {
        this.maxDepth = i;
    }

    public DownloadParameters getDownloadParameters() {
        return this.dp;
    }

    public void setDownloadParameters(DownloadParameters downloadParameters) {
        this.dp = downloadParameters;
    }

    public void setLinkPredicate(LinkPredicate linkPredicate) {
        if (linkPredicate != this.linkPredicate) {
            if (linkPredicate == null || !linkPredicate.equals(this.linkPredicate)) {
                if (this.linkPredicate != null) {
                    this.linkPredicate.disconnected(this);
                }
                this.linkPredicate = linkPredicate;
                if (this.linkPredicate != null) {
                    this.linkPredicate.connected(this);
                }
            }
        }
    }

    public LinkPredicate getLinkPredicate() {
        return this.linkPredicate;
    }

    public void setPagePredicate(PagePredicate pagePredicate) {
        if (pagePredicate != this.pagePredicate) {
            if (pagePredicate == null || !pagePredicate.equals(this.pagePredicate)) {
                if (this.pagePredicate != null) {
                    this.pagePredicate.disconnected(this);
                }
                this.pagePredicate = pagePredicate;
                if (this.pagePredicate != null) {
                    this.pagePredicate.connected(this);
                }
            }
        }
    }

    public PagePredicate getPagePredicate() {
        return this.pagePredicate;
    }

    public void setAction(Action action) {
        if (action != this.action) {
            if (action == null || !action.equals(this.action)) {
                if (this.action != null) {
                    this.action.disconnected(this);
                }
                this.action = action;
                if (this.action != null) {
                    this.action.connected(this);
                }
            }
        }
    }

    public Action getAction() {
        return this.action;
    }

    public void submit(Link link) {
        markVisited(link);
        sendLinkEvent(link, 4);
        synchronized (this.crawlQueue) {
            synchronized (this.fetchQueue) {
                this.crawlQueue.put(link);
                this.numPagesLeft++;
                this.fetchQueue.put(link);
                this.fetchQueue.notifyAll();
            }
        }
    }

    public void submit(Link[] linkArr) {
        for (Link link : linkArr) {
            submit(link);
        }
    }

    public Enumeration enumerateQueue() {
        return this.crawlQueue.elements();
    }

    public void addClassifier(Classifier classifier) {
        if (this.classifiers.contains(classifier)) {
            return;
        }
        float priority = classifier.getPriority();
        for (int i = 0; i < this.classifiers.size(); i++) {
            if (priority < ((Classifier) this.classifiers.elementAt(i)).getPriority()) {
                this.classifiers.insertElementAt(classifier, i);
                return;
            }
        }
        this.classifiers.addElement(classifier);
    }

    public void removeClassifier(Classifier classifier) {
        this.classifiers.removeElement(classifier);
    }

    public void removeAllClassifiers() {
        this.classifiers.removeAllElements();
    }

    public Enumeration enumerateClassifiers() {
        return this.classifiers.elements();
    }

    public Classifier[] getClassifiers() {
        Classifier[] classifierArr = new Classifier[this.classifiers.size()];
        this.classifiers.copyInto(classifierArr);
        return classifierArr;
    }

    public void addCrawlListener(CrawlListener crawlListener) {
        if (this.crawlListeners.contains(crawlListener)) {
            return;
        }
        this.crawlListeners.addElement(crawlListener);
    }

    public void removeCrawlListener(CrawlListener crawlListener) {
        this.crawlListeners.removeElement(crawlListener);
    }

    public void addLinkListener(LinkListener linkListener) {
        if (this.linkListeners.contains(linkListener)) {
            return;
        }
        this.linkListeners.addElement(linkListener);
    }

    public void removeLinkListener(LinkListener linkListener) {
        this.linkListeners.removeElement(linkListener);
    }

    protected void sendCrawlEvent(int i) {
        CrawlEvent crawlEvent = new CrawlEvent(this, i);
        int size = this.crawlListeners.size();
        for (int i2 = 0; i2 < size; i2++) {
            CrawlListener crawlListener = (CrawlListener) this.crawlListeners.elementAt(i2);
            switch (i) {
                case 0:
                    crawlListener.started(crawlEvent);
                    break;
                case 1:
                    crawlListener.stopped(crawlEvent);
                    break;
                case 2:
                    crawlListener.cleared(crawlEvent);
                    break;
                case 3:
                    crawlListener.timedOut(crawlEvent);
                    break;
                case 4:
                    crawlListener.paused(crawlEvent);
                    break;
            }
        }
    }

    protected void sendLinkEvent(Link link, int i) {
        LinkEvent linkEvent = new LinkEvent(this, i, link);
        link.setStatus(i);
        int size = this.linkListeners.size();
        for (int i2 = 0; i2 < size; i2++) {
            ((LinkListener) this.linkListeners.elementAt(i2)).crawled(linkEvent);
        }
    }

    protected void sendLinkEvent(Link link, int i, Throwable th) {
        LinkEvent linkEvent = new LinkEvent(this, i, link, th);
        link.setStatus(i);
        link.setLabel(TimeoutBehaviorConfiguration.EXCEPTION_TYPE_NAME, th.toString());
        int size = this.linkListeners.size();
        for (int i2 = 0; i2 < size; i2++) {
            ((LinkListener) this.linkListeners.elementAt(i2)).crawled(linkEvent);
        }
    }

    public boolean visited(Link link) {
        return this.visitedPages.containsKey(link.getPageURL().toString());
    }

    protected void markVisited(Link link) {
        this.visitedPages.put(link.getPageURL().toString(), this);
    }

    protected void clearVisited() {
        this.visitedPages.clear();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void fetch(Worm worm) {
        DownloadParameters downloadParameters;
        WormTimer wormTimer = new WormTimer(worm);
        while (!worm.dead) {
            synchronized (this.fetchQueue) {
                while (!worm.dead) {
                    Link link = (Link) this.fetchQueue.deleteMin();
                    worm.link = link;
                    if (link != null) {
                        break;
                    } else {
                        try {
                            this.fetchQueue.wait();
                        } catch (InterruptedException e) {
                        }
                    }
                }
            }
            if (!worm.dead) {
                try {
                    downloadParameters = worm.link.getDownloadParameters();
                    if (downloadParameters == null) {
                        downloadParameters = this.dp;
                    }
                    int downloadTimeout = downloadParameters.getDownloadTimeout();
                    sendLinkEvent(worm.link, 5);
                    if (downloadTimeout > 0) {
                        try {
                            wormTimer.set(downloadTimeout * 1000, false);
                        } finally {
                            wormTimer.cancel();
                        }
                    }
                } catch (ThreadDeath e2) {
                    throw e2;
                } catch (Throwable th) {
                    if (worm.dead) {
                        return;
                    }
                    sendLinkEvent(worm.link, 6, th);
                    synchronized (this.crawlQueue) {
                        this.crawlQueue.delete(worm.link);
                        this.numPagesLeft--;
                        worm.link = null;
                        this.crawlQueue.notify();
                    }
                }
                if (downloadParameters.getObeyRobotExclusion() && this.robotExclusion.disallowed(worm.link.getURL())) {
                    throw new IOException("disallowed by Robot Exclusion Standard (robots.txt)");
                    break;
                }
                new Page(worm.link, downloadParameters);
                if (worm.dead) {
                    return;
                }
                sendLinkEvent(worm.link, 7);
                if (this.synchronous) {
                    synchronized (this.crawlQueue) {
                        this.crawlQueue.notify();
                    }
                } else {
                    process(worm.link);
                }
                worm.link = null;
            } else {
                return;
            }
        }
    }

    void process(Link link) {
        Page page = link.getPage();
        int size = this.classifiers.size();
        for (int i = 0; i < size; i++) {
            ((Classifier) this.classifiers.elementAt(i)).classify(page);
        }
        this.numPagesVisited++;
        if (this.pagePredicate == null || this.pagePredicate.shouldActOn(page)) {
            if (this.action != null) {
                this.action.visit(page);
            }
            visit(page);
        }
        expand(page);
        sendLinkEvent(link, 8);
        synchronized (this.crawlQueue) {
            this.crawlQueue.delete(link);
            this.numPagesLeft--;
            this.crawlQueue.notify();
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void fetchTimedOut(Worm worm, int i) {
        if (worm.dead) {
            return;
        }
        worm.die();
        sendLinkEvent(worm.link, 6, new IOException(new StringBuffer().append("Timeout after ").append(i).append(" seconds").toString()));
        synchronized (this.crawlQueue) {
            this.crawlQueue.delete(worm.link);
            this.numPagesLeft--;
            this.worms[worm.i] = new Worm(this, worm.i);
            this.worms[worm.i].start();
            this.crawlQueue.notify();
        }
    }

    public static void main(String[] strArr) throws Exception {
        ObjectInputStream objectInputStream = new ObjectInputStream(new FileInputStream(strArr[0]));
        Crawler crawler = (Crawler) objectInputStream.readObject();
        objectInputStream.close();
        EventLog.monitor(crawler).setOnlyNetworkEvents(false);
        crawler.run();
    }
}
