package com.rapidminer.operator.web.services.google;

import com.coremedia.iso.boxes.sampleentry.SubtitleSampleEntry;
import com.rapidminer.operator.OperatorCreationException;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.web.io.GetWebpageOperator;
import com.rapidminer.operator.web.io.UserAgent;
import com.rapidminer.tools.OperatorService;
import java.util.Iterator;
import java.util.Random;

/* loaded from: input_file:com/rapidminer/operator/web/services/google/GoogleArchiveFetcher.class */
public class GoogleArchiveFetcher implements Iterator<String>, Iterable<String> {
    private static final String QUERY_REPLACMENT_KEY = "%{query}";
    private static final String YEAR_REPLACEMENT_KEY = "%{year}";
    private static final String MONTH_REPLACEMENT_KEY = "%{month}";
    private static final String LANGUAGE_REPLACEMENT_KEY = "%{language}";
    private static final String START_REPLACEMENT_KEY = "%{start}";
    private static final String URL_TEMPLATE = "http://news.google.de/archivesearch?as_q=%{query}&num=100&hl=en&as_epq=&as_oq=&as_eq=&as_user_ldate=%{month}%2F%{year}&as_user_hdate=%{month}%2F%{year}&lr=lang_%{language}&as_src=&as_price=p0&as_scoring=a&start=%{start}";
    private static final String NEXT_PAGE_REGEX = "<img src=\"nav_next.gif\" width=100 height=26 alt=\"\" border=0><br>Next</a>";
    private final GoogleArchiveFetcherSettings settings;
    private GetWebpageOperator get;
    private final Random random;
    private int currentYear;
    private int currentMonth;
    private long lastFetchTime = 0;
    private int currentStart = 0;
    private boolean hasNext = true;

    public GoogleArchiveFetcher(GoogleArchiveFetcherSettings googleArchiveFetcherSettings) {
        googleArchiveFetcherSettings.validate();
        this.settings = googleArchiveFetcherSettings;
        this.currentYear = googleArchiveFetcherSettings.getStartYear();
        this.currentMonth = googleArchiveFetcherSettings.getStartMonth();
        try {
            this.get = OperatorService.createOperator(GetWebpageOperator.class);
        } catch (OperatorCreationException e) {
            e.printStackTrace();
        }
        this.random = new Random();
    }

    @Override // java.util.Iterator
    public boolean hasNext() {
        return this.hasNext;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // java.util.Iterator
    public String next() {
        try {
            String fetchPage = fetchPage(this.settings.getQuery(), this.currentYear, this.currentMonth, this.currentStart);
            this.hasNext = hasMorePages(fetchPage);
            if (this.hasNext) {
                this.currentStart += 100;
            } else {
                this.currentStart = 0;
                if (this.currentMonth == 12) {
                    this.currentMonth = 1;
                    this.currentYear++;
                } else {
                    this.currentMonth++;
                }
                this.hasNext = this.currentMonth <= this.settings.getEndMonth() && this.currentYear <= this.settings.getEndYear();
            }
            return fetchPage;
        } catch (OperatorException e) {
            e.printStackTrace();
            return SubtitleSampleEntry.TYPE_ENCRYPTED;
        }
    }

    @Override // java.util.Iterator
    public void remove() {
        throw new UnsupportedOperationException("Removing of fetched sites is not supported");
    }

    private String fetchPage(String str, int i, int i2, int i3) throws OperatorException {
        if (System.currentTimeMillis() - this.lastFetchTime < this.settings.getMinWaiting()) {
            try {
                Thread.sleep(this.random.nextInt((int) ((this.settings.getMinWaiting() - r0) + this.random.nextInt(this.settings.getMaxWaiting()))));
            } catch (InterruptedException e) {
            }
        }
        String randomUserAgent = UserAgent.getRandomUserAgent();
        this.get.setParameter("url", buildUrl(str, i, i2, i3));
        this.get.setParameter("user_agent", randomUserAgent);
        String document = this.get.m357read().toString();
        this.lastFetchTime = System.currentTimeMillis();
        return document;
    }

    private String buildUrl(String str, int i, int i2, int i3) {
        return URL_TEMPLATE.replace(QUERY_REPLACMENT_KEY, str).replace(START_REPLACEMENT_KEY, Integer.toString(i3)).replace(YEAR_REPLACEMENT_KEY, Integer.toString(i)).replace(MONTH_REPLACEMENT_KEY, Integer.toString(i2)).replace(LANGUAGE_REPLACEMENT_KEY, getSettings().getLanguage());
    }

    private boolean hasMorePages(String str) {
        return str.contains(NEXT_PAGE_REGEX);
    }

    @Override // java.lang.Iterable
    public Iterator<String> iterator() {
        return new GoogleArchiveFetcher(getSettings());
    }

    public GoogleArchiveFetcherSettings getSettings() {
        return this.settings;
    }
}
