package com.rapidminer.operator.web.services.google;

import com.healthmarketscience.jackcess.impl.JetFormat;
import com.rapidminer.MacroHandler;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.GrowingExampleTable;
import com.rapidminer.example.utils.ExampleSets;
import com.rapidminer.operator.IOObjectCollection;
import com.rapidminer.operator.OperatorChain;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ValueDouble;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.SubprocessTransformRule;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeString;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:com/rapidminer/operator/web/services/google/GoogleArchiveSearch.class */
public class GoogleArchiveSearch extends OperatorChain {
    public static final String PARAMETER_CURRENT_EXAMPLE_INDEX_MACRO = "current_example_index_macro";
    public static final String DEFAULT_ITERATION_MACRO_NAME = "example";
    public static final String PARAMETER_QUERY = "query";
    public static final String PARAMETER_LANGUAGE = "language";
    public static final String PARAMETER_MAX_RANDOM_TIME = "max_random_waiting_time";
    public static final String PARAMETER_MIN_RANDOM_TIME = "min_random_waiting_time";
    public static final String PARAMETER_YEAR_START = "year_start";
    public static final String PARAMETER_YEAR_END = "year_end";
    public static final String PARAMETER_MONTH_START = "month_start";
    public static final String PARAMETER_MONTH_END = "month_end";
    private final OutputPort exampleSetSourceInnerSource;
    private final OutputPort googleNewsSourceInnerSource;
    private final OutputPort exampleSetOutput;
    private final OutputPort googleNewsOutput;
    int currentIndex;

    public GoogleArchiveSearch(OperatorDescription operatorDescription) {
        super(operatorDescription, new String[]{"Google News Crawling Process"});
        this.exampleSetSourceInnerSource = getSubprocess(0).getInnerSources().createPort("example set");
        this.googleNewsSourceInnerSource = getSubprocess(0).getInnerSources().createPort("google news");
        this.exampleSetOutput = getOutputPorts().createPort("example set");
        this.googleNewsOutput = getOutputPorts().createPort("google news collection");
        this.currentIndex = 0;
        getTransformer().addGenerationRule(this.exampleSetSourceInnerSource, ExampleSet.class);
        getTransformer().addGenerationRule(this.googleNewsSourceInnerSource, GoogleNews.class);
        getTransformer().addGenerationRule(this.exampleSetOutput, ExampleSet.class);
        getTransformer().addGenerationRule(this.googleNewsOutput, IOObjectCollection.class);
        getTransformer().addRule(new SubprocessTransformRule(getSubprocess(0)));
        addValue(new ValueDouble("iteration", "The number of the current iteration / loop / example / Google news article.") { // from class: com.rapidminer.operator.web.services.google.GoogleArchiveSearch.1
            public double getDoubleValue() {
                return GoogleArchiveSearch.this.currentIndex;
            }
        });
    }

    public void doWork() throws OperatorException {
        String parameterAsString = getParameterAsString(PARAMETER_CURRENT_EXAMPLE_INDEX_MACRO);
        GoogleArchiveFetcherSettings googleArchiveFetcherSettings = new GoogleArchiveFetcherSettings();
        googleArchiveFetcherSettings.setQuery(getParameterAsString("query"));
        googleArchiveFetcherSettings.setLanguage(getParameterAsString("language"));
        googleArchiveFetcherSettings.setMaxWaiting(getParameterAsInt("max_random_waiting_time"));
        googleArchiveFetcherSettings.setMinWaiting(getParameterAsInt(PARAMETER_MIN_RANDOM_TIME));
        googleArchiveFetcherSettings.setStartYear(getParameterAsInt("year_start"));
        googleArchiveFetcherSettings.setEndYear(getParameterAsInt("year_end"));
        googleArchiveFetcherSettings.setStartMonth(getParameterAsInt("month_start"));
        googleArchiveFetcherSettings.setEndMonth(getParameterAsInt("month_end"));
        GrowingExampleTable createTableFrom = ExampleSets.createTableFrom(Arrays.asList(GoogleNews.DATE_ATTRIBUTE, GoogleNews.LINK_ATTRIBUTE, GoogleNews.SOURCE_ATTRIBUTE, GoogleNews.TITLE_ATTRIBUTE, GoogleNews.ABSTRACT_ATTRIBUTE));
        ExampleSet createExampleSet = createTableFrom.createExampleSet();
        IOObjectCollection iOObjectCollection = new IOObjectCollection();
        this.currentIndex = 0;
        Iterator<String> it = new GoogleArchiveFetcher(googleArchiveFetcherSettings).iterator();
        while (it.hasNext()) {
            Iterator<GoogleNews> it2 = new GoogleArchiveParser(it.next()).parse().iterator();
            while (it2.hasNext()) {
                GoogleNews next = it2.next();
                createTableFrom.addDataRow(next.createDataRow());
                iOObjectCollection.add(next);
                MacroHandler macroHandler = getProcess().getMacroHandler();
                int i = this.currentIndex;
                this.currentIndex = i + 1;
                macroHandler.addMacro(parameterAsString, String.valueOf(i));
                if (this.exampleSetSourceInnerSource.isConnected()) {
                    this.exampleSetSourceInnerSource.deliver(createExampleSet);
                }
                if (this.googleNewsSourceInnerSource.isConnected()) {
                    this.googleNewsSourceInnerSource.deliver(next);
                }
                if (this.exampleSetSourceInnerSource.isConnected() || this.googleNewsSourceInnerSource.isConnected()) {
                    getSubprocess(0).execute();
                    inApplyLoop();
                }
            }
        }
        this.exampleSetOutput.deliver(createExampleSet);
        this.googleNewsOutput.deliver(iOObjectCollection);
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeString(PARAMETER_CURRENT_EXAMPLE_INDEX_MACRO, "The name of the macro which holds the index of the current example in each crawling iteration.", DEFAULT_ITERATION_MACRO_NAME, false));
        parameterTypes.add(new ParameterTypeString("query", "The query.", false));
        parameterTypes.add(new ParameterTypeString("language", "The language.", GoogleArchiveFetcherSettings.LANG_DE, false));
        parameterTypes.add(new ParameterTypeInt("max_random_waiting_time", "The maximal time to wait between two page fetches in ms", 0, 10000, 1000));
        parameterTypes.add(new ParameterTypeInt(PARAMETER_MIN_RANDOM_TIME, "The minimal time to wait between two page fetches in ms", 0, 10000, 500));
        parameterTypes.add(new ParameterTypeInt("year_start", "The year to start the crawling", JetFormat.MAX_RECORD_SIZE, 2100, false));
        parameterTypes.add(new ParameterTypeInt("year_end", "The year to stop the crawling", JetFormat.MAX_RECORD_SIZE, 2100, false));
        parameterTypes.add(new ParameterTypeInt("month_start", "The month to start the crawling", 1, 12, false));
        parameterTypes.add(new ParameterTypeInt("month_end", "The month to stop the crawling", 1, 12, false));
        return parameterTypes;
    }
}
