package com.rapidminer.extension.datasearch.operator;

import com.rapidminer.RapidMiner;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DoubleArrayDataRow;
import com.rapidminer.example.utils.ExampleSetBuilder;
import com.rapidminer.example.utils.ExampleSets;
import com.rapidminer.extension.datasearch.metadata.MetaDataCachingRule;
import com.rapidminer.operator.Annotations;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.LogService;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

/* loaded from: input_file:com/rapidminer/extension/datasearch/operator/GoogleTableSearchOperator.class */
public class GoogleTableSearchOperator extends Operator {
    public static final String PARAMETER_TABULAR_CORPUS = "tabular corpus";
    public static final int CORPUS_TYPE_DEFAULT = 0;
    public static final String PARAMETER_KEYWORDS = "keywords";
    public static final String PARAMETER_PAGINATION_START = "pagination start";
    public static final String PARAMETER_PAGINATION_END = "pagination end";
    private static final String DEFAULT_USER_AGENT = "Mozilla";
    private final OutputPort outputPort;
    private final MetaDataCachingRule cachingRule;
    public static final String[] CORPUS_TYPES = {"Google Web Tables", "Google Fusion Tables"};
    private static Logger LOGGER = LogService.getRoot();

    public GoogleTableSearchOperator(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.outputPort = getOutputPorts().createPort("output");
        this.cachingRule = new MetaDataCachingRule(this);
        getTransformer().addGenerationRule(this.outputPort, ExampleSet.class);
        if (RapidMiner.getExecutionMode().isHeadless()) {
            return;
        }
        getTransformer().addRule(this.cachingRule);
    }

    public void doWork() throws OperatorException {
        InputStream inputStream = null;
        ExampleSet exampleSet = null;
        String parameter = getParameter(PARAMETER_TABULAR_CORPUS);
        if (parameter.equals(CORPUS_TYPES[0])) {
            parameter = "web";
        } else if (parameter.equals(CORPUS_TYPES[1])) {
            parameter = "fusion";
        }
        String replace = getParameterAsString(PARAMETER_KEYWORDS).trim().replaceAll("( )+", StringUtils.SPACE).replace(StringUtils.SPACE, "+");
        String parameter2 = getParameter(PARAMETER_PAGINATION_START);
        String parameter3 = getParameter(PARAMETER_PAGINATION_END);
        String str = "https://research.google.com/tables?corpus=" + parameter + "&hl=en&q=" + replace;
        if (parameter2.length() > 0 && Integer.parseInt(parameter2) > 0) {
            str = str + "&start=" + parameter2;
        }
        if (parameter3.length() > 0 && Integer.parseInt(parameter3) > 0) {
            str = str + "&num=" + parameter3;
        }
        try {
            try {
                new URL(str);
                long currentTimeMillis = System.currentTimeMillis();
                Document document = Jsoup.connect(str).userAgent(DEFAULT_USER_AGENT).get();
                String title = document.title();
                long currentTimeMillis2 = System.currentTimeMillis() - currentTimeMillis;
                if (document != null) {
                    long currentTimeMillis3 = System.currentTimeMillis();
                    exampleSet = extractAnchorTags(document, str, title);
                    long currentTimeMillis4 = System.currentTimeMillis() - currentTimeMillis3;
                } else {
                    LOGGER.log(Level.INFO, "Search Page could not be parsed");
                }
                if (0 != 0) {
                    try {
                        inputStream.close();
                    } catch (IOException e) {
                        LOGGER.log(Level.WARNING, "Error closing the input stream" + e.getMessage());
                    }
                }
            } catch (IOException e2) {
                LOGGER.log(Level.WARNING, "Error accessing the given url " + e2.getMessage());
                if (0 != 0) {
                    try {
                        inputStream.close();
                    } catch (IOException e3) {
                        LOGGER.log(Level.WARNING, "Error closing the input stream" + e3.getMessage());
                    }
                }
            }
            this.outputPort.deliver(exampleSet);
        } catch (Throwable th) {
            if (0 != 0) {
                try {
                    inputStream.close();
                } catch (IOException e4) {
                    LOGGER.log(Level.WARNING, "Error closing the input stream" + e4.getMessage());
                }
            }
            throw th;
        }
    }

    private int getIndexOfSelectedSubjectIdAttribute() {
        int i = 0;
        try {
            String parameterAsString = getParameterAsString(PARAMETER_TABULAR_CORPUS);
            Iterator it = getParameterType(PARAMETER_TABULAR_CORPUS).getAttributeNames().iterator();
            while (it.hasNext() && !((String) it.next()).equals(parameterAsString)) {
                i++;
            }
        } catch (UndefinedParameterError e) {
            LOGGER.log(Level.WARNING, "UndefinedParameterError = " + e.getMessage());
        }
        return i;
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeCategory(PARAMETER_TABULAR_CORPUS, "Choose a public tabular corpus", CORPUS_TYPES, 0, false));
        parameterTypes.add(new ParameterTypeString(PARAMETER_KEYWORDS, "Keyword(s) to search for", false, false));
        parameterTypes.add(new ParameterTypeInt(PARAMETER_PAGINATION_START, "Start of pagination for retrieving results", 1, 1000, 1, true));
        parameterTypes.add(new ParameterTypeInt(PARAMETER_PAGINATION_END, "End of pagination for retrieving results", 1, 1000, 20, true));
        return parameterTypes;
    }

    public ExampleSet extractAnchorTags(Document document, String str, String str2) {
        LinkedList linkedList = new LinkedList();
        linkedList.add(AttributeFactory.createAttribute("link", 5));
        ExampleSetBuilder from = ExampleSets.from(linkedList);
        Iterator<Element> it = document.select("a[href]").iterator();
        while (it.hasNext()) {
            String attr = it.next().attr("href");
            if (attr.contains("&url=")) {
                String substring = attr.substring(attr.lastIndexOf("&url=") + 5);
                String substring2 = substring.substring(0, substring.indexOf("&source"));
                double[] dArr = new double[linkedList.size()];
                dArr[0] = r0.getMapping().mapString(substring2);
                from.addDataRow(new DoubleArrayDataRow(dArr));
            }
        }
        Annotations annotations = new Annotations();
        annotations.setAnnotation("Source Query", str);
        annotations.setAnnotation("Document Title", str2);
        ExampleSet build = from.build();
        build.getAnnotations().addAll(annotations);
        return build;
    }
}
