package com.rapidminer.extension.datasearch.operator;

import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DoubleArrayDataRow;
import com.rapidminer.example.utils.ExampleSetBuilder;
import com.rapidminer.example.utils.ExampleSets;
import com.rapidminer.operator.Annotations;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MDTransformationRule;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.LogService;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/* loaded from: input_file:com/rapidminer/extension/datasearch/operator/GoogleTableSearchOperator.class */
public class GoogleTableSearchOperator extends Operator {
    public static final String PARAMETER_TABULAR_CORPUS = "tabular corpus";
    public static final int CORPUS_TYPE_DEFAULT = 0;
    public static final String PARAMETER_KEYWORDS = "keywords";
    public static final String PARAMETER_RESULT_LIMIT = "maximum results";
    private static final String PARAMETER_USER_AGENT = "user agent";
    private static final String PARAMETER_UNIQUE_RESULTS = "unique results";
    private static final String DEFAULT_USER_AGENT = "Mozilla";
    private static final String ATTRIBUTE_LINK = "link";
    private static final String ATTRIBUTE_SEARCH_COUNT = "search_count";
    private final OutputPort outputPort;
    private ExampleSet exampleSet;
    public static final String[] CORPUS_TYPES = {"Google Web Tables", "Google Fusion Tables"};
    private static Logger LOGGER = LogService.getRoot();

    public GoogleTableSearchOperator(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.outputPort = getOutputPorts().createPort("output");
        this.exampleSet = null;
        LinkedList linkedList = new LinkedList();
        linkedList.add(AttributeFactory.createAttribute(ATTRIBUTE_LINK, 5));
        this.exampleSet = ExampleSets.from(linkedList).build();
        getTransformer().addGenerationRule(this.outputPort, this.exampleSet.getClass());
        getTransformer().addRule(new MDTransformationRule() { // from class: com.rapidminer.extension.datasearch.operator.GoogleTableSearchOperator.1
            public void transformMD() {
                GoogleTableSearchOperator.this.outputPort.deliverMD(new ExampleSetMetaData(GoogleTableSearchOperator.this.exampleSet));
            }
        });
    }

    public void transformMetaData() {
        super.transformMetaData();
    }

    public void doWork() throws OperatorException {
        InputStream inputStream = null;
        String parameter = getParameter(PARAMETER_TABULAR_CORPUS);
        if (parameter.equals(CORPUS_TYPES[0])) {
            parameter = "web";
        } else if (parameter.equals(CORPUS_TYPES[1])) {
            parameter = "fusion";
        }
        String replace = getParameterAsString(PARAMETER_KEYWORDS).trim().replaceAll("( )+", StringUtils.SPACE).replace(StringUtils.SPACE, "+");
        int parameterAsInt = getParameterAsInt(PARAMETER_RESULT_LIMIT);
        String str = "https://research.google.com/tables?corpus=" + parameter + "&hl=en&q=" + replace;
        if (parameterAsInt >= 1) {
            str = str + "&num=" + parameterAsInt;
        }
        try {
            try {
                new URL(str);
                System.currentTimeMillis();
                String parameter2 = getParameter(PARAMETER_USER_AGENT);
                Document document = Jsoup.connect(str).userAgent((parameter2 == null || parameter2.trim().length() <= 0) ? DEFAULT_USER_AGENT : parameter2).get();
                String title = document.title();
                if (document != null) {
                    System.currentTimeMillis();
                    this.exampleSet = extractAnchorTags(document, str, title);
                } else {
                    LOGGER.log(Level.INFO, "Search Page could not be parsed");
                }
                if (0 != 0) {
                    try {
                        inputStream.close();
                    } catch (IOException e) {
                        LOGGER.log(Level.WARNING, "Error closing the input stream" + e.getMessage());
                        e.printStackTrace();
                    }
                }
            } catch (IOException e2) {
                LOGGER.log(Level.WARNING, "Error accessing the given url " + e2.getMessage());
                e2.printStackTrace();
                if (0 != 0) {
                    try {
                        inputStream.close();
                    } catch (IOException e3) {
                        LOGGER.log(Level.WARNING, "Error closing the input stream" + e3.getMessage());
                        e3.printStackTrace();
                    }
                }
            }
            this.outputPort.deliver(this.exampleSet);
        } catch (Throwable th) {
            if (0 != 0) {
                try {
                    inputStream.close();
                } catch (IOException e4) {
                    LOGGER.log(Level.WARNING, "Error closing the input stream" + e4.getMessage());
                    e4.printStackTrace();
                }
            }
            throw th;
        }
    }

    private int getIndexOfSelectedSubjectIdAttribute() {
        int i = 0;
        try {
            String parameterAsString = getParameterAsString(PARAMETER_TABULAR_CORPUS);
            Iterator it = getParameterType(PARAMETER_TABULAR_CORPUS).getAttributeNames().iterator();
            while (it.hasNext() && !((String) it.next()).equals(parameterAsString)) {
                i++;
            }
        } catch (UndefinedParameterError e) {
            LOGGER.log(Level.WARNING, "UndefinedParameterError = " + e.getMessage());
        }
        return i;
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeCategory(PARAMETER_TABULAR_CORPUS, "Choose a public tabular corpus", CORPUS_TYPES, 0, false));
        parameterTypes.add(new ParameterTypeString(PARAMETER_KEYWORDS, "Keyword(s) to search for", false, false));
        parameterTypes.add(new ParameterTypeInt(PARAMETER_RESULT_LIMIT, "Specifies the index value till where the results of the query will be fetched", 1, 500, 20, false));
        parameterTypes.add(new ParameterTypeBoolean(PARAMETER_UNIQUE_RESULTS, "Specifies if only unique values should be output", false));
        parameterTypes.add(new ParameterTypeString(PARAMETER_USER_AGENT, PARAMETER_USER_AGENT, true, true));
        return parameterTypes;
    }

    public ExampleSet extractAnchorTags(Document document, String str, String str2) {
        Elements select = document.select("a[href]");
        LOGGER.log(Level.INFO, "Total HREFs = " + select.size());
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        Iterator it = select.iterator();
        while (it.hasNext()) {
            String attr = ((Element) it.next()).attr("href");
            if (attr.contains("&url=")) {
                String substring = attr.substring(attr.lastIndexOf("&url=") + 5);
                String substring2 = substring.substring(0, substring.indexOf("&source"));
                if (linkedHashMap.containsKey(substring2)) {
                    linkedHashMap.put(substring2, Integer.valueOf(((Integer) linkedHashMap.get(substring2)).intValue() + 1));
                } else {
                    linkedHashMap.put(substring2, 1);
                }
            }
        }
        Iterator it2 = linkedHashMap.keySet().iterator();
        int i = 0;
        ArrayList arrayList = new ArrayList();
        arrayList.add(this.exampleSet.getAttributes().get(ATTRIBUTE_LINK));
        if (getParameterAsBoolean(PARAMETER_UNIQUE_RESULTS)) {
            arrayList.add(AttributeFactory.createAttribute(ATTRIBUTE_SEARCH_COUNT, 3));
        }
        ExampleSetBuilder from = ExampleSets.from(arrayList);
        while (it2.hasNext()) {
            int intValue = ((Integer) linkedHashMap.get((String) it2.next())).intValue();
            if (getParameterAsBoolean(PARAMETER_UNIQUE_RESULTS)) {
                from.addDataRow(new DoubleArrayDataRow(new double[]{r0.getMapping().mapString(r0), intValue}));
            } else {
                for (int i2 = 0; i2 < intValue; i2++) {
                    from.addDataRow(new DoubleArrayDataRow(new double[]{r0.getMapping().mapString(r0)}));
                }
            }
            i++;
        }
        LOGGER.log(Level.INFO, "Total websites = " + i);
        Annotations annotations = new Annotations();
        annotations.setAnnotation("Source Query", str);
        annotations.setAnnotation("Document Title", str2);
        this.exampleSet = from.build();
        this.exampleSet.getAnnotations().addAll(annotations);
        return this.exampleSet;
    }
}
