package lod.linking;

import com.fasterxml.jackson.core.util.MinimalPrettyPrinter;
import com.mysql.jdbc.MysqlErrorNumbers;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRowFactory;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.SimplePrecondition;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeConfiguration;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.conditions.EqualTypeCondition;
import com.wcohen.ss.Jaccard;
import com.wcohen.ss.JaroWinkler;
import com.wcohen.ss.Levenstein;
import java.io.StringReader;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import lod.generators.BaseGenerator;
import lod.gui.tools.utils.OntologySelectorWizardCreator;
import lod.http.WebQueryRunner;
import lod.utils.DBSelection;
import lod.utils.ValueComparator;
import org.apache.log4j.Level;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

/* loaded from: input_file:lod/linking/LookupLinker.class */
public class LookupLinker extends Operator {
    private static final String CLASS_NAME = "lookup_linker";
    private InputPort mInputPort;
    private OutputPort mOutputPort;
    private OutputPort mOutputPortAttrs;
    public static final String PARAMETER_NEW_ATTRIBUTE_NAME = "New attribute";
    public static final String PARAMETER_QUERY_CLASS = "Query Class";
    public static final String PARAMETER_DBPEDIA_ENDPOINT = "DBPedia Lookup API";
    public static final String PARAMETER_DBPEDIA_ENDPOINT_CONNECTION_TIMEOUT_MSEC = "Connection timeout msec";
    public static final String PARAMETER_POP_CONCEPT = "Attribute";
    public static final String PARAMETER_ADDITIONAL_STRING = "Additional String";
    public static final String PARAMETER_MAX_HITS = "Max Hits";
    public static final String PARAMETER_SELECTION_METHOD = "Selection Method";
    public static final String PARAMETER_ONTOLOGY_SELECTOR = "Ontology class selection";
    public static final String PARAMETER_CUSTOM_LOOKUP_API = "Custom API Endpoint";
    private static final String NEW_ATTRIBUTES = "New Attributes";
    private static final String BYPASSING_ATTRIBUTES = "Bypassing Attributes";
    private Map<String, String> cachedResults;
    public static final int MTD_FIRST = 0;
    public static final int MTD_EDIT_DISTANCE = 1;
    public static final int MTD_JARO_WINKLER = 2;
    public static final int MTD_JACCARD = 3;
    public static int CONNECTION_TIMEOUT = Level.TRACE_INT;
    public static final String[] LOOKUP_API = {"KeywordSearch", "PrefixSearch", "Custom"};
    private static final Map<Integer, String> LOOKUP_API_Selection = new HashMap<Integer, String>() { // from class: lod.linking.LookupLinker.1
        {
            put(0, "http://lookup.dbpedia.org/api/search/KeywordSearch");
            put(1, "http://lookup.dbpedia.org/api/search/KeywordSearch");
        }
    };
    public static final String[] DBPEDIA_METHODS = {DBSelection.FIRST.toString(), DBSelection.EDIT_DISTANCE.toString(), DBSelection.JARO_WINKLER.toString(), DBSelection.JACCARD.toString()};

    public LookupLinker(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.cachedResults = new HashMap();
        this.mInputPort = getInputPorts().createPort("Example Set", ExampleSet.class);
        this.mInputPort.addPrecondition(new SimplePrecondition(this.mInputPort, new MetaData(ExampleSet.class)));
        this.mOutputPort = getOutputPorts().createPort("Appended Set");
        this.mOutputPortAttrs = getOutputPorts().createPort("Attributes Appended");
        getTransformer().addPassThroughRule(this.mInputPort, this.mOutputPort);
        getTransformer().addGenerationRule(this.mOutputPortAttrs, ExampleSet.class);
    }

    public String getConceptDbPedia(String str, String str2, DBSelection dBSelection, String str3, int i, String str4) {
        if (this.cachedResults.containsKey(str.toLowerCase() + "|" + str2 + "|" + dBSelection.toString() + "|" + str3 + "|" + i + "|" + str4)) {
            return this.cachedResults.get(str.toLowerCase() + "|" + str2 + "|" + dBSelection.toString() + "|" + str3 + "|" + i + "|" + str4);
        }
        String str5 = null;
        if (str4 == null) {
            str4 = "";
        }
        try {
            String makeGetInterruptable = new WebQueryRunner(str3 + "?MaxHits=" + i + "&QueryClass=" + str4 + "&QueryString=" + URLEncoder.encode(str), CONNECTION_TIMEOUT).makeGetInterruptable();
            if (!makeGetInterruptable.equals("")) {
                try {
                    str5 = selectDBConcept(DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new InputSource(new StringReader(makeGetInterruptable))), dBSelection, str, str2);
                } catch (Exception e) {
                    e.printStackTrace();
                }
                if (str5 != null) {
                    if (str5.equals("")) {
                        str5 = null;
                    }
                }
            }
        } catch (Exception e2) {
            e2.printStackTrace();
        }
        this.cachedResults.put(str.toLowerCase() + "|" + str2 + "|" + dBSelection.toString() + "|" + str3 + "|" + i + "|" + str4, str5);
        return str5;
    }

    public static String selectDBConcept(Document document, DBSelection dBSelection, String str, String str2) throws XPathExpressionException {
        if (dBSelection == DBSelection.FIRST) {
            return (String) XPathFactory.newInstance().newXPath().compile("/ArrayOfResult/Result/URI").evaluate(document, XPathConstants.STRING);
        }
        XPath newXPath = XPathFactory.newInstance().newXPath();
        NodeList nodeList = (NodeList) newXPath.compile("/ArrayOfResult//Result").evaluate(document, XPathConstants.NODESET);
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        for (int i = 0; i < nodeList.getLength(); i++) {
            String str3 = (String) newXPath.compile("Label").evaluate(nodeList.item(i), XPathConstants.STRING);
            String str4 = (String) newXPath.compile("URI").evaluate(nodeList.item(i), XPathConstants.STRING);
            hashMap.put(str4, Double.valueOf(getScore(dBSelection, str, str3)));
            hashMap2.put(str4, Double.valueOf(getScore(dBSelection, str + MinimalPrettyPrinter.DEFAULT_ROOT_VALUE_SEPARATOR + str2, str3)));
        }
        TreeMap treeMap = new TreeMap(new ValueComparator(hashMap));
        TreeMap treeMap2 = new TreeMap(new ValueComparator(hashMap2));
        treeMap.putAll(hashMap);
        treeMap2.putAll(hashMap2);
        if (treeMap.size() <= 0) {
            return null;
        }
        String str5 = (String) treeMap.firstKey();
        if (((Double) treeMap2.firstEntry().getValue()).doubleValue() > ((Double) treeMap.firstEntry().getValue()).doubleValue()) {
            str5 = (String) treeMap2.firstKey();
        }
        return str5;
    }

    private static double getScore(DBSelection dBSelection, String str, String str2) {
        double d = -1.0d;
        if (dBSelection == DBSelection.EDIT_DISTANCE) {
            d = new Levenstein().score(str, str2);
        } else if (dBSelection == DBSelection.JARO_WINKLER) {
            d = new JaroWinkler().score(str, str2);
        } else if (dBSelection == DBSelection.JACCARD) {
            d = new Jaccard().score(str, str2);
        }
        return d;
    }

    private DBSelection getDbSelection(String str) {
        DBSelection dBSelection = DBSelection.FIRST;
        if (str.equals("EDIT_DISTANCE")) {
            dBSelection = DBSelection.EDIT_DISTANCE;
        }
        if (str.equals("JARO_WINKLER")) {
            dBSelection = DBSelection.JARO_WINKLER;
        }
        if (str.equals("JACCARD")) {
            dBSelection = DBSelection.JACCARD;
        }
        return dBSelection;
    }

    public void doWork() throws OperatorException {
        ExampleSet<Example> cloneExampleSet = BaseGenerator.cloneExampleSet(this.mInputPort.getData(ExampleSet.class));
        Attributes attributes = cloneExampleSet.getAttributes();
        int parameterAsInt = getParameterAsInt(PARAMETER_DBPEDIA_ENDPOINT);
        String parameterAsString = parameterAsInt < LOOKUP_API_Selection.size() ? LOOKUP_API_Selection.get(Integer.valueOf(parameterAsInt)) : getParameterAsString(PARAMETER_CUSTOM_LOOKUP_API);
        String parameterAsString2 = getParameterAsString("Attribute");
        String parameterAsString3 = getParameterAsString(PARAMETER_ADDITIONAL_STRING);
        String parameterAsString4 = getParameterAsString(PARAMETER_QUERY_CLASS);
        DBSelection dbSelection = getDbSelection(getParameterAsString(PARAMETER_SELECTION_METHOD));
        int parameterAsInt2 = getParameterAsInt(PARAMETER_MAX_HITS);
        CONNECTION_TIMEOUT = getParameterAsInt(PARAMETER_DBPEDIA_ENDPOINT_CONNECTION_TIMEOUT_MSEC);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        Attribute attribute = attributes.get(parameterAsString2);
        if (attribute == null) {
            throw new OperatorException("Problem in LookupLinker: No attributes with the name '" + getParameterAsString("Attribute") + "' found.");
        }
        String parameterAsString5 = getParameterAsString("New attribute");
        if (parameterAsString5.equals("")) {
            throw new UserError(this, MysqlErrorNumbers.ER_CANT_CREATE_TABLE, new Object[]{CLASS_NAME});
        }
        Attribute createAttribute = AttributeFactory.createAttribute(parameterAsString5, 5);
        createAttribute.setTableIndex(cloneExampleSet.getAttributes().allSize());
        cloneExampleSet.getExampleTable().addAttribute(createAttribute);
        cloneExampleSet.getAttributes().addRegular(createAttribute);
        arrayList.add(createAttribute.getName());
        arrayList2.add(createAttribute.getName());
        for (Example example : cloneExampleSet) {
            String str = "";
            String conceptDbPedia = getConceptDbPedia(example.getValueAsString(attribute), parameterAsString3, dbSelection, parameterAsString, parameterAsInt2, parameterAsString4);
            if (conceptDbPedia == null) {
                str = "?";
            } else if (!conceptDbPedia.equals("")) {
                str = conceptDbPedia;
            }
            example.setValue(createAttribute, str);
        }
        Attribute[] attributeArr = {AttributeFactory.createAttribute("New Attributes", 5), AttributeFactory.createAttribute("Bypassing Attributes", 5)};
        MemoryExampleTable memoryExampleTable = new MemoryExampleTable(attributeArr);
        DataRowFactory dataRowFactory = new DataRowFactory(14);
        for (int i = 0; i < arrayList2.size(); i++) {
            memoryExampleTable.addDataRow(dataRowFactory.create(new String[]{(String) arrayList.get(i), (String) arrayList2.get(i)}, attributeArr));
        }
        ExampleSet createExampleSet = memoryExampleTable.createExampleSet();
        this.mOutputPort.deliver(cloneExampleSet);
        this.mOutputPortAttrs.deliver(createExampleSet);
        super.doWork();
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeString("New attribute", "This parameter defines the name of the new attribute", "New_Link", false));
        parameterTypes.add(new ParameterTypeString("Attribute", "Name of the attribute for lookup from table", false, false));
        parameterTypes.add(new ParameterTypeString(PARAMETER_QUERY_CLASS, "Name of the class of the object queried", true, true));
        ParameterTypeConfiguration parameterTypeConfiguration = new ParameterTypeConfiguration(OntologySelectorWizardCreator.class, this);
        parameterTypeConfiguration.setExpert(false);
        parameterTypes.add(parameterTypeConfiguration);
        parameterTypes.add(new ParameterTypeCategory(PARAMETER_DBPEDIA_ENDPOINT, "DBPedia endpoint address", LOOKUP_API, 0, false));
        ParameterTypeString parameterTypeString = new ParameterTypeString(PARAMETER_CUSTOM_LOOKUP_API, "Custom DBpedia Lookup endpoint", "", false);
        parameterTypeString.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_DBPEDIA_ENDPOINT, LOOKUP_API, true, new int[]{2}));
        parameterTypes.add(parameterTypeString);
        parameterTypes.add(new ParameterTypeInt(PARAMETER_DBPEDIA_ENDPOINT_CONNECTION_TIMEOUT_MSEC, "Connection timeout", 10, 100000, CONNECTION_TIMEOUT, true));
        parameterTypes.add(new ParameterTypeString(PARAMETER_ADDITIONAL_STRING, "Additional string to search for", true, true));
        parameterTypes.add(new ParameterTypeInt(PARAMETER_MAX_HITS, "Maximum number of hits", 1, 99999, 5, true));
        parameterTypes.add(new ParameterTypeCategory(PARAMETER_SELECTION_METHOD, "DBPedia selection method", DBPEDIA_METHODS, 1, true));
        return parameterTypes;
    }
}
