package lod.linking;

import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRowFactory;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.SimplePrecondition;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.config.ParameterTypeConfigurable;
import com.wcohen.ss.Jaccard;
import com.wcohen.ss.Levenstein;
import com.wcohen.ss.tokens.NGramTokenizer;
import com.wcohen.ss.tokens.SimpleTokenizer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import lod.generators.BaseGenerator;
import lod.sparql.SPARQLEndpointQueryRunner;
import lod.utils.NGram;
import lod.utils.ValueComparator;

/* loaded from: input_file:lod/linking/SPARQLbasedLinker.class */
public class SPARQLbasedLinker extends Operator {
    private static final String CLASS_NAME = "sparql_based_linker";
    public static final String PARAMETER_ATTRIBUTE_TO_MERGE = "Attribute to merge";
    public static final String PARAMETER_USE_NGRAMS = "Search by N-Grams";
    public static final String PARAMETER_SPARQL_MANAGER = "SPARQL connection";
    public static final String PARAMETER_LANGUAGE_TAG = "Language tag for labels";
    private static final String NEW_ATTRIBUTES = "New Attributes";
    private static final String BYPASSING_ATTRIBUTES = "Bypassing Attributes";
    public static final String PARAMETER_DETECT_CLASS = "Detect column class type";
    private InputPort mInputPort;
    private OutputPort mOutputPort;
    private OutputPort mOutputPortAttrs;
    private SPARQLEndpointQueryRunner queryRunner;

    public SPARQLbasedLinker(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.mInputPort = getInputPorts().createPort("Example Set", ExampleSet.class);
        this.mInputPort.addPrecondition(new SimplePrecondition(this.mInputPort, new MetaData(ExampleSet.class)));
        this.mOutputPort = getOutputPorts().createPort("Appended Set");
        this.mOutputPortAttrs = getOutputPorts().createPort("Attributes Appended");
        getTransformer().addPassThroughRule(this.mInputPort, this.mOutputPort);
        getTransformer().addGenerationRule(this.mOutputPortAttrs, ExampleSet.class);
    }

    public void doWork() throws OperatorException {
        try {
            this.queryRunner = SPARQLEndpointQueryRunner.initRunner(this, this.queryRunner);
            ExampleSet<Example> cloneExampleSet = BaseGenerator.cloneExampleSet(this.mInputPort.getData(ExampleSet.class));
            Attributes attributes = cloneExampleSet.getAttributes();
            String parameterAsString = getParameterAsString(PARAMETER_ATTRIBUTE_TO_MERGE);
            boolean parameterAsBoolean = getParameterAsBoolean(PARAMETER_USE_NGRAMS);
            boolean parameterAsBoolean2 = getParameterAsBoolean(PARAMETER_DETECT_CLASS);
            if (parameterAsString.equals("")) {
                this.mOutputPort.deliver(cloneExampleSet);
                this.mOutputPortAttrs.deliver((IOObject) null);
            } else {
                Attribute attribute = attributes.get(parameterAsString);
                if (attribute == null) {
                    throw new OperatorException("Problem in SPARQL Based Linker: No attribute named '" + parameterAsString + "'");
                }
                ArrayList arrayList = new ArrayList();
                ArrayList arrayList2 = new ArrayList();
                String alias = this.queryRunner.getAlias();
                Attribute createAttribute = Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), 1) ? AttributeFactory.createAttribute(attribute.getName() + "_link_to_" + alias, attribute.getValueType()) : AttributeFactory.createAttribute(attribute.getName() + "_link_to_" + alias, 5);
                createAttribute.setTableIndex(attribute.getTableIndex() + 1);
                arrayList.add(alias);
                arrayList2.add(attribute.getName() + "_link_to_" + alias);
                cloneExampleSet.getExampleTable().addAttribute(createAttribute);
                cloneExampleSet.getAttributes().addRegular(createAttribute);
                String str = "";
                if (parameterAsBoolean2) {
                    HashMap hashMap = new HashMap();
                    HashMap hashMap2 = new HashMap();
                    Iterator it = cloneExampleSet.iterator();
                    while (it.hasNext()) {
                        getClassesForEntity(((Example) it.next()).getValueAsString(attribute), parameterAsBoolean, 3, hashMap, hashMap2);
                    }
                    int i = 0;
                    boolean z = true;
                    for (Map.Entry<String, Integer> entry : hashMap.entrySet()) {
                        if (entry.getValue().intValue() > i) {
                            z = true;
                            str = entry.getKey();
                            i = entry.getValue().intValue();
                        } else if (entry.getValue().intValue() == i) {
                            z = false;
                        }
                    }
                    if (!z) {
                        str = "";
                    }
                    if (str.equals("")) {
                        int i2 = 0;
                        boolean z2 = true;
                        for (Map.Entry<String, Integer> entry2 : hashMap2.entrySet()) {
                            if (entry2.getValue().intValue() > i2) {
                                z2 = true;
                                str = entry2.getKey();
                                i2 = entry2.getValue().intValue();
                            } else if (entry2.getValue().intValue() == i2) {
                                z2 = false;
                            }
                        }
                        if (!z2) {
                            str = "";
                        }
                    }
                }
                for (Example example : cloneExampleSet) {
                    example.setValue(createAttribute, getLinkedEntityWithSPARQL(example.getValueAsString(attribute), parameterAsBoolean, str));
                }
                Attribute[] attributeArr = {AttributeFactory.createAttribute("New Attributes", 5), AttributeFactory.createAttribute("Bypassing Attributes", 5)};
                MemoryExampleTable memoryExampleTable = new MemoryExampleTable(attributeArr);
                DataRowFactory dataRowFactory = new DataRowFactory(14);
                for (int i3 = 0; i3 < arrayList2.size(); i3++) {
                    memoryExampleTable.addDataRow(dataRowFactory.create(new String[]{(String) arrayList.get(i3), (String) arrayList2.get(i3)}, attributeArr));
                }
                ExampleSet createExampleSet = memoryExampleTable.createExampleSet();
                this.mOutputPort.deliver(cloneExampleSet);
                this.mOutputPortAttrs.deliver(createExampleSet);
            }
            super.doWork();
        } catch (Exception e) {
            e.printStackTrace();
            throw new UserError(this, 2001, new Object[]{CLASS_NAME, e.getMessage()});
        }
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeConfigurable("SPARQL connection", "Choose SPARQL endpoint connection", "sparqlconfig"));
        parameterTypes.add(new ParameterTypeString(PARAMETER_ATTRIBUTE_TO_MERGE, "This parameter defines the attribute name whose contents will be used for creating links.", "City", false));
        parameterTypes.add(new ParameterTypeBoolean(PARAMETER_USE_NGRAMS, "Use this to also include n-grams in the search. If not set, the linker will search only for the whole string, such as 'United States of America', whereas n-gram search would also search for 'United States' etc.", false, false));
        parameterTypes.add(new ParameterTypeBoolean(PARAMETER_DETECT_CLASS, "Use this to assign only one class type to all instances from the column", false, false));
        parameterTypes.add(new ParameterTypeString(PARAMETER_LANGUAGE_TAG, "This parameter restricts the search to labels with the given language tag, e.g., \"en\". Leave empty for searching in all languages. If the data source you are using serves literals without language tags, you will also have to leave this empty.", "", true));
        return parameterTypes;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v101, types: [java.util.List] */
    public void getClassesForEntity(String str, boolean z, int i, Map<String, Integer> map, Map<String, Integer> map2) throws OperatorException {
        ArrayList arrayList = new ArrayList();
        Levenstein levenstein = new Levenstein();
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        List arrayList2 = new ArrayList();
        if (z) {
            arrayList2 = getAllNgramsInBound(str);
        } else {
            arrayList2.add(str);
        }
        Iterator it = arrayList2.iterator();
        while (it.hasNext()) {
            ResultSet runSelectQueryInterruptable = this.queryRunner.runSelectQueryInterruptable(getSPARQLQueryForClasses((String) it.next()));
            if (!this.queryRunner.mUIThreadRunning) {
                break;
            }
            if (runSelectQueryInterruptable != null) {
                while (runSelectQueryInterruptable.hasNext()) {
                    QuerySolution next = runSelectQueryInterruptable.next();
                    String rDFNode = next.get("s").toString();
                    String rDFNode2 = next.get("t").toString();
                    String rDFNode3 = next.get("type").toString();
                    if (rDFNode3.startsWith("http://dbpedia.org/ontology/")) {
                        ArrayList arrayList3 = new ArrayList();
                        arrayList3.add(rDFNode3);
                        if (hashMap2.containsKey(rDFNode)) {
                            arrayList3 = (List) hashMap2.get(rDFNode);
                            if (!arrayList3.contains(rDFNode3)) {
                                arrayList3.add(rDFNode3);
                            }
                        }
                        hashMap2.put(rDFNode, arrayList3);
                    }
                    hashMap.put(rDFNode, Double.valueOf(levenstein.score(rDFNode2, str)));
                }
            }
        }
        TreeMap treeMap = new TreeMap(new ValueComparator(hashMap));
        treeMap.putAll(hashMap);
        boolean z2 = false;
        for (Map.Entry entry : treeMap.entrySet()) {
            if (!z2) {
                List<String> list = (List) hashMap2.get(entry.getKey());
                if (list != null) {
                    z2 = true;
                    for (String str2 : list) {
                        if (map2.containsKey(str2)) {
                            map2.put(str2, Integer.valueOf(map2.get(str2).intValue() + 1));
                        } else {
                            map2.put(str2, 1);
                        }
                    }
                }
            }
            List<String> list2 = (List) hashMap2.get(entry.getKey());
            if (list2 != null) {
                for (String str3 : list2) {
                    if (!arrayList.contains(str3)) {
                        if (map.containsKey(str3)) {
                            map.put(str3, Integer.valueOf(map.get(str3).intValue() + 1));
                        } else {
                            map.put(str3, 1);
                        }
                        arrayList.add(str3);
                    }
                }
            }
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    public String getLinkedEntityWithSPARQL(String str, boolean z, String str2) throws OperatorException {
        String str3 = null;
        Jaccard jaccard = new Jaccard(new NGramTokenizer(2, 4, true, new SimpleTokenizer(true, true)));
        HashMap hashMap = new HashMap();
        List arrayList = new ArrayList();
        if (z) {
            arrayList = getAllNgramsInBound(str);
        } else {
            arrayList.add(str);
        }
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            ResultSet runSelectQueryInterruptable = this.queryRunner.runSelectQueryInterruptable(getSPARQLQuery((String) it.next(), str2));
            if (!this.queryRunner.mUIThreadRunning) {
                break;
            }
            if (runSelectQueryInterruptable != null) {
                while (runSelectQueryInterruptable.hasNext()) {
                    QuerySolution next = runSelectQueryInterruptable.next();
                    hashMap.put(next.get("s").toString(), Double.valueOf(jaccard.score(next.getLiteral("t").toString().replaceAll("@.*$", ""), str)));
                }
            }
        }
        TreeMap treeMap = new TreeMap(new ValueComparator(hashMap));
        treeMap.putAll(hashMap);
        if (treeMap.size() > 0) {
            str3 = (String) treeMap.firstKey();
        }
        return str3;
    }

    private String getSPARQLQueryForClasses(String str) throws UndefinedParameterError {
        String parameterAsString = getParameterAsString(PARAMETER_LANGUAGE_TAG);
        return "SELECT DISTINCT * WHERE {?s a <http://www.w3.org/2002/07/owl#Thing>.?s <http://www.w3.org/2000/01/rdf-schema#label> ?t .?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?type. filter not exists {?subtype ^a ?s ; <http://www.w3.org/2000/01/rdf-schema#subClassOf> ?type .}. FILTER regex(?t, \"" + str + "\", \"i\") . " + ((parameterAsString == null || parameterAsString.length() <= 0) ? "" : "FILTER(LANGMATCHES(LANG(?t), \"" + parameterAsString + "\")). ") + "}";
    }

    private String getSPARQLQuery(String str, String str2) throws UndefinedParameterError {
        String parameterAsString = getParameterAsString(PARAMETER_LANGUAGE_TAG);
        String str3 = (parameterAsString == null || parameterAsString.length() <= 0) ? "" : "FILTER(LANGMATCHES(LANG(?t), \"" + parameterAsString + "\")). ";
        return str2.equals("") ? "SELECT DISTINCT * WHERE {?s <http://www.w3.org/2000/01/rdf-schema#label> ?t . FILTER regex(?t, \"" + str + "\", \"i\") . " + str3 + "}" : "SELECT DISTINCT * WHERE {?s <http://www.w3.org/2000/01/rdf-schema#label> ?t . ?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <" + str2 + ">. FILTER regex(?t, \"" + str + "\", \"i\") . " + str3 + "} LIMIT 30";
    }

    public static List<String> getAllNgramsInBound(String str) {
        int length = str.split(" |,|!|'|\\?|-|_|\\t").length;
        ArrayList arrayList = new ArrayList();
        for (int i = 1; i <= length; i++) {
            arrayList.addAll(new NGram(str, i).list());
        }
        return arrayList;
    }

    public void processFinished() throws OperatorException {
        System.out.println("ProcessFinished pressed...");
        if (this.queryRunner != null) {
            this.queryRunner.mUIThreadRunning = false;
            this.queryRunner.finalizeAsyncThread();
        }
        super.processFinished();
    }
}
