package com.rapidminer.extension.operator;

import com.google.common.collect.Sets;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DoubleSparseArrayDataRow;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.PortUserError;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.SimplePrecondition;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.tools.LogService;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.shingle.ShingleFilter;

/* loaded from: input_file:com/rapidminer/extension/operator/fuzzy_matching.class */
public class fuzzy_matching extends Operator {
    private static final String PARAMETER_THRESHOLD_PERCENTAGE = "Threshold Percentage";
    private InputPort exampleSetInput;
    private InputPort inputdocument;
    private OutputPort exampleSetOutput;

    public fuzzy_matching(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.exampleSetInput = getInputPorts().createPort("example set");
        this.inputdocument = getInputPorts().createPort("document");
        this.exampleSetOutput = getOutputPorts().createPort("example set");
        getTransformer().addGenerationRule(this.exampleSetOutput, ExampleSet.class);
        this.exampleSetInput.addPrecondition(new SimplePrecondition(this.exampleSetInput, new MetaData(ExampleSet.class)));
        this.inputdocument.addPrecondition(new SimplePrecondition(this.inputdocument, new MetaData(IOObject.class)));
    }

    public static int getRatio(String str, String str2, boolean z) {
        if (str.length() >= str2.length()) {
            str2 = str;
            str = str2;
        }
        String escapeString = escapeString(str);
        String escapeString2 = escapeString(str2);
        String lowerCase = escapeString.toLowerCase();
        String lowerCase2 = escapeString2.toLowerCase();
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        StringTokenizer stringTokenizer = new StringTokenizer(lowerCase);
        while (stringTokenizer.hasMoreTokens()) {
            hashSet.add(stringTokenizer.nextToken());
        }
        StringTokenizer stringTokenizer2 = new StringTokenizer(lowerCase2);
        while (stringTokenizer2.hasMoreTokens()) {
            hashSet2.add(stringTokenizer2.nextToken());
        }
        Sets.SetView intersection = Sets.intersection(hashSet, hashSet2);
        TreeSet newTreeSet = Sets.newTreeSet(intersection);
        if (z) {
            System.out.print("Sorted intersection --> ");
            Iterator it = newTreeSet.iterator();
            while (it.hasNext()) {
                System.out.print(((String) it.next()) + ShingleFilter.TOKEN_SEPARATOR);
            }
        }
        TreeSet newTreeSet2 = Sets.newTreeSet(Sets.symmetricDifference(hashSet, intersection));
        TreeSet newTreeSet3 = Sets.newTreeSet(Sets.symmetricDifference(hashSet2, intersection));
        if (z) {
            System.out.print("\nSorted rest of 1 --> ");
            Iterator it2 = newTreeSet2.iterator();
            while (it2.hasNext()) {
                System.out.print(((String) it2.next()) + ShingleFilter.TOKEN_SEPARATOR);
            }
            System.out.print("\nSorted rest of 2 -->");
            Iterator it3 = newTreeSet3.iterator();
            while (it3.hasNext()) {
                System.out.print(((String) it3.next()) + ShingleFilter.TOKEN_SEPARATOR);
            }
        }
        String str3 = "";
        String str4 = "";
        String str5 = "";
        Iterator it4 = newTreeSet.iterator();
        while (it4.hasNext()) {
            str3 = str3 + ShingleFilter.TOKEN_SEPARATOR + ((String) it4.next());
        }
        String trim = str3.trim();
        Iterator<E> it5 = Sets.union(newTreeSet, newTreeSet2).iterator();
        while (it5.hasNext()) {
            str4 = str4 + ShingleFilter.TOKEN_SEPARATOR + ((String) it5.next());
        }
        String trim2 = str4.trim();
        Iterator<E> it6 = Sets.union(intersection, newTreeSet3).iterator();
        while (it6.hasNext()) {
            str5 = str5 + ShingleFilter.TOKEN_SEPARATOR + ((String) it6.next());
        }
        String trim3 = str5.trim();
        int calculateLevensteinDistance = calculateLevensteinDistance(trim, trim2);
        int calculateLevensteinDistance2 = calculateLevensteinDistance(trim, trim3);
        int calculateLevensteinDistance3 = calculateLevensteinDistance(trim2, trim3);
        if (z) {
            System.out.println();
            System.out.println("t0 = " + trim + " --> " + calculateLevensteinDistance);
            System.out.println("t1 = " + trim2 + " --> " + calculateLevensteinDistance2);
            System.out.println("t2 = " + trim3 + " --> " + calculateLevensteinDistance3);
            System.out.println();
        }
        return Math.max(Math.max(calculateLevensteinDistance, calculateLevensteinDistance2), calculateLevensteinDistance3);
    }

    public static int calculateLevensteinDistance(String str, String str2) {
        return 100 - new Double((StringUtils.getLevenshteinDistance(str, str2) / Math.max(str.length(), str2.length())) * 100.0d).intValue();
    }

    public static String escapeString(String str) {
        return Pattern.compile("[^\\w+]", 256).matcher(str).replaceAll(ShingleFilter.TOKEN_SEPARATOR);
    }

    public void doWork() throws OperatorException {
        ExampleSet<Example> data = this.exampleSetInput.getData(ExampleSet.class);
        if (this.inputdocument.getAnyDataOrNull().getClass().getName() != "com.rapidminer.operator.text.Document") {
            LogService.getRoot().log(Level.INFO, "wrong Data in second input port");
            LogService.getRoot().log(Level.INFO, "Expected com.rapidminer.operator.text.Document class but got " + this.inputdocument.getAnyDataOrNull().getClass().getName() + " class");
            throw new PortUserError(this.inputdocument, 150, new Object[]{this.inputdocument.getAnyDataOrNull().getClass().getName().substring(this.inputdocument.getAnyDataOrNull().getClass().getName().lastIndexOf(".") + 1), "Fuzzy Matching:Document"});
        }
        String obj = this.inputdocument.getAnyDataOrNull().toString();
        ArrayList<String> arrayList = new ArrayList<String>() { // from class: com.rapidminer.extension.operator.fuzzy_matching.1
            {
                add(":");
                add("\f");
                add("");
            }
        };
        Attribute[] attributeArr = {AttributeFactory.createAttribute("String", 5), AttributeFactory.createAttribute("Percentage Match", 3)};
        MemoryExampleTable memoryExampleTable = new MemoryExampleTable(attributeArr);
        for (Example example : data) {
            int ratio = getRatio(obj, example.toString(), false);
            if (ratio >= getParameterAsInt(PARAMETER_THRESHOLD_PERCENTAGE) && !arrayList.contains(example.toString())) {
                DoubleSparseArrayDataRow doubleSparseArrayDataRow = new DoubleSparseArrayDataRow();
                doubleSparseArrayDataRow.set(attributeArr[0], attributeArr[0].getMapping().mapString(example.toString()));
                doubleSparseArrayDataRow.set(attributeArr[1], ratio);
                memoryExampleTable.addDataRow(doubleSparseArrayDataRow);
            }
        }
        this.exampleSetOutput.deliver(memoryExampleTable.createExampleSet());
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeInt(PARAMETER_THRESHOLD_PERCENTAGE, "Example string matching more than Threshold Percentage is added to result example set", 0, 100, false));
        return parameterTypes;
    }
}
