package com.rapidminer.extension.datasearch.operator;

import com.google.common.collect.Lists;
import com.medallia.word2vec.Searcher;
import com.medallia.word2vec.Word2VecModel;
import com.medallia.word2vec.Word2VecTrainerBuilder;
import com.medallia.word2vec.neuralnetwork.NeuralNetworkType;
import com.medallia.word2vec.util.Format;
import com.rapidminer.Process;
import com.rapidminer.ProcessStateListener;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.AttributeRole;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DoubleArrayDataRow;
import com.rapidminer.example.table.GrowingExampleTable;
import com.rapidminer.example.utils.ExampleSetBuilder;
import com.rapidminer.example.utils.ExampleSets;
import com.rapidminer.extension.datasearch.collection.DataSearchOperatorCollection;
import com.rapidminer.extension.datasearch.exampleset.DataSearchExampleSet;
import com.rapidminer.extension.datasearch.json.MetaDataTable;
import com.rapidminer.extension.datasearch.substrings.AutomatonMatcher;
import com.rapidminer.extension.datasearch.substrings.BoyerMooreWiki;
import com.rapidminer.extension.datasearch.substrings.KMP;
import com.rapidminer.extension.datasearch.substrings.NGram;
import com.rapidminer.extension.datasearch.substrings.RabinKarp;
import com.rapidminer.extension.datasearch.substrings.ZMatcher;
import com.rapidminer.gui.RapidMinerGUI;
import com.rapidminer.operator.IOObjectCollection;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.error.AttributeNotFoundError;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.preprocessing.GuessValueTypes;
import com.rapidminer.operator.preprocessing.MaterializeDataInMemory;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeAttribute;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.parameter.conditions.BooleanParameterCondition;
import com.rapidminer.parameter.conditions.EqualTypeCondition;
import com.rapidminer.tools.LogService;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import org.apache.commons.codec.EncoderException;
import org.apache.commons.codec.language.Soundex;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.similarity.CosineDistance;
import org.apache.commons.text.similarity.FuzzyScore;
import org.apache.commons.text.similarity.JaccardSimilarity;
import org.apache.commons.text.similarity.JaroWinklerDistance;
import org.apache.commons.text.similarity.LevenshteinDistance;
import org.apache.commons.text.similarity.LongestCommonSubsequenceDistance;
import org.apache.log4j.Priority;
import org.apache.log4j.spi.LocationInfo;
import org.hsqldb.Tokens;

/* loaded from: input_file:com/rapidminer/extension/datasearch/operator/CreateCorrespondencesOperator.class */
public class CreateCorrespondencesOperator extends Operator {
    private IOObjectCollection<ExampleSet> inputCollection;
    private ExampleSet queryTable;
    private ExampleSet correspondencesSchemaLevel;
    private ExampleSet correspondencesInstanceLevel;
    private GrowingExampleTable correspondencesSchemaLevelGrowingExampleTable;
    private GrowingExampleTable correspondencesInstanceLevelGrowingExampleTable;
    private String subjectIdAttributeNameOfQueryTable;
    private String extendedAttributeNameSuffixed;
    private InputPort queryTableInputPort;
    private InputPort collectionInputPort;
    private OutputPort exampleSetCollection;
    private OutputPort exampleSetSchemaCorrespondences;
    private OutputPort exampleSetInstancesCorrespondences;
    public static final String PARAMETER_EXTENDED_ATTRIBUTE = "extended attribute";
    public static final String PARAMETER_SUBJECT_ID = "subject id";
    public static final String PARAMETER_TABLE_DENSITY = "minimum density";
    public static final String PARAMETER_TABLE_RATIO = "minimum ratio";
    public static final String PARAMETER_TABLE_COVERAGE = "minimum coverage";
    public static final String PARAMETER_APPLY_CORRELATION_FILTER = "apply correlation";
    public static final String PARAMETER_CORRELATION_ATTRIBUTE = "correlation attribute";
    public static final String PARAMETER_CORRELATION_ATTRIBUTE_WEIGHT = "minimum correlation weight";
    public static final String PARAMETER_DECIMAL_POINT_DELIMITER = "decimal point character for type guess";
    public static final String ANNOTATION_TABLE = "Table or Page Title";
    public static final String ANNOTATION_DOCUMENT = "Document Title";
    public static final String PARAMETER_ATTRIBUTE_MATCHING_METHOD = "attribute matching method";
    public static final String PARAMETER_SUBJECT_ID_MINIMUM_UNIQUENESS = "minimum uniqueness";
    public static final String PARAMETER_SHOW_MATCHED_SUBJECT_IDS = "show detected subject identifier(s)";
    public static final String PARAMETER_APPLY_SCHEMA_MATCHINGS = "compute schema matchings";
    public static final String PARAMETER_SCHEMA_MATCHING_METHOD = "schema matching method";
    public static final String PARAMETER_CONFLICT_RESOLUTION_METHOD = "conflict resolution ";
    public static final int RESOLUTION_METHOD_JACCARD = 0;
    public static final int RESOLUTION_METHOD_LEVENSHTEIN = 1;
    public static final int RESOLUTION_METHOD_JAROWINKLER = 2;
    public static final int METHOD_TYPE_EQUALS = 0;
    public static final int METHOD_TYPE_CONTAINS = 1;
    public static final int METHOD_TYPE_CONTAINS_ANY = 2;
    public static final int METHOD_TYPE_REGEX_PATTERN = 3;
    public static final int METHOD_TYPE_FUZZY_SCORE = 4;
    public static final int METHOD_TYPE_LEVENSHTEIN_DISTANCE = 5;
    public static final int METHOD_TYPE_NGRAM_DISTANCE = 6;
    public static final int METHOD_TYPE_COSINE_DISTANCE = 7;
    public static final int METHOD_TYPE_JARO_WINKLER_DISTANCE = 8;
    public static final int METHOD_TYPE_JACCARD_SIMILARITY = 9;
    public static final int METHOD_TYPE_LONGEST_COMMON_SUBSEQUENCE_DISTANCE = 10;
    public static final int METHOD_TYPE_PHONETIC_ENCODING_WITH_LEVENSHTEIN_DISTANCE = 11;
    public static final int METHOD_TYPE_KMP_ALGORITHM = 12;
    public static final int METHOD_TYPE_FINITE_AUTOMATON_MATCH = 13;
    public static final int METHOD_TYPE_RABIN_KARP_ALGORITHM = 14;
    public static final int METHOD_TYPE_Z_ALGORITHM = 15;
    public static final int METHOD_TYPE_BOYER_MOORE_ALGORITHM = 16;
    public static final String SUBJECT_ID_FUZZY_SCORE_MINIMUM = "minimum fuzzy score";
    public static final String SUBJECT_ID_LEVENSHTEIN_SIMILARITY_MINIMUM = "minimum levenshtein similarity";
    public static final String SUBJECT_ID_NGRAM_SIMILARITY_MINIMUM = "minimum n-gram similarity";
    public static final String SUBJECT_ID_NGRAM_N_VALUE = "n value for n-gram";
    public static final String SUBJECT_ID_COSINE_SIMILARITY_MINIMUM = "minimum cosine similarity";
    public static final String SUBJECT_ID_JARO_WINKLER_SIMILARITY_MINIMUM = "minimum jaro winkler similarity";
    public static final String SUBJECT_ID_JACCARD_SIMILARITY_MINIMUM = "minimum jaccard similarity";
    public static final String SUBJECT_ID_LONGEST_COMMON_SUBSEQUENCE_SIMILARITY_MINIMUM = "minimum subsequence similarity";
    public static final String SUBJECT_ID_PHONETIC_ENCODING_WITH_LEVENSHTEIN_SIMILARITY_MINIMUM = "minimum phonetic levenshtein similarity";
    public static final String SCHEMA_MATCH_EQUALS_ABOVE = "minimum equals score";
    public static final String SCHEMA_MATCH_CONTAINS_ABOVE = "minimum contains score";
    public static final String SCHEMA_MATCH_CONTAINS_ANY_ABOVE = "minimum contains any score";
    public static final String SCHEMA_MATCH_JACCARD_SIMILARITY_ABOVE = "minimum jaccard similarity score";
    public static final String SCHEMA_MATCH_WORD2VEC_NEURAL_NET_CHOICE = "type of neural network";
    public static final int SCHEMA_MATCH_WORD2VEC_NEURAL_NET_TYPE_CBOW = 0;
    public static final int SCHEMA_MATCH_WORD2VEC_NEURAL_NET_TYPE_SKIPGRAM = 1;
    public static final String SCHEMA_MATCH_WORD2VEC_COSINE_SIMILARITY_ABOVE = "minimum table cosine similarity";
    public static final String SCHEMA_MATCH_WORD2VEC_MINVOCABFREQUENCY = "minimal vocabulary frequency";
    public static final String SCHEMA_MATCH_WORD2VEC_LAYERSIZE = "size of the generated vector";
    public static final String SCHEMA_MATCH_WORD2VEC_WINDOWSIZE = "size of window";
    public static final String SCHEMA_MATCH_WORD2VEC_USE_NEGATIVE_SAMPLES = "number of negative samples";
    public static final String SCHEMA_MATCH_WORD2VEC_DOWN_SAMPLING_RATE = "down sampling rate for high occurrence words";
    public static final String SCHEMA_MATCH_WORD2VEC_ITERATIONS = "number of iterations";
    public static final String SCHEMA_MATCH_WORD2VEC_INITIAL_LEARNING_RATE = "initial learning rate";
    private int processors;
    public static final int SCHEMA_METHOD_TYPE_JACCARD_SIMILARITY = 3;
    public static final int SCHEMA_METHOD_TYPE_WORD2VEC_SIMILARITY = 4;
    public static final String ROLE_ADDITIONAL_ATTRIBUTE_MATCH = "Extension_Attribute_Match";
    public static final String PARAMETER_CONTROL = "apply manual refinements";
    public static final int MAX_LENGTH_OF_TABLE_NAME_IN_TREE = 30;
    public static final String PARAMETER_BALANCE_COVERAGE_AND_PRECISION = "balance coverage and precision";
    private static final Logger LOGGER = LogService.getRoot();
    public static final String[] CONFLICT_RESOLUTION_METHODS = {"Maximum Jaccard Similarity", "Maximum Levenshtein Similarity", "Maximum JaroWinkler Similarity"};
    public static final String[] SUBJECT_ID_MATCHING_METHODS = {"Equals", "Contains", "Contains Any", "Regex Pattern", "Fuzzy Score", "Levenshtein Distance", "N-Gram Distance", "Cosine Distance", "Jaro Winkler Similarity", "Jaccard Similarity", "Longest Common Subsequence", "English Phonetic Encoding with Levenshtein", "Knuth-Morris-Pratt Algorithm", "Finite Automaton Match", "Rabin-Karp Algorithm", "Z Algorithm (Linear Time Pattern Search)", "Boyer-Moore Algorithm"};
    public static final String[] SCHEMA_MATCHING_METHODS = {"Equals", "Contains", "Contains Any", "Jaccard Similarity", "Word2Vec"};
    public static final String[] SCHEMA_MATCH_WORD2VEC_NEURAL_NET_TYPES = {"CBOW", "Skip-Gram"};

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:com/rapidminer/extension/datasearch/operator/CreateCorrespondencesOperator$CoverageAndRatio.class */
    public class CoverageAndRatio {
        private double coverage;
        private double ratio;

        protected CoverageAndRatio() {
        }

        public double getCoverage() {
            return this.coverage;
        }

        public void setCoverage(double d) {
            this.coverage = d;
        }

        public double getRatio() {
            return this.ratio;
        }

        public void setRatio(double d) {
            this.ratio = d;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:com/rapidminer/extension/datasearch/operator/CreateCorrespondencesOperator$PropertiesOfMatchedTable.class */
    public class PropertiesOfMatchedTable {
        private double coverage;
        private double ratio;
        private double density;
        private int emptyValues;
        private int nonEmptyValues;

        public PropertiesOfMatchedTable() {
        }

        public PropertiesOfMatchedTable(double d, double d2, double d3, int i, int i2) {
            this.coverage = d;
            this.ratio = d2;
            this.density = d3;
            this.emptyValues = i;
            this.nonEmptyValues = i2;
        }

        public double getCoverage() {
            return this.coverage;
        }

        public void setCoverage(double d) {
            this.coverage = d;
        }

        public double getRatio() {
            return this.ratio;
        }

        public void setRatio(double d) {
            this.ratio = d;
        }

        public double getDensity() {
            return this.density;
        }

        public void setDensity(double d) {
            this.density = d;
        }

        public double getEmptyValues() {
            return this.emptyValues;
        }

        public void setEmptyValues(int i) {
            this.emptyValues = i;
        }

        public double getNonEmptyValues() {
            return this.nonEmptyValues;
        }

        public void setNonEmptyValues(int i) {
            this.nonEmptyValues = i;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:com/rapidminer/extension/datasearch/operator/CreateCorrespondencesOperator$SetOfCollectionAndList.class */
    public class SetOfCollectionAndList {
        private IOObjectCollection<ExampleSet> collection;
        private List<String> nameOfMatchedSubjectIdOfTable;
        private List<PropertiesOfMatchedTable> propertiesOfMatchedTables;
        private ExampleSet fusedTable;
        private Map<String, Integer> attributeNameToCollectionIndex;

        public SetOfCollectionAndList() {
        }

        public SetOfCollectionAndList(IOObjectCollection<ExampleSet> iOObjectCollection, List<String> list, ExampleSet exampleSet, Map<String, Integer> map, List<PropertiesOfMatchedTable> list2) {
            this.collection = iOObjectCollection;
            this.nameOfMatchedSubjectIdOfTable = list;
            this.fusedTable = exampleSet;
            this.attributeNameToCollectionIndex = map;
            this.propertiesOfMatchedTables = list2;
        }

        public IOObjectCollection<ExampleSet> getCollection() {
            return this.collection;
        }

        public void setCollection(IOObjectCollection<ExampleSet> iOObjectCollection) {
            this.collection = iOObjectCollection;
        }

        public List<String> getNameOfMatchedSubjectIdAttribute() {
            return this.nameOfMatchedSubjectIdOfTable;
        }

        public void setNameOfMatchedSubjectIdAttribute(List<String> list) {
            this.nameOfMatchedSubjectIdOfTable = list;
        }

        public ExampleSet getFusedTable() {
            return this.fusedTable;
        }

        public void setFusedTable(ExampleSet exampleSet) {
            this.fusedTable = exampleSet;
        }

        public Map<String, Integer> getAttributeNameToCollectionIndex() {
            return this.attributeNameToCollectionIndex;
        }

        public void setAttributeNameToCollectionIndex(Map<String, Integer> map) {
            this.attributeNameToCollectionIndex = map;
        }

        public List<PropertiesOfMatchedTable> getPropertiesOfMatchedTables() {
            return this.propertiesOfMatchedTables;
        }

        public void setPropertiesOfMatchedTables(List<PropertiesOfMatchedTable> list) {
            this.propertiesOfMatchedTables = list;
        }
    }

    public CreateCorrespondencesOperator(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.inputCollection = null;
        this.queryTable = null;
        this.correspondencesSchemaLevel = null;
        this.correspondencesInstanceLevel = null;
        this.correspondencesSchemaLevelGrowingExampleTable = null;
        this.correspondencesInstanceLevelGrowingExampleTable = null;
        this.subjectIdAttributeNameOfQueryTable = null;
        this.extendedAttributeNameSuffixed = null;
        this.queryTableInputPort = getInputPorts().createPort("example set", ExampleSet.class);
        this.collectionInputPort = getInputPorts().createPort("collection of example sets", IOObjectCollection.class);
        this.exampleSetCollection = getOutputPorts().createPort("collection of data search example sets");
        this.exampleSetSchemaCorrespondences = getOutputPorts().createPort("schema level correspondences");
        this.exampleSetInstancesCorrespondences = getOutputPorts().createPort("instance level correspondences");
        this.processors = 1;
        getTransformer().addGenerationRule(this.exampleSetCollection, DataSearchOperatorCollection.class);
        getTransformer().addGenerationRule(this.exampleSetSchemaCorrespondences, ExampleSet.class);
        getTransformer().addGenerationRule(this.exampleSetInstancesCorrespondences, ExampleSet.class);
    }

    public void doWork() throws OperatorException {
        validate();
        setBreakPointsAndListenersForManualRefinements();
        this.exampleSetCollection.deliver(createDataSearchOutputCollection(filterCollectionByUserCriteria()));
        this.exampleSetSchemaCorrespondences.deliver(this.correspondencesSchemaLevel);
        this.exampleSetInstancesCorrespondences.deliver(this.correspondencesInstanceLevel);
    }

    private void setBreakPointsAndListenersForManualRefinements() {
        if (getParameterAsBoolean("apply manual refinements")) {
            setBreakpoint(1, true);
        }
        getProcess().addProcessStateListener(new ProcessStateListener() { // from class: com.rapidminer.extension.datasearch.operator.CreateCorrespondencesOperator.1
            public void stopped(Process process) {
                if (CreateCorrespondencesOperator.this.getParameterAsBoolean("apply manual refinements")) {
                    CreateCorrespondencesOperator.this.setBreakpoint(1, false);
                }
                try {
                    CreateCorrespondencesOperator.this.getRoot().getProcess().save();
                    RapidMinerGUI.getMainFrame().processHasBeenSaved();
                } catch (IOException e) {
                    CreateCorrespondencesOperator.LOGGER.log(Level.WARNING, "Problem saving process");
                }
            }

            public void resumed(Process process) {
            }

            public void paused(Process process) {
            }

            public void started(Process process) {
            }
        });
    }

    private void validate() throws UserError {
        try {
            this.inputCollection = this.collectionInputPort.getDataOrNull(IOObjectCollection.class);
            this.inputCollection = setUUIDasTableSourceIfEmpty(this.inputCollection);
            this.queryTable = this.queryTableInputPort.getDataOrNull(ExampleSet.class);
            this.subjectIdAttributeNameOfQueryTable = getParameter("subject id");
            Attribute attribute = this.queryTable.getAttributes().get(this.subjectIdAttributeNameOfQueryTable);
            if (attribute == null) {
                throw new AttributeNotFoundError(this, "subject id", this.subjectIdAttributeNameOfQueryTable);
            }
            this.queryTable.getAttributes().setSpecialAttribute(attribute, DataSearchOperator.ROLE_SUBJECTL_ATTRIBUTE);
            String parameter = getParameter(PARAMETER_EXTENDED_ATTRIBUTE);
            if (parameter == null) {
                throw new AttributeNotFoundError(this, PARAMETER_EXTENDED_ATTRIBUTE, parameter);
            }
            addExtendedAttributeToQueryTable(parameter);
            initializeSchemaCorrespondenceGrowingExampleTable();
            initilalizeInstanceCorrespondenceGrowingExampleTable();
        } catch (UserError e) {
            e.printStackTrace();
            throw e;
        } catch (UndefinedParameterError e2) {
            e2.printStackTrace();
            throw e2;
        }
    }

    private void initializeSchemaCorrespondenceGrowingExampleTable() {
        LinkedList linkedList = new LinkedList();
        linkedList.add(AttributeFactory.createAttribute("table", 1));
        linkedList.add(AttributeFactory.createAttribute("attribute", 1));
        linkedList.add(AttributeFactory.createAttribute("targetSchema", 1));
        this.correspondencesSchemaLevelGrowingExampleTable = ExampleSets.from(linkedList).build().getExampleTable();
    }

    private void initilalizeInstanceCorrespondenceGrowingExampleTable() {
        LinkedList linkedList = new LinkedList();
        linkedList.add(AttributeFactory.createAttribute("table", 1));
        linkedList.add(AttributeFactory.createAttribute("instance", 1));
        linkedList.add(AttributeFactory.createAttribute("content", 5));
        linkedList.add(AttributeFactory.createAttribute("instanceFromTargetSchema", 1));
        this.correspondencesInstanceLevelGrowingExampleTable = ExampleSets.from(linkedList).build().getExampleTable();
    }

    private IOObjectCollection<ExampleSet> setUUIDasTableSourceIfEmpty(IOObjectCollection<ExampleSet> iOObjectCollection) {
        IOObjectCollection<ExampleSet> iOObjectCollection2 = new IOObjectCollection<>();
        if (iOObjectCollection != null && iOObjectCollection.size() > 0) {
            for (ExampleSet exampleSet : iOObjectCollection.getObjects()) {
                String str = exampleSet.getAnnotations().get("Source");
                String str2 = null;
                if (str != null && str.contains(Tokens.T_DIVIDE)) {
                    str2 = str.substring(str.lastIndexOf(Tokens.T_DIVIDE) + 1);
                }
                ExampleSet materializeExampleSet = MaterializeDataInMemory.materializeExampleSet(exampleSet);
                String source = materializeExampleSet.getSource();
                if (source == null || source.trim().length() == 0) {
                    if (str2 == null) {
                        UUID randomUUID = UUID.randomUUID();
                        LOGGER.log(Level.INFO, "UUID generated =" + randomUUID.toString());
                        materializeExampleSet.setSource(randomUUID.toString());
                    } else {
                        LOGGER.log(Level.INFO, "Using  Source annotation as Table.Source =" + str2);
                        materializeExampleSet.setSource(str2);
                    }
                }
                iOObjectCollection2.add(materializeExampleSet);
            }
        }
        return iOObjectCollection2;
    }

    private void addExtendedAttributeToQueryTable(String str) {
        Iterator allAttributes = this.queryTable.getAttributes().allAttributes();
        ArrayList arrayList = new ArrayList();
        while (allAttributes.hasNext()) {
            arrayList.add(((Attribute) allAttributes.next()).getName());
        }
        int i = 0;
        this.extendedAttributeNameSuffixed = str;
        while (arrayList.contains(this.extendedAttributeNameSuffixed)) {
            i++;
            this.extendedAttributeNameSuffixed += "_" + i;
        }
        LOGGER.log(Level.INFO, "*+* Finalized Extended-Attribute suffixed name = " + this.extendedAttributeNameSuffixed);
    }

    private SetOfCollectionAndList filterCollectionByUserCriteria() throws UserError {
        SetOfCollectionAndList setOfCollectionAndList = null;
        if (this.inputCollection != null && this.inputCollection.size() > 0) {
            setOfCollectionAndList = findTablesWithKeywordAndSubjectIdAndAcceptableThresholds();
        }
        return setOfCollectionAndList;
    }

    private SetOfCollectionAndList findTablesWithKeywordAndSubjectIdAndAcceptableThresholds() throws UserError {
        SetOfCollectionAndList setOfCollectionAndList = new SetOfCollectionAndList();
        IOObjectCollection<ExampleSet> iOObjectCollection = new IOObjectCollection<>();
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        List<String> listOfSubjectIdsInQueryTable = getListOfSubjectIdsInQueryTable();
        try {
            String parameterAsString = getParameterAsString(PARAMETER_EXTENDED_ATTRIBUTE);
            String parameterAsString2 = getParameterAsString("subject id");
            String lowerCase = parameterAsString2.trim().toLowerCase();
            int parameterAsInt = getParameterAsInt(PARAMETER_ATTRIBUTE_MATCHING_METHOD);
            double parameterAsDouble = getParameterAsDouble("minimum density");
            double parameterAsDouble2 = getParameterAsDouble("minimum ratio");
            double parameterAsDouble3 = getParameterAsDouble("minimum coverage");
            double parameterAsDouble4 = getParameterAsDouble("minimum uniqueness");
            boolean parameterAsBoolean = getParameterAsBoolean(PARAMETER_BALANCE_COVERAGE_AND_PRECISION);
            Iterator it = findExtendedKeyword(parameterAsString, parameterAsInt).getCollection().getObjects().iterator();
            if (!it.hasNext()) {
                throw new UserError(this, "create_correspondences.101", new Object[]{parameterAsString});
            }
            boolean z = true;
            while (it.hasNext()) {
                ExampleSet<Example> exampleSet = (ExampleSet) it.next();
                Iterator it2 = exampleSet.getAttributes().iterator();
                while (true) {
                    if (it2.hasNext()) {
                        Attribute attribute = (Attribute) it2.next();
                        String name = attribute.getName();
                        if (lowerCase != null && name != null && subjectIdMatchConfirmed(lowerCase, name.trim().toLowerCase(), parameterAsInt) && isPrimaryIdentifier(attribute, exampleSet, parameterAsDouble4)) {
                            z = false;
                            int i = 0;
                            boolean z2 = false;
                            HashMap hashMap = new HashMap();
                            ArrayList arrayList3 = new ArrayList();
                            if (!parameterAsBoolean) {
                                z2 = false;
                                int i2 = 0;
                                new ArrayList();
                                for (String str : listOfSubjectIdsInQueryTable) {
                                    HashMap hashMap2 = new HashMap();
                                    ArrayList<Integer> arrayList4 = new ArrayList<>();
                                    int i3 = 0;
                                    Iterator it3 = exampleSet.iterator();
                                    while (it3.hasNext()) {
                                        String valueAsString = ((Example) it3.next()).getValueAsString(attribute);
                                        if (valueAsString != null && valueAsString.trim().length() > 0 && str.trim().length() > 0 && !valueAsString.equalsIgnoreCase(LocationInfo.NA) && !str.equalsIgnoreCase(LocationInfo.NA) && subjectIdMatchConfirmed(str.trim().toLowerCase(), valueAsString.trim().toLowerCase(), parameterAsInt)) {
                                            arrayList4.add(Integer.valueOf(i3));
                                            if (hashMap2.containsKey(str.trim().toLowerCase())) {
                                                hashMap2.get(str.trim().toLowerCase()).add(valueAsString.trim().toLowerCase());
                                            } else {
                                                ArrayList<String> arrayList5 = new ArrayList<>();
                                                arrayList5.add(valueAsString.trim().toLowerCase());
                                                hashMap2.put(str.trim().toLowerCase(), arrayList5);
                                            }
                                        }
                                        i3++;
                                    }
                                    hashMap.put(Integer.valueOf(i2), resolveRowLevelConflictFromIsQueryTable(str, hashMap2, arrayList4));
                                    if (!hashMap2.containsKey(str.trim().toLowerCase())) {
                                    }
                                    i2++;
                                    arrayList3.add(hashMap2);
                                }
                            } else if (parameterAsBoolean) {
                                if (listOfSubjectIdsInQueryTable.size() <= exampleSet.size()) {
                                    z2 = false;
                                    int i4 = 0;
                                    new ArrayList();
                                    for (String str2 : listOfSubjectIdsInQueryTable) {
                                        HashMap hashMap3 = new HashMap();
                                        ArrayList<Integer> arrayList6 = new ArrayList<>();
                                        int i5 = 0;
                                        Iterator it4 = exampleSet.iterator();
                                        while (it4.hasNext()) {
                                            String valueAsString2 = ((Example) it4.next()).getValueAsString(attribute);
                                            if (valueAsString2 != null && valueAsString2.trim().length() > 0 && str2.trim().length() > 0 && !valueAsString2.equalsIgnoreCase(LocationInfo.NA) && !str2.equalsIgnoreCase(LocationInfo.NA) && subjectIdMatchConfirmed(str2.trim().toLowerCase(), valueAsString2.trim().toLowerCase(), parameterAsInt)) {
                                                arrayList6.add(Integer.valueOf(i5));
                                                if (hashMap3.containsKey(str2.trim().toLowerCase())) {
                                                    hashMap3.get(str2.trim().toLowerCase()).add(valueAsString2.trim().toLowerCase());
                                                } else {
                                                    ArrayList<String> arrayList7 = new ArrayList<>();
                                                    arrayList7.add(valueAsString2.trim().toLowerCase());
                                                    hashMap3.put(str2.trim().toLowerCase(), arrayList7);
                                                }
                                            }
                                            i5++;
                                        }
                                        hashMap.put(Integer.valueOf(i4), resolveRowLevelConflictFromIsQueryTable(str2, hashMap3, arrayList6));
                                        if (!hashMap3.containsKey(str2.trim().toLowerCase())) {
                                        }
                                        i4++;
                                        arrayList3.add(hashMap3);
                                    }
                                } else if (exampleSet.size() < listOfSubjectIdsInQueryTable.size()) {
                                    z2 = true;
                                    int i6 = 0;
                                    new ArrayList();
                                    for (Example example : exampleSet) {
                                        HashMap hashMap4 = new HashMap();
                                        ArrayList<Integer> arrayList8 = new ArrayList<>();
                                        String valueAsString3 = example.getValueAsString(attribute);
                                        for (int i7 = 0; i7 < listOfSubjectIdsInQueryTable.size(); i7++) {
                                            String str3 = listOfSubjectIdsInQueryTable.get(i7);
                                            if (valueAsString3 != null && valueAsString3.trim().length() > 0 && str3.trim().length() > 0 && !valueAsString3.equalsIgnoreCase(LocationInfo.NA) && !str3.equalsIgnoreCase(LocationInfo.NA) && subjectIdMatchConfirmed(str3.trim().toLowerCase(), valueAsString3.trim().toLowerCase(), parameterAsInt)) {
                                                arrayList8.add(Integer.valueOf(i7));
                                                if (hashMap4.containsKey(valueAsString3.trim().toLowerCase())) {
                                                    hashMap4.get(valueAsString3.trim().toLowerCase()).add(str3.trim().toLowerCase());
                                                } else {
                                                    ArrayList<String> arrayList9 = new ArrayList<>();
                                                    arrayList9.add(str3.trim().toLowerCase());
                                                    hashMap4.put(valueAsString3.trim().toLowerCase(), arrayList9);
                                                }
                                            }
                                        }
                                        hashMap.put(Integer.valueOf(i6), dontResolveRowLevelConflictFromIsCandidateTable(valueAsString3, hashMap4, arrayList8));
                                        if (!hashMap4.containsKey(valueAsString3.trim().toLowerCase())) {
                                        }
                                        i6++;
                                        arrayList3.add(hashMap4);
                                    }
                                }
                            }
                            Iterator it5 = exampleSet.iterator();
                            while (it5.hasNext()) {
                                String valueAsString4 = ((Example) it5.next()).getValueAsString(attribute);
                                if (valueAsString4.length() > 0 && !valueAsString4.equalsIgnoreCase(LocationInfo.NA)) {
                                    i++;
                                }
                            }
                            int size = exampleSet.size();
                            double d = i / size;
                            if (d >= parameterAsDouble) {
                                CoverageAndRatio computeCoverageAndRatio = computeCoverageAndRatio(exampleSet.getSource(), z2, hashMap, arrayList3, this.queryTable.size(), size);
                                double ratio = computeCoverageAndRatio.getRatio();
                                if (ratio >= parameterAsDouble2) {
                                    double coverage = computeCoverageAndRatio.getCoverage();
                                    if (coverage >= parameterAsDouble3) {
                                        iOObjectCollection.add(exampleSet);
                                        arrayList.add(name);
                                        LOGGER.log(Level.INFO, "Table/Attribute ACCEPTED: Subject-Id Found with Density = " + d + ", Ratio = " + ratio + " and Coverage = " + coverage + " are FINE!");
                                        updateInstanceCorrespondencesTable(exampleSet.getSource(), z2, hashMap, arrayList3);
                                        PropertiesOfMatchedTable propertiesOfMatchedTable = new PropertiesOfMatchedTable();
                                        propertiesOfMatchedTable.setCoverage(coverage);
                                        propertiesOfMatchedTable.setRatio(ratio);
                                        propertiesOfMatchedTable.setDensity(d);
                                        propertiesOfMatchedTable.setNonEmptyValues(i);
                                        propertiesOfMatchedTable.setEmptyValues(size - i);
                                        arrayList2.add(propertiesOfMatchedTable);
                                        break;
                                    }
                                    LOGGER.log(Level.INFO, "Attribute REJECTED: Subject-Id Found but Coverage < than desired. Looping over further attributes. Density = " + d + ", Ratio = " + ratio + " and Coverage = " + coverage);
                                } else {
                                    LOGGER.log(Level.INFO, "Attribute REJECTED: Subject-Id Found but Ratio < than desired. Looping over further attributes. Density = " + d + ", Ratio = " + ratio);
                                }
                            } else {
                                LOGGER.log(Level.INFO, "Attribute REJECTED: Subject-Id Found but Density < than desired. Looping over further attributes. Density = " + d);
                            }
                        }
                    }
                }
            }
            if (z) {
                throw new UserError(this, "create_correspondences.102", new Object[]{parameterAsString2});
            }
            setOfCollectionAndList.setCollection(iOObjectCollection);
            setOfCollectionAndList.setNameOfMatchedSubjectIdAttribute(arrayList);
            setOfCollectionAndList.setPropertiesOfMatchedTables(arrayList2);
            return filterCollectionBySchemaMatchings(setOfCollectionAndList, parameterAsString2);
        } catch (UndefinedParameterError e) {
            e.printStackTrace();
            throw new UserError(this, "create_correspondences.100");
        }
    }

    public ArrayList<Integer> dontResolveRowLevelConflictFromIsCandidateTable(String str, Map<String, ArrayList<String>> map, ArrayList<Integer> arrayList) {
        ArrayList<Integer> arrayList2 = new ArrayList<>();
        if (arrayList == null || arrayList.size() != 1) {
            return arrayList;
        }
        arrayList2.add(arrayList.get(0));
        return arrayList2;
    }

    public ArrayList<Integer> resolveRowLevelConflictFromIsQueryTable(String str, Map<String, ArrayList<String>> map, ArrayList<Integer> arrayList) {
        ArrayList<Integer> arrayList2 = new ArrayList<>();
        if (arrayList == null || arrayList.size() != 1) {
            Set<String> keySet = map.keySet();
            if (keySet != null && keySet.size() > 0) {
                double d = 0.0d;
                int i = 0;
                for (String str2 : map.get(keySet.iterator().next())) {
                    double d2 = 0.0d;
                    try {
                        if (getParameterAsInt(PARAMETER_CONFLICT_RESOLUTION_METHOD) == 0) {
                            d2 = getJaccardSimilarityOfTwoStrings(str, str2);
                        } else if (getParameterAsInt(PARAMETER_CONFLICT_RESOLUTION_METHOD) == 1) {
                            d2 = getLevenshteinSimilarityOfTwoStrings(str, str2);
                        } else if (getParameterAsInt(PARAMETER_CONFLICT_RESOLUTION_METHOD) == 2) {
                            d2 = getJaroWinklerSimilarityOfTwoStrings(str, str2);
                        }
                    } catch (UndefinedParameterError e) {
                        e.printStackTrace();
                    }
                    if (d2 > d) {
                        d = d2;
                        arrayList2.removeAll(arrayList2);
                        arrayList2.add(arrayList.get(i));
                    }
                    i++;
                }
            }
        } else {
            arrayList2.add(arrayList.get(0));
        }
        return arrayList2;
    }

    public int resolveConflictAndReturnIndex(String str, ArrayList<String> arrayList) {
        int i = 0;
        double d = 0.0d;
        int i2 = 0;
        Iterator<String> it = arrayList.iterator();
        while (it.hasNext()) {
            String next = it.next();
            double d2 = 0.0d;
            try {
                if (getParameterAsInt(PARAMETER_CONFLICT_RESOLUTION_METHOD) == 0) {
                    d2 = getJaccardSimilarityOfTwoStrings(str, next);
                } else if (getParameterAsInt(PARAMETER_CONFLICT_RESOLUTION_METHOD) == 1) {
                    d2 = getLevenshteinSimilarityOfTwoStrings(str, next);
                } else if (getParameterAsInt(PARAMETER_CONFLICT_RESOLUTION_METHOD) == 2) {
                    d2 = getJaroWinklerSimilarityOfTwoStrings(str, next);
                }
            } catch (UndefinedParameterError e) {
                e.printStackTrace();
            }
            if (d2 > d) {
                d = d2;
                i = i2;
            }
            i2++;
        }
        return i;
    }

    private void removeRedundantEntriesFromCorrespondenceTables(String str) {
        ArrayList arrayList = new ArrayList();
        ExampleSet<Example> createExampleSet = this.correspondencesSchemaLevelGrowingExampleTable.createExampleSet();
        Attribute attribute = createExampleSet.getAttributes().get("table");
        Attribute attribute2 = createExampleSet.getAttributes().get("attribute");
        Attribute attribute3 = createExampleSet.getAttributes().get("targetSchema");
        for (Example example : createExampleSet) {
            String valueAsString = example.getValueAsString(attribute3);
            if (valueAsString != null && valueAsString.trim().length() > 0 && str.equals(valueAsString)) {
                arrayList.add(example.getValueAsString(attribute));
            }
        }
        LinkedList linkedList = new LinkedList();
        linkedList.add(AttributeFactory.createAttribute("table", 1));
        linkedList.add(AttributeFactory.createAttribute("attribute", 1));
        linkedList.add(AttributeFactory.createAttribute("targetSchema", 1));
        ExampleSetBuilder from = ExampleSets.from(linkedList);
        if (arrayList.size() > 0) {
            for (Example example2 : createExampleSet) {
                String valueAsString2 = example2.getValueAsString(attribute);
                String valueAsString3 = example2.getValueAsString(attribute2);
                String valueAsString4 = example2.getValueAsString(attribute3);
                if (arrayList.contains(valueAsString2)) {
                    double[] dArr = new double[linkedList.size()];
                    dArr[0] = r0.getMapping().mapString(valueAsString2);
                    dArr[1] = r0.getMapping().mapString(valueAsString3);
                    dArr[2] = r0.getMapping().mapString(valueAsString4);
                    from.addDataRow(new DoubleArrayDataRow(dArr));
                }
            }
        }
        LinkedList linkedList2 = new LinkedList();
        linkedList2.add(AttributeFactory.createAttribute("table", 1));
        linkedList2.add(AttributeFactory.createAttribute("instance", 1));
        linkedList2.add(AttributeFactory.createAttribute("content", 5));
        linkedList2.add(AttributeFactory.createAttribute("instanceFromTargetSchema", 1));
        ExampleSetBuilder from2 = ExampleSets.from(linkedList2);
        ExampleSet<Example> createExampleSet2 = this.correspondencesInstanceLevelGrowingExampleTable.createExampleSet();
        Attribute attribute4 = createExampleSet2.getAttributes().get("table");
        Attribute attribute5 = createExampleSet2.getAttributes().get("instance");
        Attribute attribute6 = createExampleSet2.getAttributes().get("content");
        Attribute attribute7 = createExampleSet2.getAttributes().get("instanceFromTargetSchema");
        if (arrayList.size() > 0) {
            for (Example example3 : createExampleSet2) {
                String valueAsString5 = example3.getValueAsString(attribute4);
                String valueAsString6 = example3.getValueAsString(attribute5);
                String valueAsString7 = example3.getValueAsString(attribute6);
                String valueAsString8 = example3.getValueAsString(attribute7);
                if (arrayList.contains(valueAsString5)) {
                    double[] dArr2 = new double[this.correspondencesInstanceLevelGrowingExampleTable.getAttributes().length];
                    dArr2[0] = r0.getMapping().mapString(valueAsString5);
                    dArr2[1] = r0.getMapping().mapString(valueAsString6);
                    dArr2[2] = r0.getMapping().mapString(valueAsString7);
                    dArr2[3] = r0.getMapping().mapString(valueAsString8);
                    from2.addDataRow(new DoubleArrayDataRow(dArr2));
                }
            }
        }
        this.correspondencesSchemaLevel = from.build();
        this.correspondencesInstanceLevel = from2.build();
    }

    private boolean isPrimaryIdentifier(Attribute attribute, ExampleSet exampleSet, double d) {
        HashSet hashSet = new HashSet();
        String str = null;
        String str2 = null;
        boolean z = false;
        if (exampleSet.getAnnotations() != null && exampleSet.getAnnotations().size() > 0) {
            if (exampleSet.getAnnotations().getAnnotation("Table or Page Title") != null) {
                str = exampleSet.getAnnotations().getAnnotation("Table or Page Title");
            }
            if (exampleSet.getAnnotations().getAnnotation("Document Title") != null) {
                str2 = exampleSet.getAnnotations().getAnnotation("Document Title");
            }
        }
        Iterator it = exampleSet.iterator();
        while (it.hasNext()) {
            String trim = ((Example) it.next()).getValueAsString(attribute).trim();
            if (trim != null && trim.length() > 0 && !trim.equalsIgnoreCase(LocationInfo.NA) && !hashSet.contains(trim)) {
                hashSet.add(trim);
            }
        }
        double size = hashSet.size() / exampleSet.size();
        if (size >= d) {
            z = true;
            LOGGER.log(Level.INFO, "PASSED: Primary Id check for Table [Annotations: " + str + " -AND- " + str2 + "] with unique values = " + size);
        }
        return z;
    }

    private SetOfCollectionAndList filterCollectionBySchemaMatchings(SetOfCollectionAndList setOfCollectionAndList, String str) {
        SetOfCollectionAndList setOfCollectionAndList2 = new SetOfCollectionAndList();
        IOObjectCollection<ExampleSet> iOObjectCollection = new IOObjectCollection<>();
        ArrayList arrayList = new ArrayList();
        IOObjectCollection<ExampleSet> collection = setOfCollectionAndList.getCollection();
        List<String> nameOfMatchedSubjectIdAttribute = setOfCollectionAndList.getNameOfMatchedSubjectIdAttribute();
        List<ExampleSet> objects = collection.getObjects();
        if (!getParameterAsBoolean("compute schema matchings")) {
            int i = 0;
            Iterator<ExampleSet> it = objects.iterator();
            while (it.hasNext()) {
                updateSchemaCorrespondencesTable(it.next().getSource(), nameOfMatchedSubjectIdAttribute.get(i), str);
                i++;
            }
            removeRedundantEntriesFromCorrespondenceTables(str);
            return setOfCollectionAndList;
        }
        int i2 = 1;
        try {
            i2 = getParameterAsInt("schema matching method");
        } catch (UndefinedParameterError e) {
            e.printStackTrace();
        }
        List<String> listOfAttributeNames = getListOfAttributeNames(Lists.newArrayList(this.queryTable.getAttributes().allAttributes()));
        if (i2 == 4) {
            Word2VecModel trainWord2VecModel = trainWord2VecModel(listOfAttributeNames, objects);
            int i3 = 0;
            for (ExampleSet exampleSet : objects) {
                if (schemaMatchAcceptablePerWord2Vec(listOfAttributeNames, getListOfAttributeNames(Lists.newArrayList(exampleSet.getAttributes().allAttributes())), trainWord2VecModel)) {
                    iOObjectCollection.add(exampleSet);
                    arrayList.add(nameOfMatchedSubjectIdAttribute.get(i3));
                    updateSchemaCorrespondencesTable(exampleSet.getSource(), nameOfMatchedSubjectIdAttribute.get(i3), str);
                }
                i3++;
            }
        } else {
            int i4 = 0;
            for (ExampleSet exampleSet2 : objects) {
                if (schemaMatchAcceptable(listOfAttributeNames, getListOfAttributeNames(Lists.newArrayList(exampleSet2.getAttributes().allAttributes())), i2)) {
                    iOObjectCollection.add(exampleSet2);
                    arrayList.add(nameOfMatchedSubjectIdAttribute.get(i4));
                    updateSchemaCorrespondencesTable(exampleSet2.getSource(), nameOfMatchedSubjectIdAttribute.get(i4), str);
                }
                i4++;
            }
        }
        removeRedundantEntriesFromCorrespondenceTables(str);
        LOGGER.log(Level.INFO, "*** Total Tables with Acceptable Schema Matches = " + iOObjectCollection.size() + "\n");
        setOfCollectionAndList2.setCollection(iOObjectCollection);
        setOfCollectionAndList2.setNameOfMatchedSubjectIdAttribute(arrayList);
        return setOfCollectionAndList2;
    }

    private boolean schemaMatchAcceptable(List<String> list, List<String> list2, int i) {
        boolean z = false;
        switch (i) {
            case 0:
                try {
                    double parameterAsDouble = getParameterAsDouble("minimum equals score");
                    int i2 = 0;
                    int i3 = 0;
                    Iterator<String> it = list.iterator();
                    while (it.hasNext()) {
                        String lowerCase = it.next().toLowerCase();
                        Iterator<String> it2 = list2.iterator();
                        while (it2.hasNext()) {
                            if (lowerCase.equalsIgnoreCase(it2.next().toLowerCase())) {
                                i2++;
                            }
                            i3++;
                        }
                    }
                    z = ((double) i2) / ((double) i3) >= parameterAsDouble;
                    break;
                } catch (UndefinedParameterError e) {
                    e.printStackTrace();
                    break;
                } catch (Exception e2) {
                    e2.printStackTrace();
                    break;
                }
            case 1:
                try {
                    double parameterAsDouble2 = getParameterAsDouble("minimum contains score");
                    int i4 = 0;
                    int i5 = 0;
                    Iterator<String> it3 = list.iterator();
                    while (it3.hasNext()) {
                        String lowerCase2 = it3.next().toLowerCase();
                        Iterator<String> it4 = list2.iterator();
                        while (it4.hasNext()) {
                            String lowerCase3 = it4.next().toLowerCase();
                            if (lowerCase2.contains(lowerCase3) || lowerCase3.contains(lowerCase2)) {
                                i4++;
                            }
                            i5++;
                        }
                    }
                    z = ((double) i4) / ((double) i5) >= parameterAsDouble2;
                    break;
                } catch (UndefinedParameterError e3) {
                    e3.printStackTrace();
                    break;
                } catch (Exception e4) {
                    e4.printStackTrace();
                    break;
                }
                break;
            case 2:
                try {
                    double parameterAsDouble3 = getParameterAsDouble("minimum contains any score");
                    int i6 = 0;
                    int i7 = 0;
                    Iterator<String> it5 = list.iterator();
                    while (it5.hasNext()) {
                        String lowerCase4 = it5.next().toLowerCase();
                        Iterator<String> it6 = list2.iterator();
                        while (it6.hasNext()) {
                            String lowerCase5 = it6.next().toLowerCase();
                            String[] split = lowerCase4.trim().split("\\s+");
                            if (StringUtils.containsAny(lowerCase4, lowerCase5.trim().split("\\s+")) || StringUtils.containsAny(lowerCase5, split)) {
                                i6++;
                            }
                            i7++;
                        }
                    }
                    z = ((double) i6) / ((double) i7) >= parameterAsDouble3;
                    break;
                } catch (UndefinedParameterError e5) {
                    e5.printStackTrace();
                    break;
                } catch (Exception e6) {
                    e6.printStackTrace();
                    break;
                }
                break;
            case 3:
                try {
                    double parameterAsDouble4 = getParameterAsDouble("minimum jaccard similarity score");
                    double d = 0.0d;
                    int i8 = 0;
                    JaccardSimilarity jaccardSimilarity = new JaccardSimilarity();
                    Iterator<String> it7 = list.iterator();
                    while (it7.hasNext()) {
                        String lowerCase6 = it7.next().toLowerCase();
                        Iterator<String> it8 = list2.iterator();
                        while (it8.hasNext()) {
                            d += jaccardSimilarity.apply((CharSequence) lowerCase6, (CharSequence) it8.next().toLowerCase()).doubleValue();
                            i8++;
                        }
                    }
                    z = d / ((double) i8) >= parameterAsDouble4;
                    break;
                } catch (UndefinedParameterError e7) {
                    e7.printStackTrace();
                    break;
                } catch (Exception e8) {
                    e8.printStackTrace();
                    break;
                }
            default:
                z = false;
                break;
        }
        return z;
    }

    private boolean schemaMatchAcceptablePerWord2Vec(List<String> list, List<String> list2, Word2VecModel word2VecModel) {
        double d = 0.0d;
        try {
            d = getParameterAsDouble("minimum table cosine similarity");
        } catch (UndefinedParameterError e) {
            e.printStackTrace();
        }
        double d2 = 0.0d;
        Searcher forSearch = word2VecModel.forSearch();
        for (String str : list) {
            for (String str2 : list2) {
                if (forSearch.contains(str.toLowerCase()) && forSearch.contains(str2.toLowerCase())) {
                    try {
                        d2 += forSearch.cosineDistance(str.toLowerCase(), str2.toLowerCase());
                    } catch (Searcher.UnknownWordException e2) {
                        e2.printStackTrace();
                    }
                }
            }
        }
        return d2 / (list.size() * list2.size()) >= d;
    }

    private List<String> getListOfAttributeNames(List<Attribute> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<Attribute> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getName());
        }
        return arrayList;
    }

    public List<String> getListOfSubjectIdsInQueryTable() {
        ArrayList arrayList = new ArrayList();
        try {
            Attribute attribute = this.queryTable.getAttributes().get(getParameterAsString("subject id"));
            Iterator it = this.queryTable.iterator();
            while (it.hasNext()) {
                arrayList.add(((Example) it.next()).getValueAsString(attribute).trim());
            }
        } catch (UndefinedParameterError e) {
            e.printStackTrace();
        }
        return arrayList;
    }

    private Word2VecModel trainWord2VecModel(List<String> list, List<ExampleSet> list2) {
        ArrayList arrayList = new ArrayList();
        Word2VecModel word2VecModel = null;
        StringBuilder sb = new StringBuilder();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            sb.append(it.next() + StringUtils.SPACE);
        }
        List<String> removeEmptyElements = removeEmptyElements(Arrays.asList(StringUtils.splitByWholeSeparatorPreserveAllTokens(sb.toString().toLowerCase(), StringUtils.SPACE)));
        int size = removeEmptyElements.size();
        arrayList.add(removeEmptyElements);
        Iterator<ExampleSet> it2 = list2.iterator();
        while (it2.hasNext()) {
            List<String> listOfAttributeNames = getListOfAttributeNames(Lists.newArrayList(it2.next().getAttributes().allAttributes()));
            StringBuilder sb2 = new StringBuilder();
            Iterator<String> it3 = listOfAttributeNames.iterator();
            while (it3.hasNext()) {
                sb2.append(it3.next());
            }
            List<String> removeEmptyElements2 = removeEmptyElements(Arrays.asList(StringUtils.splitByWholeSeparatorPreserveAllTokens(sb2.toString().toLowerCase(), StringUtils.SPACE)));
            size += removeEmptyElements2.size();
            arrayList.add(removeEmptyElements2);
        }
        try {
            int parameterAsInt = getParameterAsInt("type of neural network");
            int parameterAsInt2 = getParameterAsInt("minimal vocabulary frequency");
            int parameterAsInt3 = getParameterAsInt("size of the generated vector");
            int parameterAsInt4 = getParameterAsInt("size of window");
            int parameterAsInt5 = getParameterAsInt("number of negative samples");
            double parameterAsDouble = getParameterAsDouble("down sampling rate for high occurrence words");
            int parameterAsInt6 = getParameterAsInt("number of iterations");
            double parameterAsDouble2 = getParameterAsDouble("initial learning rate");
            LOGGER.log(Level.INFO, "Model will be trained on a vocaulary of size = " + size + " with Neural Network type = " + (parameterAsInt == 0 ? "CBOW" : "SKIP_GRAM"));
            word2VecModel = Word2VecModel.trainer().setMinVocabFrequency(parameterAsInt2).useNumThreads(this.processors).setWindowSize(parameterAsInt4).type(parameterAsInt == 0 ? NeuralNetworkType.CBOW : NeuralNetworkType.SKIP_GRAM).setLayerSize(parameterAsInt3).useNegativeSamples(parameterAsInt5).setDownSamplingRate(parameterAsDouble).setInitialLearningRate(parameterAsDouble2).setNumIterations(parameterAsInt6).setListener(new Word2VecTrainerBuilder.TrainingProgressListener() { // from class: com.rapidminer.extension.datasearch.operator.CreateCorrespondencesOperator.2
                @Override // com.medallia.word2vec.Word2VecTrainerBuilder.TrainingProgressListener
                public void update(Word2VecTrainerBuilder.TrainingProgressListener.Stage stage, double d) {
                    CreateCorrespondencesOperator.LOGGER.log(Level.INFO, String.format("%s is %.2f%% complete", Format.formatEnum(stage), Double.valueOf(d * 100.0d)));
                }
            }).train(arrayList);
        } catch (InterruptedException e) {
            e.printStackTrace();
        } catch (UndefinedParameterError e2) {
            e2.printStackTrace();
        }
        LOGGER.log(Level.INFO, "Model trained!");
        return word2VecModel;
    }

    private List<String> removeEmptyElements(List<String> list) {
        ArrayList arrayList = new ArrayList();
        for (String str : list) {
            if (!str.isEmpty()) {
                arrayList.add(str);
            }
        }
        return arrayList;
    }

    private boolean subjectIdMatchConfirmed(String str, String str2, int i) {
        boolean z = false;
        switch (i) {
            case 0:
                if (str.equalsIgnoreCase(str2)) {
                    z = true;
                    break;
                }
                break;
            case 1:
                if (str.contains(str2) || str2.contains(str)) {
                    z = true;
                    break;
                }
                break;
            case 2:
                z = StringUtils.containsAny(str, str2.trim().split("\\s+")) || StringUtils.containsAny(str2, str.trim().split("\\s+"));
                break;
            case 3:
                z = Pattern.compile(new StringBuilder().append("(.*)").append(Pattern.quote(str)).append("(.*)").toString(), 64).matcher(str2).find() || Pattern.compile(new StringBuilder().append("(.*)").append(Pattern.quote(str2)).append("(.*)").toString(), 64).matcher(str).find();
                break;
            case 4:
                FuzzyScore fuzzyScore = new FuzzyScore(Locale.ENGLISH);
                int intValue = fuzzyScore.fuzzyScore(str, str2).intValue();
                int intValue2 = fuzzyScore.fuzzyScore(str2, str).intValue();
                int i2 = intValue + intValue2;
                double d = 0.0d;
                if (intValue > 0 && intValue2 > 0) {
                    d = intValue / i2;
                }
                try {
                    if (d >= getParameterAsDouble("minimum fuzzy score")) {
                        z = true;
                    }
                    break;
                } catch (UndefinedParameterError e) {
                    e.printStackTrace();
                    break;
                }
                break;
            case 5:
                try {
                    if (getLevenshteinSimilarityOfTwoStrings(str, str2) >= getParameterAsDouble("minimum levenshtein similarity")) {
                        z = true;
                    }
                    break;
                } catch (UndefinedParameterError e2) {
                    e2.printStackTrace();
                    break;
                }
            case 6:
                try {
                    if (getNGramSimilarityOfTwoStrings(str, str2) >= getParameterAsDouble("minimum n-gram similarity")) {
                        z = true;
                    }
                    break;
                } catch (UndefinedParameterError e3) {
                    e3.printStackTrace();
                    break;
                }
            case 7:
                try {
                    if (getCosineSimilarityOfTwoStrings(str, str2) >= getParameterAsDouble("minimum cosine similarity")) {
                        z = true;
                    }
                    break;
                } catch (UndefinedParameterError e4) {
                    e4.printStackTrace();
                    break;
                }
            case 8:
                try {
                    if (getJaroWinklerSimilarityOfTwoStrings(str, str2) >= getParameterAsDouble("minimum jaro winkler similarity")) {
                        z = true;
                    }
                    break;
                } catch (UndefinedParameterError e5) {
                    e5.printStackTrace();
                    break;
                }
            case 9:
                try {
                    if (getJaccardSimilarityOfTwoStrings(str, str2) >= getParameterAsDouble("minimum jaccard similarity")) {
                        z = true;
                    }
                    break;
                } catch (UndefinedParameterError e6) {
                    e6.printStackTrace();
                    break;
                }
            case 10:
                try {
                    if (getLongestCommonSubsequenceSimilarityOfTwoStrings(str, str2) >= getParameterAsDouble("minimum subsequence similarity")) {
                        z = true;
                    }
                    break;
                } catch (UndefinedParameterError e7) {
                    e7.printStackTrace();
                    break;
                }
            case 11:
                try {
                    if (getLevenshteinSimilarityOfSameSoundexPhoneticEncodedStrings(str, str2) >= getParameterAsDouble("minimum phonetic levenshtein similarity")) {
                        z = true;
                    }
                    break;
                } catch (UndefinedParameterError e8) {
                    e8.printStackTrace();
                    break;
                }
            case 12:
                int search = new KMP(str).search(str2);
                int search2 = new KMP(str2).search(str);
                if (search < str2.length() || search2 < str.length()) {
                    z = true;
                    break;
                }
                break;
            case 13:
                AutomatonMatcher automatonMatcher = new AutomatonMatcher();
                int match = automatonMatcher.match(str, str2, 0);
                int match2 = automatonMatcher.match(str2, str, 0);
                if (match > -1 || match2 > -1) {
                    z = true;
                    break;
                }
                break;
            case 14:
                int search3 = new RabinKarp(str).search(str2);
                int search4 = new RabinKarp(str2).search(str);
                if (search3 < str2.length() || search4 < str.length()) {
                    z = true;
                    break;
                }
                break;
            case 15:
                ZMatcher zMatcher = new ZMatcher();
                int match3 = zMatcher.match(str, str2, 0);
                int match4 = zMatcher.match(str2, str, 0);
                if (match3 > -1 || match4 > -1) {
                    z = true;
                    break;
                }
                break;
            case 16:
                BoyerMooreWiki boyerMooreWiki = new BoyerMooreWiki();
                int indexOf = boyerMooreWiki.indexOf(str.toCharArray(), str2.toCharArray());
                int indexOf2 = boyerMooreWiki.indexOf(str2.toCharArray(), str.toCharArray());
                if (indexOf > -1 || indexOf2 > -1) {
                    z = true;
                    break;
                }
                break;
        }
        return z;
    }

    private double getJaccardSimilarityOfTwoStrings(String str, String str2) {
        return new JaccardSimilarity().apply((CharSequence) str.trim().toLowerCase(), (CharSequence) str2.trim().toLowerCase()).doubleValue();
    }

    private double getLevenshteinSimilarityOfTwoStrings(String str, String str2) {
        return 1.0d - (new LevenshteinDistance().apply((CharSequence) str.trim().toLowerCase(), (CharSequence) str2.trim().toLowerCase()).doubleValue() / Math.max(str.trim().length(), str2.trim().length()));
    }

    private double getJaroWinklerSimilarityOfTwoStrings(String str, String str2) {
        return new JaroWinklerDistance().apply((CharSequence) str.trim().toLowerCase(), (CharSequence) str2.trim().toLowerCase()).doubleValue();
    }

    private double getNGramSimilarityOfTwoStrings(String str, String str2) {
        int i = 2;
        try {
            i = getParameterAsInt("n value for n-gram");
        } catch (UndefinedParameterError e) {
            e.printStackTrace();
        }
        return 1.0d - new NGram(i).distance(str, str2);
    }

    private double getCosineSimilarityOfTwoStrings(String str, String str2) {
        return new CosineDistance().apply((CharSequence) str.trim().toLowerCase(), (CharSequence) str2.trim().toLowerCase()).doubleValue();
    }

    private double getLongestCommonSubsequenceSimilarityOfTwoStrings(String str, String str2) {
        return 1.0d - (new LongestCommonSubsequenceDistance().apply((CharSequence) str, (CharSequence) str2).intValue() / (str.length() + str2.length()));
    }

    private double getLevenshteinSimilarityOfSameSoundexPhoneticEncodedStrings(String str, String str2) {
        Soundex soundex = new Soundex();
        String str3 = null;
        String str4 = null;
        double d = 0.0d;
        try {
            str3 = soundex.encode(str);
            str4 = soundex.encode(str2);
        } catch (EncoderException e) {
            e.printStackTrace();
        }
        if (str3 != null && str4 != null && str3.length() > 0 && str4.length() > 0 && str3.equalsIgnoreCase(str4)) {
            d = getLevenshteinSimilarityOfTwoStrings(str, str2);
        }
        return d;
    }

    private SetOfCollectionAndList findExtendedKeyword(String str, int i) {
        String lowerCase = str.trim().toLowerCase();
        SetOfCollectionAndList setOfCollectionAndList = new SetOfCollectionAndList();
        IOObjectCollection<ExampleSet> iOObjectCollection = new IOObjectCollection<>();
        ArrayList arrayList = new ArrayList();
        for (ExampleSet exampleSet : this.inputCollection.getObjects()) {
            Iterator it = exampleSet.getAttributes().iterator();
            ArrayList arrayList2 = new ArrayList();
            while (it.hasNext()) {
                String name = ((Attribute) it.next()).getName();
                if (lowerCase != null && name != null && subjectIdMatchConfirmed(lowerCase, name.trim().toLowerCase(), i)) {
                    arrayList2.add(name);
                }
            }
            if (arrayList2.size() > 0) {
                String resolveConflictAmongMatchedAttributes = resolveConflictAmongMatchedAttributes(str, arrayList2);
                exampleSet.getAttributes().setSpecialAttribute(exampleSet.getAttributes().get(resolveConflictAmongMatchedAttributes), ROLE_ADDITIONAL_ATTRIBUTE_MATCH);
                arrayList.add(resolveConflictAmongMatchedAttributes);
                iOObjectCollection.add(exampleSet);
                updateSchemaCorrespondencesTable(exampleSet.getSource(), resolveConflictAmongMatchedAttributes, this.extendedAttributeNameSuffixed);
            } else {
                LOGGER.log(Level.INFO, "\n--> NO Keyword Match found with Table: " + exampleSet.getSource());
            }
        }
        setOfCollectionAndList.setCollection(iOObjectCollection);
        setOfCollectionAndList.setNameOfMatchedSubjectIdAttribute(arrayList);
        return setOfCollectionAndList;
    }

    private CoverageAndRatio computeCoverageAndRatio(String str, boolean z, Map<Integer, ArrayList<Integer>> map, List<Map<String, ArrayList<String>>> list, int i, int i2) {
        Map<String, ArrayList<String>> map2;
        Set<String> keySet;
        CoverageAndRatio coverageAndRatio = new CoverageAndRatio();
        Attribute createAttribute = AttributeFactory.createAttribute("table", 1);
        Attribute createAttribute2 = AttributeFactory.createAttribute("instance", 1);
        Attribute createAttribute3 = AttributeFactory.createAttribute("instanceFromTargetSchema", 1);
        LinkedList linkedList = new LinkedList();
        linkedList.add(createAttribute);
        linkedList.add(createAttribute2);
        linkedList.add(createAttribute3);
        ExampleSetBuilder from = ExampleSets.from(linkedList);
        if (z) {
            Set<Integer> keySet2 = map.keySet();
            int i3 = 0;
            HashMap hashMap = new HashMap();
            HashMap hashMap2 = new HashMap();
            for (Integer num : keySet2) {
                ArrayList<Integer> arrayList = map.get(num);
                if (arrayList != null && arrayList.size() > 0 && list != null && list.size() > 0 && (keySet = (map2 = list.get(i3)).keySet()) != null && keySet.size() > 0) {
                    String next = keySet.iterator().next();
                    ArrayList<String> arrayList2 = map2.get(next);
                    int resolveConflictAndReturnIndex = resolveConflictAndReturnIndex(next, arrayList2);
                    Integer valueOf = Integer.valueOf(arrayList.get(resolveConflictAndReturnIndex).intValue());
                    String str2 = arrayList2.get(resolveConflictAndReturnIndex);
                    if (hashMap.containsKey(valueOf)) {
                        String str3 = (String) hashMap.get(valueOf);
                        ArrayList arrayList3 = new ArrayList();
                        arrayList3.add(str3);
                        arrayList3.add(str2);
                        String resolveConflictAmongMatchedAttributes = resolveConflictAmongMatchedAttributes(next, arrayList3);
                        if (resolveConflictAmongMatchedAttributes.equalsIgnoreCase(str2)) {
                            hashMap.put(valueOf, resolveConflictAmongMatchedAttributes);
                            hashMap2.put(valueOf, num);
                        }
                    } else {
                        hashMap.put(valueOf, str2);
                        hashMap2.put(valueOf, num);
                    }
                }
                i3++;
            }
            for (Integer num2 : hashMap2.keySet()) {
                from.addDataRow(new DoubleArrayDataRow(new double[]{createAttribute.getMapping().mapString(str), createAttribute2.getMapping().mapString((((Integer) hashMap2.get(num2)).intValue() + 1) + ""), createAttribute3.getMapping().mapString((num2.intValue() + 1) + "")}));
            }
        } else {
            Iterator<Integer> it = map.keySet().iterator();
            while (it.hasNext()) {
                ArrayList<Integer> arrayList4 = map.get(it.next());
                if (arrayList4 != null && arrayList4.size() > 0) {
                    from.addDataRow(new DoubleArrayDataRow(new double[]{createAttribute.getMapping().mapString(str), createAttribute2.getMapping().mapString((arrayList4.get(0).intValue() + 1) + ""), createAttribute3.getMapping().mapString((r0.intValue() + 1) + "")}));
                }
            }
        }
        ExampleSet build = from.build();
        double size = build.size() / i;
        int i4 = 0;
        HashSet hashSet = new HashSet();
        Iterator it2 = build.iterator();
        while (it2.hasNext()) {
            String nominalValue = ((Example) it2.next()).getNominalValue(createAttribute2);
            if (!hashSet.contains(nominalValue)) {
                hashSet.add(nominalValue);
                i4++;
            }
        }
        coverageAndRatio.setCoverage(size);
        coverageAndRatio.setRatio(i4 / i2);
        return coverageAndRatio;
    }

    private void updateInstanceCorrespondencesTable(String str, boolean z, Map<Integer, ArrayList<Integer>> map, List<Map<String, ArrayList<String>>> list) {
        Map<String, ArrayList<String>> map2;
        Set<String> keySet;
        Attribute attribute = null;
        Attribute attribute2 = null;
        Attribute attribute3 = null;
        try {
            attribute = this.correspondencesInstanceLevelGrowingExampleTable.findAttribute("table");
            attribute2 = this.correspondencesInstanceLevelGrowingExampleTable.findAttribute("instance");
            this.correspondencesInstanceLevelGrowingExampleTable.findAttribute("content");
            attribute3 = this.correspondencesInstanceLevelGrowingExampleTable.findAttribute("instanceFromTargetSchema");
        } catch (OperatorException e) {
            e.printStackTrace();
        }
        if (!z) {
            for (Integer num : map.keySet()) {
                ArrayList<Integer> arrayList = map.get(num);
                if (arrayList != null && arrayList.size() > 0) {
                    int intValue = num.intValue() + 1;
                    int intValue2 = arrayList.get(0).intValue() + 1;
                    double[] dArr = new double[this.correspondencesInstanceLevelGrowingExampleTable.getAttributes().length];
                    dArr[0] = attribute.getMapping().mapString(str);
                    dArr[1] = attribute2.getMapping().mapString(intValue2 + "");
                    dArr[2] = attribute2.getMapping().mapString("");
                    dArr[3] = attribute3.getMapping().mapString(intValue + "");
                    this.correspondencesInstanceLevelGrowingExampleTable.addDataRow(new DoubleArrayDataRow(dArr));
                }
            }
            return;
        }
        Set<Integer> keySet2 = map.keySet();
        int i = 0;
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        for (Integer num2 : keySet2) {
            ArrayList<Integer> arrayList2 = map.get(num2);
            if (arrayList2 != null && arrayList2.size() > 0 && list != null && list.size() > 0 && (keySet = (map2 = list.get(i)).keySet()) != null && keySet.size() > 0) {
                String next = keySet.iterator().next();
                ArrayList<String> arrayList3 = map2.get(next);
                int resolveConflictAndReturnIndex = resolveConflictAndReturnIndex(next, arrayList3);
                Integer valueOf = Integer.valueOf(arrayList2.get(resolveConflictAndReturnIndex).intValue());
                String str2 = arrayList3.get(resolveConflictAndReturnIndex);
                if (hashMap.containsKey(valueOf)) {
                    String str3 = (String) hashMap.get(valueOf);
                    ArrayList arrayList4 = new ArrayList();
                    arrayList4.add(str3);
                    arrayList4.add(str2);
                    String resolveConflictAmongMatchedAttributes = resolveConflictAmongMatchedAttributes(next, arrayList4);
                    if (resolveConflictAmongMatchedAttributes.equalsIgnoreCase(str2)) {
                        hashMap.put(valueOf, resolveConflictAmongMatchedAttributes);
                        hashMap2.put(valueOf, num2);
                    }
                } else {
                    hashMap.put(valueOf, str2);
                    hashMap2.put(valueOf, num2);
                }
            }
            i++;
        }
        for (Integer num3 : hashMap2.keySet()) {
            int intValue3 = num3.intValue() + 1;
            int intValue4 = ((Integer) hashMap2.get(num3)).intValue() + 1;
            double[] dArr2 = new double[this.correspondencesInstanceLevelGrowingExampleTable.getAttributes().length];
            dArr2[0] = attribute.getMapping().mapString(str);
            dArr2[1] = attribute2.getMapping().mapString(intValue4 + "");
            dArr2[2] = attribute2.getMapping().mapString("");
            dArr2[3] = attribute3.getMapping().mapString(intValue3 + "");
            this.correspondencesInstanceLevelGrowingExampleTable.addDataRow(new DoubleArrayDataRow(dArr2));
        }
    }

    private void updateSchemaCorrespondencesTable(String str, String str2, String str3) {
        Attribute attribute = null;
        Attribute attribute2 = null;
        Attribute attribute3 = null;
        try {
            attribute = this.correspondencesSchemaLevelGrowingExampleTable.findAttribute("table");
            attribute2 = this.correspondencesSchemaLevelGrowingExampleTable.findAttribute("attribute");
            attribute3 = this.correspondencesSchemaLevelGrowingExampleTable.findAttribute("targetSchema");
        } catch (OperatorException e) {
            e.printStackTrace();
        }
        double[] dArr = new double[this.correspondencesSchemaLevelGrowingExampleTable.getAttributes().length];
        dArr[0] = attribute.getMapping().mapString(str);
        dArr[1] = attribute2.getMapping().mapString(str2);
        dArr[2] = attribute3.getMapping().mapString(str3);
        this.correspondencesSchemaLevelGrowingExampleTable.addDataRow(new DoubleArrayDataRow(dArr));
    }

    private DataSearchOperatorCollection createDataSearchOutputCollection(SetOfCollectionAndList setOfCollectionAndList) {
        DataSearchOperatorCollection dataSearchOperatorCollection = new DataSearchOperatorCollection();
        IOObjectCollection<ExampleSet> collection = setOfCollectionAndList.getCollection();
        List<String> nameOfMatchedSubjectIdAttribute = setOfCollectionAndList.getNameOfMatchedSubjectIdAttribute();
        List<PropertiesOfMatchedTable> propertiesOfMatchedTables = setOfCollectionAndList.getPropertiesOfMatchedTables();
        int i = 0;
        for (ExampleSet exampleSet : collection.getObjects()) {
            DataSearchExampleSet dataSearchExampleSet = new DataSearchExampleSet(exampleSet.getExampleTable());
            Iterator specialAttributes = exampleSet.getAttributes().specialAttributes();
            while (specialAttributes.hasNext()) {
                AttributeRole attributeRole = (AttributeRole) specialAttributes.next();
                dataSearchExampleSet.getAttributes().setSpecialAttribute(attributeRole.getAttribute(), attributeRole.getSpecialName());
            }
            String str = nameOfMatchedSubjectIdAttribute.get(i);
            Attribute attribute = null;
            if (exampleSet.getAttributes() != null && exampleSet.getAttributes().get(str) != null) {
                attribute = exampleSet.getAttributes().get(str);
            }
            if (attribute != null) {
                dataSearchExampleSet.getAttributes().setSpecialAttribute(attribute, EnrichTableByDataFusionOperator.SPECIAL_ATTRIBUTE_ROLE_NAME_SUBJECT_ID);
            }
            MetaDataTable metaDataTable = new MetaDataTable();
            if (propertiesOfMatchedTables != null && propertiesOfMatchedTables.size() > 0) {
                PropertiesOfMatchedTable propertiesOfMatchedTable = propertiesOfMatchedTables.get(i);
                metaDataTable.setCoverage(propertiesOfMatchedTable.getCoverage());
                metaDataTable.setRatio(propertiesOfMatchedTable.getRatio());
                metaDataTable.setEmptyValues(propertiesOfMatchedTable.getEmptyValues());
            }
            String source = exampleSet.getSource();
            metaDataTable.setTitle(source);
            dataSearchExampleSet.setMetaData(metaDataTable);
            dataSearchExampleSet.setUserData("table-name", source);
            dataSearchExampleSet.setSource(source.substring(0, (source.length() > 30 ? 30 : source.length()) - 1));
            dataSearchExampleSet.setSchemaCorrespondending(true);
            dataSearchExampleSet.setInstanceCorrespondending(true);
            dataSearchExampleSet.setBothCorrespondending(true);
            dataSearchExampleSet.getAnnotations().addAll(exampleSet.getAnnotations());
            dataSearchOperatorCollection.add(dataSearchExampleSet);
            dataSearchOperatorCollection.addTableStatistics(source, metaDataTable.getCoverage(), metaDataTable.getRatio(), metaDataTable.getTrust(), metaDataTable.getEmptyValues(), metaDataTable.getTableScore());
            i++;
        }
        Attributes attributes = this.queryTable.getAttributes();
        Attribute createAttribute = AttributeFactory.createAttribute(this.extendedAttributeNameSuffixed, 1);
        this.queryTable.getExampleTable().addAttribute(createAttribute);
        attributes.setSpecialAttribute(createAttribute, DataSearchOperator.ROLE_ADDITIONAL_ATTRIBUTE);
        Iterator it = this.queryTable.iterator();
        while (it.hasNext()) {
            ((Example) it.next()).setValue(createAttribute, (String) null);
        }
        LinkedList linkedList = new LinkedList();
        linkedList.add(this.extendedAttributeNameSuffixed);
        this.queryTable.setUserData("extension-attributes", linkedList);
        this.queryTable = guessTypes(this.queryTable);
        dataSearchOperatorCollection.setTargetSchema(this.queryTable);
        dataSearchOperatorCollection.setCorrespondencesSchemaLevel(this.correspondencesSchemaLevel);
        dataSearchOperatorCollection.setCorrespondencesInstanceLevel(this.correspondencesInstanceLevel);
        dataSearchOperatorCollection.setRelevanceStrengthExampleSet(dataSearchOperatorCollection.createOrUpdateRelevanceStrength());
        dataSearchOperatorCollection.getListOfExtendedAttributes().add(this.extendedAttributeNameSuffixed);
        dataSearchOperatorCollection.setSource("Data Search Collection");
        dataSearchOperatorCollection.setProcess(getProcess());
        dataSearchOperatorCollection.calculateCollectionStatistics();
        return dataSearchOperatorCollection;
    }

    private String resolveConflictAmongMatchedAttributes(String str, List<String> list) {
        String str2;
        if (list == null || list.size() <= 1) {
            str2 = list.get(0);
        } else {
            double d = 0.0d;
            String str3 = null;
            for (String str4 : list) {
                double d2 = 0.0d;
                try {
                    if (getParameterAsInt(PARAMETER_CONFLICT_RESOLUTION_METHOD) == 0) {
                        d2 = getJaccardSimilarityOfTwoStrings(str, str4);
                    } else if (getParameterAsInt(PARAMETER_CONFLICT_RESOLUTION_METHOD) == 1) {
                        d2 = getLevenshteinSimilarityOfTwoStrings(str, str4);
                    } else if (getParameterAsInt(PARAMETER_CONFLICT_RESOLUTION_METHOD) == 2) {
                        d2 = getJaroWinklerSimilarityOfTwoStrings(str, str4);
                    }
                } catch (UndefinedParameterError e) {
                    e.printStackTrace();
                }
                if (d2 > d) {
                    d = d2;
                    str3 = str4;
                }
            }
            str2 = str3;
        }
        return str2;
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeString(PARAMETER_EXTENDED_ATTRIBUTE, "Specify the name of extended attribute to search in corpus and for creating correspondences", false, false));
        parameterTypes.add(new ParameterTypeAttribute("subject id", "Select the subject identifier attribute of the query table. This attribute will be assigned the role of id internally. The tables in the provided collection would also be evaluated to automatically detect a subject id that most closely matches with this subject id. The values of subject id may be empty but non-empty values must be unique.", this.queryTableInputPort, false));
        parameterTypes.add(new ParameterTypeCategory(PARAMETER_ATTRIBUTE_MATCHING_METHOD, "Select the method to be used for automatically detecting an attribute that matches extended attribute name and another attribute that matches the subject identifier attribute. This method performs a substring match on attribute names. This examination is performed on each table in the corpus.", SUBJECT_ID_MATCHING_METHODS, 2, false));
        ParameterTypeDouble parameterTypeDouble = new ParameterTypeDouble("minimum fuzzy score", "Similarity score should be above this threshold.", 0.0d, 1.0d, 0.5d);
        parameterTypeDouble.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_ATTRIBUTE_MATCHING_METHOD, SUBJECT_ID_MATCHING_METHODS, false, new int[]{4}));
        parameterTypes.add(parameterTypeDouble);
        ParameterTypeDouble parameterTypeDouble2 = new ParameterTypeDouble("minimum levenshtein similarity", "Similarity score should be above this threshold.", 0.0d, 1.0d, 0.5d);
        parameterTypeDouble2.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_ATTRIBUTE_MATCHING_METHOD, SUBJECT_ID_MATCHING_METHODS, false, new int[]{5}));
        parameterTypes.add(parameterTypeDouble2);
        ParameterTypeDouble parameterTypeDouble3 = new ParameterTypeDouble("minimum cosine similarity", "Similarity score should be above this threshold.", 0.0d, 1.0d, 0.5d);
        parameterTypeDouble3.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_ATTRIBUTE_MATCHING_METHOD, SUBJECT_ID_MATCHING_METHODS, false, new int[]{7}));
        parameterTypes.add(parameterTypeDouble3);
        ParameterTypeDouble parameterTypeDouble4 = new ParameterTypeDouble("minimum n-gram similarity", "Similarity score should be above this threshold.", 0.0d, 1.0d, 0.5d);
        parameterTypeDouble4.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_ATTRIBUTE_MATCHING_METHOD, SUBJECT_ID_MATCHING_METHODS, false, new int[]{6}));
        parameterTypes.add(parameterTypeDouble4);
        ParameterTypeInt parameterTypeInt = new ParameterTypeInt("n value for n-gram", "The value of n for N-Gram based distance similarity.", Priority.ALL_INT, Integer.MAX_VALUE, 2);
        parameterTypeInt.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_ATTRIBUTE_MATCHING_METHOD, SUBJECT_ID_MATCHING_METHODS, false, new int[]{6}));
        parameterTypes.add(parameterTypeInt);
        ParameterTypeDouble parameterTypeDouble5 = new ParameterTypeDouble("minimum jaro winkler similarity", "Similarity score should be above this threshold.", 0.0d, 1.0d, 0.5d);
        parameterTypeDouble5.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_ATTRIBUTE_MATCHING_METHOD, SUBJECT_ID_MATCHING_METHODS, false, new int[]{8}));
        parameterTypes.add(parameterTypeDouble5);
        ParameterTypeDouble parameterTypeDouble6 = new ParameterTypeDouble("minimum jaccard similarity", "Similarity score should be above this threshold.", 0.0d, 1.0d, 0.5d);
        parameterTypeDouble6.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_ATTRIBUTE_MATCHING_METHOD, SUBJECT_ID_MATCHING_METHODS, false, new int[]{9}));
        parameterTypes.add(parameterTypeDouble6);
        ParameterTypeDouble parameterTypeDouble7 = new ParameterTypeDouble("minimum subsequence similarity", "Similarity score should be above this threshold.", 0.0d, 1.0d, 0.5d);
        parameterTypeDouble7.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_ATTRIBUTE_MATCHING_METHOD, SUBJECT_ID_MATCHING_METHODS, false, new int[]{10}));
        parameterTypes.add(parameterTypeDouble7);
        ParameterTypeDouble parameterTypeDouble8 = new ParameterTypeDouble("minimum phonetic levenshtein similarity", "This threshold applies to English language only data. It represents Levenshtein similarity of two strings whose phonetic encoding is the same. Similarity score should be above this threshold.", 0.0d, 1.0d, 0.5d);
        parameterTypeDouble8.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_ATTRIBUTE_MATCHING_METHOD, SUBJECT_ID_MATCHING_METHODS, false, new int[]{11}));
        parameterTypes.add(parameterTypeDouble8);
        parameterTypes.add(new ParameterTypeDouble("minimum uniqueness", "Specify how unique an attribute must be in order to be considered as subject identifier by the detection method. Uniquness is checked after the substring match on attribute name succeeds.", 0.0d, 1.0d, 0.99d));
        parameterTypes.add(new ParameterTypeDouble("minimum density", "Minimum density of a table's subject identifier attribute, in order to consider the table as a candidate for further processing.", 0.0d, 1.0d, 0.5d, false));
        parameterTypes.add(new ParameterTypeDouble("minimum ratio", "Ratio of matched and total instances (Examples) in the candidate table.", 0.0d, 1.0d, 0.5d, false));
        parameterTypes.add(new ParameterTypeDouble("minimum coverage", "Coverage is the ratio of matched and total instances (Examples) in the query table.", 0.0d, 1.0d, 0.5d, false));
        parameterTypes.add(new ParameterTypeBoolean("compute schema matchings", "If this parameter is set to true, the schema of tables in the provided collection is matched with that of query table.", false, false));
        ParameterTypeCategory parameterTypeCategory = new ParameterTypeCategory("schema matching method", "Select the method for substring (symmetric) matching between the column names of query table and candidate tables. The mean of each pair-wise comparison is computed using the selected method to determine contextually-relevant tables.", SCHEMA_MATCHING_METHODS, 1, false);
        parameterTypeCategory.registerDependencyCondition(new BooleanParameterCondition(this, "compute schema matchings", false, true));
        parameterTypes.add(parameterTypeCategory);
        ParameterTypeDouble parameterTypeDouble9 = new ParameterTypeDouble("minimum equals score", "Similarity score should be above this threshold.", 0.0d, 1.0d, 0.5d);
        parameterTypeDouble9.registerDependencyCondition(new EqualTypeCondition(this, "schema matching method", SCHEMA_MATCHING_METHODS, false, new int[]{0}));
        parameterTypes.add(parameterTypeDouble9);
        ParameterTypeDouble parameterTypeDouble10 = new ParameterTypeDouble("minimum contains score", "Similarity score should be above this threshold.", 0.0d, 1.0d, 0.5d);
        parameterTypeDouble10.registerDependencyCondition(new EqualTypeCondition(this, "schema matching method", SCHEMA_MATCHING_METHODS, false, new int[]{1}));
        parameterTypes.add(parameterTypeDouble10);
        ParameterTypeDouble parameterTypeDouble11 = new ParameterTypeDouble("minimum contains any score", "Similarity score should be above this threshold.", 0.0d, 1.0d, 0.5d);
        parameterTypeDouble11.registerDependencyCondition(new EqualTypeCondition(this, "schema matching method", SCHEMA_MATCHING_METHODS, false, new int[]{2}));
        parameterTypes.add(parameterTypeDouble11);
        ParameterTypeDouble parameterTypeDouble12 = new ParameterTypeDouble("minimum jaccard similarity score", "Similarity score should be above this threshold.", 0.0d, 1.0d, 0.5d);
        parameterTypeDouble12.registerDependencyCondition(new EqualTypeCondition(this, "schema matching method", SCHEMA_MATCHING_METHODS, false, new int[]{3}));
        parameterTypes.add(parameterTypeDouble12);
        ParameterTypeDouble parameterTypeDouble13 = new ParameterTypeDouble("minimum table cosine similarity", "The mean of cosine similarity between attribute names of query table and candidate table. Specify the mean to be above this threshold.", 0.0d, 1.0d, 0.5d);
        parameterTypeDouble13.registerDependencyCondition(new EqualTypeCondition(this, "schema matching method", SCHEMA_MATCHING_METHODS, false, new int[]{4}));
        parameterTypes.add(parameterTypeDouble13);
        ParameterTypeCategory parameterTypeCategory2 = new ParameterTypeCategory("type of neural network", "Select the type of neural network to train.", SCHEMA_MATCH_WORD2VEC_NEURAL_NET_TYPES, 0, false);
        parameterTypeCategory2.registerDependencyCondition(new EqualTypeCondition(this, "schema matching method", SCHEMA_MATCHING_METHODS, false, new int[]{4}));
        parameterTypes.add(parameterTypeCategory2);
        ParameterTypeInt parameterTypeInt2 = new ParameterTypeInt("minimal vocabulary frequency", "Specify the minimum frequency for a valid token to be considered part of the vocabulary.", 1, Integer.MAX_VALUE, 1, false);
        parameterTypeInt2.registerDependencyCondition(new EqualTypeCondition(this, "schema matching method", SCHEMA_MATCHING_METHODS, false, new int[]{4}));
        parameterTypes.add(parameterTypeInt2);
        ParameterTypeInt parameterTypeInt3 = new ParameterTypeInt("size of the generated vector", "Size of the word vector generated by the model.", 1, Integer.MAX_VALUE, 100, false);
        parameterTypeInt3.registerDependencyCondition(new EqualTypeCondition(this, "schema matching method", SCHEMA_MATCHING_METHODS, false, new int[]{4}));
        parameterTypes.add(parameterTypeInt3);
        ParameterTypeInt parameterTypeInt4 = new ParameterTypeInt("size of window", "Text is split into windows during model generation. Specify the size of the window to consider.", 1, Integer.MAX_VALUE, 7, false);
        parameterTypeInt4.registerDependencyCondition(new EqualTypeCondition(this, "schema matching method", SCHEMA_MATCHING_METHODS, false, new int[]{4}));
        parameterTypes.add(parameterTypeInt4);
        ParameterTypeInt parameterTypeInt5 = new ParameterTypeInt("number of negative samples", "Number of negative samples to use.", 1, Integer.MAX_VALUE, 0, false);
        parameterTypeInt5.registerDependencyCondition(new EqualTypeCondition(this, "schema matching method", SCHEMA_MATCHING_METHODS, false, new int[]{4}));
        parameterTypes.add(parameterTypeInt5);
        ParameterTypeInt parameterTypeInt6 = new ParameterTypeInt("number of iterations", "Set the number of iteration for training.", 1, Integer.MAX_VALUE, 5, false);
        parameterTypeInt6.registerDependencyCondition(new EqualTypeCondition(this, "schema matching method", SCHEMA_MATCHING_METHODS, false, new int[]{4}));
        parameterTypes.add(parameterTypeInt6);
        ParameterTypeDouble parameterTypeDouble14 = new ParameterTypeDouble("down sampling rate for high occurrence words", "Set threshold for occurrence of words. Words that appear with higher frequency in the training data, e.g. stopwords, will be randomly removed.", Double.MIN_VALUE, Double.MAX_VALUE, 1.0E-4d);
        parameterTypeDouble14.registerDependencyCondition(new EqualTypeCondition(this, "schema matching method", SCHEMA_MATCHING_METHODS, false, new int[]{4}));
        parameterTypes.add(parameterTypeDouble14);
        ParameterTypeDouble parameterTypeDouble15 = new ParameterTypeDouble("initial learning rate", "Set the starting learning rate.", Double.MIN_VALUE, Double.MAX_VALUE, 0.05d);
        parameterTypeDouble15.registerDependencyCondition(new EqualTypeCondition(this, "schema matching method", SCHEMA_MATCHING_METHODS, false, new int[]{4}));
        parameterTypes.add(parameterTypeDouble15);
        parameterTypes.add(new ParameterTypeCategory(PARAMETER_CONFLICT_RESOLUTION_METHOD, "Select the method to be used for conflict resolution which may be needed in two cases. First, when discovering the best among possibly multiple attributes that matched the extended attribute keyword. Second, when discovering the best instance match between the subject id attribute of query table and that of the candidate table. This method is used to decide on the instance (example) value for new attributes being added to the query table from a candidate table that qualifies all given criteria. Conflicts i.e. multiple values may be discovered because in a candidate table, more than one subject id instance may match against one subject id instance of the query table or vice versa.", CONFLICT_RESOLUTION_METHODS, 0, false));
        parameterTypes.add(new ParameterTypeString("decimal point character for type guess", "Character that is used as decimal point in real values. Automated data type guessing has the ability to type extended (fused) table's values as integer or real even if these were represented as nominals in candidate (originating) table.", ".", false));
        parameterTypes.add(new ParameterTypeBoolean(PARAMETER_BALANCE_COVERAGE_AND_PRECISION, "Select to boost precision of matched extended attribute value by resolving conflicts of candidate value that matched potentially with multiple query table values. This option also ensures that in case of multiple best matches, the best of the best match is selected.", false, false));
        parameterTypes.add(new ParameterTypeBoolean("apply manual refinements", "Select to apply manual refinements on the output of the operator during process execution", false));
        return parameterTypes;
    }

    public void deliverProcessedExampleSetCollection(DataSearchOperatorCollection dataSearchOperatorCollection) {
        this.exampleSetCollection.deliver(dataSearchOperatorCollection);
    }

    public void deliverProcessedSchemaCorrespondences(ExampleSet exampleSet) {
        this.exampleSetSchemaCorrespondences.deliver(exampleSet);
    }

    public void deliverProcessedInstanceCorrespondences(ExampleSet exampleSet) {
        this.exampleSetInstancesCorrespondences.deliver(exampleSet);
    }

    private ExampleSet guessTypes(ExampleSet exampleSet) {
        ExampleSet exampleSet2 = exampleSet;
        GuessValueTypes guessValueTypes = new GuessValueTypes(getOperatorDescription());
        try {
            guessValueTypes.setParameter("decimal_point_character", getParameterAsString("decimal point character for type guess"));
            exampleSet2 = guessValueTypes.apply(exampleSet);
        } catch (OperatorException e) {
            e.printStackTrace();
        }
        return exampleSet2;
    }
}
