package com.rapidminer.operator.preprocessing.ie.features.struct;

import cern.colt.matrix.impl.AbstractFormatter;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.Partition;
import com.rapidminer.example.set.SplittedExampleSet;
import com.rapidminer.example.table.AttributeObjectFactory;
import com.rapidminer.example.table.ObjectAttribute;
import com.rapidminer.example.table.struct.AbstractStructureCreation;
import com.rapidminer.example.table.struct.Structures;
import com.rapidminer.example.table.struct.tree.TreeStructures;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.learner.treekernel.kernel.tools.KernelStructureFactory;
import com.rapidminer.operator.preprocessing.ie.features.tools.Path;
import com.rapidminer.operator.preprocessing.ie.features.tools.PreprocessOperatorImpl;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeAttribute;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.parameter.conditions.BooleanParameterCondition;
import com.rapidminer.parameter.conditions.EqualTypeCondition;
import com.rapidminer.tools.LogService;
import com.rapidminer.tools.OperatorService;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.Tree;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;

/* loaded from: input_file:com/rapidminer/operator/preprocessing/ie/features/struct/ParseTreePreProcessing.class */
public class ParseTreePreProcessing extends AbstractStructureCreation {
    private LexicalizedParser lp;
    private final String PARAMETER_MODEL_FILE = "modelfile";
    private final String PARAMETER_NEED_PARSING = "needParsing";
    private final String PARAMETER_POS_LIST = "poslist";
    public static final int TREE_MCT = 0;
    public static final int TREE_MiCT = 1;
    public static final int TREE_PT = 2;
    public static final int TREE_CPT = 3;
    public static final int TREE_FPT = 4;
    public static final int TREE_FCPT = 5;
    public static final int TREE_ORIGINAL = 6;
    private boolean FTK;
    public static final String PARAMETER_TREE_TYPE = "parseTreeType";
    public static final String[] TREE_TYPES = {"MCT", "MiCT", "PT", "CPT", "FPT", "FCPT", "none"};
    public static final String PARAMETER_SOURCE_ATTRIBUTE = "valueAttribute";
    public static final String PARAMETER_CREATE_TREE_STRING = "createTreeString";
    public static final String PARAMETER_FTK = "FTK";
    Attribute sourceAttribute;

    public ParseTreePreProcessing(OperatorDescription operatorDescription) throws OperatorException {
        super(operatorDescription);
        this.PARAMETER_MODEL_FILE = "modelfile";
        this.PARAMETER_NEED_PARSING = "needParsing";
        this.PARAMETER_POS_LIST = "poslist";
        this.FTK = true;
    }

    public void doWork() throws OperatorException {
        ExampleSet<Example> exampleSet = (ExampleSet) this.exampleSetInput.getData();
        Attribute createIdAttribute = createIdAttribute(exampleSet);
        if (createIdAttribute == null) {
            throw new OperatorException("Unable to create an ID attribute!");
        }
        Attribute attribute = (ObjectAttribute) exampleSet.getAttributes().get(Structures.ID_ATTRIBUTE);
        if (attribute == null) {
            LogService.getGlobal().log("Creating structID attribute...", 8);
            attribute = (ObjectAttribute) AttributeObjectFactory.createAttribute(Structures.ID_ATTRIBUTE, 12);
            exampleSet.getExampleTable().addAttribute(attribute);
            exampleSet.getAttributes().addRegular(attribute);
        }
        try {
            PrintStream printStream = new PrintStream(new FileOutputStream(File.createTempFile("parse-errors-", ".log")));
            this.sourceAttribute = exampleSet.getAttributes().get(getParameterAsString("valueAttribute"));
            TreeStructures treeStructures = new TreeStructures();
            int[] iArr = new int[exampleSet.size()];
            int i = 0;
            int i2 = 0;
            int i3 = 0;
            int i4 = 0;
            ArrayList arrayList = new ArrayList();
            int i5 = 0;
            ArrayList arrayList2 = new ArrayList();
            if (getParameterAsBoolean(PARAMETER_FTK)) {
                KernelStructureFactory.setKERNEL_STRUCTURE_INDEX(0);
            } else {
                KernelStructureFactory.setKERNEL_STRUCTURE_INDEX(1);
            }
            for (Example example : exampleSet) {
                try {
                    if (i3 % 1000 == 0) {
                        System.out.println(i3 + " / " + exampleSet.size() + " preprocessed!");
                    }
                    String valueAsString = example.getValueAsString(this.sourceAttribute);
                    if (getParameterAsBoolean("needParsing")) {
                        valueAsString = newValueToInsert(valueAsString);
                    }
                    Tree tree = null;
                    Double valueOf = Double.valueOf(example.getId());
                    if (valueAsString != null) {
                        try {
                        } catch (Exception e) {
                            e.printStackTrace();
                            System.out.println(">>>" + valueAsString);
                            printStream.println("Parsing of example " + valueOf + " failed.");
                            printStream.println("Error parsing: " + valueAsString);
                            e.printStackTrace(printStream);
                            printStream.println("---------------------------------------------------");
                            i2++;
                            example.setValue(attribute, Double.NaN);
                            iArr[i] = 1;
                        }
                        if (valueAsString.length() != 0) {
                            Tree valueOf2 = Tree.valueOf(valueAsString);
                            tree = prune(getParameterAsInt("parseTreeType"), valueOf2);
                            if (getEntityBranch(tree, new Path(), "E1-") == null || getEntityBranch(tree, new Path(), "E2-") == null) {
                                if (getEntityBranch(tree, new Path(), "E1-") == null && getEntityBranch(valueOf2, new Path(), "E1-") == null) {
                                    i4++;
                                    arrayList.add(valueAsString);
                                } else if (getEntityBranch(tree, new Path(), "E2-") == null && getEntityBranch(valueOf2, new Path(), "E2-") == null) {
                                    i5++;
                                    arrayList2.add(valueAsString);
                                } else if (getEntityBranch(valueOf2, new Path(), "E1-") == null || getEntityBranch(valueOf2, new Path(), "E2-") == null) {
                                    System.out.println("Unknown error!");
                                } else {
                                    System.out.println("Original tree does not contain E1 AND E2!");
                                }
                            }
                            example.setValue(attribute, attribute.m48getMapping().mapString(KernelStructureFactory.instanceOf(tree)));
                            iArr[i] = 0;
                            example.setValue(createIdAttribute, tree.toString());
                            i++;
                            i3++;
                        }
                    }
                    System.out.println("treeStirng is null");
                    example.setValue(attribute, Double.NaN);
                    iArr[i] = 1;
                    example.setValue(createIdAttribute, tree.toString());
                    i++;
                    i3++;
                } catch (Exception e2) {
                    e2.printStackTrace();
                }
            }
            if (i2 > 0) {
                new SplittedExampleSet(exampleSet, new Partition(iArr, 2)).selectSingleSubset(0);
            }
            System.out.println("Tree is null (" + (treeStructures == null) + ")");
            System.out.println(i4 + "(E1) and " + i5 + "(E2) of " + i3 + " already corrupt in original data!");
            System.out.println("1.___________________________________");
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                System.out.println((String) it.next());
            }
            System.out.println("");
            System.out.println("2.___________________________________");
            Iterator it2 = arrayList2.iterator();
            while (it2.hasNext()) {
                System.out.println((String) it2.next());
            }
            this.exampleSetOutput.deliver(exampleSet);
        } catch (IOException e3) {
            throw new OperatorException("Cannot create the parse-error log-file!");
        }
    }

    private Tree prune(int i, Tree tree) {
        switch (i) {
            case 0:
                return prune_MCT(TreeCloner.clone(tree));
            case 1:
                return prune_MiCT(TreeCloner.clone(tree));
            case 2:
                return prune_PT(TreeCloner.clone(tree));
            case 3:
                return prune_CPT(TreeCloner.clone(tree));
            case 4:
                return prune_FPT(TreeCloner.clone(tree));
            case 5:
                return prune_FCPT(TreeCloner.clone(tree));
            case 6:
                return TreeCloner.clone(tree);
            default:
                return prune_MCT(TreeCloner.clone(tree));
        }
    }

    private Path getEntityBranch(Tree tree, Path path, String str) {
        if (tree.value().startsWith(str)) {
            return path;
        }
        if (tree.isPreTerminal()) {
            return null;
        }
        for (int i = 0; i < tree.getChildrenAsList().size(); i++) {
            Path m60clone = path.m60clone();
            m60clone.addPoint(i, tree.getChild(i).value());
            Path entityBranch = getEntityBranch(tree.getChild(i), m60clone, str);
            if (entityBranch != null) {
                return entityBranch;
            }
        }
        return null;
    }

    private Tree getMCT(Tree tree, Path path, Path path2) {
        if (path == null || path2 == null) {
            return tree;
        }
        int i = 0;
        for (int i2 = 0; i2 < path.getPath().size() && path2.getPath().size() > i2 && path.getPath().get(i2) == path2.getPath().get(i2); i2++) {
            tree = tree.getChild(path.getPath().get(i2).intValue());
            i++;
        }
        return (path2.getPath().size() <= i || path.getPath().size() <= i) ? tree : new LabeledScoredTreeFactory().newTreeNode(tree.value(), tree.getChildrenAsList());
    }

    private Tree getMiCT(Tree tree, Path path, Path path2) {
        if (path == null || path2 == null) {
            return tree;
        }
        int i = 0;
        for (int i2 = 0; i2 < path.getPath().size() && path2.getPath().size() > i2 && path.getPath().get(i2) == path2.getPath().get(i2); i2++) {
            tree = tree.getChild(path.getPath().get(i2).intValue());
            i++;
        }
        if (path2.getPath().size() <= i || path.getPath().size() <= i) {
            return tree;
        }
        int intValue = path2.getPath().get(i).intValue();
        int intValue2 = path.getPath().get(i).intValue();
        ArrayList arrayList = new ArrayList();
        if (intValue > intValue2) {
            intValue = intValue2;
            intValue2 = intValue;
        }
        while (intValue <= intValue2) {
            arrayList.add(tree.getChild(intValue));
            intValue++;
        }
        return new LabeledScoredTreeFactory().newTreeNode(tree.value(), arrayList);
    }

    private Tree getPT(Tree tree, Path path, Path path2) {
        if (path == null || path2 == null) {
            return tree;
        }
        int i = 0;
        for (int i2 = 0; i2 < path.getPath().size() && path2.getPath().size() > i2 && path.getPath().get(i2) == path2.getPath().get(i2); i2++) {
            tree = tree.getChild(path.getPath().get(i2).intValue());
            i++;
        }
        if (path2.getPath().size() <= i || path.getPath().size() <= i) {
            return tree;
        }
        int intValue = path2.getPath().get(i).intValue();
        int intValue2 = path.getPath().get(i).intValue();
        ArrayList arrayList = new ArrayList();
        if (intValue > intValue2) {
            intValue = intValue2;
            intValue2 = intValue;
            path2 = path;
            path = path2;
        }
        Tree tree2 = null;
        try {
            tree2 = recursivePathWay(TreeCloner.clone(tree.getChild(intValue)), path2, i + 1, true);
        } catch (Exception e) {
            e.printStackTrace();
        }
        arrayList.add(tree2);
        if (intValue2 - intValue > 1) {
            for (int i3 = intValue + 1; i3 < intValue2; i3++) {
                arrayList.add(TreeCloner.clone(tree.getChild(i3)));
            }
        }
        Tree tree3 = null;
        try {
            tree3 = recursivePathWay(TreeCloner.clone(tree.getChild(intValue2)), path, i + 1, false);
        } catch (Exception e2) {
            e2.printStackTrace();
        }
        arrayList.add(tree3);
        return new LabeledScoredTreeFactory().newTreeNode(tree.value(), arrayList);
    }

    private Tree recursivePathWay(Tree tree, Path path, int i, boolean z) throws Exception {
        try {
            LabeledScoredTreeFactory labeledScoredTreeFactory = new LabeledScoredTreeFactory();
            if (tree.isPreTerminal() || i >= path.getPath().size() || tree.getChildrenAsList() == null) {
                return TreeCloner.clone(tree);
            }
            Tree newTreeNode = labeledScoredTreeFactory.newTreeNode(tree.value(), (List) null);
            int intValue = path.getPath().get(i).intValue();
            if (z) {
                newTreeNode.addChild(recursivePathWay(TreeCloner.clone(tree.getChild(intValue)), path, i + 1, z));
                for (int i2 = intValue + 1; i2 < tree.getChildrenAsList().size(); i2++) {
                    newTreeNode.addChild(TreeCloner.clone(tree.getChild(i2)));
                }
            } else {
                for (int i3 = 0; i3 < intValue; i3++) {
                    newTreeNode.addChild(TreeCloner.clone(tree.getChild(i3)));
                }
                newTreeNode.addChild(recursivePathWay(TreeCloner.clone(tree.getChild(intValue)), path, i + 1, z));
            }
            return newTreeNode;
        } catch (Exception e) {
            throw new Exception(e);
        }
    }

    private Tree getCPT(Tree tree, Path path, Path path2) {
        return getPT(tree, contextPath(path, true, TreeCloner.clone(tree)), contextPath(path2, false, TreeCloner.clone(tree)));
    }

    private Path contextPath(Path path, boolean z, Tree tree) {
        Path path2 = new Path();
        ArrayList arrayList = new ArrayList();
        boolean z2 = false;
        ArrayList arrayList2 = new ArrayList();
        for (int i = 0; i < path.getPath().size(); i++) {
            arrayList2.add(Integer.valueOf(tree.getChildrenAsList().size()));
            tree = tree.getChild(path.getPath().get(i).intValue());
        }
        for (int size = path.getPath().size() - 1; size >= 0; size--) {
            int intValue = path.getPath().get(size).intValue();
            if (!z2 && z && intValue > 0) {
                arrayList.add(Integer.valueOf(intValue - 1));
                z2 = true;
            } else if (z2 || z || intValue >= ((Integer) arrayList2.get(size)).intValue() - 1) {
                arrayList.add(Integer.valueOf(intValue));
            } else {
                arrayList.add(Integer.valueOf(intValue + 1));
                z2 = true;
            }
        }
        if (!z2) {
            return path;
        }
        for (int size2 = arrayList.size() - 1; size2 >= 0; size2--) {
            path2.addPoint(((Integer) arrayList.get(size2)).intValue(), "");
        }
        return path2;
    }

    private Tree flatten(Tree tree, HashSet<String> hashSet) {
        if (tree.isPreTerminal() || tree.isLeaf()) {
            return TreeCloner.clone(tree);
        }
        if (tree.getChildrenAsList().size() == 1 && !hashSet.contains(tree.value())) {
            return flatten(TreeCloner.clone(tree.getChild(0)), hashSet);
        }
        Tree newTreeNode = new LabeledScoredTreeFactory().newTreeNode(tree.value(), (List) null);
        Iterator<Tree> it = tree.getChildrenAsList().iterator();
        while (it.hasNext()) {
            newTreeNode.addChild(flatten(TreeCloner.clone(it.next()), hashSet));
        }
        return newTreeNode;
    }

    private Tree getFPT(Tree tree, Path path, Path path2) {
        HashSet<String> hashSet = new HashSet<>();
        try {
            for (String[] strArr : getParameterList("poslist")) {
                hashSet.add(strArr[1]);
                hashSet.add("E1-" + strArr[1]);
                hashSet.add("E2-" + strArr[1]);
            }
        } catch (UndefinedParameterError e) {
            e.printStackTrace();
        }
        return flatten(getPT(tree, path, path2), hashSet);
    }

    private Tree getFCPT(Tree tree, Path path, Path path2) {
        HashSet<String> hashSet = new HashSet<>();
        try {
            Iterator it = getParameterList("poslist").iterator();
            while (it.hasNext()) {
                hashSet.add(((String[]) it.next())[1]);
            }
        } catch (UndefinedParameterError e) {
            e.printStackTrace();
        }
        return flatten(getCPT(tree, path, path2), hashSet);
    }

    private Tree prune_MCT(Tree tree) {
        return getMCT(tree, getEntityBranch(tree, new Path(), "E1-"), getEntityBranch(tree, new Path(), "E2-"));
    }

    private Tree prune_MiCT(Tree tree) {
        return getMiCT(tree, getEntityBranch(tree, new Path(), "E1-"), getEntityBranch(tree, new Path(), "E2-"));
    }

    private Tree prune_PT(Tree tree) {
        return getPT(tree, getEntityBranch(tree, new Path(), "E1-"), getEntityBranch(tree, new Path(), "E2-"));
    }

    private Tree prune_CPT(Tree tree) {
        return getCPT(tree, getEntityBranch(tree, new Path(), "E1-"), getEntityBranch(tree, new Path(), "E2-"));
    }

    private Tree prune_FPT(Tree tree) {
        return getFPT(tree, getEntityBranch(tree, new Path(), "E1-"), getEntityBranch(tree, new Path(), "E2-"));
    }

    private Tree prune_FCPT(Tree tree) {
        return getFCPT(tree, getEntityBranch(tree, new Path(), "E1-"), getEntityBranch(tree, new Path(), "E2-"));
    }

    protected String newValueToInsert(String str) {
        if (str.length() >= 400) {
            return "";
        }
        StringTokenizer stringTokenizer = new StringTokenizer(str, AbstractFormatter.DEFAULT_COLUMN_SEPARATOR);
        String[] strArr = new String[stringTokenizer.countTokens()];
        int i = 0;
        while (stringTokenizer.hasMoreTokens()) {
            int i2 = i;
            i++;
            strArr[i2] = stringTokenizer.nextToken();
            System.out.print(AbstractFormatter.DEFAULT_COLUMN_SEPARATOR + strArr[i - 1]);
        }
        if (this.lp == null) {
            try {
                this.lp = new LexicalizedParser(getParameterAsString("modelfile"));
            } catch (Exception e) {
                e.printStackTrace();
                return "";
            }
        }
        this.lp.reset();
        try {
            return this.lp.apply((Object) Arrays.asList(strArr)).toString();
        } catch (Exception e2) {
            return "";
        }
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeAttribute("valueAttribute", "The name of attribute from which the parser should create the parse tree.", this.exampleSetInput));
        parameterTypes.add(new ParameterTypeBoolean("needParsing", "Need not to be selected, if sentences are already parsed.", false));
        ParameterTypeFile parameterTypeFile = new ParameterTypeFile("modelfile", "The model used by the stanford parser", "*", true);
        parameterTypeFile.registerDependencyCondition(new BooleanParameterCondition(this, "needParsing", false, true));
        parameterTypes.add(parameterTypeFile);
        parameterTypes.add(new ParameterTypeCategory("parseTreeType", "The tree type to prune the original trees to", TREE_TYPES, 0));
        ParameterTypeList parameterTypeList = new ParameterTypeList("poslist", "Which nodes should not be flattened?", new ParameterTypeString("name", "An alias name for this POS-tag.", false, false), new ParameterTypeString("pos", "The POS tag which are not flattened.", false, false));
        parameterTypeList.registerDependencyCondition(new EqualTypeCondition(this, "parseTreeType", TREE_TYPES, false, new int[]{4, 5}));
        parameterTypes.add(parameterTypeList);
        ParameterTypeBoolean parameterTypeBoolean = new ParameterTypeBoolean(PARAMETER_FTK, "Should the trees be converted into the FTK-format?", false);
        parameterTypeBoolean.setExpert(false);
        parameterTypes.add(parameterTypeBoolean);
        return parameterTypes;
    }

    public PreprocessOperatorImpl create() throws Exception {
        return OperatorService.createOperator("ParseTreePreprocessing_");
    }

    public Class<?>[] getInputClasses() {
        return new Class[]{ExampleSet.class};
    }

    public Class<?>[] getOutputClasses() {
        return new Class[]{ExampleSet.class};
    }
}
