package com.rapidminer.extension.hanminer.operator.processing.filtering;

import com.rapidminer.extension.hanminer.document.SimpleDocumentSet;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeString;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/* loaded from: input_file:com/rapidminer/extension/hanminer/operator/processing/filtering/FilterTokens.class */
public class FilterTokens extends Operator {
    private static final String PARAMETER_REMOVE_PUNCTUATION = "remove_punctuations";
    private static final String PARAMETER_REMOVE_NUMBER = "remove_numbers";
    private static final String PARAMETER_REMOVE_NON_CHINESE_CHAR = "remove_non_chinese_characters";
    private static final String PARAMETER_REMOVE_CONDITION = "remove_condition";
    private static final String PARAMETER_USE_REGEX = "use_regular_expression";
    private static final String PARAMETER_EXPR = "expression";
    private static final String PARAMETER_INVERSE_CONDITION = "inverse_condition";
    private static final String[] CONDITIONS = {"matches", "contains"};
    public static final int CONDITION_MATCHES = 0;
    public static final int CONDITION_CONTAINS = 1;
    private InputPort documentSetInput;
    private OutputPort documentSetOutput;

    public FilterTokens(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.documentSetInput = getInputPorts().createPort("document set");
        this.documentSetOutput = getOutputPorts().createPort("document set");
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeBoolean(PARAMETER_REMOVE_PUNCTUATION, "If set to true, filter all punctuation characters", true, false));
        parameterTypes.add(new ParameterTypeBoolean(PARAMETER_REMOVE_NUMBER, "If set to true, filter all numbers", false, false));
        parameterTypes.add(new ParameterTypeBoolean(PARAMETER_REMOVE_NON_CHINESE_CHAR, "If set to true, filter all non-Chinese characters", false, false));
        parameterTypes.add(new ParameterTypeCategory("remove_condition", "The condition to filter tokens.", CONDITIONS, 0, false));
        parameterTypes.add(new ParameterTypeBoolean("inverse_condition", "If set to true, inverse condition.", false, false));
        parameterTypes.add(new ParameterTypeBoolean("use_regular_expression", "If set to true, match regular expression. Otherwise, match words.", false, false));
        parameterTypes.add(new ParameterTypeString("expression", "The expression to be compared to. Only work if not empty.", "", false));
        return parameterTypes;
    }

    public String filterTokens(String str, boolean z, boolean z2, boolean z3, int i, boolean z4, String str2, boolean z5) {
        if (z) {
            str = str.replaceAll("\\p{P}", "");
        }
        if (z2) {
            str = str.replaceAll("[0-9]", "");
        }
        if (z3) {
            str = str.replaceAll("[^\\u4e00-\\u9fa5|\\s]", "");
        }
        if (str2.length() == 0) {
            return str;
        }
        Stream stream = Arrays.asList(str.split("\\s+")).stream();
        switch (i) {
            case 0:
                if (!z5) {
                    stream = stream.filter(str3 -> {
                        return str3.equals(str2) == z4;
                    });
                    break;
                } else {
                    stream = stream.filter(str4 -> {
                        return str4.matches(str2) == z4;
                    });
                    break;
                }
            case 1:
                if (!z5) {
                    stream = stream.filter(str5 -> {
                        return str5.contains(str2) == z4;
                    });
                    break;
                } else {
                    stream = stream.filter(str6 -> {
                        return Pattern.compile(str2).matcher(str6).find() == z4;
                    });
                    break;
                }
        }
        return String.join(" ", (Iterable<? extends CharSequence>) stream.collect(Collectors.toList()));
    }

    public void doWork() throws OperatorException {
        SimpleDocumentSet data = this.documentSetInput.getData(SimpleDocumentSet.class);
        boolean parameterAsBoolean = getParameterAsBoolean(PARAMETER_REMOVE_PUNCTUATION);
        boolean parameterAsBoolean2 = getParameterAsBoolean(PARAMETER_REMOVE_NUMBER);
        boolean parameterAsBoolean3 = getParameterAsBoolean(PARAMETER_REMOVE_NON_CHINESE_CHAR);
        int parameterAsInt = getParameterAsInt("remove_condition");
        boolean parameterAsBoolean4 = getParameterAsBoolean("inverse_condition");
        boolean parameterAsBoolean5 = getParameterAsBoolean("use_regular_expression");
        String parameterAsString = getParameterAsString("expression");
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = data.getDocuments().iterator();
        while (it.hasNext()) {
            arrayList.add(filterTokens(it.next(), parameterAsBoolean, parameterAsBoolean2, parameterAsBoolean3, parameterAsInt, parameterAsBoolean4, parameterAsString, parameterAsBoolean5));
        }
        this.documentSetOutput.deliver(new SimpleDocumentSet(arrayList));
    }
}
