package com.rapidminer.extension.hanminer.operator.processing.filtering;

import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary;
import com.hankcs.hanlp.seg.common.Term;
import com.rapidminer.extension.hanminer.document.SimpleDocumentSet;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.conditions.BooleanParameterCondition;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.stream.Collectors;

/* loaded from: input_file:com/rapidminer/extension/hanminer/operator/processing/filtering/FilterStopwords.class */
public class FilterStopwords extends Operator {
    private static final String PARAMETER_LOAD_STOPWORDS_FROM_FILE = "load_stopwords_from_file";
    private static final String PARAMETER_STOPWORDS_FILE = "stopwords_file";
    private InputPort documentSetInput;
    private OutputPort documentSetOutput;

    public FilterStopwords(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.documentSetInput = getInputPorts().createPort("document set");
        this.documentSetOutput = getOutputPorts().createPort("document set");
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeBoolean(PARAMETER_LOAD_STOPWORDS_FROM_FILE, "If set to true, use custom stopwords dictionary from file. Otherwise, use default stopwords", false, false));
        ParameterTypeFile parameterTypeFile = new ParameterTypeFile(PARAMETER_STOPWORDS_FILE, "Path to the stopwords file", (String) null, true, false);
        parameterTypeFile.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_LOAD_STOPWORDS_FROM_FILE, true, true));
        parameterTypes.add(parameterTypeFile);
        return parameterTypes;
    }

    public String filterStopwords(String str) {
        List list = (List) Arrays.asList(str.split("\\s+")).stream().map(str2 -> {
            return new Term(str2, null);
        }).collect(Collectors.toList());
        CoreStopWordDictionary.apply(list);
        return (String) list.stream().map(term -> {
            return term.word;
        }).collect(Collectors.joining(" "));
    }

    public String filterStopwords(String str, File file) {
        List asList = Arrays.asList(str.split("\\s+"));
        HashSet hashSet = new HashSet();
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                hashSet.addAll(Arrays.asList(readLine.split("\\s+")));
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return String.join(" ", (List) asList.stream().filter(str2 -> {
            return !hashSet.contains(str2);
        }).collect(Collectors.toList()));
    }

    public void doWork() throws OperatorException {
        SimpleDocumentSet data = this.documentSetInput.getData(SimpleDocumentSet.class);
        boolean parameterAsBoolean = getParameterAsBoolean(PARAMETER_LOAD_STOPWORDS_FROM_FILE);
        File parameterAsFile = getParameterAsFile(PARAMETER_STOPWORDS_FILE);
        ArrayList arrayList = new ArrayList();
        for (String str : data.getDocuments()) {
            if (parameterAsBoolean) {
                arrayList.add(filterStopwords(str, parameterAsFile));
            } else {
                arrayList.add(filterStopwords(str));
            }
        }
        this.documentSetOutput.deliver(new SimpleDocumentSet(arrayList));
    }
}
