package com.rapidminer.operator.text.io.wordfilter;

import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.text.Document;
import com.rapidminer.operator.text.Token;
import com.rapidminer.operator.text.io.AbstractTokenProcessor;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.tools.Tools;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;

/* loaded from: input_file:com/rapidminer/operator/text/io/wordfilter/TokenPOSFilter.class */
public class TokenPOSFilter extends AbstractTokenProcessor {
    public static final String[] LANGUAGES = {"English", "German"};
    public static final int ENGLISH = 0;
    public static final int GERMAN = 1;
    public static final String PARAMETER_LANGUAGE = "language";
    public static final String PARAMETER_EXPRESSION = "expression";
    public static final String PARAMETER_INVERT = "invert_filter";

    public TokenPOSFilter(OperatorDescription operatorDescription) {
        super(operatorDescription);
    }

    @Override // com.rapidminer.operator.text.io.AbstractTokenProcessor
    protected Document doWork(Document document) throws UserError {
        String str = getParameterAsInt("language") == 0 ? "en" : "de";
        try {
            SentenceDetectorME sentenceDetectorME = new SentenceDetectorME(new SentenceModel(Tools.getResource("pos/" + str + "-sent.bin").openStream()));
            TokenizerME tokenizerME = new TokenizerME(new TokenizerModel(Tools.getResource("pos/" + str + "-token.bin").openStream()));
            POSTaggerME pOSTaggerME = new POSTaggerME(new POSModel(Tools.getResource("pos/" + str + "-pos-maxent.bin").openStream()));
            Pattern compile = Pattern.compile(getParameterAsString("expression"));
            boolean parameterAsBoolean = getParameterAsBoolean(PARAMETER_INVERT);
            ArrayList arrayList = new ArrayList(document.getTokenSequence().size());
            for (Token token : document.getTokenSequence()) {
                for (String str2 : sentenceDetectorME.sentDetect(token.getToken())) {
                    String[] strArr = tokenizerME.tokenize(str2);
                    String[] tag = pOSTaggerME.tag(strArr);
                    for (int i = 0; i < strArr.length; i++) {
                        Matcher matcher = compile.matcher(tag[i]);
                        if (parameterAsBoolean) {
                            if (!matcher.matches()) {
                                arrayList.add(new Token(strArr[i], token));
                            }
                        } else if (matcher.matches()) {
                            arrayList.add(new Token(strArr[i], token));
                        }
                    }
                }
            }
            document.setTokenSequence(arrayList);
            return document;
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        ParameterTypeCategory parameterTypeCategory = new ParameterTypeCategory("language", "The language for the used part of speech (POS) tagger.", LANGUAGES, 0);
        parameterTypeCategory.setExpert(false);
        parameterTypes.add(parameterTypeCategory);
        parameterTypes.add(new ParameterTypeString("expression", "The regular expression used for matching the POS tags (STTS for German and PENN for English).", false));
        ParameterTypeBoolean parameterTypeBoolean = new ParameterTypeBoolean(PARAMETER_INVERT, "Indicates if the filter should be inverted.", false);
        parameterTypeBoolean.setExpert(false);
        parameterTypes.add(parameterTypeBoolean);
        return parameterTypes;
    }
}
