package com.rapidminer.operator.text.io.wordfilter;

import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.text.Document;
import com.rapidminer.operator.text.Token;
import com.rapidminer.operator.text.io.AbstractTokenProcessor;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.conditions.EqualTypeCondition;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.Tools;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;

/* loaded from: input_file:com/rapidminer/operator/text/io/wordfilter/TokenPOSRatioFilter.class */
public class TokenPOSRatioFilter extends AbstractTokenProcessor {
    public static final int LANGUAGE_SOURCE_USER = 0;
    public static final int LANGUAGE_SOURCE_DOCUMENT = 1;
    public static final int UNKNOWN = -1;
    public static final int ENGLISH = 0;
    public static final int GERMAN = 1;
    public static final String PARAMETER_LANGUAGE_SOURCE = "language_source";
    public static final String PARAMETER_LANGUAGE = "language";
    public static final String PARAMETER_LANGUAGE_ATTRIBUTE = "language_attribute";
    public static final String PARAMETER_MIN_ADJECTIVES = "min_ratio_adjectives";
    public static final String PARAMETER_MIN_NOUNS = "min_ratio_nouns";
    public static final String PARAMETER_MIN_VERBS = "min_ratio_verbs";
    public static final String[] LANGUAGE_SOURCES = {"user", "document"};
    public static final String[] LANGUAGES = {"English", "German"};

    public TokenPOSRatioFilter(OperatorDescription operatorDescription) {
        super(operatorDescription);
    }

    @Override // com.rapidminer.operator.text.io.AbstractTokenProcessor
    protected Document doWork(Document document) throws UserError {
        int i = -1;
        if (getParameterAsInt(PARAMETER_LANGUAGE_SOURCE) == 0) {
            i = getParameterAsInt("language");
        } else {
            String parameterAsString = getParameterAsString(PARAMETER_LANGUAGE_ATTRIBUTE);
            if (document.getMetaDataKeys().contains(parameterAsString) && Ontology.ATTRIBUTE_VALUE_TYPE.isA(document.getMetaDataType(parameterAsString), 1)) {
                String str = (String) document.getMetaDataValue(parameterAsString);
                if (str.equals("en")) {
                    i = 0;
                } else if (str.equals("de")) {
                    i = 1;
                }
            }
        }
        if (i != 0 && i != 1) {
            getLogger().log(Level.WARNING, "Language unspecified, skipping token filtering by POS ratios.");
            return document;
        }
        String str2 = i == 0 ? "en" : "de";
        try {
            SentenceDetectorME sentenceDetectorME = new SentenceDetectorME(new SentenceModel(Tools.getResource("pos/" + str2 + "-sent.bin").openStream()));
            TokenizerME tokenizerME = new TokenizerME(new TokenizerModel(Tools.getResource("pos/" + str2 + "-token.bin").openStream()));
            POSTaggerME pOSTaggerME = new POSTaggerME(new POSModel(Tools.getResource("pos/" + str2 + "-pos-maxent.bin").openStream()));
            double parameterAsDouble = getParameterAsDouble(PARAMETER_MIN_ADJECTIVES);
            double parameterAsDouble2 = getParameterAsDouble(PARAMETER_MIN_NOUNS);
            double parameterAsDouble3 = getParameterAsDouble(PARAMETER_MIN_VERBS);
            ArrayList arrayList = new ArrayList(document.getTokenSequence().size());
            for (Token token : document.getTokenSequence()) {
                int i2 = 0;
                int i3 = 0;
                int i4 = 0;
                int i5 = 0;
                for (String str3 : sentenceDetectorME.sentDetect(token.getToken())) {
                    for (String str4 : pOSTaggerME.tag(tokenizerME.tokenize(str3))) {
                        i5++;
                        switch (i) {
                            case 0:
                                if (str4.startsWith("JJ")) {
                                    i2++;
                                }
                                if (str4.startsWith("NN")) {
                                    i3++;
                                }
                                if (str4.startsWith("VB")) {
                                    i4++;
                                    break;
                                } else {
                                    break;
                                }
                            case 1:
                                if (str4.startsWith("ADJ")) {
                                    i2++;
                                }
                                if (str4.startsWith("N")) {
                                    i3++;
                                }
                                if (str4.startsWith("V")) {
                                    i4++;
                                    break;
                                } else {
                                    break;
                                }
                        }
                    }
                }
                boolean z = i2 / i5 >= parameterAsDouble;
                if (i3 / i5 < parameterAsDouble2) {
                    z = false;
                }
                if (i4 / i5 < parameterAsDouble3) {
                    z = false;
                }
                if (z) {
                    arrayList.add(token);
                }
            }
            document.setTokenSequence(arrayList);
            return document;
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        ParameterTypeCategory parameterTypeCategory = new ParameterTypeCategory(PARAMETER_LANGUAGE_SOURCE, "Specifies whether the language is set explicitely by the user or specified as a meta data attribute in the document.", LANGUAGE_SOURCES, 0);
        parameterTypeCategory.setExpert(false);
        parameterTypes.add(parameterTypeCategory);
        ParameterTypeCategory parameterTypeCategory2 = new ParameterTypeCategory("language", "The language for the used part of speech (POS) tagger.", LANGUAGES, 0);
        parameterTypeCategory2.setExpert(false);
        parameterTypeCategory2.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_LANGUAGE_SOURCE, LANGUAGE_SOURCES, true, new int[]{0}));
        parameterTypes.add(parameterTypeCategory2);
        ParameterTypeString parameterTypeString = new ParameterTypeString(PARAMETER_LANGUAGE_ATTRIBUTE, "The meta data attribute key that contains the iso language code of the document.", "language");
        parameterTypeString.setExpert(false);
        parameterTypeString.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_LANGUAGE_SOURCE, LANGUAGE_SOURCES, true, new int[]{1}));
        parameterTypes.add(parameterTypeString);
        ParameterTypeDouble parameterTypeDouble = new ParameterTypeDouble(PARAMETER_MIN_ADJECTIVES, "The minimum ratio of adjectives for each token to be kept", 0.0d, 1.0d, 0.0d);
        parameterTypeDouble.setExpert(false);
        parameterTypes.add(parameterTypeDouble);
        ParameterTypeDouble parameterTypeDouble2 = new ParameterTypeDouble(PARAMETER_MIN_NOUNS, "The minimum ratio of nouns for each token to be kept", 0.0d, 1.0d, 0.0d);
        parameterTypeDouble2.setExpert(false);
        parameterTypes.add(parameterTypeDouble2);
        ParameterTypeDouble parameterTypeDouble3 = new ParameterTypeDouble(PARAMETER_MIN_VERBS, "The minimum ratio of verbs for each token to be kept", 0.0d, 1.0d, 0.0d);
        parameterTypeDouble3.setExpert(false);
        parameterTypes.add(parameterTypeDouble3);
        return parameterTypes;
    }
}
