package com.rapidminer.extension.operator_toolbox.operator.text_processing;

import com.github.pemistahl.lingua.api.Language;
import com.github.pemistahl.lingua.api.LanguageDetector;
import com.github.pemistahl.lingua.api.LanguageDetectorBuilder;
import com.rapidminer.adaption.belt.IOTable;
import com.rapidminer.belt.buffer.Buffers;
import com.rapidminer.belt.buffer.NominalBuffer;
import com.rapidminer.belt.buffer.NumericBuffer;
import com.rapidminer.belt.column.ColumnType;
import com.rapidminer.belt.execution.SequentialContext;
import com.rapidminer.belt.reader.ObjectReader;
import com.rapidminer.belt.reader.Readers;
import com.rapidminer.belt.table.Builders;
import com.rapidminer.belt.table.Table;
import com.rapidminer.belt.util.ColumnRole;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.ProcessStoppedException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ports.IncompatibleMDClassException;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.SetRelation;
import com.rapidminer.operator.ports.metadata.table.ColumnInfoBuilder;
import com.rapidminer.operator.ports.metadata.table.TableMetaData;
import com.rapidminer.operator.ports.metadata.table.TableMetaDataBuilder;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeAttribute;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeEnumeration;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.parameter.conditions.EqualStringCondition;
import com.rapidminer.tools.math.container.Range;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.logging.Level;

/* loaded from: input_file:com/rapidminer/extension/operator_toolbox/operator/text_processing/DetectLanguageOperator.class */
public class DetectLanguageOperator extends Operator {
    private InputPort exaInput;
    private OutputPort exaOutput;
    private OutputPort oriOutput;
    public static final String PARAMETER_ATTRIBUTE = "text_attribute";
    public static final String PARAMETER_LANGUAGE_SELECTION_LIST = "language_selection";
    public static final String PARAMETER_LANGUAGE_SELECTION_NAME = "language code";
    public static final String PARAMETER_FAIL_ON_ERROR = "fail_on_error";
    private static final String SELECTION_ALL = "all";
    private static final String SELECTION_MOST_SPOKEN = "most_spoken";
    private static final String SELECTION_MOST_SPOKEN_EUROPEAN = "most_spoken_european";
    private static final String SELECTION_CUSTOM = "custom";
    private static final String SELECTION_LATIN_SCRIPT = "latin_script";
    private static final String SELECTION_ARABIC_SCRIPT = "arabic_script";
    private static final String SELECTION_CYRILLIC_SCRIPT = "cyrillic_script";
    public static final String PREDICTION_NAME = "prediction(language)";
    public static final String CONFIDENCE_NAME = "confidence(language)";
    private static Language[] allLanguages = (Language[]) Language.all().toArray(new Language[0]);
    private static String[] allLanguageNames = languageToName(allLanguages);
    private static Language[] mostSpokenLanguages = {Language.ENGLISH, Language.CHINESE, Language.HINDI, Language.SPANISH, Language.FRENCH, Language.ARABIC, Language.BENGALI, Language.RUSSIAN};
    private static Language[] mostSpokenEuropeanLanguages = {Language.ENGLISH, Language.SPANISH, Language.FRENCH, Language.RUSSIAN, Language.GERMAN, Language.ITALIAN, Language.PORTUGUESE};
    private static Language[] latinScriptLanguages = (Language[]) Language.allWithLatinScript().toArray(new Language[0]);
    private static Language[] arabicScriptLanguages = (Language[]) Language.allWithArabicScript().toArray(new Language[0]);
    private static Language[] cyrillicScriptLanguages = (Language[]) Language.allWithCyrillicScript().toArray(new Language[0]);
    public static String PARAMETER_LANGUAGE_SELECTION_MODE = "languages";

    public DetectLanguageOperator(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.exaInput = getInputPorts().createPort("exa");
        this.exaOutput = getOutputPorts().createPort("exa");
        this.oriOutput = getOutputPorts().createPort("ori");
        getTransformer().addRule(() -> {
            try {
                TableMetaDataBuilder tableMetaDataBuilder = new TableMetaDataBuilder(this.exaInput.getMetaData(TableMetaData.class));
                tableMetaDataBuilder.add(CONFIDENCE_NAME, new ColumnInfoBuilder(ColumnType.REAL).setNumericRange(new Range(0.0d, 1.0d), SetRelation.EQUAL).build());
                tableMetaDataBuilder.add(PREDICTION_NAME, new ColumnInfoBuilder(ColumnType.NOMINAL).addDictionaryValues(Arrays.asList(languagesToISO639_3Code(getChosenLanguages()))).build());
                tableMetaDataBuilder.addColumnMetaData(PREDICTION_NAME, ColumnRole.PREDICTION);
                tableMetaDataBuilder.addColumnMetaData(CONFIDENCE_NAME, ColumnRole.SCORE);
                this.exaOutput.deliverMD(tableMetaDataBuilder.build());
            } catch (IncompatibleMDClassException | UndefinedParameterError e) {
                getLogger().log(Level.SEVERE, "Cannot create output meta data: " + e.getMessage());
            }
        });
        getTransformer().addPassThroughRule(this.exaInput, this.oriOutput);
    }

    public void doWork() throws UserError, ProcessStoppedException {
        Table table = this.exaInput.getData(IOTable.class).getTable();
        ObjectReader objectReader = Readers.objectReader(table.column(getParameterAsString("text_attribute")), String.class);
        LanguageDetector build = LanguageDetectorBuilder.fromLanguages(getChosenLanguages()).withMinimumRelativeDistance(0.1d).build();
        NominalBuffer nominalBuffer = Buffers.nominalBuffer(table.height());
        NumericBuffer realBuffer = Buffers.realBuffer(table.height());
        int i = 0;
        getProgress().setTotal(table.height());
        while (objectReader.hasRemaining()) {
            String str = (String) objectReader.read();
            try {
                SortedMap<Language, Double> computeLanguageConfidenceValues = build.computeLanguageConfidenceValues(str);
                double d = 0.0d;
                Iterator<Map.Entry<Language, Double>> it2 = computeLanguageConfidenceValues.entrySet().iterator();
                while (it2.hasNext()) {
                    d += it2.next().getValue().doubleValue();
                }
                String isoCode639_3 = computeLanguageConfidenceValues.firstKey().getIsoCode639_3().toString();
                double d2 = Double.NaN;
                if (d != 0.0d) {
                    d2 = computeLanguageConfidenceValues.get(computeLanguageConfidenceValues.firstKey()).doubleValue() / d;
                }
                nominalBuffer.set(i, isoCode639_3);
                realBuffer.set(i, d2);
            } catch (Exception e) {
                getLogger().log(Level.INFO, "Cannot detect language for: " + str);
                getLogger().log(Level.INFO, "Received error message: " + e.getMessage());
                if (getParameterAsBoolean("fail_on_error")) {
                    throw new UserError(this, "operator_toolbox.text.language_cannot_be_parsed", new Object[]{str, Integer.toString(i)});
                }
            }
            getProgress().step();
            i++;
        }
        this.exaOutput.deliver(new IOTable(Builders.newTableBuilder(table).add(PREDICTION_NAME, nominalBuffer.toColumn()).add(CONFIDENCE_NAME, realBuffer.toColumn()).addMetaData(PREDICTION_NAME, ColumnRole.PREDICTION).addMetaData(CONFIDENCE_NAME, ColumnRole.SCORE).build(new SequentialContext())));
        this.oriOutput.deliver(new IOTable(table));
    }

    private Language[] getChosenLanguages() throws UndefinedParameterError {
        Language[] parseLanguagesFromParameters;
        String parameterAsString = getParameterAsString(PARAMETER_LANGUAGE_SELECTION_MODE);
        boolean z = -1;
        switch (parameterAsString.hashCode()) {
            case -1349088399:
                if (parameterAsString.equals(SELECTION_CUSTOM)) {
                    z = 6;
                    break;
                }
                break;
            case -634699568:
                if (parameterAsString.equals(SELECTION_MOST_SPOKEN_EUROPEAN)) {
                    z = 2;
                    break;
                }
                break;
            case 96673:
                if (parameterAsString.equals(SELECTION_ALL)) {
                    z = false;
                    break;
                }
                break;
            case 587322302:
                if (parameterAsString.equals(SELECTION_ARABIC_SCRIPT)) {
                    z = 4;
                    break;
                }
                break;
            case 651620035:
                if (parameterAsString.equals(SELECTION_CYRILLIC_SCRIPT)) {
                    z = 5;
                    break;
                }
                break;
            case 956915302:
                if (parameterAsString.equals(SELECTION_LATIN_SCRIPT)) {
                    z = 3;
                    break;
                }
                break;
            case 1779041182:
                if (parameterAsString.equals(SELECTION_MOST_SPOKEN)) {
                    z = true;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                parseLanguagesFromParameters = allLanguages;
                break;
            case true:
                parseLanguagesFromParameters = mostSpokenLanguages;
                break;
            case true:
                parseLanguagesFromParameters = mostSpokenEuropeanLanguages;
                break;
            case true:
                parseLanguagesFromParameters = latinScriptLanguages;
                break;
            case true:
                parseLanguagesFromParameters = arabicScriptLanguages;
                break;
            case true:
                parseLanguagesFromParameters = cyrillicScriptLanguages;
                break;
            case true:
                parseLanguagesFromParameters = parseLanguagesFromParameters();
                break;
            default:
                throw new IllegalStateException("Unexpected value: " + getParameterAsString(getParameterAsString(PARAMETER_LANGUAGE_SELECTION_MODE)));
        }
        return parseLanguagesFromParameters;
    }

    private static String[] languageToName(Language[] languageArr) {
        int length = languageArr.length;
        String[] strArr = new String[length];
        for (int i = 0; i < length; i++) {
            strArr[i] = languageArr[i].name();
        }
        return strArr;
    }

    private static String[] languagesToISO639_3Code(Language[] languageArr) {
        int length = languageArr.length;
        String[] strArr = new String[length];
        for (int i = 0; i < length; i++) {
            strArr[i] = languageArr[i].getIsoCode639_3().toString();
        }
        return strArr;
    }

    private Language[] parseLanguagesFromParameters() throws UndefinedParameterError {
        String[] transformString2Enumeration = ParameterTypeEnumeration.transformString2Enumeration(getParameterAsString(PARAMETER_LANGUAGE_SELECTION_LIST));
        Language[] languageArr = new Language[transformString2Enumeration.length];
        for (int i = 0; i < transformString2Enumeration.length; i++) {
            languageArr[i] = Language.valueOf(transformString2Enumeration[i]);
        }
        return languageArr;
    }

    public List<ParameterType> getParameterTypes() {
        ArrayList arrayList = new ArrayList();
        arrayList.add(new ParameterTypeAttribute("text_attribute", "Text Attribute", this.exaInput));
        arrayList.add(new ParameterTypeCategory(PARAMETER_LANGUAGE_SELECTION_MODE, "The potential languages", new String[]{SELECTION_ALL, SELECTION_MOST_SPOKEN, SELECTION_MOST_SPOKEN_EUROPEAN, SELECTION_ARABIC_SCRIPT, SELECTION_CYRILLIC_SCRIPT, SELECTION_LATIN_SCRIPT, SELECTION_CUSTOM}, 0));
        ParameterTypeEnumeration parameterTypeEnumeration = new ParameterTypeEnumeration(PARAMETER_LANGUAGE_SELECTION_LIST, "Advanced parameters that can be set.", new ParameterTypeCategory(PARAMETER_LANGUAGE_SELECTION_NAME, "language name", allLanguageNames, 0));
        parameterTypeEnumeration.registerDependencyCondition(new EqualStringCondition(this, PARAMETER_LANGUAGE_SELECTION_MODE, true, new String[]{SELECTION_CUSTOM}));
        arrayList.add(parameterTypeEnumeration);
        arrayList.add(new ParameterTypeBoolean("fail_on_error", "If true the operator fails if there is an error. If set to false you will get missing predictions and confidences for the rows it fails on.", false));
        return arrayList;
    }
}
