package com.rapidminer.extension.hanminer.operator.featureExtraction;

import com.hankcs.hanlp.HanLP;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DoubleArrayDataRow;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.extension.hanminer.document.SimpleDocumentSet;
import com.rapidminer.extension.hanminer.operator.featureExtraction.vectorizer.TfIdfVectorizer;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.tools.Ontology;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

/* loaded from: input_file:com/rapidminer/extension/hanminer/operator/featureExtraction/KeywordExtraction.class */
public class KeywordExtraction extends Operator {
    private static final String PARAMETER_KEYWORD_NUMBER = "keyword number";
    private static final String PARAMETER_CRITERION = "criterion";
    private static final String[] CRITERIA = {"TextRank", "TF-IDF"};
    public static final int CRITERION_TEXTRANK = 0;
    public static final int CRITERION_TFIDF = 1;
    private InputPort documentSetInput;
    private OutputPort exampleSetOutput;

    public KeywordExtraction(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.documentSetInput = getInputPorts().createPort("document set");
        this.exampleSetOutput = getOutputPorts().createPort("example set");
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeInt(PARAMETER_KEYWORD_NUMBER, "The number of keywords to extract.", 1, 10, 5, false));
        parameterTypes.add(new ParameterTypeCategory(PARAMETER_CRITERION, "The criteria to select keywords", CRITERIA, 1, false));
        return parameterTypes;
    }

    public static List<String> getTfIdfKeywords(SimpleDocumentSet simpleDocumentSet, int i) {
        Map<Object, Map<String, Double>> compute = TfIdfVectorizer.computeTfIDF(simpleDocumentSet).compute();
        ArrayList arrayList = new ArrayList();
        Iterator<Map.Entry<Object, Map<String, Double>>> it = compute.entrySet().iterator();
        while (it.hasNext()) {
            LinkedList linkedList = new LinkedList(it.next().getValue().entrySet());
            Collections.sort(linkedList, (entry, entry2) -> {
                return ((Double) entry2.getValue()).compareTo((Double) entry.getValue());
            });
            arrayList.add(String.join(" ", (List) linkedList.subList(0, Math.min(i, linkedList.size())).stream().map(entry3 -> {
                return (String) entry3.getKey();
            }).collect(Collectors.toList())));
        }
        return arrayList;
    }

    /* JADX WARN: Multi-variable type inference failed */
    public void doWork() throws OperatorException {
        SimpleDocumentSet data = this.documentSetInput.getData(SimpleDocumentSet.class);
        int parameterAsInt = getParameterAsInt(PARAMETER_KEYWORD_NUMBER);
        LinkedList linkedList = new LinkedList();
        Ontology ontology = Ontology.ATTRIBUTE_VALUE_TYPE;
        linkedList.add(AttributeFactory.createAttribute("document", 5));
        Ontology ontology2 = Ontology.ATTRIBUTE_VALUE_TYPE;
        linkedList.add(AttributeFactory.createAttribute("keywords", 5));
        MemoryExampleTable memoryExampleTable = new MemoryExampleTable(linkedList);
        List arrayList = new ArrayList();
        switch (getParameterAsInt(PARAMETER_CRITERION)) {
            case 0:
                Iterator<String> it = data.getDocuments().iterator();
                while (it.hasNext()) {
                    arrayList.add(String.join(" ", HanLP.extractKeyword(it.next(), parameterAsInt)));
                }
                break;
            case 1:
                arrayList = getTfIdfKeywords(data, parameterAsInt);
                break;
        }
        for (int i = 0; i < arrayList.size(); i++) {
            memoryExampleTable.addDataRow(new DoubleArrayDataRow(new double[]{r0.getMapping().mapString(data.getDocument(i)), r0.getMapping().mapString((String) arrayList.get(i))}));
        }
        this.exampleSetOutput.deliver(memoryExampleTable.createExampleSet());
    }
}
