package com.rapidminer.extension.hanminer.operator.featureExtraction;

import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DoubleArrayDataRow;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.extension.hanminer.document.DocumentSet;
import com.rapidminer.extension.hanminer.document.SimpleDocumentSet;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.tools.Ontology;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;

/* loaded from: input_file:com/rapidminer/extension/hanminer/operator/featureExtraction/WordCount.class */
public class WordCount extends Operator {
    private InputPort documentSetInput;
    private OutputPort exampleSetOutput;

    public WordCount(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.documentSetInput = getInputPorts().createPort("document set");
        this.exampleSetOutput = getOutputPorts().createPort("example set");
    }

    public static Map<String, Integer> wordCount(DocumentSet documentSet) {
        List<String> asList = Arrays.asList(documentSet.toString().split("\\s+"));
        HashMap hashMap = new HashMap();
        for (String str : asList) {
            hashMap.put(str, Integer.valueOf(((Integer) hashMap.getOrDefault(str, 0)).intValue() + 1));
        }
        return hashMap;
    }

    public void doWork() throws OperatorException {
        SortedSet<Map.Entry> entriesSortedByValues = entriesSortedByValues(wordCount(this.documentSetInput.getData(SimpleDocumentSet.class)));
        LinkedList linkedList = new LinkedList();
        Ontology ontology = Ontology.ATTRIBUTE_VALUE_TYPE;
        linkedList.add(AttributeFactory.createAttribute("Word", 5));
        Ontology ontology2 = Ontology.ATTRIBUTE_VALUE_TYPE;
        linkedList.add(AttributeFactory.createAttribute("Count", 2));
        MemoryExampleTable memoryExampleTable = new MemoryExampleTable(linkedList);
        for (Map.Entry entry : entriesSortedByValues) {
            double[] dArr = new double[linkedList.size()];
            dArr[0] = r0.getMapping().mapString((String) entry.getKey());
            dArr[1] = ((Integer) entry.getValue()).intValue();
            memoryExampleTable.addDataRow(new DoubleArrayDataRow(dArr));
        }
        this.exampleSetOutput.deliver(memoryExampleTable.createExampleSet());
    }

    public static <K, V extends Comparable<? super V>> SortedSet<Map.Entry<K, V>> entriesSortedByValues(Map<K, V> map) {
        TreeSet treeSet = new TreeSet(new Comparator<Map.Entry<K, V>>() { // from class: com.rapidminer.extension.hanminer.operator.featureExtraction.WordCount.1
            @Override // java.util.Comparator
            public int compare(Map.Entry<K, V> entry, Map.Entry<K, V> entry2) {
                int compareTo = ((Comparable) entry2.getValue()).compareTo(entry.getValue());
                if (compareTo != 0) {
                    return compareTo;
                }
                return 1;
            }
        });
        treeSet.addAll(map.entrySet());
        return treeSet;
    }
}
