package com.rapidminer.extension.operator.text_processing.modelling.mallet;

import cc.mallet.pipe.CharSequence2TokenSequence;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.TokenSequence2FeatureSequence;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.operator.IOObjectCollection;
import com.rapidminer.operator.text.Document;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.regex.Pattern;

/* loaded from: input_file:com/rapidminer/extension/operator/text_processing/modelling/mallet/MalletHelper.class */
public class MalletHelper {
    public static InstanceList convertDocsToInstances(IOObjectCollection<Document> iOObjectCollection) {
        InstanceList instanceList = new InstanceList(new SerialPipes(getDefaultPipeList()));
        int i = 0;
        for (Document document : iOObjectCollection.getObjects()) {
            instanceList.addThruPipe(new Instance(document.getDisplayText(), "", Integer.valueOf(i), document.getDisplayText()));
            i++;
        }
        return instanceList;
    }

    public InstanceList convertToInstanceList(ExampleSet exampleSet, Attribute attribute) {
        InstanceList instanceList = new InstanceList(new SerialPipes(getDefaultPipeList()));
        Iterator it = exampleSet.iterator();
        while (it.hasNext()) {
            instanceList.addThruPipe(new Instance(((Example) it.next()).getNominalValue(attribute), "", "", "RM"));
        }
        return instanceList;
    }

    public static HashMap<String, Attribute> getMetaDataHashMap(IOObjectCollection<Document> iOObjectCollection) {
        HashMap<String, Attribute> hashMap = new HashMap<>();
        for (Document document : iOObjectCollection.getObjects()) {
            for (String str : document.getMetaDataKeys()) {
                if (!hashMap.containsKey(str)) {
                    hashMap.put(str, AttributeFactory.createAttribute(str, document.getMetaDataType(str)));
                }
            }
        }
        return hashMap;
    }

    private static ArrayList<Pipe> getDefaultPipeList() {
        ArrayList<Pipe> arrayList = new ArrayList<>();
        arrayList.add(new CharSequence2TokenSequence(Pattern.compile("\\p{L}[\\p{L}\\p{P}]+\\p{L}")));
        arrayList.add(new TokenSequence2FeatureSequence());
        return arrayList;
    }
}
