package edu.pitt.dbmi.edda.operator.ldaop;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.GenerateNewMDRule;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.PassThroughOrGenerateRule;
import com.rapidminer.operator.ports.metadata.SetRelation;
import com.rapidminer.operator.ports.metadata.SimplePrecondition;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDirectory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.UndefinedParameterError;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import salvo.jesus.graph.xml.XGMML;

/* loaded from: input_file:edu/pitt/dbmi/edda/operator/ldaop/TopicModelOperator.class */
public class TopicModelOperator extends Operator {
    public static final String PARAMETER_TEXTS = "text_directories";
    public static final String PARAMETER_TEXT_DIR = "directory";
    public static final String PARAMETER_NUMBER_OF_TOPICS = "number_of_topics";
    public static final String PARAMETER_NUMBER_OF_ITERATIONS = "number_of_iterations";
    public static final String PARAMETER_BURN_IN_PERIOD = "burn_in_period";
    public static final String PARAMETER_DIRICHLET_ALPHA = "dirichlet_alpha";
    public static final String PARAMETER_DIRICHLET_BETA = "dirichlet_beta";
    public static final String PARAMETER_RANDOM_SEED = "random_seed";
    public static final String PARAMETER_OPTIMIZE_INTERVAL = "optimize_interval";
    public static final String PARAMETER_SYMMETRIC_ALPHA = "symmetric_alpha";
    public static final String PARAMETER_TEMPERING_INTERVAL = "tempering interval";
    public static final String PARAMETER_INFERENCER_ITERATIONS = "inferencer_iterations";
    public static final String PARAMETER_INFERENCER_THINNING = "inferencer_thinning";
    public static final String PARAMETER_INFERENCER_BURN_IN_PERIOD = "inferencer_burn_in_period";
    public static final String PARAMETER_GENERATE_DIAGNOSTICS = "generate_diagnostics";
    public static final String PARAMETER_NUM_MOST_PROB_TO_DISPLAY = "num_most_prob_to_display";
    public static final String PARAMETER_EXAMPLE_SET_FEATURES = "example_set_features";
    public static final String[] EXAMPLE_SET_FEATURES = {"Document Thetas", "KL Divergence (From Medians)", "Both"};
    public static final int ES_FTRS_DOC_THETAS = 0;
    public static final int ES_FTRS_KL_DIVERGE = 1;
    public static final int ES_FTRS_BOTH = 2;
    public static final String METADATA_PATH = "metadata_path";
    private final InputPort exampleSetInput;
    private final InputPort topicModelInput;
    private final OutputPort exampleSetOutput;
    private final OutputPort topicModelOutput;

    public TopicModelOperator(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.exampleSetInput = getInputPorts().createPort("example set");
        this.topicModelInput = getInputPorts().createPort("topic model");
        this.exampleSetOutput = getOutputPorts().createPort("example set");
        this.topicModelOutput = getOutputPorts().createPort("topic model");
        this.exampleSetInput.addPrecondition(new SimplePrecondition(this.exampleSetInput, new MetaData(ExampleSet.class), false));
        this.topicModelInput.addPrecondition(new SimplePrecondition(this.topicModelInput, new MetaData(TopicModelAdapter.class), false));
        getTransformer().addRule(new PassThroughOrGenerateRule(this.topicModelInput, this.topicModelOutput, new MetaData(TopicModelAdapter.class)));
        getTransformer().addRule(new GenerateNewMDRule(this.exampleSetOutput, new ExampleSetMetaData()) { // from class: edu.pitt.dbmi.edda.operator.ldaop.TopicModelOperator.1
            public MetaData modifyMetaData(MetaData metaData) {
                return TopicModelOperator.this.addMetaDataAttributes((ExampleSetMetaData) metaData);
            }
        });
    }

    public void doWork() throws OperatorException {
        List parameterList = getParameterList("text_directories");
        TopicModelWorker topicModelWorker = new TopicModelWorker();
        ExampleSet exampleSet = (ExampleSet) this.exampleSetInput.getDataOrNull();
        topicModelWorker.setInputExampleSet(exampleSet);
        if (!new ExampleSetVerifier(exampleSet).isVerified()) {
            throw new UserError(this, "edda.corrupt_es_input");
        }
        topicModelWorker.setLabelDirectoryPairs(parameterList);
        topicModelWorker.setNumTopics(getParameterAsInt(PARAMETER_NUMBER_OF_TOPICS));
        topicModelWorker.setNumberOfIterations(getParameterAsInt(PARAMETER_NUMBER_OF_ITERATIONS));
        topicModelWorker.setBurnInPeriod(getParameterAsInt(PARAMETER_BURN_IN_PERIOD));
        topicModelWorker.setAlpha(getParameterAsDouble(PARAMETER_DIRICHLET_ALPHA));
        topicModelWorker.setBeta(getParameterAsDouble(PARAMETER_DIRICHLET_BETA));
        topicModelWorker.setRandomSeed(getParameterAsInt(PARAMETER_RANDOM_SEED));
        topicModelWorker.setOptimizeInterval(getParameterAsInt(PARAMETER_OPTIMIZE_INTERVAL));
        topicModelWorker.setSymmetricAlpha(getParameterAsBoolean(PARAMETER_SYMMETRIC_ALPHA));
        topicModelWorker.setTemperingInterval(getParameterAsInt(PARAMETER_TEMPERING_INTERVAL));
        topicModelWorker.setNumberMostProbableWordsForDisplay(getParameterAsInt(PARAMETER_NUM_MOST_PROB_TO_DISPLAY));
        topicModelWorker.setGeneratingDiagnostics(getParameterAsBoolean("generate_diagnostics"));
        specifyOutputExampleSet(topicModelWorker);
        if (topicModelWorker.getAlpha() < 0.0d) {
            topicModelWorker.setAlpha(Double.valueOf(50.0d / topicModelWorker.getNumTopics()).doubleValue());
        }
        TopicModelAdapter topicModelAdapter = (TopicModelAdapter) this.topicModelInput.getDataOrNull();
        if (topicModelAdapter != null) {
            topicModelWorker.setTopicModelAdapter(topicModelAdapter);
        }
        topicModelWorker.process();
        ExampleSet outgoingExampleSet = topicModelWorker.getOutgoingExampleSet();
        markMetaDataAttributesSpecial(outgoingExampleSet);
        this.exampleSetOutput.deliver(outgoingExampleSet);
        this.topicModelOutput.deliver(topicModelWorker.getTopicModelAdapter());
    }

    private void markMetaDataAttributesSpecial(ExampleSet exampleSet) {
        ArrayList arrayList = new ArrayList();
        for (Attribute attribute : exampleSet.getAttributes()) {
            if (false | attribute.getName().equals("metadata_path")) {
                arrayList.add(attribute);
            }
        }
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            Attribute attribute2 = (Attribute) it.next();
            exampleSet.getAttributes().setSpecialAttribute(attribute2, attribute2.getName());
        }
    }

    private void specifyOutputExampleSet(TopicModelWorker topicModelWorker) throws UndefinedParameterError {
        switch (getParameterAsInt(PARAMETER_EXAMPLE_SET_FEATURES)) {
            case 0:
                topicModelWorker.setOutputingDocumentThetas(true);
                topicModelWorker.setOutputingKulbachLeiblerDivergences(false);
                return;
            case 1:
                topicModelWorker.setOutputingDocumentThetas(false);
                topicModelWorker.setOutputingKulbachLeiblerDivergences(true);
                return;
            default:
                topicModelWorker.setOutputingDocumentThetas(true);
                topicModelWorker.setOutputingKulbachLeiblerDivergences(true);
                return;
        }
    }

    public List<ParameterType> getParameterTypes() {
        LinkedList linkedList = new LinkedList();
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("Specifies class and directory path pairs for corpus input.");
        stringBuffer.append("\n");
        stringBuffer.append("This parameter is ignored if an input example set is given.");
        ParameterTypeList parameterTypeList = new ParameterTypeList("text_directories", stringBuffer.toString(), new ParameterTypeString("class_name", "Class name"), new ParameterTypeDirectory("directory", "Directory of files", false));
        parameterTypeList.setOptional(true);
        parameterTypeList.setExpert(false);
        linkedList.add(parameterTypeList);
        StringBuffer stringBuffer2 = new StringBuffer();
        stringBuffer2.append("The number of topics for this model.");
        stringBuffer2.append("\n");
        ParameterTypeInt parameterTypeInt = new ParameterTypeInt(PARAMETER_NUMBER_OF_TOPICS, stringBuffer2.toString(), 0, Integer.MAX_VALUE, 10);
        parameterTypeInt.setExpert(false);
        linkedList.add(parameterTypeInt);
        StringBuffer stringBuffer3 = new StringBuffer();
        stringBuffer3.append("Alpha parameter of the Dirichlet prior.");
        stringBuffer3.append("\n");
        ParameterTypeDouble parameterTypeDouble = new ParameterTypeDouble(PARAMETER_DIRICHLET_ALPHA, stringBuffer3.toString(), -1.0d, Double.MAX_VALUE, 1.0d);
        parameterTypeDouble.setExpert(false);
        linkedList.add(parameterTypeDouble);
        StringBuffer stringBuffer4 = new StringBuffer();
        stringBuffer4.append("Beta parameter of the Dirichlet prior.");
        stringBuffer4.append("\n");
        ParameterTypeDouble parameterTypeDouble2 = new ParameterTypeDouble(PARAMETER_DIRICHLET_BETA, stringBuffer4.toString(), Double.MIN_VALUE, Double.MAX_VALUE, 0.01d);
        parameterTypeDouble2.setExpert(false);
        linkedList.add(parameterTypeDouble2);
        StringBuffer stringBuffer5 = new StringBuffer();
        stringBuffer5.append("Number of iterations of the Gibbs MCMC algorithm.");
        stringBuffer5.append("\n");
        ParameterTypeInt parameterTypeInt2 = new ParameterTypeInt(PARAMETER_NUMBER_OF_ITERATIONS, stringBuffer5.toString(), 0, Integer.MAX_VALUE, 1000);
        parameterTypeInt2.setExpert(false);
        linkedList.add(parameterTypeInt2);
        StringBuffer stringBuffer6 = new StringBuffer();
        stringBuffer6.append("Iterations before theta and phi begin to influence zeta assignments.");
        stringBuffer6.append("\n");
        ParameterTypeInt parameterTypeInt3 = new ParameterTypeInt(PARAMETER_BURN_IN_PERIOD, stringBuffer6.toString(), 0, Integer.MAX_VALUE, 50);
        parameterTypeInt3.setExpert(false);
        linkedList.add(parameterTypeInt3);
        StringBuffer stringBuffer7 = new StringBuffer();
        stringBuffer7.append("Thetas, KL divergences, or both.");
        stringBuffer7.append("\n");
        ParameterTypeCategory parameterTypeCategory = new ParameterTypeCategory(PARAMETER_EXAMPLE_SET_FEATURES, stringBuffer7.toString(), EXAMPLE_SET_FEATURES, 0);
        parameterTypeCategory.setExpert(false);
        linkedList.add(parameterTypeCategory);
        StringBuffer stringBuffer8 = new StringBuffer();
        stringBuffer8.append("Shows N most probable words (tokens) per topic in a visualization screen. ");
        stringBuffer8.append("\n");
        ParameterTypeInt parameterTypeInt4 = new ParameterTypeInt(PARAMETER_NUM_MOST_PROB_TO_DISPLAY, stringBuffer8.toString(), 0, Integer.MAX_VALUE, 10);
        parameterTypeInt4.setExpert(false);
        linkedList.add(parameterTypeInt4);
        StringBuffer stringBuffer9 = new StringBuffer();
        stringBuffer9.append("For repeatable results over runs, users should set this parameter.");
        stringBuffer9.append("\n");
        stringBuffer9.append("It will cause the pseudo random number generator to repeat the same sequences,");
        stringBuffer9.append("\n");
        stringBuffer9.append("thus driving the algorithm to the same conclusion.");
        stringBuffer9.append("\n");
        ParameterTypeInt parameterTypeInt5 = new ParameterTypeInt(PARAMETER_RANDOM_SEED, stringBuffer9.toString(), 0, Integer.MAX_VALUE, 2013);
        parameterTypeInt5.setExpert(true);
        linkedList.add(parameterTypeInt5);
        StringBuffer stringBuffer10 = new StringBuffer();
        stringBuffer10.append("see Mallet");
        stringBuffer10.append("\n");
        String stringBuffer11 = stringBuffer10.toString();
        ParameterTypeBoolean parameterTypeBoolean = new ParameterTypeBoolean(PARAMETER_SYMMETRIC_ALPHA, stringBuffer11, new Boolean(true).booleanValue());
        parameterTypeBoolean.setExpert(true);
        linkedList.add(parameterTypeBoolean);
        ParameterTypeInt parameterTypeInt6 = new ParameterTypeInt(PARAMETER_OPTIMIZE_INTERVAL, stringBuffer11, 0, Integer.MAX_VALUE, 10);
        parameterTypeInt6.setExpert(true);
        linkedList.add(parameterTypeInt6);
        ParameterTypeInt parameterTypeInt7 = new ParameterTypeInt(PARAMETER_TEMPERING_INTERVAL, stringBuffer11, 0, Integer.MAX_VALUE, 0);
        parameterTypeInt7.setExpert(true);
        linkedList.add(parameterTypeInt7);
        ParameterTypeInt parameterTypeInt8 = new ParameterTypeInt(PARAMETER_INFERENCER_ITERATIONS, stringBuffer11, 0, Integer.MAX_VALUE, 0);
        parameterTypeInt8.setExpert(true);
        linkedList.add(parameterTypeInt8);
        ParameterTypeInt parameterTypeInt9 = new ParameterTypeInt(PARAMETER_INFERENCER_THINNING, stringBuffer11, 0, Integer.MAX_VALUE, 0);
        parameterTypeInt9.setExpert(true);
        linkedList.add(parameterTypeInt9);
        ParameterTypeInt parameterTypeInt10 = new ParameterTypeInt(PARAMETER_INFERENCER_BURN_IN_PERIOD, stringBuffer11, 0, Integer.MAX_VALUE, 0);
        parameterTypeInt10.setExpert(true);
        linkedList.add(parameterTypeInt10);
        StringBuffer stringBuffer12 = new StringBuffer();
        stringBuffer12.append("Displays diagnostic messages to standard output.");
        stringBuffer12.append("\n");
        ParameterTypeBoolean parameterTypeBoolean2 = new ParameterTypeBoolean("generate_diagnostics", stringBuffer12.toString(), false);
        parameterTypeBoolean2.setExpert(true);
        linkedList.add(parameterTypeBoolean2);
        return linkedList;
    }

    protected ExampleSetMetaData addMetaDataAttributes(ExampleSetMetaData exampleSetMetaData) {
        try {
            AttributeMetaData attributeMetaData = new AttributeMetaData(XGMML.LABEL_ATTRIBUTE_LITERAL, getProvidedLabelType(), XGMML.LABEL_ATTRIBUTE_LITERAL);
            attributeMetaData.setValueSet(new HashSet(getLabelValues()), SetRelation.EQUAL);
            exampleSetMetaData.addAttribute(attributeMetaData);
            exampleSetMetaData.addAttribute(new AttributeMetaData("metadata_path", 7, "metadata_path"));
        } catch (Exception e) {
        }
        return exampleSetMetaData;
    }

    protected int getProvidedLabelType() throws UndefinedParameterError {
        return getParameterList("text_directories").size() == 2 ? 6 : 7;
    }

    protected List<String> getLabelValues() {
        LinkedList linkedList = new LinkedList();
        try {
            Iterator it = getParameterList("text_directories").iterator();
            while (it.hasNext()) {
                linkedList.add(((String[]) it.next())[0]);
            }
        } catch (UndefinedParameterError e) {
        }
        return linkedList;
    }
}
