package com.rapidminer.extension.hanminer.operator.featureExtraction.vectorizer;

import com.hankcs.hanlp.mining.word2vec.DocVectorModel;
import com.hankcs.hanlp.mining.word2vec.Word2VecTrainer;
import com.hankcs.hanlp.mining.word2vec.WordVectorModel;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.FloatArrayDataRow;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.extension.hanminer.document.DocumentSet;
import com.rapidminer.extension.hanminer.document.SimpleDocumentSet;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.SimplePrecondition;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeDirectory;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.conditions.BooleanParameterCondition;
import com.rapidminer.tools.Ontology;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.logging.Logger;

/* loaded from: input_file:com/rapidminer/extension/hanminer/operator/featureExtraction/vectorizer/Doc2vec.class */
public class Doc2vec extends Operator {
    private static Logger logger = Logger.getLogger("Doc2vec");
    private static final String PARAMETER_LOAD_MODEL_FROM_FILE = "load_model_from_file";
    private static final String PARAMETER_MODEL_FILE = "model_file";
    private static final String PARAMETER_USE_DEFAULT_MODEL = "use_default_model";
    private static final String PARAMETER_EMBEDDING_SIZE = "embedding_size";
    private static final String PARAMETER_CORPUS_FILE = "corpus_file";
    private static final String DEFAULT_MODEL_FILE = "data/model/word2vec/word2vec_100";
    private static final String PARAMETER_SAVE_MODEL_TO = "save_model_to";
    private InputPort documentSetInput;
    private OutputPort exampleSetOutput;

    public Doc2vec(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.documentSetInput = getInputPorts().createPort("document set");
        this.exampleSetOutput = getOutputPorts().createPort("example set");
        this.documentSetInput.addPrecondition(new SimplePrecondition(this.documentSetInput, new MetaData(DocumentSet.class)) { // from class: com.rapidminer.extension.hanminer.operator.featureExtraction.vectorizer.Doc2vec.1
            protected boolean isMandatory() {
                return false;
            }
        });
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeBoolean(PARAMETER_LOAD_MODEL_FROM_FILE, "If set to true, load a pre-trained word2vec model from file.", false, false));
        ParameterTypeFile parameterTypeFile = new ParameterTypeFile(PARAMETER_MODEL_FILE, "Path to the model", (String) null, true, false);
        parameterTypeFile.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_LOAD_MODEL_FROM_FILE, true, true));
        parameterTypes.add(parameterTypeFile);
        ParameterTypeBoolean parameterTypeBoolean = new ParameterTypeBoolean(PARAMETER_USE_DEFAULT_MODEL, "If set to true, use the default model", true, false);
        parameterTypeBoolean.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_LOAD_MODEL_FROM_FILE, true, false));
        parameterTypes.add(parameterTypeBoolean);
        ParameterTypeInt parameterTypeInt = new ParameterTypeInt(PARAMETER_EMBEDDING_SIZE, "Number of dimensions for output vectors", 10, 500, 100, false);
        parameterTypeInt.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_LOAD_MODEL_FROM_FILE, false, false));
        parameterTypeInt.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_USE_DEFAULT_MODEL, true, false));
        parameterTypes.add(parameterTypeInt);
        ParameterTypeDirectory parameterTypeDirectory = new ParameterTypeDirectory(PARAMETER_CORPUS_FILE, "Folder that contains the corpus to train a new model.", true);
        parameterTypeDirectory.setExpert(false);
        parameterTypeDirectory.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_LOAD_MODEL_FROM_FILE, false, false));
        parameterTypeDirectory.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_USE_DEFAULT_MODEL, true, false));
        parameterTypes.add(parameterTypeDirectory);
        ParameterTypeFile parameterTypeFile2 = new ParameterTypeFile(PARAMETER_SAVE_MODEL_TO, "Path to save the new word2vec model", "ser", false, false);
        parameterTypeFile2.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_LOAD_MODEL_FROM_FILE, false, false));
        parameterTypeFile2.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_USE_DEFAULT_MODEL, true, false));
        parameterTypes.add(parameterTypeFile2);
        return parameterTypes;
    }

    private WordVectorModel loadWord2VecModel() throws OperatorException {
        if (getParameterAsBoolean(PARAMETER_LOAD_MODEL_FROM_FILE)) {
            try {
                return new WordVectorModel(getParameterAsFile(PARAMETER_MODEL_FILE).getAbsolutePath());
            } catch (IOException e) {
                e.printStackTrace();
            }
        } else if (!getParameterAsBoolean(PARAMETER_USE_DEFAULT_MODEL)) {
            int parameterAsInt = getParameterAsInt(PARAMETER_EMBEDDING_SIZE);
            String parameterAsString = getParameterAsString(PARAMETER_CORPUS_FILE);
            String parameterAsString2 = getParameterAsString(PARAMETER_SAVE_MODEL_TO);
            Word2VecTrainer word2VecTrainer = new Word2VecTrainer();
            word2VecTrainer.setLayerSize(parameterAsInt);
            return word2VecTrainer.train(parameterAsString, parameterAsString2);
        }
        try {
            return new WordVectorModel(DEFAULT_MODEL_FILE);
        } catch (IOException e2) {
            logger.warning(String.format("fail to load default word2vec model from %s", DEFAULT_MODEL_FILE));
            e2.printStackTrace();
            return null;
        }
    }

    public void doWork() throws OperatorException {
        DocumentSet data = this.documentSetInput.getData(SimpleDocumentSet.class);
        DocVectorModel docVectorModel = new DocVectorModel(loadWord2VecModel());
        LinkedList linkedList = new LinkedList();
        for (int i = 0; i < docVectorModel.dimension(); i++) {
            Ontology ontology = Ontology.ATTRIBUTE_VALUE_TYPE;
            linkedList.add(AttributeFactory.createAttribute("Feature_" + i, 4));
        }
        MemoryExampleTable memoryExampleTable = new MemoryExampleTable(linkedList);
        Iterator<String> it = data.getDocuments().iterator();
        while (it.hasNext()) {
            memoryExampleTable.addDataRow(new FloatArrayDataRow(docVectorModel.query(it.next()).getElementArray()));
        }
        this.exampleSetOutput.deliver(memoryExampleTable.createExampleSet());
    }
}
