package edu.pitt.dbmi.edda.operator.regexop;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.GenerateNewMDRule;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.PassThroughOrGenerateRule;
import com.rapidminer.operator.ports.metadata.SetRelation;
import com.rapidminer.operator.ports.metadata.SimplePrecondition;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDirectory;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.UndefinedParameterError;
import edu.pitt.dbmi.edda.operator.regexop.document.DocumentFetcher;
import edu.pitt.dbmi.edda.operator.regexop.rank.RankerBinary;
import edu.pitt.dbmi.edda.operator.regexop.rank.RankerFrequency;
import edu.pitt.dbmi.edda.operator.regexop.rank.RankerOccurrence;
import edu.pitt.dbmi.edda.operator.regexop.rank.RankerRegularExpression;
import edu.pitt.dbmi.edda.operator.regexop.rank.RankerTfIdf;
import edu.pitt.dbmi.edda.operator.regexop.regex.RegExReader;
import edu.pitt.dbmi.edda.operator.regexop.regex.RegularExpressionList;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import salvo.jesus.graph.xml.XGMML;

/* loaded from: input_file:edu/pitt/dbmi/edda/operator/regexop/RegExOperator.class */
public class RegExOperator extends Operator {
    public static final String PARAMETER_REGEX_FILE = "regex_file";
    public static final String PARAMETER_TEXTS = "text_directories";
    public static final String PARAMETER_TEXT_DIR = "directory";
    public static final String PARAMETER_CREATE_WORD_VECTOR = "create_word_vector";
    public static final String PARAMETER_VECTOR_CREATION = "vector_creation";
    public static final String PARAMETER_NO_OVERLAPS = "no_overlaps";
    public static final String PARAMETER_MIN_NON_ZEROS = "prune_below_absolute";
    public static final String PARAMETER_ABSTRACTS_ONLY = "abstracts_only";
    public static final String PARAMETER_GENERATE_DIAGNOSTICS = "generate_diagnostics";
    public static final String METADATA_PATH = "metadata_path";
    public static final String[] VECTOR_CREATOR_NAMES = {"Term Frequency", "Binary Term Occurrences", "Term Occurrences", "TF-IDF"};
    public static final Class<?>[] VECTOR_CREATOR_CLASSES = {RankerFrequency.class, RankerBinary.class, RankerOccurrence.class, RankerTfIdf.class};
    private final InputPort regularExpressionListInput;
    private final OutputPort exampleSetOutput;
    private final OutputPort regularExpressionListOutput;

    public RegExOperator(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.regularExpressionListInput = getInputPorts().createPort("regex list");
        this.exampleSetOutput = getOutputPorts().createPort("example set");
        this.regularExpressionListOutput = getOutputPorts().createPort("regex list");
        this.regularExpressionListInput.addPrecondition(new SimplePrecondition(this.regularExpressionListInput, new MetaData(RegularExpressionList.class), false));
        getTransformer().addRule(new PassThroughOrGenerateRule(this.regularExpressionListInput, this.regularExpressionListOutput, new MetaData(RegularExpressionList.class)));
        getTransformer().addRule(new GenerateNewMDRule(this.exampleSetOutput, new ExampleSetMetaData()) { // from class: edu.pitt.dbmi.edda.operator.regexop.RegExOperator.1
            public MetaData modifyMetaData(MetaData metaData) {
                return RegExOperator.this.addMetaDataAttributes((ExampleSetMetaData) metaData);
            }
        });
    }

    public void doWork() throws OperatorException {
        DocumentFetcher documentFetcher = new DocumentFetcher(getParameterList("text_directories"));
        documentFetcher.setUsingAbstractsOnly(getParameterAsBoolean(PARAMETER_ABSTRACTS_ONLY));
        documentFetcher.establishLabeledDocuments();
        RankerRegularExpression establishRanker = establishRanker();
        establishRanker.setDocumentFetcher(documentFetcher);
        establishRanker.setGeneratingDiagnostics(getParameterAsBoolean("generate_diagnostics"));
        RegExOpWorker regExOpWorker = new RegExOpWorker();
        regExOpWorker.setGeneratingDiagnostics(getParameterAsBoolean("generate_diagnostics"));
        RegularExpressionList regularExpressionList = (RegularExpressionList) this.regularExpressionListInput.getDataOrNull();
        if (regularExpressionList != null) {
            establishRanker.setRequiredNumberOfNonZeroRanksPerRegEx(0);
            regExOpWorker.setRankerRegularExpression(establishRanker);
            regExOpWorker.setRegularExpressions(regularExpressionList);
            regExOpWorker.process();
            ExampleSet outgoingExampleSet = regExOpWorker.getOutgoingExampleSet();
            markMetaDataAttributesSpecial(outgoingExampleSet);
            this.exampleSetOutput.deliver(outgoingExampleSet);
            this.regularExpressionListOutput.deliver(regularExpressionList);
            return;
        }
        RegularExpressionList readRegularExpressionFromFile = new RegExReader().readRegularExpressionFromFile(getParameterAsFile(PARAMETER_REGEX_FILE).getAbsolutePath());
        regExOpWorker.setRegularExpressions(readRegularExpressionFromFile);
        regExOpWorker.setRankerRegularExpression(establishRanker);
        regExOpWorker.process();
        ExampleSet outgoingExampleSet2 = regExOpWorker.getOutgoingExampleSet();
        markMetaDataAttributesSpecial(outgoingExampleSet2);
        this.exampleSetOutput.deliver(outgoingExampleSet2);
        this.regularExpressionListOutput.deliver(readRegularExpressionFromFile);
    }

    private void markMetaDataAttributesSpecial(ExampleSet exampleSet) {
        ArrayList arrayList = new ArrayList();
        for (Attribute attribute : exampleSet.getAttributes()) {
            if (false | attribute.getName().equals("metadata_path")) {
                arrayList.add(attribute);
            }
        }
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            Attribute attribute2 = (Attribute) it.next();
            exampleSet.getAttributes().setSpecialAttribute(attribute2, attribute2.getName());
        }
    }

    protected RankerRegularExpression establishRanker() throws OperatorException {
        try {
            RankerRegularExpression rankerRegularExpression = (RankerRegularExpression) VECTOR_CREATOR_CLASSES[getParameterAsInt(PARAMETER_VECTOR_CREATION)].newInstance();
            rankerRegularExpression.setEliminatingSubsumedAnnotations(getParameterAsBoolean(PARAMETER_NO_OVERLAPS));
            rankerRegularExpression.setRequiredNumberOfNonZeroRanksPerRegEx(Integer.valueOf(getParameterAsInt(PARAMETER_MIN_NON_ZEROS)));
            return rankerRegularExpression;
        } catch (Exception e) {
            throw new OperatorException(e.getMessage());
        }
    }

    public List<ParameterType> getParameterTypes() {
        LinkedList linkedList = new LinkedList();
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("XML input file of regular expressions.");
        stringBuffer.append("\n");
        ParameterTypeFile parameterTypeFile = new ParameterTypeFile(PARAMETER_REGEX_FILE, stringBuffer.toString(), (String) null, true);
        parameterTypeFile.setExpert(false);
        linkedList.add(parameterTypeFile);
        StringBuffer stringBuffer2 = new StringBuffer();
        stringBuffer2.append("Specifies class and directory path pairs for corpus input.");
        stringBuffer2.append("\n");
        ParameterTypeList parameterTypeList = new ParameterTypeList("text_directories", stringBuffer2.toString(), new ParameterTypeString("class_name", "Class"), new ParameterTypeDirectory("directory", "Directory", false));
        parameterTypeList.setOptional(true);
        parameterTypeList.setExpert(false);
        linkedList.add(parameterTypeList);
        StringBuffer stringBuffer3 = new StringBuffer();
        stringBuffer3.append(" Defines the attribute calculation. May be one of the following:\n");
        stringBuffer3.append("       1. Match Frequency\n");
        stringBuffer3.append("       2. Binary Match Occurrence\n");
        stringBuffer3.append("       3. Cumulative Match Occurrences\n");
        stringBuffer3.append("       4. MF-IDF Match Frequency Inverse Document Frequency\n");
        stringBuffer3.append("\n");
        ParameterTypeCategory parameterTypeCategory = new ParameterTypeCategory(PARAMETER_VECTOR_CREATION, stringBuffer3.toString(), VECTOR_CREATOR_NAMES, 0);
        parameterTypeCategory.setOptional(false);
        parameterTypeCategory.setExpert(false);
        linkedList.add(parameterTypeCategory);
        StringBuffer stringBuffer4 = new StringBuffer();
        stringBuffer4.append("Defines behavior across all regular expressions. \n");
        stringBuffer4.append("If turned on, the following is true: \n");
        stringBuffer4.append("When a regular expression matches a region of text, \n");
        stringBuffer4.append("the region is 'off limits' to subsequent regular expression matching. \n");
        stringBuffer4.append("The original XML file order determines precedence.\n");
        stringBuffer4.append("\n");
        ParameterTypeBoolean parameterTypeBoolean = new ParameterTypeBoolean(PARAMETER_NO_OVERLAPS, stringBuffer4.toString(), false);
        parameterTypeBoolean.setOptional(true);
        parameterTypeBoolean.setExpert(true);
        linkedList.add(parameterTypeBoolean);
        StringBuffer stringBuffer5 = new StringBuffer();
        stringBuffer5.append(" A regular expression must match at least the number of times \n");
        stringBuffer5.append(" specified before the expression is added to the output example set.\n");
        stringBuffer5.append("\n");
        ParameterTypeInt parameterTypeInt = new ParameterTypeInt(PARAMETER_MIN_NON_ZEROS, stringBuffer5.toString(), 0, Integer.MAX_VALUE, 0);
        parameterTypeInt.setOptional(true);
        parameterTypeInt.setExpert(true);
        linkedList.add(parameterTypeInt);
        StringBuffer stringBuffer6 = new StringBuffer();
        stringBuffer6.append("Displays diagnostic messages to standard output.");
        stringBuffer6.append("\n");
        ParameterTypeBoolean parameterTypeBoolean2 = new ParameterTypeBoolean("generate_diagnostics", stringBuffer6.toString(), false);
        parameterTypeBoolean2.setOptional(true);
        parameterTypeBoolean2.setExpert(true);
        linkedList.add(parameterTypeBoolean2);
        return linkedList;
    }

    protected ExampleSetMetaData addMetaDataAttributes(ExampleSetMetaData exampleSetMetaData) {
        try {
            AttributeMetaData attributeMetaData = new AttributeMetaData(XGMML.LABEL_ATTRIBUTE_LITERAL, getProvidedLabelType(), XGMML.LABEL_ATTRIBUTE_LITERAL);
            attributeMetaData.setValueSet(new HashSet(getLabelValues()), SetRelation.EQUAL);
            exampleSetMetaData.addAttribute(attributeMetaData);
            exampleSetMetaData.addAttribute(new AttributeMetaData("metadata_path", 7, "metadata_path"));
        } catch (Exception e) {
        }
        return exampleSetMetaData;
    }

    protected int getProvidedLabelType() throws UndefinedParameterError {
        return getParameterList("text_directories").size() == 2 ? 6 : 7;
    }

    protected List<String> getLabelValues() {
        LinkedList linkedList = new LinkedList();
        try {
            Iterator it = getParameterList("text_directories").iterator();
            while (it.hasNext()) {
                linkedList.add(((String[]) it.next())[0]);
            }
        } catch (UndefinedParameterError e) {
        }
        return linkedList;
    }
}
