package eu.radoop.operator.spark;

import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.OperatorVersion;
import com.rapidminer.operator.ProcessSetupError;
import com.rapidminer.operator.ProcessStoppedException;
import com.rapidminer.operator.SimpleProcessSetupError;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MDTransformationRule;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.MetaDataError;
import com.rapidminer.operator.ports.metadata.SetRelation;
import com.rapidminer.operator.ports.metadata.SimpleMetaDataError;
import com.rapidminer.operator.ports.quickfix.ParameterSettingQuickFix;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.parameter.conditions.AboveOperatorVersionCondition;
import com.rapidminer.tools.Observable;
import com.rapidminer.tools.Observer;
import com.rapidminer.tools.math.container.Range;
import eu.radoop.RadoopConf;
import eu.radoop.RadoopOperator;
import eu.radoop.RadoopTools;
import eu.radoop.datahandler.HadoopExampleSet;
import eu.radoop.datahandler.HadoopExampleSetFactory;
import eu.radoop.datahandler.hdfs.TempHDFSDirectory;
import eu.radoop.datahandler.hive.FileFormatHive;
import eu.radoop.exception.OperationKilledException;
import eu.radoop.manipulation.HiveWindowing;
import eu.radoop.operator.spark.SparkTools;
import eu.radoop.spark.GenerateDataFunction;
import eu.radoop.spark.SparkOperation;
import eu.radoop.spark.SparkVersion;
import eu.radoop.transfer.parameter.CommonParameter;
import eu.radoop.transfer.parameter.GenerateDataParameter;
import eu.radoop.transfer.parameter.ParameterTransferObject;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.stream.Stream;

/* loaded from: input_file:eu/radoop/operator/spark/DataGenerator.class */
public class DataGenerator extends RadoopOperator {
    private final OutputPort exampleSetOutput;
    public static final String PARAMETER_TARGET_FUNCTION = "target_function";
    public static final String PARAMETER_NUMBER_EXAMPLES = "number_examples";
    public static final String PARAMETER_NUMBER_OF_ATTRIBUTES = "number_of_attributes";
    private static final String PARAMETER_ATTRIBUTES_LOWER_BOUND = "attributes_lower_bound";
    private static final String PARAMETER_ATTRIBUTES_UPPER_BOUND = "attributes_upper_bound";
    private static final String PARAMETER_NUMBER_OF_PARTITIONS = "number_of_partitions";
    private static final String[] FUNCTION_NAMES = (String[]) Stream.of((Object[]) GenerateDataFunction.values()).map((v0) -> {
        return v0.toString();
    }).toArray(i -> {
        return new String[i];
    });
    public static final OperatorVersion VERSION_NO_PARTITIONING = new OperatorVersion(7, 3, 1);
    private boolean cacheDirty;
    private MetaData cachedMetaData;
    private MetaDataError cachedError;

    public DataGenerator(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.exampleSetOutput = getOutputPorts().createPort("example set output");
        this.cacheDirty = true;
        getTransformer().addRule(new MDTransformationRule() { // from class: eu.radoop.operator.spark.DataGenerator.1
            public void transformMD() {
                if (DataGenerator.this.cacheDirty) {
                    try {
                        DataGenerator.this.cachedMetaData = getGeneratedMetaData();
                        DataGenerator.this.cachedError = null;
                    } catch (OperatorException e) {
                        DataGenerator.this.cachedMetaData = new MetaData(HadoopExampleSet.class);
                        String message = e.getMessage();
                        if (message == null || message.length() == 0) {
                            message = e.toString();
                        }
                        DataGenerator.this.cachedError = new SimpleMetaDataError(ProcessSetupError.Severity.WARNING, DataGenerator.this.exampleSetOutput, "cannot_create_exampleset_metadata", new Object[]{message});
                    }
                    if (DataGenerator.this.cachedMetaData != null) {
                        DataGenerator.this.cachedMetaData.addToHistory(DataGenerator.this.exampleSetOutput);
                    }
                    DataGenerator.this.cacheDirty = false;
                }
                DataGenerator.this.exampleSetOutput.deliverMD(DataGenerator.this.cachedMetaData);
                if (DataGenerator.this.cachedError != null) {
                    DataGenerator.this.exampleSetOutput.addError(DataGenerator.this.cachedError);
                }
            }

            private MetaData getGeneratedMetaData() throws UndefinedParameterError {
                double parameterAsDouble = DataGenerator.this.getParameterAsDouble(DataGenerator.PARAMETER_ATTRIBUTES_LOWER_BOUND);
                double parameterAsDouble2 = DataGenerator.this.getParameterAsDouble(DataGenerator.PARAMETER_ATTRIBUTES_UPPER_BOUND);
                if (parameterAsDouble2 < parameterAsDouble) {
                    DataGenerator.this.exampleSetOutput.addError(new SimpleMetaDataError(ProcessSetupError.Severity.WARNING, DataGenerator.this.exampleSetOutput, "upper_smaller_than_lower", new Object[]{Double.valueOf(parameterAsDouble), Double.valueOf(parameterAsDouble2)}));
                }
                ExampleSetMetaData exampleSetMetaData = new ExampleSetMetaData();
                AttributeMetaData attributeMetaData = new AttributeMetaData(HiveWindowing.ROLE_LABEL, 4, HiveWindowing.ROLE_LABEL);
                attributeMetaData.setValueRange(new Range(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY), SetRelation.SUBSET);
                exampleSetMetaData.addAttribute(attributeMetaData);
                for (int i = 0; i < DataGenerator.this.getParameterAsInt(DataGenerator.PARAMETER_NUMBER_OF_ATTRIBUTES); i++) {
                    AttributeMetaData attributeMetaData2 = new AttributeMetaData("att" + (i + 1), 4);
                    attributeMetaData2.setValueRange(new Range(parameterAsDouble, parameterAsDouble2), SetRelation.EQUAL);
                    exampleSetMetaData.addAttribute(attributeMetaData2);
                }
                exampleSetMetaData.setNumberOfExamples(DataGenerator.this.getParameterAsInt(DataGenerator.PARAMETER_NUMBER_EXAMPLES));
                return RadoopOperator.castToHesMD(exampleSetMetaData);
            }
        });
        observeParameters();
    }

    private void observeParameters() {
        getParameters().addObserverAsFirst(new Observer<String>() { // from class: eu.radoop.operator.spark.DataGenerator.2
            public void update(Observable<String> observable, String str) {
                DataGenerator.this.cacheDirty = true;
            }

            public /* bridge */ /* synthetic */ void update(Observable observable, Object obj) {
                update((Observable<String>) observable, (String) obj);
            }
        }, false);
    }

    @Override // eu.radoop.RadoopOperator
    public boolean producesOutput(Class<? extends IOObject> cls) {
        return RadoopOperator.isHadoopExampleSet(cls);
    }

    @Override // eu.radoop.RadoopOperator
    public void performAdditionalChecks() {
        RadoopOperator.checkNestUsage(this);
        SparkTools.checkSparkVersionDesignTime(this, SparkVersion.get30AndAbove());
        ArrayList arrayList = new ArrayList();
        try {
            GenerateDataFunction parameterAsFunction = getParameterAsFunction(PARAMETER_TARGET_FUNCTION);
            int parameterAsInt = getParameterAsInt(PARAMETER_NUMBER_OF_ATTRIBUTES);
            if (GenerateDataFunction.func2Args.contains(parameterAsFunction) && parameterAsInt < 2) {
                arrayList.add(new ParameterSettingQuickFix(this, PARAMETER_NUMBER_OF_ATTRIBUTES, String.valueOf(2)));
                addError(new SimpleProcessSetupError(ProcessSetupError.Severity.ERROR, getPortOwner(), arrayList, "example_set_generator.too_few_attributes", new Object[]{parameterAsFunction, 2}));
            }
            if (GenerateDataFunction.func3Args.contains(parameterAsFunction) && parameterAsInt < 3) {
                arrayList.add(new ParameterSettingQuickFix(this, PARAMETER_NUMBER_OF_ATTRIBUTES, String.valueOf(3)));
                addError(new SimpleProcessSetupError(ProcessSetupError.Severity.ERROR, getPortOwner(), arrayList, "example_set_generator.too_few_attributes", new Object[]{parameterAsFunction, 3}));
            }
        } catch (UndefinedParameterError e) {
        }
    }

    public void doWork() throws OperatorException {
        SparkTools.checkSparkVersionRuntime(this, SparkVersion.get30AndAbove());
        TempHDFSDirectory tempHdfsDirectory = getTempHdfsDirectory();
        TempHDFSDirectory tempHdfsDirectory2 = getTempHdfsDirectory();
        ParameterTransferObject parameterTransferObject = new ParameterTransferObject();
        CommonParameter.FileFormat fileFormat = CommonParameter.FileFormat.values()[getParameterAsInt(SparkTools.PARAMETER_DATAFORMAT)];
        int parameterAsInt = getParameterAsInt(PARAMETER_NUMBER_EXAMPLES);
        int parameterAsInt2 = getParameterAsInt(PARAMETER_NUMBER_OF_ATTRIBUTES);
        GenerateDataFunction parameterAsFunction = getParameterAsFunction(PARAMETER_TARGET_FUNCTION);
        double parameterAsDouble = getParameterAsDouble(PARAMETER_ATTRIBUTES_LOWER_BOUND);
        double parameterAsDouble2 = getParameterAsDouble(PARAMETER_ATTRIBUTES_UPPER_BOUND);
        if (parameterAsDouble2 < parameterAsDouble) {
            throw new UserError(this, 226, new Object[]{Double.valueOf(parameterAsDouble), Double.valueOf(parameterAsDouble2)});
        }
        double d = parameterAsDouble2 - parameterAsDouble;
        if (GenerateDataFunction.func2Args.contains(parameterAsFunction) && parameterAsInt2 < 2) {
            throw new UserError(this, 918, new Object[]{parameterAsFunction, "needs at least 2 attributes!"});
        }
        if (GenerateDataFunction.func3Args.contains(parameterAsFunction) && parameterAsInt2 < 3) {
            throw new UserError(this, 918, new Object[]{parameterAsFunction, "needs at least 3 attributes!"});
        }
        parameterTransferObject.setParameter(GenerateDataParameter.OUTPUT_FORMAT, fileFormat.name());
        parameterTransferObject.setParameter(GenerateDataParameter.OUTPUT_DIR, tempHdfsDirectory.getFullPath());
        parameterTransferObject.setParameter(GenerateDataParameter.EX_DIR, tempHdfsDirectory2.getFullPath());
        parameterTransferObject.setParameter(GenerateDataParameter.NR_OF_EXAMPLES, Integer.valueOf(parameterAsInt));
        parameterTransferObject.setParameter(GenerateDataParameter.NR_OF_ATTRS, Integer.valueOf(parameterAsInt2));
        parameterTransferObject.setParameter(GenerateDataParameter.LABELGEN_FUNCTION, parameterAsFunction.name());
        parameterTransferObject.setParameter(GenerateDataParameter.RANGE, Double.valueOf(d));
        parameterTransferObject.setParameter(GenerateDataParameter.OFFSET, Double.valueOf(parameterAsDouble));
        parameterTransferObject.setParameter(GenerateDataParameter.NUMBER_OF_PARTITIONS, Integer.valueOf(getCompatibilityLevel().isAtMost(VERSION_NO_PARTITIONING) ? -1 : getParameterAsInt("number_of_partitions")));
        ConcurrentLinkedQueue concurrentLinkedQueue = new ConcurrentLinkedQueue();
        ConcurrentLinkedQueue concurrentLinkedQueue2 = new ConcurrentLinkedQueue();
        try {
            SparkTools.startMonitoringThread(this, concurrentLinkedQueue, concurrentLinkedQueue2, true);
            checkForStop();
            SparkTools.SparkFinalState sparkFinalState = SparkTools.SparkFinalState.FAILED;
            try {
                HashMap hashMap = new HashMap();
                hashMap.put(RadoopConf.SPARK_VALIDATE_OUTPUT_SPECS, "false");
                SparkTools.SparkFinalState finalState = getSparkHandler().runSpark(this, this, SparkOperation.GenerateData, concurrentLinkedQueue, concurrentLinkedQueue2, hashMap, parameterTransferObject, null).getFinalState();
                LinkedHashMap linkedHashMap = new LinkedHashMap();
                if (!finalState.equals(SparkTools.SparkFinalState.SUCCEEDED)) {
                    try {
                        SparkTools.handleSparkFailure(this, finalState, tempHdfsDirectory2);
                        checkForOperationStop();
                    } catch (IOException e) {
                        if (e.getMessage() == null) {
                            throw new UserError(this, 1503);
                        }
                        throw new UserError(this, 1504, new Object[]{RadoopTools.formatOperatorExceptionMessage(null, e)});
                    }
                } else {
                    if (!getMapReduceHDFSHandler().exists(tempHdfsDirectory.getFullPath())) {
                        throw new UserError(this, "spark.datagenerator.output_missing");
                    }
                    for (int i = 1; i <= parameterAsInt2; i++) {
                        linkedHashMap.put(AttributeFactory.createAttribute("att" + i, 4), null);
                    }
                    linkedHashMap.put(AttributeFactory.createAttribute(HiveWindowing.ROLE_LABEL, 4), HiveWindowing.ROLE_LABEL);
                }
                if (this.exampleSetOutput.isConnected()) {
                    String tempTableName = getTempTableName();
                    HadoopExampleSetFactory.createHiveTable(getHiveHandler(), tempTableName, linkedHashMap, false, "STORED AS " + (fileFormat.equals(CommonParameter.FileFormat.TEXTFILE) ? FileFormatHive.TEXTFILE.toString() : FileFormatHive.PARQUET.toString()));
                    tempHdfsDirectory.loadDataIntoHive(getHiveHandler(), tempTableName, true, true);
                    createExampleSet(this.exampleSetOutput, tempTableName, null, true, null, null, null, null, null, new HadoopExampleSet[0]);
                }
            } catch (IOException e2) {
                throw RadoopTools.formattedOperatorException("The Spark job could not be submitted.", e2);
            }
        } catch (OperationKilledException e3) {
            throw new ProcessStoppedException(this);
        }
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeCategory(SparkTools.PARAMETER_DATAFORMAT, "The storage format of the generated HadoopExampleSet.", SparkTools.DATAFORMATS, getCompatibilityLevel().isAtMost(VERSION_NO_PARTITIONING) ? CommonParameter.FileFormat.PARQUET.ordinal() : CommonParameter.FileFormat.TEXTFILE.ordinal(), true));
        ParameterTypeCategory parameterTypeCategory = new ParameterTypeCategory(PARAMETER_TARGET_FUNCTION, "Specifies the target function of this example set.", FUNCTION_NAMES, 0);
        parameterTypeCategory.setExpert(false);
        parameterTypes.add(parameterTypeCategory);
        ParameterTypeInt parameterTypeInt = new ParameterTypeInt(PARAMETER_NUMBER_EXAMPLES, "The number of generated examples.", 1, Integer.MAX_VALUE, 100);
        parameterTypeInt.setExpert(false);
        parameterTypes.add(parameterTypeInt);
        ParameterTypeInt parameterTypeInt2 = new ParameterTypeInt(PARAMETER_NUMBER_OF_ATTRIBUTES, "The number of attributes.", 1, Integer.MAX_VALUE, 5);
        parameterTypeInt2.setExpert(false);
        parameterTypes.add(parameterTypeInt2);
        ParameterTypeDouble parameterTypeDouble = new ParameterTypeDouble(PARAMETER_ATTRIBUTES_LOWER_BOUND, "The minimum value for the attributes.", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, -10.0d);
        parameterTypeDouble.setExpert(false);
        parameterTypes.add(parameterTypeDouble);
        ParameterTypeDouble parameterTypeDouble2 = new ParameterTypeDouble(PARAMETER_ATTRIBUTES_UPPER_BOUND, "The maximum value for the attributes.", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 10.0d);
        parameterTypeDouble2.setExpert(false);
        parameterTypes.add(parameterTypeDouble2);
        ParameterTypeInt parameterTypeInt3 = new ParameterTypeInt("number_of_partitions", "The number of partitions in the output HadoopExampleSet. If it is set to 0 (default value), then the number of partitions is (number of executors)*(number of executor cores), which results in the best performance for this operator. You can disable this and use Spark's default number of partitions by setting the parameter to -1.", -1, Integer.MAX_VALUE, 0, true);
        parameterTypeInt3.registerDependencyCondition(new AboveOperatorVersionCondition(this, VERSION_NO_PARTITIONING));
        parameterTypes.add(parameterTypeInt3);
        return parameterTypes;
    }

    @Override // eu.radoop.RadoopOperator
    public int getCost() {
        return 1;
    }

    private GenerateDataFunction getParameterAsFunction(String str) throws UndefinedParameterError {
        return GenerateDataFunction.valueOf(getParameterAsString(str).toUpperCase());
    }

    public OperatorVersion[] getIncompatibleVersionChanges() {
        return new OperatorVersion[]{VERSION_NO_PARTITIONING};
    }
}
