package eu.radoop.operator.meta;

import com.rapidminer.example.Attribute;
import com.rapidminer.operator.IOObjectCollection;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.OperatorVersion;
import com.rapidminer.operator.ProcessSetupError;
import com.rapidminer.operator.SimpleProcessSetupError;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.PortPairExtender;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.AttributeSetPrecondition;
import com.rapidminer.operator.ports.metadata.CollectionMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MDTransformationRule;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.ParameterConditionedPrecondition;
import com.rapidminer.operator.ports.quickfix.ParameterSettingQuickFix;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeAttribute;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.parameter.conditions.AboveOperatorVersionCondition;
import com.rapidminer.parameter.conditions.AndParameterCondition;
import com.rapidminer.parameter.conditions.BelowOrEqualOperatorVersionCondition;
import com.rapidminer.parameter.conditions.BooleanParameterCondition;
import com.rapidminer.parameter.conditions.EqualStringCondition;
import com.rapidminer.parameter.conditions.EqualTypeCondition;
import com.rapidminer.parameter.conditions.NonEqualTypeCondition;
import com.rapidminer.parameter.conditions.OrParameterCondition;
import com.rapidminer.parameter.conditions.ParameterCondition;
import com.rapidminer.tools.LogService;
import eu.radoop.KillableOperationWrapper;
import eu.radoop.RadoopConf;
import eu.radoop.RadoopTools;
import eu.radoop.RapidMinerSubprocessInsideTheNest;
import eu.radoop.connections.SparkResourceAllocationPolicy;
import eu.radoop.datahandler.HadoopExampleSet;
import eu.radoop.datahandler.HadoopExampleSetFactory;
import eu.radoop.datahandler.hdfs.TempHDFSDirectory;
import eu.radoop.datahandler.hive.FileFormatHive;
import eu.radoop.datahandler.hive.JdbcConnectionTools;
import eu.radoop.datahandler.mapreducehdfs.FileReport;
import eu.radoop.exception.HiveTableException;
import eu.radoop.io.importers.HiveImport;
import eu.radoop.manipulation.HiveWindowing;
import eu.radoop.operator.ports.RadoopPortPairExtender;
import eu.radoop.operator.ports.metadata.HadoopExampleSetMetaData;
import eu.radoop.operator.ports.metadata.RadoopAttributeSetPrecondition;
import eu.radoop.spark.SparkOperation;
import eu.radoop.tools.CommonUtils;
import eu.radoop.transfer.ProcessOutputTO;
import eu.radoop.transfer.PushdownOutputAttributeMetaData;
import eu.radoop.transfer.parameter.MissingAttributesHandlingMode;
import eu.radoop.transfer.parameter.PartitionSizing;
import eu.radoop.transfer.parameter.PartitioningMode;
import eu.radoop.transfer.parameter.ProcessPushdownParameter;
import eu.radoop.transfer.parameter.SchemaConflictResolutionMode;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.mapreduce.MRJobConfig;

@RapidMinerSubprocessInsideTheNest
/* loaded from: input_file:eu/radoop/operator/meta/MultiNodePushdown.class */
public class MultiNodePushdown extends PushdownOperator {
    public static final String PARAMETER_NUMBER_OF_BOOTSTRAPS = "number_of_bootstraps";
    public static final String PARAMETER_PARTITIONING_MODE = "partitioning_mode";
    public static final String PARAMETER_PARTITION_SIZING = "partition_sizing";
    public static final String PARAMETER_NUMBER_OF_PARTITIONS = "number_of_partitions";
    public static final String PARAMETER_PARTITION_SIZE = "partition_size";
    public static final String PARAMETER_PARTITIONING_ATTRIBUTE = "partitioning_attribute";
    public static final String PARAMETER_ADD_PARTITION_INDEX = "add_partition_index_attribute";
    public static final String PARAMETER_SET_MACRO = "set_partition_macro";
    public static final String PARAMETER_MACRO_NAME = "macro_name";
    public static final String PARAMETER_MACRO_START_VALUE = "macro_start_value";
    public static final String PARAMETER_FORCE_INPUT_PREPROCESSING = "force_input_preprocessing";
    public static final String PARAMETER_FORCE_RESOURCE_CALCULATION = "force_resource_calculation";
    public static final String PARAMETER_CLUSTER_RESOURCES_LIMIT = "cluster_resources_limit_%";
    public static final String PARAMETER_CLUSTER_RESOURCES = "cluster_resources_%";
    public static final String PARAMETER_CLUSTER_MAX_NODE_MEMORY_PERCENTAGE = "max_node_memory_%";
    public static final String PARAMETER_CLUSTER_EXECUTOR_MEMORY_PERCENTAGE = "executor_memory_%";
    public static final String PARAMETER_MERGE_OUTPUT = "merge_output";
    public static final String PARAMETER_RESOLVE_SCHEMA_CONFLICTS = "resolve_schema_conflicts";
    public static final String PARAMETER_SCHEMA_CONFLICT_RESOLUTION_MODE = "schema_conflict_resolution_mode";
    public static final String PARAMETER_HANDLE_MISSING_ATTRIBUTES = "handle_missing_attributes";
    public static final String PARAMETER_MISSING_ATTRIBUTES_VALUE = "missing_attributes_value";
    public static final String PARAMETER_MAX_COLLECTION_SIZE = "max_collection_size";
    public static final String PARAMETER_DISTRIBUTE_NOMINAL_MAPPINGS = "distribute_nominal_mappings";
    private static final long SKIP_PREPROCESSING_FILESIZE_LIMIT = 10485760;
    public static final OperatorVersion VERSION_OLD_MEMORY_PARAM_NAMES = new OperatorVersion("7.4.1");

    public MultiNodePushdown(OperatorDescription operatorDescription) {
        super(operatorDescription, "Multi Process Pushdown");
        this.exampleSetInput.addPrecondition(new ParameterConditionedPrecondition(this.exampleSetInput, new ParameterConditionedPrecondition(this.exampleSetInput, new RadoopAttributeSetPrecondition(this.exampleSetInput, AttributeSetPrecondition.getAttributesByParameter(this, new String[]{PARAMETER_PARTITIONING_ATTRIBUTE}), new String[0]), this, PARAMETER_PARTITIONING_MODE, PartitioningMode.ATTRIBUTE.getName()), this, "bootstrap_data", "false"));
    }

    @Override // eu.radoop.operator.meta.PushdownOperator
    protected MDTransformationRule createOutputExampleSetRule() {
        return new MDTransformationRule() { // from class: eu.radoop.operator.meta.MultiNodePushdown.1
            public void transformMD() {
                ExampleSetMetaData metaData = MultiNodePushdown.this.exampleSetInnerSink.getMetaData();
                if (metaData instanceof ExampleSetMetaData) {
                    MetaData convertToHadoopExampleSetMetaData = HiveImport.convertToHadoopExampleSetMetaData(metaData, true);
                    if (convertToHadoopExampleSetMetaData != null) {
                        convertToHadoopExampleSetMetaData.addToHistory(MultiNodePushdown.this.exampleSetOutput);
                    }
                    MetaData metaData2 = (HadoopExampleSetMetaData) convertToHadoopExampleSetMetaData;
                    if (MultiNodePushdown.this.getParameterAsBoolean(MultiNodePushdown.PARAMETER_ADD_PARTITION_INDEX)) {
                        metaData2.addAttribute(new AttributeMetaData(CommonUtils.newUniqueAlias(RadoopConf.PARTITION_ID, (List) metaData2.getAllAttributes().stream().map((v0) -> {
                            return v0.getName();
                        }).collect(Collectors.toList())), 7, RadoopConf.PARTITION_ID));
                    }
                    MetaData metaData3 = metaData2;
                    if (!MultiNodePushdown.this.getParameterAsBoolean(MultiNodePushdown.PARAMETER_MERGE_OUTPUT) && metaData3 != null) {
                        metaData3 = new CollectionMetaData(metaData3);
                    }
                    MultiNodePushdown.this.exampleSetOutput.deliverMD(metaData3);
                }
            }
        };
    }

    @Override // eu.radoop.operator.meta.PushdownOperator
    public void prepareSparkJobSubmission(HadoopExampleSet hadoopExampleSet) throws OperatorException, UndefinedParameterError {
        this.sparkExecutionMethod = SparkOperation.MultiNodePushdown;
        this.sparkResourceAllocationPolicy = SparkResourceAllocationPolicy.STATIC_HEURISTIC_MULTI_NODE_PUSHDOWN;
        this.sparkResourceAllocationPolicyParams = new HashMap();
        if (getRadoopNest().getCurrentConnection().getSparkResourceAllocationPolicy() != SparkResourceAllocationPolicy.STATIC_HEURISTIC) {
            return;
        }
        int i = 0;
        double d = 0.0d;
        boolean parameterAsBoolean = getParameterAsBoolean("bootstrap_data");
        int parameterAsInt = getParameterAsInt(PARAMETER_NUMBER_OF_BOOTSTRAPS);
        if (hadoopExampleSet == null) {
            i = 1;
            d = 1.0d;
        } else if (!parameterAsBoolean) {
            boolean parameterAsBoolean2 = getCompatibilityLevel().isAtMost(VERSION_OLD_MEMORY_PARAM_NAMES) ? getParameterAsBoolean(PARAMETER_FORCE_INPUT_PREPROCESSING) : getParameterAsBoolean("force_resource_calculation");
            try {
                FileReport fileReport = getMapReduceHDFSHandler().getFileReport(hadoopExampleSet.getDirectory());
                if (!parameterAsBoolean2 && fileReport.totalSize <= 10485760) {
                    i = 1;
                    d = 1.0d;
                } else if (PartitioningMode.ATTRIBUTE.getName().equals(getParameterAsString(PARAMETER_PARTITIONING_MODE))) {
                    i = 0;
                    long j = 0;
                    long j2 = 0;
                    ResultSet resultSet = null;
                    try {
                        try {
                            resultSet = getHiveHandler().runQueryKillable(hadoopExampleSet.getUdfDependencies(), null, KillableOperationWrapper.create(this), "SELECT COUNT(*) cnt FROM ? GROUP BY " + getParameterAsString(PARAMETER_PARTITIONING_ATTRIBUTE), HadoopExampleSet.getTableName(hadoopExampleSet));
                            while (resultSet.next()) {
                                i++;
                                int i2 = resultSet.getInt(1);
                                j2 = Math.max(j2, i2);
                                j += i2;
                            }
                            JdbcConnectionTools.closeRes(resultSet);
                            if (j == 0) {
                                i = 1;
                                d = 1.0d;
                            } else {
                                d = (j / i) / j2;
                            }
                        } catch (SQLException e) {
                            JdbcConnectionTools.forceCloseResAndConnection(resultSet);
                            throw new OperatorException(RadoopTools.formatOperatorExceptionMessage("Error during gathering attribute statistics on input.", e), e);
                        }
                    } finally {
                    }
                } else if (PartitioningMode.RANDOM.getName().equals(getParameterAsString(PARAMETER_PARTITIONING_MODE))) {
                    if (PartitionSizing.FIXED.getName().equals(getParameterAsString(PARAMETER_PARTITION_SIZING))) {
                        i = getParameterAsInt("number_of_partitions");
                    } else {
                        ResultSet resultSet2 = null;
                        long j3 = 0;
                        try {
                            try {
                                resultSet2 = getHiveHandler().runQueryKillable(hadoopExampleSet.getUdfDependencies(), null, KillableOperationWrapper.create(this), "SELECT COUNT(*) FROM ?", HadoopExampleSet.getTableName(hadoopExampleSet));
                                while (resultSet2.next()) {
                                    j3 = resultSet2.getLong(1);
                                }
                                JdbcConnectionTools.closeRes(resultSet2);
                                i = j3 == 0 ? 1 : (int) Math.ceil(j3 / getParameterAsInt(PARAMETER_PARTITION_SIZE));
                            } catch (SQLException e2) {
                                JdbcConnectionTools.forceCloseResAndConnection(resultSet2);
                                throw new OperatorException(RadoopTools.formatOperatorExceptionMessage("Error during counting input rows.", e2), e2);
                            }
                        } finally {
                        }
                    }
                    d = 1.0d;
                } else if (fileReport.totalSize == 0) {
                    i = 1;
                    d = 1.0d;
                } else {
                    i = fileReport.blockCount;
                    d = (fileReport.totalSize / fileReport.blockCount) / fileReport.maxBlockSize;
                }
                if (!getParameterAsBoolean(PARAMETER_MERGE_OUTPUT) && i > getParameterAsInt(PARAMETER_MAX_COLLECTION_SIZE)) {
                    throw new UserError(this, "spark.pushdown.too_many_partitions", new Object[]{Integer.valueOf(i), Integer.valueOf(getParameterAsInt(PARAMETER_MAX_COLLECTION_SIZE))});
                }
            } catch (IOException e3) {
                throw new OperatorException(RadoopTools.formatOperatorExceptionMessage("Error during getting input file report.", e3), e3);
            }
        } else if (!getParameterAsBoolean(PARAMETER_MERGE_OUTPUT) && parameterAsInt > getParameterAsInt(PARAMETER_MAX_COLLECTION_SIZE)) {
            throw new UserError(this, "spark.pushdown.too_many_bootstraps", new Object[]{Integer.valueOf(parameterAsInt), Integer.valueOf(getParameterAsInt(PARAMETER_MAX_COLLECTION_SIZE))});
        }
        try {
            this.sparkResourceAllocationPolicyParams.put(SparkResourceAllocationPolicy.STATIC_HEURISTIC_MULTI_NODE_PUSHDOWN_PARM_CLUSTER_REPORT, getMapReduceHDFSHandler().getClusterResources());
            this.sparkResourceAllocationPolicyParams.put(SparkResourceAllocationPolicy.STATIC_HEURISTIC_MULTI_NODE_PUSHDOWN_PARM_INPUT_PARTITIONS, Integer.valueOf(parameterAsBoolean ? parameterAsInt : i));
            this.sparkResourceAllocationPolicyParams.put(SparkResourceAllocationPolicy.STATIC_HEURISTIC_MULTI_NODE_PUSHDOWN_PARM_INPUT_AVG_PER_MAX_PARTITION_RATIO, Double.valueOf(parameterAsBoolean ? 1.0d : d));
            this.sparkResourceAllocationPolicyParams.put(SparkResourceAllocationPolicy.STATIC_HEURISTIC_MULTI_NODE_PUSHDOWN_PARM_CLUSTER_RESOURCES_LIMIT, Integer.valueOf(getCompatibilityLevel().isAtMost(VERSION_OLD_MEMORY_PARAM_NAMES) ? getParameterAsInt("cluster_resources_limit_%") : getParameterAsInt("cluster_resources_%")));
            this.sparkResourceAllocationPolicyParams.put(SparkResourceAllocationPolicy.STATIC_HEURISTIC_MULTI_NODE_PUSHDOWN_PARM_MAX_NODE_MEMORY_PERCENTAGE, Integer.valueOf(getCompatibilityLevel().isAtMost(VERSION_OLD_MEMORY_PARAM_NAMES) ? getParameterAsInt("max_node_memory_%") : getParameterAsInt("executor_memory_%")));
            this.sparkResourceAllocationPolicyParams.put(SparkResourceAllocationPolicy.STATIC_HEURISTIC_MULTI_NODE_PUSHDOWN_PARM_PARALLELIZATION, 4);
            this.sparkResourceAllocationPolicyParams.put(SparkResourceAllocationPolicy.STATIC_HEURISTIC_SINGLE_NODE_PUSHDOWN_PARM_DRIVER_MEMORY, Integer.valueOf(getParameterAsInt("driver_memory_(MB)")));
        } catch (IOException e4) {
            throw new OperatorException("Error during getting cluster report: ", e4);
        }
    }

    @Override // eu.radoop.operator.meta.PushdownOperator
    public List<ParameterType> getSpecificParameterTypes() {
        ArrayList arrayList = new ArrayList();
        ParameterTypeInt parameterTypeInt = new ParameterTypeInt(PARAMETER_NUMBER_OF_BOOTSTRAPS, "The desired number of boostrapping executions.", 1, Integer.MAX_VALUE, 5, false);
        parameterTypeInt.registerDependencyCondition(new BooleanParameterCondition(this, "bootstrap_data", true, true));
        arrayList.add(parameterTypeInt);
        ParameterTypeCategory parameterTypeCategory = new ParameterTypeCategory(PARAMETER_PARTITIONING_MODE, "Selects the partitioning mode.", PartitioningMode.getNames(), PartitioningMode.getDefault().ordinal(), false);
        parameterTypeCategory.registerDependencyCondition(new BooleanParameterCondition(this, "bootstrap_data", true, false));
        arrayList.add(parameterTypeCategory);
        ParameterTypeCategory parameterTypeCategory2 = new ParameterTypeCategory(PARAMETER_PARTITION_SIZING, "Selects method of partition sizing.", PartitionSizing.getNames(), PartitionSizing.getDefault().ordinal(), false);
        parameterTypeCategory2.registerDependencyCondition(new BooleanParameterCondition(this, "bootstrap_data", true, false));
        parameterTypeCategory2.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_PARTITIONING_MODE, PartitioningMode.getNames(), true, new int[]{PartitioningMode.RANDOM.ordinal()}));
        arrayList.add(parameterTypeCategory2);
        ParameterTypeInt parameterTypeInt2 = new ParameterTypeInt("number_of_partitions", "The desired number of partitions.", 1, Integer.MAX_VALUE, 1, false);
        parameterTypeInt2.registerDependencyCondition(new BooleanParameterCondition(this, "bootstrap_data", true, false));
        parameterTypeInt2.registerDependencyCondition(new AndParameterCondition(this, true, new ParameterCondition[]{new EqualTypeCondition(this, PARAMETER_PARTITIONING_MODE, PartitioningMode.getNames(), true, new int[]{PartitioningMode.RANDOM.ordinal()}), new EqualTypeCondition(this, PARAMETER_PARTITION_SIZING, PartitionSizing.getNames(), true, new int[]{PartitionSizing.FIXED.ordinal()})}));
        arrayList.add(parameterTypeInt2);
        ParameterTypeInt parameterTypeInt3 = new ParameterTypeInt(PARAMETER_PARTITION_SIZE, "The desired number of records assigned to a partition.", 1, Integer.MAX_VALUE, 1000000, false);
        parameterTypeInt3.registerDependencyCondition(new BooleanParameterCondition(this, "bootstrap_data", true, false));
        parameterTypeInt3.registerDependencyCondition(new AndParameterCondition(this, true, new ParameterCondition[]{new EqualTypeCondition(this, PARAMETER_PARTITIONING_MODE, PartitioningMode.getNames(), true, new int[]{PartitioningMode.RANDOM.ordinal()}), new EqualTypeCondition(this, PARAMETER_PARTITION_SIZING, PartitionSizing.getNames(), true, new int[]{PartitionSizing.ABSOLUTE.ordinal()})}));
        arrayList.add(parameterTypeInt3);
        ParameterTypeAttribute parameterTypeAttribute = new ParameterTypeAttribute(PARAMETER_PARTITIONING_ATTRIBUTE, "Partition the input ExampleSet by the values of this attribute.", this.exampleSetInput, true, false);
        parameterTypeAttribute.registerDependencyCondition(new BooleanParameterCondition(this, "bootstrap_data", true, false));
        parameterTypeAttribute.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_PARTITIONING_MODE, PartitioningMode.getNames(), true, new int[]{PartitioningMode.ATTRIBUTE.ordinal()}));
        arrayList.add(parameterTypeAttribute);
        arrayList.add(new ParameterTypeBoolean(PARAMETER_MERGE_OUTPUT, "If selected, the output of each partition is merged into a single output table (first output port). In this case if the output schemas are different, the operator will fail. If unchecked, the operator delivers a collection of example sets on the output port, one table for each partition.", true, false));
        ParameterTypeBoolean parameterTypeBoolean = new ParameterTypeBoolean(PARAMETER_RESOLVE_SCHEMA_CONFLICTS, "If selected, the schema conflicts on the output of the partitions are resolved.Consider using this if the resulting data sets of multiple partitions may have a different schema (e.g when doing text mining).In this case the output contains the superset of each partition's attributes. Missing attributes in one data setwill be filled up as specified by \"handle missing attributes\" parameter.", false, false);
        parameterTypeBoolean.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_MERGE_OUTPUT, false, true));
        arrayList.add(parameterTypeBoolean);
        ParameterTypeCategory parameterTypeCategory3 = new ParameterTypeCategory(PARAMETER_SCHEMA_CONFLICT_RESOLUTION_MODE, "Selects the schema conflict resolution mode on the partitions of the output.", SchemaConflictResolutionMode.getNames(), SchemaConflictResolutionMode.getDefault().ordinal(), false);
        parameterTypeCategory3.registerDependencyCondition(new AndParameterCondition(this, false, new ParameterCondition[]{new BooleanParameterCondition(this, PARAMETER_MERGE_OUTPUT, true, true), new BooleanParameterCondition(this, PARAMETER_RESOLVE_SCHEMA_CONFLICTS, true, true)}));
        parameterTypeCategory3.setExpert(true);
        arrayList.add(parameterTypeCategory3);
        ParameterTypeCategory parameterTypeCategory4 = new ParameterTypeCategory(PARAMETER_HANDLE_MISSING_ATTRIBUTES, "Defines how the values of missing attributes are handled after schema resolution. If \"Use a specified value\" is selected, all occurrences will be set to the specified value, otherwise they will be considered as missing values.", MissingAttributesHandlingMode.getNames(), MissingAttributesHandlingMode.getDefault().ordinal());
        parameterTypeCategory4.registerDependencyCondition(new AndParameterCondition(this, false, new ParameterCondition[]{new BooleanParameterCondition(this, PARAMETER_MERGE_OUTPUT, true, true), new BooleanParameterCondition(this, PARAMETER_RESOLVE_SCHEMA_CONFLICTS, true, true)}));
        parameterTypeCategory4.setExpert(true);
        arrayList.add(parameterTypeCategory4);
        ParameterTypeString parameterTypeString = new ParameterTypeString("missing_attributes_value", "Values of missing attributes after schema resolution.");
        parameterTypeString.registerDependencyCondition(new AndParameterCondition(this, false, new ParameterCondition[]{new BooleanParameterCondition(this, PARAMETER_MERGE_OUTPUT, true, true), new BooleanParameterCondition(this, PARAMETER_RESOLVE_SCHEMA_CONFLICTS, true, true), new EqualStringCondition(this, PARAMETER_HANDLE_MISSING_ATTRIBUTES, true, new String[]{MissingAttributesHandlingMode.VALUE.getName()})}));
        arrayList.add(parameterTypeString);
        ParameterTypeInt parameterTypeInt4 = new ParameterTypeInt(PARAMETER_MAX_COLLECTION_SIZE, "The maximum number of output example sets (Hive tables) to generate when merge_output is set to false. This threshold is needed because the Hive server is usually not able to handle calls to hundreds of tables in parallel. The operator will fail if more partitions would be used than this number.", 1, Integer.MAX_VALUE, 20, false);
        parameterTypeInt4.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_MERGE_OUTPUT, false, false));
        arrayList.add(parameterTypeInt4);
        arrayList.add(new ParameterTypeBoolean(PARAMETER_DISTRIBUTE_NOMINAL_MAPPINGS, "If enabled then all nominal attributes values are discovered on the input data and distributed to all partitions and thus enabling consistent model builds.", false, false));
        arrayList.add(new ParameterTypeBoolean(PARAMETER_ADD_PARTITION_INDEX, "Add a new attribute to the output example set, indicating the index of the partition that processed the example.", false, true));
        arrayList.add(new ParameterTypeBoolean("set_partition_macro", "Selects if for each partition a macro with the current partition index is set.", false, true));
        ParameterTypeString parameterTypeString2 = new ParameterTypeString("macro_name", "The name of the partition macro.", HiveWindowing.ROLE_PARTITION, true);
        parameterTypeString2.registerDependencyCondition(new BooleanParameterCondition(this, "set_partition_macro", true, true));
        arrayList.add(parameterTypeString2);
        ParameterTypeInt parameterTypeInt5 = new ParameterTypeInt("macro_start_value", "The number which is set for the macro in the first partition.", Integer.MIN_VALUE, Integer.MAX_VALUE, 1, true);
        parameterTypeInt5.registerDependencyCondition(new BooleanParameterCondition(this, "set_partition_macro", true, true));
        parameterTypeInt5.registerDependencyCondition(new OrParameterCondition(this, true, new ParameterCondition[]{new BooleanParameterCondition(this, "bootstrap_data", true, true), new NonEqualTypeCondition(this, PARAMETER_PARTITIONING_MODE, PartitioningMode.getNames(), true, new int[]{PartitioningMode.ATTRIBUTE.ordinal()})}));
        arrayList.add(parameterTypeInt5);
        ParameterTypeInt parameterTypeInt6 = new ParameterTypeInt("cluster_resources_limit_%", "The maximum cluster utilization that this operator may use.", 10, 100, 70, true);
        parameterTypeInt6.registerDependencyCondition(new BelowOrEqualOperatorVersionCondition(this, VERSION_OLD_MEMORY_PARAM_NAMES));
        arrayList.add(parameterTypeInt6);
        ParameterTypeInt parameterTypeInt7 = new ParameterTypeInt("cluster_resources_%", "The maximum cluster utilization that this operator may use.", 10, 100, 70, true);
        parameterTypeInt7.registerDependencyCondition(new AboveOperatorVersionCondition(this, VERSION_OLD_MEMORY_PARAM_NAMES));
        arrayList.add(parameterTypeInt7);
        arrayList.add(new ParameterTypeInt("driver_memory_(MB)", "Amount of memory to use for the driver process in MB.", 512, Integer.MAX_VALUE, 2048, true));
        ParameterTypeInt parameterTypeInt8 = new ParameterTypeInt("max_node_memory_%", "Percentage of the memory of the largest node of the cluster that should be used at least by each executor. Try increasing this parameter in case the process has memory issues but please also note that higher setting may reduce the number of executors allocated on heterogeneous clusters and so increase running time.", 10, 100, 50, true);
        parameterTypeInt8.registerDependencyCondition(new BelowOrEqualOperatorVersionCondition(this, VERSION_OLD_MEMORY_PARAM_NAMES));
        arrayList.add(parameterTypeInt8);
        ParameterTypeInt parameterTypeInt9 = new ParameterTypeInt("executor_memory_%", "Percentage of the memory of the largest node of the cluster that should be used at least by each executor. Try increasing this parameter in case the process has memory issues but please also note that higher setting may reduce the number of executors allocated on heterogeneous clusters and so increase running time.", 10, 100, 50, true);
        parameterTypeInt9.registerDependencyCondition(new AboveOperatorVersionCondition(this, VERSION_OLD_MEMORY_PARAM_NAMES));
        arrayList.add(parameterTypeInt9);
        ParameterTypeBoolean parameterTypeBoolean2 = new ParameterTypeBoolean(PARAMETER_FORCE_INPUT_PREPROCESSING, "Forces gathering of statistics on the input data set to boost resource allocation heuristics. By default, detailed statistics (e.g. expected partition size) are not calculated on data sets that do not exceed 10 MB, because this overhead is not expected to gain enough improvement in resource allocation in those cases. With this setting, however, you can force the calculation of these statistics on small input data sets as well. This parameter is only considered if Spark Resource Allocation Policy is set to Static, Heuristic Configuration in the current Radoop connection.", false, true);
        parameterTypeBoolean2.registerDependencyCondition(new BelowOrEqualOperatorVersionCondition(this, VERSION_OLD_MEMORY_PARAM_NAMES));
        arrayList.add(parameterTypeBoolean2);
        ParameterTypeBoolean parameterTypeBoolean3 = new ParameterTypeBoolean("force_resource_calculation", "Forces gathering of statistics on the input data set to boost resource allocation heuristics. By default, detailed statistics (e.g. expected partition size) are not calculated on data sets that do not exceed 10 MB, because this overhead is not expected to gain enough improvement in resource allocation in those cases. With this setting, however, you can force the calculation of these statistics on small input data sets as well. This parameter is only considered if Spark Resource Allocation Policy is set to Static, Heuristic Configuration in the current Radoop connection.", false, true);
        parameterTypeBoolean3.registerDependencyCondition(new AboveOperatorVersionCondition(this, VERSION_OLD_MEMORY_PARAM_NAMES));
        arrayList.add(parameterTypeBoolean3);
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // eu.radoop.operator.meta.PushdownOperator
    public void checkParameters(HadoopExampleSet hadoopExampleSet) throws UserError {
        super.checkParameters(hadoopExampleSet);
        if (!getParameterAsBoolean(PARAMETER_MERGE_OUTPUT) && PartitioningMode.RANDOM.equals(PartitioningMode.getByName(getParameterAsString(PARAMETER_PARTITIONING_MODE))) && PartitionSizing.FIXED.equals(PartitionSizing.getByName(getParameterAsString(PARAMETER_PARTITION_SIZING))) && getParameterAsInt("number_of_partitions") > getParameterAsInt(PARAMETER_MAX_COLLECTION_SIZE)) {
            throw new UserError(this, "spark.pushdown.too_many_partitions", new Object[]{Integer.valueOf(getParameterAsInt("number_of_partitions")), Integer.valueOf(getParameterAsInt(PARAMETER_MAX_COLLECTION_SIZE))});
        }
        if (PartitioningMode.ATTRIBUTE.equals(PartitioningMode.getByName(getParameterAsString(PARAMETER_PARTITIONING_MODE)))) {
            String parameterAsString = getParameterAsString(PARAMETER_PARTITIONING_ATTRIBUTE);
            if (hadoopExampleSet == null) {
                throw new UserError(this, "spark.pushdown.missing_input_exa");
            }
            if (hadoopExampleSet.attributes.findRoleByName(parameterAsString) == null) {
                throw new UserError(this, 160, new Object[]{parameterAsString});
            }
        }
    }

    @Override // eu.radoop.operator.meta.PushdownOperator
    protected void setupSpecParams() throws UserError {
        this.pushdownParams.setParameter(ProcessPushdownParameter.NUMBER_OF_BOOTSTRAPS, Integer.valueOf(getParameterAsInt(PARAMETER_NUMBER_OF_BOOTSTRAPS)));
        this.pushdownParams.setParameter(ProcessPushdownParameter.PARTITIONING_MODE, getParameterAsString(PARAMETER_PARTITIONING_MODE));
        this.pushdownParams.setParameter(ProcessPushdownParameter.PARTITION_SIZING, getParameterAsString(PARAMETER_PARTITION_SIZING));
        this.pushdownParams.setParameter(ProcessPushdownParameter.NUMBER_OF_PARTITIONS, Integer.valueOf(getParameterAsInt("number_of_partitions")));
        this.pushdownParams.setParameter(ProcessPushdownParameter.PARTITION_SIZE, Integer.valueOf(getParameterAsInt(PARAMETER_PARTITION_SIZE)));
        this.pushdownParams.setParameter(ProcessPushdownParameter.PARTITIONING_ATTRIBUTE, getParameterAsString(PARAMETER_PARTITIONING_ATTRIBUTE));
        this.pushdownParams.setParameter(ProcessPushdownParameter.ADD_PARTITION_INDEX, Boolean.valueOf(getParameterAsBoolean(PARAMETER_ADD_PARTITION_INDEX)));
        this.pushdownParams.setParameter(ProcessPushdownParameter.SET_PARTITION_MACRO, Boolean.valueOf(getParameterAsBoolean("set_partition_macro")));
        this.pushdownParams.setParameter(ProcessPushdownParameter.PARTITION_MACRO_NAME, getParameterAsString("macro_name"));
        this.pushdownParams.setParameter(ProcessPushdownParameter.PARTITION_MACRO_START_VALUE, Integer.valueOf(getParameterAsInt("macro_start_value")));
        this.pushdownParams.setParameter(ProcessPushdownParameter.MERGE_OUTPUT, Boolean.valueOf(getParameterAsBoolean(PARAMETER_MERGE_OUTPUT)));
        this.pushdownParams.setParameter(ProcessPushdownParameter.RESOLVE_SCHEMA_CONFLICTS, Boolean.valueOf(getParameterAsBoolean(PARAMETER_RESOLVE_SCHEMA_CONFLICTS)));
        this.pushdownParams.setParameter(ProcessPushdownParameter.SCHEMA_CONFLICT_RESOLUTION_MODE, getParameterAsString(PARAMETER_SCHEMA_CONFLICT_RESOLUTION_MODE));
        this.pushdownParams.setParameter(ProcessPushdownParameter.HANDLE_MISSING_ATTRIBUTES, getParameterAsString(PARAMETER_HANDLE_MISSING_ATTRIBUTES));
        this.pushdownParams.setParameter(ProcessPushdownParameter.MISSING_ATTRIBUTES_VALUE, getParameterAsString("missing_attributes_value"));
        this.pushdownParams.setParameter(ProcessPushdownParameter.MAX_COLLECTION_SIZE, Integer.valueOf(getParameterAsInt(PARAMETER_MAX_COLLECTION_SIZE)));
        this.pushdownParams.setParameter(ProcessPushdownParameter.DISTRIBUTE_NOMINAL_MAPPINGS, Boolean.valueOf(getParameterAsBoolean(PARAMETER_DISTRIBUTE_NOMINAL_MAPPINGS)));
    }

    @Override // eu.radoop.operator.meta.PushdownOperator
    protected PortPairExtender createOutputExtender() {
        return new RadoopPortPairExtender(MRJobConfig.OUTPUT, getSubprocess(0).getInnerSinks(), getOutputPorts(), false, true) { // from class: eu.radoop.operator.meta.MultiNodePushdown.2
            /* JADX INFO: Access modifiers changed from: protected */
            @Override // eu.radoop.operator.ports.RadoopPortPairExtender
            public MetaData transformMetaData(MetaData metaData) {
                MetaData transformMetaData = super.transformMetaData(metaData);
                if (!(metaData instanceof ExampleSetMetaData)) {
                    return transformMetaData;
                }
                MultiNodePushdown.this.addError(new SimpleProcessSetupError(ProcessSetupError.Severity.WARNING, MultiNodePushdown.this.getPortOwner(), "type_not_recommended", new Object[]{"large ExampleSet", "first output"}));
                return new CollectionMetaData(transformMetaData.clone());
            }
        };
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // eu.radoop.operator.meta.PushdownOperator
    public void deliverExaOutput(ProcessOutputTO processOutputTO, TempHDFSDirectory tempHDFSDirectory) throws OperatorException {
        if (getParameterAsBoolean(PARAMETER_MERGE_OUTPUT)) {
            super.deliverExaOutput(processOutputTO, tempHDFSDirectory);
            return;
        }
        int i = 0;
        ArrayList arrayList = new ArrayList();
        if (this.exampleSetInnerSink.isConnected()) {
            for (Map.Entry<String, ArrayList<PushdownOutputAttributeMetaData>> entry : processOutputTO.getPartitionToAMD().entrySet()) {
                String key = entry.getKey();
                Map<Attribute, String> attributesWithRole = getAttributesWithRole(entry.getValue());
                tempHDFSDirectory.cleanNonDataFiles();
                try {
                    tempHDFSDirectory.setPermission();
                } catch (IOException e) {
                    LogService.getRoot().fine("Could not set permissions (rwxrwxrwx) for directory: " + tempHDFSDirectory.getFullPath());
                }
                String tempTableName = getTempTableName();
                HadoopExampleSetFactory.createHiveTable(getHiveHandler(), tempTableName, attributesWithRole, false, "ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\001' escaped by '\\\\' STORED AS " + FileFormatHive.TEXTFILE.toString());
                String str = "part-" + String.format("%1$05d", Integer.valueOf(i));
                i++;
                getHiveHandler().runFastScript(null, false, "LOAD DATA INPATH '?' INTO TABLE ?", tempHDFSDirectory.getFullPath() + str, tempTableName);
                try {
                    HadoopExampleSet hiveTable = HadoopExampleSetFactory.getHiveTable(getHiveHandler(), tempTableName, true, null, null);
                    hiveTable.resetUdfDependencies();
                    hiveTable.resetOperatorCost();
                    if (isRunning() && hasBreakpoint(1)) {
                        try {
                            hiveTable.explore(this);
                        } catch (HiveTableException e2) {
                            LogService.getRoot().fine("Could not explore " + HadoopExampleSet.getTableName(hiveTable));
                            throw new UserError(this, 1008, new Object[]{HadoopExampleSet.getTableName(hiveTable)});
                        }
                    }
                    if (isRunning() && this.exampleSetOutput.isConnected()) {
                        InputPort destination = this.exampleSetOutput.getDestination();
                        for (Operator operator : getProcess().getAllOperators()) {
                            Iterator it = operator.getInputPorts().getAllPorts().iterator();
                            while (it.hasNext()) {
                                if (destination == ((InputPort) it.next()) && operator.hasBreakpoint(0)) {
                                    try {
                                        hiveTable.explore(this);
                                    } catch (HiveTableException e3) {
                                        LogService.getRoot().fine("Could not explore " + HadoopExampleSet.getTableName(hiveTable));
                                        throw new UserError(this, 1008, new Object[]{HadoopExampleSet.getTableName(hiveTable)});
                                    }
                                }
                            }
                        }
                    }
                    hiveTable.getAnnotations().put("Partition", key);
                    arrayList.add(hiveTable);
                } catch (HiveTableException e4) {
                    throw new UserError(this, 1006, new Object[]{tempTableName});
                }
            }
            this.exampleSetOutput.deliver(new IOObjectCollection(arrayList));
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // eu.radoop.operator.meta.PushdownOperator, eu.radoop.RadoopOperatorChain
    public void performAdditionalChecks() {
        super.performAdditionalChecks();
        try {
            if (!getParameterAsBoolean(PARAMETER_MERGE_OUTPUT) && PartitioningMode.RANDOM.equals(PartitioningMode.getByName(getParameterAsString(PARAMETER_PARTITIONING_MODE))) && PartitionSizing.FIXED.equals(PartitionSizing.getByName(getParameterAsString(PARAMETER_PARTITION_SIZING))) && getParameterAsInt("number_of_partitions") > getParameterAsInt(PARAMETER_MAX_COLLECTION_SIZE)) {
                ArrayList arrayList = new ArrayList();
                arrayList.add(new ParameterSettingQuickFix(this, "number_of_partitions", (String) null, "set_optional_parameter", new Object[]{"number_of_partitions"}));
                arrayList.add(new ParameterSettingQuickFix(this, PARAMETER_MAX_COLLECTION_SIZE, (String) null, "set_optional_parameter", new Object[]{PARAMETER_MAX_COLLECTION_SIZE}));
                addError(new SimpleProcessSetupError(ProcessSetupError.Severity.ERROR, getPortOwner(), arrayList, "too_many_partitions", new Object[]{Integer.valueOf(getParameterAsInt("number_of_partitions")), Integer.valueOf(getParameterAsInt(PARAMETER_MAX_COLLECTION_SIZE))}));
            }
        } catch (UndefinedParameterError e) {
        }
    }

    @Override // eu.radoop.operator.meta.PushdownOperator
    protected boolean isExaOutputConnected() {
        return this.exampleSetInnerSink.isConnected();
    }

    @Override // eu.radoop.operator.meta.PushdownOperator
    protected boolean deliverIfPossible() {
        return false;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // eu.radoop.operator.meta.PushdownOperator
    public void handleUnknownRunnerError() throws UserError {
        if ((PartitioningMode.ATTRIBUTE.equals(PartitioningMode.getByName(getParameterAsString(PARAMETER_PARTITIONING_MODE))) || (getParameterAsBoolean(PARAMETER_RESOLVE_SCHEMA_CONFLICTS) && SchemaConflictResolutionMode.CACHE_ON_DISK.equals(SchemaConflictResolutionMode.getByName(getParameterAsString(PARAMETER_SCHEMA_CONFLICT_RESOLUTION_MODE))))) && getParameterAsBoolean("use_memory_monitor")) {
            throw new UserError(this, "spark.pushdown.listener_bus_error");
        }
        super.handleUnknownRunnerError();
    }

    @Override // eu.radoop.operator.meta.PushdownOperator
    public OperatorVersion[] getIncompatibleVersionChanges() {
        return (OperatorVersion[]) ArrayUtils.add(super.getIncompatibleVersionChanges(), VERSION_OLD_MEMORY_PARAM_NAMES);
    }
}
