package eu.radoop.operator.spark;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.AttributeWeights;
import com.rapidminer.example.table.NominalMapping;
import com.rapidminer.operator.OperatorCapability;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ProcessSetupError;
import com.rapidminer.operator.SimpleProcessSetupError;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.learner.functions.LogisticRegressionModel;
import com.rapidminer.operator.ports.quickfix.ParameterSettingQuickFix;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.parameter.conditions.NonEqualTypeCondition;
import eu.radoop.datahandler.HadoopExampleSet;
import eu.radoop.spark.SparkOperation;
import eu.radoop.transfer.model.SupportVectorMachineMTO;
import eu.radoop.transfer.parameter.ParameterTransferObject;
import eu.radoop.transfer.parameter.SparkSupportVectorMachineParameter;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/* loaded from: input_file:eu/radoop/operator/spark/SparkSupportVectorMachine.class */
public class SparkSupportVectorMachine extends AbstractSparkPredictionLearner<SparkSupportVectorMachineParameter, SupportVectorMachineMTO, LogisticRegressionModel> {
    public static final String PARAMETER_NUM_ITERATIONS = "number_of_iterations";
    public static final String PARAMETER_STEP_SIZE = "step_size";
    public static final String PARAMETER_MINIBATCH_FRACTION = "minibatch_fractions";
    public static final String PARAMETER_REGULARIZATION = "regularization_parameter";
    public static final String PARAMETER_ADD_INTERCEPT = "add_intercept";
    public static final String PARAMETER_USE_FEATURE_SCALING = "use_feature_scaling";
    public static final String PARAMETER_UPDATER = "updater";
    public static final String PARAMETER_CONVERGENCE_TO_L = "convergence_to_L";
    public static final double DEFAULT_CONVERGENCE_TO_L = 1.0E-4d;
    public static final String[] UPDATERS = {"Simple Updater", "L1 Updater", "Squared L2 Updater"};

    public SparkSupportVectorMachine(OperatorDescription operatorDescription) {
        super(operatorDescription, SparkOperation.SupportVectorMachine);
    }

    @Override // eu.radoop.operator.spark.AbstractSparkLearner
    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeInt("number_of_iterations", "Number of iterations of gradient descent to run.", 0, Integer.MAX_VALUE, 100, false));
        parameterTypes.add(new ParameterTypeDouble("step_size", "The initial step size of SGD for the first step. Default 1.0. In subsequent steps, the step size will decrease with stepSize/sqrt(current_iteration_number).", 0.0d, Double.MAX_VALUE, 1.0d, false));
        parameterTypes.add(new ParameterTypeDouble("minibatch_fractions", "Fraction of the input data set that should be used for one iteration of SGD. Default 1.0 (corresponding to deterministic/classical gradient descent)", 0.0d, 1.0d, 1.0d, false));
        parameterTypes.add(new ParameterTypeDouble("convergence_to_L", "Set the convergence tolerance of iterations. Default 1E-4. Smaller value will lead to higher accuracy with the cost of more iterations. This parameter is only available in Spark 1.5 or later. For earlier Spark versions it is skipped.", 0.0d, 1.0d, 1.0E-4d, false));
        parameterTypes.add(new ParameterTypeBoolean("add_intercept", "Set if the algorithm should add an intercept.", true, true));
        parameterTypes.add(new ParameterTypeBoolean("use_feature_scaling", "Scaling columns to unit variance as a heuristic to reduce the condition number: During the optimization process, the convergence (rate) depends on the condition number of the training dataset. Scaling the variables often reduces this condition number heuristically, thus improving the convergence rate. Without reducing the condition number, some training datasets mixing the columns with different scales may not be able to converge. Here, if useFeatureScaling is enabled, Spark will standardize the training features by dividing the variance of each column (without subtracting the mean), and train the model in the scaled space. ", true, true));
        parameterTypes.add(new ParameterTypeCategory("updater", "Set the updater function to actually perform a gradient step in a given direction. The updater is responsible to perform the update from the regularization term as well, and therefore determines what kind or regularization is used, if any.", UPDATERS, SparkSupportVectorMachineParameter.Updater.SIMPLE_UPDATER.ordinal(), true));
        ParameterTypeDouble parameterTypeDouble = new ParameterTypeDouble("regularization_parameter", "The regularization parameter.", 0.0d, Double.MAX_VALUE, 0.0d, true);
        parameterTypeDouble.registerDependencyCondition(new NonEqualTypeCondition(this, "updater", UPDATERS, false, new int[]{SparkSupportVectorMachineParameter.Updater.SIMPLE_UPDATER.ordinal()}));
        parameterTypes.add(parameterTypeDouble);
        return parameterTypes;
    }

    public boolean supportsCapability(OperatorCapability operatorCapability) {
        return operatorCapability.equals(OperatorCapability.NUMERICAL_ATTRIBUTES) || operatorCapability.equals(OperatorCapability.BINOMINAL_LABEL);
    }

    @Override // eu.radoop.modeling.prediction.RadoopAbstractLearner
    public boolean canCalculateWeights() {
        return true;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // eu.radoop.operator.spark.AbstractSparkLearner, eu.radoop.RadoopOperator
    public void performAdditionalChecks() {
        try {
            if (getParameterAsDouble("minibatch_fractions") == 0.0d) {
                addError(new SimpleProcessSetupError(ProcessSetupError.Severity.ERROR, getPortOwner(), Collections.singletonList(new ParameterSettingQuickFix(this, "minibatch_fractions", "change_parameter", new Object[]{"minibatch_fractions"})), "parameter_zero_value", new Object[]{"minibatch_fractions"}));
            }
        } catch (UndefinedParameterError e) {
        }
        super.performAdditionalChecks();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // eu.radoop.operator.spark.AbstractSparkLearner
    public LogisticRegressionModel convertModelFromMTO(SupportVectorMachineMTO supportVectorMachineMTO, HadoopExampleSet hadoopExampleSet) throws OperatorException {
        double[] dArr;
        Map<String, Double> weightsMap = supportVectorMachineMTO.getWeightsMap();
        boolean shouldAddIntercept = supportVectorMachineMTO.shouldAddIntercept();
        if (shouldAddIntercept) {
            dArr = new double[weightsMap.size() + 1];
            dArr[weightsMap.size()] = supportVectorMachineMTO.getIntercept();
        } else {
            dArr = new double[weightsMap.size()];
        }
        int i = 0;
        Iterator it = hadoopExampleSet.attributes.iterator();
        while (it.hasNext()) {
            int i2 = i;
            i++;
            dArr[i2] = weightsMap.get(((Attribute) it.next()).getName()).doubleValue();
        }
        double[] dArr2 = new double[dArr.length];
        Arrays.fill(dArr2, Double.NaN);
        return new LogisticRegressionModel(hadoopExampleSet, dArr, dArr2, shouldAddIntercept);
    }

    @Override // eu.radoop.operator.spark.AbstractSparkLearner
    protected ParameterTransferObject<SparkSupportVectorMachineParameter> setupAlgorithmParams(HadoopExampleSet hadoopExampleSet) throws OperatorException {
        int parameterAsInt = getParameterAsInt("number_of_iterations");
        double parameterAsDouble = getParameterAsDouble("regularization_parameter");
        boolean parameterAsBoolean = getParameterAsBoolean("add_intercept");
        boolean parameterAsBoolean2 = getParameterAsBoolean("use_feature_scaling");
        SparkSupportVectorMachineParameter.Updater updater = SparkSupportVectorMachineParameter.Updater.values()[getParameterAsInt("updater")];
        double parameterAsDouble2 = getParameterAsDouble("step_size");
        double parameterAsDouble3 = getParameterAsDouble("minibatch_fractions");
        if (parameterAsDouble3 == 0.0d) {
            throw new UserError(this, 116, new Object[]{"minibatch_fractions", Double.valueOf(parameterAsDouble3)});
        }
        double parameterAsDouble4 = getParameterAsDouble("convergence_to_L");
        Attribute label = hadoopExampleSet.getAttributes().getLabel();
        NominalMapping mapping = label.getMapping();
        String positiveString = mapping.getPositiveString();
        String negativeString = mapping.getNegativeString();
        if (positiveString == null && negativeString == null) {
            logWarning("The positive and negative values for the attribute " + label.getName() + " is unknown. To avoid exploring them here, please use the Remap Binominals operator.");
        } else {
            if (positiveString == null || negativeString == null) {
                throw new OperatorException("The label attribute " + label.getName() + " has less then 2 different values!");
            }
            if (mapping.size() > 2) {
                throw new OperatorException("The label attribute " + label.getName() + " has more then 2 different values!");
            }
        }
        ParameterTransferObject<SparkSupportVectorMachineParameter> parameterTransferObject = new ParameterTransferObject<>();
        parameterTransferObject.setParameter(SparkSupportVectorMachineParameter.ITERATIONS, Integer.valueOf(parameterAsInt));
        parameterTransferObject.setParameter(SparkSupportVectorMachineParameter.REG_PARAM, Double.valueOf(parameterAsDouble));
        parameterTransferObject.setParameter(SparkSupportVectorMachineParameter.ADD_INTERCEPT, Boolean.valueOf(parameterAsBoolean));
        parameterTransferObject.setParameter(SparkSupportVectorMachineParameter.USE_FEATURE_SCALING, Boolean.valueOf(parameterAsBoolean2));
        parameterTransferObject.setParameter(SparkSupportVectorMachineParameter.MINI_BATCH_FRACTION, Double.valueOf(parameterAsDouble3));
        parameterTransferObject.setParameter(SparkSupportVectorMachineParameter.CONVERGENCE_TOL, Double.valueOf(parameterAsDouble4));
        parameterTransferObject.setParameter(SparkSupportVectorMachineParameter.STEP_SIZE, Double.valueOf(parameterAsDouble2));
        parameterTransferObject.setParameter(SparkSupportVectorMachineParameter.UPDATER, updater.name());
        return parameterTransferObject;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // eu.radoop.operator.spark.AbstractSparkLearner
    public void postProcessModel(LogisticRegressionModel logisticRegressionModel, HadoopExampleSet hadoopExampleSet) {
        AttributeWeights attributeWeights = new AttributeWeights();
        int i = 0;
        Iterator it = hadoopExampleSet.attributes.iterator();
        while (it.hasNext()) {
            int i2 = i;
            i++;
            attributeWeights.setWeight(((Attribute) it.next()).getName(), logisticRegressionModel.getCoefficients()[i2]);
        }
        getOutputPorts().getPortByName("weights").deliver(attributeWeights);
    }
}
