package eu.radoop.spark;

import eu.radoop.transfer.parameter.ParameterTransferObject;
import eu.radoop.transfer.parameter.SparkIsolationForestParameter;
import org.apache.spark.ml.Pipeline;
import org.apache.spark.ml.PipelineModel;
import org.apache.spark.ml.PipelineStage;
import org.apache.spark.ml.feature.VectorAssembler;
import org.apache.spark.ml.iforest.IForest;
import org.apache.spark.sql.Dataset;

/* loaded from: input_file:lib/radoop-spark3.jar:eu/radoop/spark/SparkIsolationForestHelper.class */
public class SparkIsolationForestHelper {
    private static final String HIVE_DEFAULT_SEPARATOR = "\u0001";
    private static final String FEATURES_COLUMN_NAME = "features";

    public void trainModelAndDetectOutliers(ParameterTransferObject<SparkIsolationForestParameter> parameterTransferObject, Object obj, String str, String[] strArr) {
        PipelineStage seed = new IForest().setNumTrees(parameterTransferObject.getParameterAsInteger(SparkIsolationForestParameter.NUM_TREES).intValue()).setMaxSamples(parameterTransferObject.getParameterAsInteger(SparkIsolationForestParameter.MAX_SAMPLES).intValue()).setContamination(parameterTransferObject.getParameterAsDouble(SparkIsolationForestParameter.CONTAMINATION).doubleValue()).setMaxDepth(parameterTransferObject.getParameterAsInteger(SparkIsolationForestParameter.MAX_DEPTH).intValue()).setSeed(parameterTransferObject.getParameterAsInteger(SparkIsolationForestParameter.RANDOM_SEED).intValue());
        PipelineStage vectorAssembler = new VectorAssembler();
        vectorAssembler.setInputCols(strArr);
        vectorAssembler.setOutputCol(FEATURES_COLUMN_NAME);
        PipelineModel fit = new Pipeline().setStages(new PipelineStage[]{vectorAssembler, seed}).fit((Dataset) obj);
        fit.stages()[fit.stages().length - 1].summary().predictions().drop(FEATURES_COLUMN_NAME).write().option("sep", "\u0001").csv(str);
    }
}
