package com.rapidminer.extension.anomalydetection.operator.clustering;

import com.rapidminer.adaption.belt.IOTable;
import com.rapidminer.belt.table.BeltConverter;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.extension.anomalydetection.anomaly_models.clustering.CBLOFModel;
import com.rapidminer.extension.anomalydetection.anomaly_models.clustering.CMGOSModel;
import com.rapidminer.extension.anomalydetection.anomaly_models.clustering.ClusterBasedAnomalyDetectionModel;
import com.rapidminer.extension.anomalydetection.anomaly_models.clustering.LDCOFModel;
import com.rapidminer.extension.anomalydetection.operator.AbstractAnomalyOperator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.clustering.ClusterModel;
import com.rapidminer.operator.learner.CapabilityCheck;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.conditions.BooleanParameterCondition;
import com.rapidminer.parameter.conditions.EqualStringCondition;
import com.rapidminer.parameter.conditions.EqualTypeCondition;
import com.rapidminer.studio.concurrency.internal.SequentialConcurrencyContext;
import com.rapidminer.tools.ParameterService;
import com.rapidminer.tools.RandomGenerator;
import com.rapidminer.tools.Tools;
import com.rapidminer.tools.math.similarity.DistanceMeasure;
import com.rapidminer.tools.math.similarity.DistanceMeasureHelper;
import com.rapidminer.tools.math.similarity.DistanceMeasures;
import com.rapidminer.tools.math.similarity.numerical.EuclideanDistance;
import de.dfki.madm.anomalydetection.operator.kernel_based.AnomalyDetectionLibSVMOperator;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

/* loaded from: input_file:com/rapidminer/extension/anomalydetection/operator/clustering/ClusterBasedAnomalyDetectionOperator.class */
public class ClusterBasedAnomalyDetectionOperator extends AbstractAnomalyOperator {
    public static final String PARAMETER_ALGORITHM = "algorithm";
    public static final int METHOD_COV_REDUCTION = 0;
    public static final int METHOD_COV_REGULARIZE = 1;
    public static final int METHOD_COV_MCD = 2;
    public static final String PARAMETER_LAMBDA = "lambda";
    public static final String PARAMETER_LAMBDA_DESCRIPTION = "Lambda for regularization (see Friedmann). A lambda of 0.0 menas QDA (each cluster has its own covariance) and a lambda of 1.0 means LDA (a global covariance matrix).";
    public static final String PARAMETER_COVARIANCE = "covariance estimation";
    public static final String PARAMETER_COVARIANCE_DESCRIPTION = "The algorithm to estimate the covariance matrics. Reduction is the simplest method whereas the other two are more complex. Details can be found in the papers (see Operator description).";
    public static final String PARAMETER_H = "h (non-outlier instances)";
    public static final String PARAMETER_H_DESCRIPTION = "This parameter specifies the number of samples for fastMCD/MCD to be used for a computation (non-outliers). If set to -1 it is automatically computed according to the 'probability for normal class'. Friedmann et al recommend to use 75% of the examples as a good estimate. The upper bound is the numer of examples and the lower bound is (number of examples * dimensions +1)/2. Values exceeding these limits will be replaced by the limit. ";
    public static final String PARAMETER_NUMBER_OF_SUBSETS = "number of subsets";
    public static final String PARAMETER_POINTS_SUBSET_DESCRIPTION = "Defines the number of subsets used in fastMCD. Friedmann recommends to have at most 5 subsets.";
    public static final String PARAMETER_FMCD = "threshold for fastMCD";
    public static final String PARAMETER_FMCD_DESCRIPTION = "If the number of examples in the dataset exceeds the threshold, fastMCD will be applied instead of MCD (complete search). Not recommended to be higher than 600 due to computational issues.";
    public static final String PARAMETER_RUN = "iterations";
    public static final String PARAMETER_RUN_DESCRIPTION = "Number of iterations for computing the MCD. 100-500 might be a good choice.";
    public static final String PARAMETER_NUMBER_OF_THREADS = "number of threads";
    public static final String PARAMETER_NUMBER_OF_THREADS_DESCRIPTION = "The number of threads for the computation";
    public static final String PARAMETER_NUMBER_OF_REMOVE = "times to remove outlier";
    public static final String PARAMETER_NUMBER_OF_REMOVE_DESCRIPTION = "The number of times outlier should be removed for minimum covariance determinant";
    public static final String PARAMETER_OUTLIER_PROBABILITY = "probability for normal class";
    public static final String PARAMETER_OUTLIER_PROBABILITY_DESCRIPTION = "This is the expected probability of normal data instances. Usually it should be between 0.95 and 1.0 to make sense.";
    public static final String PARAMETER_LIMIT_COVARIANCE_POINTS = "limit computations";
    public static final String PARAMETER_LIMIT_COVARIANCE_POINTS_DESCRIPTION = "Limit the number of instances to calculate the covariance matrix. Should be used for very large clusters. The sampling of the instances is a random choice.";
    public static final String PARAMETER_NUMBER_COVARIANCE_POINTS = "maximum";
    public static final String PARAMETER_NUMBER_COVARIANCE_POINTS_DESCRIPTION = "Maximum number of instances for covariance matrix calculation";
    public static final String PARAMETER_PARALLELIZE_EVALUATION_PROCESS = "parallelize evaluation process";
    public static final String PARAMETER_PARALLELIZE_EVALUATION_PROCESS_DESCRIPTION = "Specifies that evaluation process should be performed in parallel";
    protected InputPort clusterInput;
    protected OutputPort clusterThroughput;
    private DistanceMeasureHelper measureHelper;
    private static final String CBLOF = "CBLOF";
    private static final String CMGOS = "CMGOS";
    private static final String LDCOF = "LDCOF";
    public static final String[] AVAILABLE_ALGORITHMS = {CBLOF, CMGOS, LDCOF};
    public static String PARAMETER_ALPHA = "alpha";
    public static String PARAMETER_BETA = AnomalyDetectionLibSVMOperator.PARAMETER_BETA;
    public static String PARAMETER_WEIGHTING = "use cluster size as weighting factor";
    public static String PARAMETER_LIKE_CBLOF = "divide clusters like cblof";
    public static String PARAMETER_GAMMA_LDCOF = "gamma_(ldcof)";
    public static final String[] COV = {"Reduction", "Regularisation", "MCD"};
    public static String PARAMETER_GAMMA = "gamma";

    public ClusterBasedAnomalyDetectionOperator(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.clusterInput = getInputPorts().createPort("clu", ClusterModel.class);
        this.clusterThroughput = getOutputPorts().createPort("clu");
        this.measureHelper = new DistanceMeasureHelper(this);
        getTransformer().addPassThroughRule(this.clusterInput, this.clusterThroughput);
    }

    public void doWork() throws OperatorException {
        ClusterBasedAnomalyDetectionModel buildCMGOS;
        IOTable iOTable = (IOTable) this.exaInput.getData(IOTable.class);
        ExampleSet convert = BeltConverter.convert(iOTable, new SequentialConcurrencyContext());
        new CapabilityCheck(this, Tools.booleanValue(ParameterService.getParameterValue("rapidminer.general.capabilities.warn"), true)).checkLearnerCapabilities(this, convert);
        String parameterAsString = getParameterAsString(PARAMETER_ALGORITHM);
        DistanceMeasure initializedMeasure = this.measureHelper.getInitializedMeasure(convert);
        boolean z = -1;
        switch (parameterAsString.hashCode()) {
            case 63917668:
                if (parameterAsString.equals(CBLOF)) {
                    z = false;
                    break;
                }
                break;
            case 64240577:
                if (parameterAsString.equals(CMGOS)) {
                    z = 2;
                    break;
                }
                break;
            case 72280290:
                if (parameterAsString.equals(LDCOF)) {
                    z = true;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                buildCMGOS = buildCBLOF(iOTable, initializedMeasure, convert);
                break;
            case true:
                buildCMGOS = buildLDCOF(iOTable, initializedMeasure, convert);
                break;
            case true:
                buildCMGOS = buildCMGOS(iOTable, initializedMeasure, convert);
                break;
            default:
                throw new OperatorException("Unknown algorithm " + parameterAsString);
        }
        this.exaOutput.deliver(buildCMGOS.apply(iOTable, this));
        this.modOutput.deliver(buildCMGOS);
        this.clusterThroughput.deliver(this.clusterInput.getData(ClusterModel.class));
    }

    protected ClusterBasedAnomalyDetectionModel buildCBLOF(IOTable iOTable, DistanceMeasure distanceMeasure, ExampleSet exampleSet) throws OperatorException {
        CBLOFModel cBLOFModel = new CBLOFModel(iOTable, this.clusterInput.getData(ClusterModel.class), distanceMeasure);
        cBLOFModel.setUseClusterWeights(getParameterAsBoolean(PARAMETER_WEIGHTING));
        cBLOFModel.train(exampleSet, getParameterAsDouble(PARAMETER_ALPHA), getParameterAsDouble(PARAMETER_BETA));
        return cBLOFModel;
    }

    protected ClusterBasedAnomalyDetectionModel buildLDCOF(IOTable iOTable, DistanceMeasure distanceMeasure, ExampleSet exampleSet) throws OperatorException {
        LDCOFModel lDCOFModel;
        ClusterModel data = this.clusterInput.getData(ClusterModel.class);
        if (getParameterAsBoolean(PARAMETER_LIKE_CBLOF)) {
            lDCOFModel = new LDCOFModel(iOTable, data, distanceMeasure);
            lDCOFModel.train(exampleSet, getParameterAsDouble(PARAMETER_GAMMA_LDCOF));
        } else {
            lDCOFModel = new LDCOFModel(iOTable, data, new EuclideanDistance());
            lDCOFModel.train(exampleSet, getParameterAsDouble(PARAMETER_ALPHA), getParameterAsDouble(PARAMETER_BETA));
        }
        return lDCOFModel;
    }

    protected ClusterBasedAnomalyDetectionModel buildCMGOS(IOTable iOTable, DistanceMeasure distanceMeasure, ExampleSet exampleSet) throws OperatorException {
        CMGOSModel cMGOSModel = new CMGOSModel(iOTable, this.clusterInput.getData(ClusterModel.class), distanceMeasure);
        cMGOSModel.setThreads(1);
        cMGOSModel.setRemoveRuns(getParameterAsInt("times to remove outlier"));
        cMGOSModel.setProbability(getParameterAsDouble("probability for normal class"));
        int i = -1;
        if (getParameterAsBoolean("limit computations")) {
            i = getParameterAsInt("maximum");
        }
        if (getParameterAsBoolean("limit computations_")) {
            i = getParameterAsInt("maximum_");
        }
        cMGOSModel.setCov_sampling(i);
        cMGOSModel.setPercentage(getParameterAsDouble(PARAMETER_GAMMA));
        cMGOSModel.setLambda(getParameterAsDouble("lambda"));
        cMGOSModel.setCov(getParameterAsInt("covariance estimation"));
        cMGOSModel.setH(getParameterAsInt("h (non-outlier instances)"));
        cMGOSModel.setNumberOfSubsets(getParameterAsInt("number of subsets"));
        cMGOSModel.setFastMCDPoints(getParameterAsInt("threshold for fastMCD"));
        cMGOSModel.setInititeration(getParameterAsInt("iterations"));
        cMGOSModel.setRandomGenerator(RandomGenerator.getRandomGenerator(this));
        cMGOSModel.train(exampleSet);
        return cMGOSModel;
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeCategory(PARAMETER_ALGORITHM, "which algorithm to choose", AVAILABLE_ALGORITHMS, 0, false));
        parameterTypes.addAll(DistanceMeasures.getParameterTypes(this));
        EqualStringCondition equalStringCondition = new EqualStringCondition(this, PARAMETER_ALGORITHM, true, new String[]{CBLOF, LDCOF});
        ParameterTypeDouble parameterTypeDouble = new ParameterTypeDouble(PARAMETER_ALPHA, "This parameter specifies the percentage of the data set that is expected to be normal", 0.0d, 100.0d, 90.0d, false);
        parameterTypeDouble.registerDependencyCondition(equalStringCondition);
        ParameterTypeDouble parameterTypeDouble2 = new ParameterTypeDouble(PARAMETER_BETA, "This parameter specifies the minimum ratio between the size of a large cluster and a small cluster", 1.0d, 2.147483647E9d, 5.0d, false);
        parameterTypeDouble2.registerDependencyCondition(equalStringCondition);
        ParameterTypeBoolean parameterTypeBoolean = new ParameterTypeBoolean(PARAMETER_WEIGHTING, "Uses the cluster size as a weight factor as proposed by the original publication.", true);
        parameterTypeBoolean.registerDependencyCondition(new EqualStringCondition(this, PARAMETER_ALGORITHM, true, new String[]{CBLOF}));
        ParameterTypeBoolean parameterTypeBoolean2 = new ParameterTypeBoolean(PARAMETER_LIKE_CBLOF, "The division into large and small clusters will be implemented in a manner similar to CBLOF.", false, false);
        EqualStringCondition equalStringCondition2 = new EqualStringCondition(this, PARAMETER_ALGORITHM, true, new String[]{LDCOF});
        parameterTypeBoolean2.registerDependencyCondition(equalStringCondition2);
        parameterTypes.add(parameterTypeBoolean2);
        ParameterTypeDouble parameterTypeDouble3 = new ParameterTypeDouble(PARAMETER_GAMMA_LDCOF, "ratio between the maximum size of small clusters and the average cluster size", 0.0d, 1.0d, 0.1d);
        parameterTypeDouble3.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_LIKE_CBLOF, true, false));
        parameterTypeDouble3.registerDependencyCondition(equalStringCondition2);
        parameterTypes.add(parameterTypeDouble);
        parameterTypes.add(parameterTypeDouble2);
        parameterTypes.add(parameterTypeDouble3);
        parameterTypes.add(parameterTypeBoolean);
        List<ParameterType> cMGOSParameters = getCMGOSParameters();
        Iterator<ParameterType> it = cMGOSParameters.iterator();
        while (it.hasNext()) {
            it.next().registerDependencyCondition(new EqualStringCondition(this, PARAMETER_ALGORITHM, true, new String[]{CMGOS}));
        }
        parameterTypes.addAll(cMGOSParameters);
        return parameterTypes;
    }

    private List<ParameterType> getCMGOSParameters() {
        LinkedList linkedList = new LinkedList();
        linkedList.add(new ParameterTypeDouble("probability for normal class", "This is the expected probability of normal data instances. Usually it should be between 0.95 and 1.0 to make sense.", 0.0d, 1.0d, 0.975d, false));
        linkedList.add(new ParameterTypeDouble(PARAMETER_GAMMA, "Ratio between the maximum size of small clusters and the average cluster size. Smallclusters are removed.", 0.0d, 1.0d, 0.1d));
        linkedList.add(new ParameterTypeCategory("covariance estimation", "The algorithm to estimate the covariance matrics. Reduction is the simplest method whereas the other two are more complex. Details can be found in the papers (see Operator description).", COV, 0, false));
        ParameterTypeInt parameterTypeInt = new ParameterTypeInt("times to remove outlier", "The number of times outlier should be removed for minimum covariance determinant", 0, Integer.MAX_VALUE, 1, false);
        parameterTypeInt.registerDependencyCondition(new EqualTypeCondition(getParameterHandler(), "covariance estimation", COV, false, new int[]{0, 1}));
        linkedList.add(parameterTypeInt);
        ParameterTypeBoolean parameterTypeBoolean = new ParameterTypeBoolean("limit computations", "Limit the number of instances to calculate the covariance matrix. Should be used for very large clusters. The sampling of the instances is a random choice.", false, false);
        parameterTypeBoolean.registerDependencyCondition(new EqualTypeCondition(getParameterHandler(), "covariance estimation", COV, false, new int[]{1}));
        linkedList.add(parameterTypeBoolean);
        ParameterTypeInt parameterTypeInt2 = new ParameterTypeInt("maximum", "Maximum number of instances for covariance matrix calculation", 1, Integer.MAX_VALUE, 1000, false);
        parameterTypeInt2.registerDependencyCondition(new BooleanParameterCondition(this, "limit computations", true, true));
        linkedList.add(parameterTypeInt2);
        ParameterTypeBoolean parameterTypeBoolean2 = new ParameterTypeBoolean("limit computations_", "Limit the number of instances to calculate the covariance matrix. Should be used for very large clusters. The sampling of the instances is a random choice.", false, false);
        parameterTypeBoolean2.registerDependencyCondition(new EqualTypeCondition(getParameterHandler(), "covariance estimation", COV, false, new int[]{0}));
        linkedList.add(parameterTypeBoolean2);
        ParameterTypeInt parameterTypeInt3 = new ParameterTypeInt("maximum_", "Maximum number of instances for covariance matrix calculation", 1, Integer.MAX_VALUE, 1000, false);
        parameterTypeInt3.registerDependencyCondition(new BooleanParameterCondition(this, "limit computations_", true, true));
        linkedList.add(parameterTypeInt3);
        ParameterTypeInt parameterTypeInt4 = new ParameterTypeInt("h (non-outlier instances)", "This parameter specifies the number of samples for fastMCD/MCD to be used for a computation (non-outliers). If set to -1 it is automatically computed according to the 'probability for normal class'. Friedmann et al recommend to use 75% of the examples as a good estimate. The upper bound is the numer of examples and the lower bound is (number of examples * dimensions +1)/2. Values exceeding these limits will be replaced by the limit. ", 0, Integer.MAX_VALUE, -1, false);
        parameterTypeInt4.registerDependencyCondition(new EqualTypeCondition(getParameterHandler(), "covariance estimation", COV, false, new int[]{2}));
        linkedList.add(parameterTypeInt4);
        ParameterTypeInt parameterTypeInt5 = new ParameterTypeInt("iterations", PARAMETER_RUN_DESCRIPTION, 0, Integer.MAX_VALUE, 500, false);
        parameterTypeInt5.registerDependencyCondition(new EqualTypeCondition(getParameterHandler(), "covariance estimation", COV, false, new int[]{2}));
        linkedList.add(parameterTypeInt5);
        ParameterTypeInt parameterTypeInt6 = new ParameterTypeInt("threshold for fastMCD", "If the number of examples in the dataset exceeds the threshold, fastMCD will be applied instead of MCD (complete search). Not recommended to be higher than 600 due to computational issues.", 0, Integer.MAX_VALUE, 600, false);
        parameterTypeInt6.registerDependencyCondition(new EqualTypeCondition(getParameterHandler(), "covariance estimation", COV, false, new int[]{2}));
        linkedList.add(parameterTypeInt6);
        ParameterTypeInt parameterTypeInt7 = new ParameterTypeInt("number of subsets", "Defines the number of subsets used in fastMCD. Friedmann recommends to have at most 5 subsets.", 0, Integer.MAX_VALUE, 5, false);
        parameterTypeInt7.registerDependencyCondition(new EqualTypeCondition(getParameterHandler(), "covariance estimation", COV, false, new int[]{2}));
        linkedList.add(parameterTypeInt7);
        ParameterTypeDouble parameterTypeDouble = new ParameterTypeDouble("lambda", "Lambda for regularization (see Friedmann). A lambda of 0.0 menas QDA (each cluster has its own covariance) and a lambda of 1.0 means LDA (a global covariance matrix).", 0.0d, 1.0d, 0.1d, false);
        parameterTypeDouble.registerDependencyCondition(new EqualTypeCondition(getParameterHandler(), "covariance estimation", COV, false, new int[]{1}));
        linkedList.add(parameterTypeDouble);
        linkedList.add(new ParameterTypeBoolean("parallelize evaluation process", "Specifies that evaluation process should be performed in parallel", false, false));
        ParameterTypeInt parameterTypeInt8 = new ParameterTypeInt("number of threads", "The number of threads for the computation", 1, Integer.MAX_VALUE, Runtime.getRuntime().availableProcessors(), false);
        parameterTypeInt8.registerDependencyCondition(new BooleanParameterCondition(this, "parallelize evaluation process", true, true));
        linkedList.add(parameterTypeInt8);
        linkedList.addAll(RandomGenerator.getRandomGeneratorParameters(this));
        return linkedList;
    }
}
