package eu.radoop.manipulation;

import com.microsoft.azure.storage.Constants;
import com.rapidminer.example.Attribute;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.parameter.ParameterHandler;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeLong;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.conditions.BooleanParameterCondition;
import com.rapidminer.parameter.conditions.EqualTypeCondition;
import eu.radoop.KillableOperation;
import eu.radoop.KillableOperationWrapper;
import eu.radoop.RadoopNest;
import eu.radoop.RadoopOperator;
import eu.radoop.RadoopTools;
import eu.radoop.datahandler.HadoopExampleSet;
import eu.radoop.datahandler.hive.JdbcConnectionTools;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hdfs.tools.offlineImageViewer.PBImageXmlWriter;

/* loaded from: input_file:eu/radoop/manipulation/HiveSample.class */
public class HiveSample extends RadoopAbstractManipulation {
    public static final String PARAMETER_SAMPLE = "sample";
    public static final String[] SAMPLE_MODES = {"absolute", "probability"};
    public static final int SAMPLE_ABSOLUTE = 0;
    public static final int SAMPLE_PROBABILITY = 1;
    public static final String PARAMETER_BALANCE_DATA = "balance_data";
    public static final String PARAMETER_SAMPLE_SIZE = "sample_size";
    public static final String PARAMETER_SAMPLE_PROBABILITY = "sample_probability";
    public static final String PARAMETER_SAMPLE_SIZE_LIST = "sample_size_per_class";
    public static final String PARAMETER_SAMPLE_PROBABILITY_LIST = "sample_probability_per_class";
    public static final String PARAMETER_CASE_SENSITIVE = "case_sensitive";
    public static final String SAMPLE_QUERY = "SELECT * FROM <table_name>";
    public static final String SAMPLE_QUERY_WHERE = " WHERE RAND() <= <random_limit>";

    /* loaded from: input_file:eu/radoop/manipulation/HiveSample$SampleStatistics.class */
    public enum SampleStatistics {
        CountAll
    }

    public HiveSample(OperatorDescription operatorDescription) {
        super(operatorDescription);
    }

    public static List<ParameterType> getSampleParameterTypes(ParameterHandler parameterHandler) {
        ArrayList arrayList = new ArrayList();
        ParameterTypeCategory parameterTypeCategory = new ParameterTypeCategory(PARAMETER_SAMPLE, "Determines how the amount of data is specified.", SAMPLE_MODES, 0);
        parameterTypeCategory.setExpert(false);
        arrayList.add(parameterTypeCategory);
        arrayList.add(new ParameterTypeBoolean(PARAMETER_BALANCE_DATA, "If you need to sample differently for examples of a certain class, you might check this.", false, true));
        ParameterTypeLong parameterTypeLong = new ParameterTypeLong("sample_size", "The estimated number of examples which should be sampled. A sample probabilty for each example is calculated based on this value.", 1L, Long.MAX_VALUE, Constants.MAX_BLOCK_NUMBER);
        parameterTypeLong.registerDependencyCondition(new EqualTypeCondition(parameterHandler, PARAMETER_SAMPLE, SAMPLE_MODES, true, new int[]{0}));
        parameterTypeLong.registerDependencyCondition(new BooleanParameterCondition(parameterHandler, PARAMETER_BALANCE_DATA, true, false));
        parameterTypeLong.setExpert(false);
        arrayList.add(parameterTypeLong);
        ParameterTypeDouble parameterTypeDouble = new ParameterTypeDouble(PARAMETER_SAMPLE_PROBABILITY, "The sample probability for each example.", 0.0d, 1.0d, 0.05d);
        parameterTypeDouble.registerDependencyCondition(new EqualTypeCondition(parameterHandler, PARAMETER_SAMPLE, SAMPLE_MODES, true, new int[]{1}));
        parameterTypeDouble.registerDependencyCondition(new BooleanParameterCondition(parameterHandler, PARAMETER_BALANCE_DATA, true, false));
        parameterTypeDouble.setExpert(false);
        arrayList.add(parameterTypeDouble);
        ParameterTypeList parameterTypeList = new ParameterTypeList(PARAMETER_SAMPLE_SIZE_LIST, "The estimated sample size per class.", new ParameterTypeString(javax.jdo.Constants.PMF_ATTRIBUTE_CLASS, "The class name this sample size applies to."), new ParameterTypeLong(PBImageXmlWriter.SNAPSHOT_DIFF_SECTION_SIZE, "The estimated number of sampled examples of this class.", 1L, Long.MAX_VALUE));
        parameterTypeList.registerDependencyCondition(new EqualTypeCondition(parameterHandler, PARAMETER_SAMPLE, SAMPLE_MODES, true, new int[]{0}));
        parameterTypeList.registerDependencyCondition(new BooleanParameterCondition(parameterHandler, PARAMETER_BALANCE_DATA, true, true));
        parameterTypeList.setExpert(false);
        arrayList.add(parameterTypeList);
        ParameterTypeList parameterTypeList2 = new ParameterTypeList(PARAMETER_SAMPLE_PROBABILITY_LIST, "The fraction per class.", new ParameterTypeString(javax.jdo.Constants.PMF_ATTRIBUTE_CLASS, "The class name this sample size applies to."), new ParameterTypeDouble("probability", "The probability of examples of this class to belong to the sample.", 0.0d, 1.0d));
        parameterTypeList2.registerDependencyCondition(new EqualTypeCondition(parameterHandler, PARAMETER_SAMPLE, SAMPLE_MODES, true, new int[]{1}));
        parameterTypeList2.registerDependencyCondition(new BooleanParameterCondition(parameterHandler, PARAMETER_BALANCE_DATA, true, true));
        parameterTypeList2.setExpert(false);
        arrayList.add(parameterTypeList2);
        ParameterTypeBoolean parameterTypeBoolean = new ParameterTypeBoolean(PARAMETER_CASE_SENSITIVE, "Indicates whether the specified class names should be considered case sensitive or not.", true, true);
        parameterTypeBoolean.registerDependencyCondition(new BooleanParameterCondition(parameterHandler, PARAMETER_BALANCE_DATA, true, true));
        arrayList.add(parameterTypeBoolean);
        return arrayList;
    }

    public static String buildSampleQuery(Operator operator, HadoopExampleSet hadoopExampleSet) throws OperatorException {
        return buildSampleQuery(operator, hadoopExampleSet, null);
    }

    /* JADX WARN: Finally extract failed */
    public static String buildSampleQuery(Operator operator, HadoopExampleSet hadoopExampleSet, EnumMap<SampleStatistics, Long> enumMap) throws OperatorException {
        String str;
        double d;
        long j;
        RadoopNest checkRadoopNest = RadoopOperator.checkRadoopNest(operator);
        KillableOperation create = KillableOperationWrapper.create(operator);
        boolean parameterAsBoolean = operator.getParameterAsBoolean(PARAMETER_BALANCE_DATA);
        boolean parameterAsBoolean2 = operator.getParameterAsBoolean(PARAMETER_CASE_SENSITIVE);
        boolean z = operator.getParameterAsInt(PARAMETER_SAMPLE) == 0;
        Attribute attribute = null;
        List<String[]> list = null;
        HashMap hashMap = null;
        double d2 = -1.0d;
        if (parameterAsBoolean) {
            attribute = hadoopExampleSet.getAttributes().getLabel();
            if (attribute == null) {
                throw new UserError(operator, 105);
            }
            if (!attribute.isNominal()) {
                throw new UserError(operator, 101, new Object[]{operator.getName(), attribute.getName()});
            }
            if (z) {
                List<String[]> parameterList = operator.getParameterList(PARAMETER_SAMPLE_SIZE_LIST);
                if (parameterList.isEmpty()) {
                    throw new UserError(operator, 217, new Object[]{PARAMETER_SAMPLE_SIZE_LIST, operator.getName(), ""});
                }
                hashMap = new HashMap();
                for (String[] strArr : parameterList) {
                    try {
                        hashMap.put(strArr[0], Long.valueOf(Long.parseLong(strArr[1])));
                    } catch (NumberFormatException e) {
                        throw new UserError(operator, 211, new Object[]{PBImageXmlWriter.SNAPSHOT_DIFF_SECTION_SIZE, strArr[1]});
                    }
                }
            } else {
                list = operator.getParameterList(PARAMETER_SAMPLE_PROBABILITY_LIST);
                if (list.isEmpty()) {
                    throw new UserError(operator, 217, new Object[]{PARAMETER_SAMPLE_PROBABILITY_LIST, operator.getName()});
                }
                for (String[] strArr2 : list) {
                    try {
                        strArr2[1] = RadoopTools.formatHiveDouble(Double.parseDouble(strArr2[1]));
                    } catch (NumberFormatException e2) {
                        throw new UserError(operator, 211, new Object[]{"probability", strArr2[1]});
                    }
                }
            }
        }
        if (z) {
            operator.logNote("Counting rows for absolute size sampling...");
            HashMap hashMap2 = null;
            if (parameterAsBoolean) {
                String name = attribute.getName();
                str = "SELECT " + name + ", COUNT(*) cnt FROM ? GROUP BY " + name;
                hashMap2 = new HashMap();
            } else {
                str = "SELECT COUNT(*) cnt FROM ?";
            }
            long j2 = 0;
            ResultSet resultSet = null;
            try {
                try {
                    resultSet = checkRadoopNest.getHiveHandler().runQueryKillable(hadoopExampleSet.getUdfDependencies(), null, create, str, HadoopExampleSet.getTableName(hadoopExampleSet));
                    while (resultSet.next()) {
                        if (parameterAsBoolean) {
                            String string = resultSet.getString(1);
                            j = resultSet.getLong(2);
                            hashMap2.put(parameterAsBoolean2 ? string : string.toUpperCase(), Long.valueOf(j));
                        } else {
                            j = resultSet.getLong(1);
                        }
                        j2 += j;
                    }
                    operator.logNote("Number of rows: " + RadoopTools.formatOutputInteger(j2));
                    JdbcConnectionTools.closeRes(resultSet);
                    if (enumMap != null) {
                        enumMap.put((EnumMap<SampleStatistics, Long>) SampleStatistics.CountAll, (SampleStatistics) Long.valueOf(j2));
                    }
                    if (parameterAsBoolean) {
                        list = new ArrayList();
                        for (Map.Entry entry : hashMap.entrySet()) {
                            Long l = (Long) hashMap2.get(parameterAsBoolean2 ? entry.getKey() : ((String) entry.getKey()).toUpperCase());
                            if (l == null) {
                                operator.logWarning("Class '" + ((String) entry.getKey()) + "' does not exist in the data set. The specified sample size for this class will be ignored.");
                            } else {
                                if (((Long) entry.getValue()).longValue() <= l.longValue()) {
                                    d = ((Long) entry.getValue()).longValue() / l.longValue();
                                } else {
                                    operator.logWarning("Input ExampleSet does not contain the specified number (" + String.valueOf(entry.getValue()) + ") of rows (only " + l + ") for the class: '" + ((String) entry.getKey()) + "'.");
                                    d = 1.0d;
                                }
                                list.add(new String[]{(String) entry.getKey(), RadoopTools.formatHiveDouble(d)});
                            }
                        }
                        if (list.isEmpty()) {
                            throw new UserError(operator, 217, new Object[]{PARAMETER_SAMPLE_SIZE_LIST, operator.getName()});
                        }
                    } else {
                        long parameterAsLong = operator.getParameterAsLong("sample_size");
                        if (parameterAsLong <= j2) {
                            d2 = parameterAsLong / j2;
                        } else {
                            operator.logWarning("Input ExampleSet does not contain the specified number (" + parameterAsLong + ") of rows (only " + operator + ").");
                            d2 = 1.0d;
                        }
                    }
                } catch (SQLException e3) {
                    JdbcConnectionTools.forceCloseResAndConnection(resultSet);
                    throw new OperatorException("Error during calculating data set statistics: ", e3);
                }
            } catch (Throwable th) {
                JdbcConnectionTools.closeRes(resultSet);
                throw th;
            }
        } else if (!parameterAsBoolean) {
            d2 = operator.getParameterAsDouble(PARAMETER_SAMPLE_PROBABILITY);
        }
        StringBuilder sb = new StringBuilder();
        if (!parameterAsBoolean) {
            sb.append(RadoopTools.formatHiveDouble(d2));
        } else if (list != null && !list.isEmpty()) {
            sb.append("CASE " + (parameterAsBoolean2 ? attribute.getName() : "UPPER(" + attribute.getName() + ")"));
            for (String[] strArr3 : list) {
                sb.append(" WHEN " + (parameterAsBoolean2 ? "'" + strArr3[0] + "'" : "UPPER('" + strArr3[0] + "')") + " THEN " + strArr3[1]);
            }
            sb.append(" ELSE -1.0 END");
        }
        String replace = SAMPLE_QUERY.replace("<table_name>", HadoopExampleSet.getTableName(hadoopExampleSet));
        if (d2 < 1.0d) {
            replace = (replace + " WHERE RAND() <= <random_limit>").replace("<random_limit>", sb);
        }
        return replace;
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.addAll(getSampleParameterTypes(this));
        return parameterTypes;
    }

    public void doWork() throws OperatorException {
        HadoopExampleSet inputHes = getInputHes();
        String tempTableName = getTempTableName();
        getHiveHandler().runFastScript(inputHes.getUdfDependencies(), false, "CREATE VIEW ? AS " + buildSampleQuery(this, inputHes), tempTableName);
        createExampleSet(tempTableName, inputHes, false, null, null);
    }

    @Override // eu.radoop.RadoopOperator
    public int getCost() {
        return 1;
    }
}
