package eu.radoop.operator.discretization;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.rapidminer.example.Attribute;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ProcessSetupError;
import com.rapidminer.operator.SimpleProcessSetupError;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MetaDataInfo;
import com.rapidminer.operator.ports.quickfix.ParameterSettingQuickFix;
import com.rapidminer.operator.preprocessing.PreprocessingModel;
import com.rapidminer.operator.preprocessing.discretization.DiscretizationModel;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.parameter.conditions.EqualStringCondition;
import com.rapidminer.tools.LogService;
import com.rapidminer.tools.container.Tupel;
import eu.radoop.RadoopNest;
import eu.radoop.RadoopTools;
import eu.radoop.datahandler.HadoopExampleSet;
import eu.radoop.datahandler.hive.JdbcConnectionTools;
import eu.radoop.manipulation.AbstractPreprocessing;
import eu.radoop.operator.RadoopAttributeSubsetSelector;
import eu.radoop.operator.discretization.common.AttributeRangeStruct;
import eu.radoop.operator.discretization.common.Range;
import eu.radoop.operator.discretization.common.RangeLabel;
import eu.radoop.operator.discretization.common.RangesSQLFormatter;
import eu.radoop.operator.ports.metadata.HadoopExampleSetMetaData;
import eu.radoop.operator.ports.metadata.RadoopPrecondition;
import java.sql.ResultSet;
import java.text.DecimalFormatSymbols;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.function.Function;
import java.util.logging.Level;
import java.util.stream.Collectors;
import org.apache.commons.lang3.time.StopWatch;

/* loaded from: input_file:eu/radoop/operator/discretization/Discretization.class */
public abstract class Discretization extends AbstractPreprocessing {
    private static final String OUTPUT_ATTRIBUTE_POSTFIX = "output_attribute_postfix";
    private static final String BINNING_RANGE_LABEL = "range_name_type";
    private static final String CUSTOM_BINNING_NAME_DEFAULT = "custom_range_name_default";
    private static final String CUSTOM_BINNING_NAME_MATRIX = "custom_range_name_matrix";
    protected static final String DEFAULT_OUTPUT_POSTFIX = "_bin";
    protected final RadoopAttributeSubsetSelector attributeSubsetSelector;
    private static final String OUTPUT_BIN_FORMAT = "%s%s";
    protected static final Integer BIN_COUNT_MIN_VALUE = 2;
    protected static final Integer DEFAULT_BIN_COUNT = 5;
    protected static final Integer BIN_COUNT_MAX_VALUE = Integer.MAX_VALUE;
    protected static final String SQL_VALID_CHARS = "[a-zA-Z_0-9]";
    protected static final String SQL_VALID_CHAR_PATTERN = String.format("^%s+", SQL_VALID_CHARS);

    /* loaded from: input_file:eu/radoop/operator/discretization/Discretization$CustomLabelFunction.class */
    public static class CustomLabelFunction implements Function<Range[], RangeLabel.DiscretizationLabel> {
        private final Discretization ds;

        public CustomLabelFunction(Discretization discretization) {
            this.ds = discretization;
        }

        @Override // java.util.function.Function
        public RangeLabel.DiscretizationLabel apply(Range[] rangeArr) {
            try {
                return generateCustomLabel(rangeArr);
            } catch (OperatorException e) {
                LogService.getRoot().log(Level.FINE, "Issue generating custom Label, reverting to using Long Labels", e);
                return new RangeLabel.LongLabel(Range.findMaxIndex(rangeArr));
            }
        }

        private RangeLabel.DiscretizationLabel generateCustomLabel(Range[] rangeArr) throws OperatorException {
            RangeLabel.TYPE customBinnigNameDefault = this.ds.getCustomBinnigNameDefault();
            if (customBinnigNameDefault == RangeLabel.TYPE.CUSTOM) {
                throw new IllegalArgumentException("Invalid label provided : " + customBinnigNameDefault);
            }
            RangeLabel.DiscretizationLabel createDiscretizationLabel = this.ds.createDiscretizationLabel(customBinnigNameDefault, rangeArr, new CustomLabelFunction(this.ds));
            List<String[]> customBinningNameMatrix = this.ds.getCustomBinningNameMatrix();
            HashMap hashMap = new HashMap();
            for (String[] strArr : customBinningNameMatrix) {
                try {
                    int parseInt = Integer.parseInt(strArr[0]);
                    if (parseInt > -1 && parseInt < rangeArr.length) {
                        hashMap.put(rangeArr[parseInt], String.format("'%s'", strArr[1]));
                    }
                } catch (Exception e) {
                    throw new OperatorException("Error matching labels to index of ranges", e);
                }
            }
            return new RangeLabel.CustomLabel(hashMap, createDiscretizationLabel);
        }
    }

    /* loaded from: input_file:eu/radoop/operator/discretization/Discretization$ResultSetCloser.class */
    public static class ResultSetCloser implements AutoCloseable {
        private ResultSet resultSet;

        public ResultSetCloser(ResultSet resultSet) {
            this.resultSet = resultSet;
        }

        @Override // java.lang.AutoCloseable
        public void close() {
            JdbcConnectionTools.closeRes(this.resultSet);
            this.resultSet = null;
        }

        public ResultSet getResultSet() {
            return this.resultSet;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Discretization(OperatorDescription operatorDescription) {
        super(operatorDescription);
        getExampleSetInputPort().addPrecondition(new RadoopPrecondition(getExampleSetInputPort()));
        this.attributeSubsetSelector = new RadoopAttributeSubsetSelector(getParameterHandler(), getExampleSetInputPort(), 2);
    }

    @Override // eu.radoop.manipulation.RadoopAbstractManipulation
    public HadoopExampleSetMetaData modifyExampleSetOutputMetaData(HadoopExampleSetMetaData hadoopExampleSetMetaData) {
        try {
            return modifyExampleSetOutputMetaData(hadoopExampleSetMetaData, postFixLabels(), getRangeLabel());
        } catch (UndefinedParameterError e) {
            LogService.getRoot().log(Level.WARNING, "Error with developing Metadata, check parameter settings", e);
            return hadoopExampleSetMetaData;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // eu.radoop.RadoopOperator
    public void performAdditionalChecks() {
        super.performAdditionalChecks();
        HashSet newHashSet = Sets.newHashSet(OUTPUT_ATTRIBUTE_POSTFIX);
        newHashSet.addAll(getPostfixKeys());
        checkValidPostFix(newHashSet);
    }

    protected void checkValidPostFix(Set<String> set) {
        super.performAdditionalChecks();
        try {
            for (String str : set) {
                if (!checkValidCharForSql(getParameterAsString(str))) {
                    addError(new SimpleProcessSetupError(ProcessSetupError.Severity.ERROR, getPortOwner(), Lists.newArrayList(new ParameterSettingQuickFix(this, str)), "invalid_character", new Object[]{str, SQL_VALID_CHARS}));
                }
            }
        } catch (OperatorException e) {
            throw new IllegalStateException(String.format("Error with performing checks of postfix keys %s", set), e);
        }
    }

    protected Set<String> getPostfixKeys() {
        return Sets.newHashSet(OUTPUT_ATTRIBUTE_POSTFIX);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static boolean checkValidCharForSql(String str) {
        return str != null && str.matches(SQL_VALID_CHAR_PATTERN);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String[] postFixLabels() throws UndefinedParameterError {
        return new String[]{getPostfix()};
    }

    private HadoopExampleSetMetaData modifyExampleSetOutputMetaData(HadoopExampleSetMetaData hadoopExampleSetMetaData, String[] strArr, RangeLabel.TYPE type) {
        HadoopExampleSetMetaData hadoopExampleSetMetaData2 = new HadoopExampleSetMetaData();
        if (hadoopExampleSetMetaData != null) {
            RadoopTools.copyGenerationHistory(hadoopExampleSetMetaData2, hadoopExampleSetMetaData);
            hadoopExampleSetMetaData2.addAllAttributes(hadoopExampleSetMetaData.getAllAttributes());
            for (AttributeMetaData attributeMetaData : this.attributeSubsetSelector.getMetaDataSubset(hadoopExampleSetMetaData, false).getAllAttributes()) {
                for (String str : strArr) {
                    AttributeMetaData attributeMetaData2 = new AttributeMetaData(generateOutputBin(attributeMetaData.getName(), str, hadoopExampleSetMetaData2), type.ontologyType);
                    attributeMetaData2.setNumberOfMissingValues(attributeMetaData.getNumberOfMissingValues());
                    hadoopExampleSetMetaData2.addAttribute(attributeMetaData2);
                }
            }
        }
        return hadoopExampleSetMetaData2;
    }

    public RangeLabel.DiscretizationLabel createDiscretizationLabel(RangeLabel.TYPE type, Range[] rangeArr, Function<Range[], RangeLabel.DiscretizationLabel> function) {
        return type.createDiscretizationLabel(rangeArr, function);
    }

    @Override // eu.radoop.manipulation.AbstractPreprocessing
    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.addAll(this.attributeSubsetSelector.getParameterTypes());
        parameterTypes.add(new ParameterTypeString(OUTPUT_ATTRIBUTE_POSTFIX, "Ending to add to generate binning attribute", DEFAULT_OUTPUT_POSTFIX));
        addAdditionalParameters(parameterTypes);
        List list = (List) Arrays.stream(RangeLabel.TYPE.values()).map(type -> {
            return type.label;
        }).collect(Collectors.toList());
        parameterTypes.add(new ParameterTypeCategory(BINNING_RANGE_LABEL, "How label will be written", (String[]) list.toArray(new String[list.size()]), 0, false));
        list.remove(RangeLabel.TYPE.CUSTOM.label);
        EqualStringCondition equalStringCondition = new EqualStringCondition(getParameterHandler(), BINNING_RANGE_LABEL, false, new String[]{RangeLabel.TYPE.CUSTOM.label});
        ParameterTypeList parameterTypeList = new ParameterTypeList(CUSTOM_BINNING_NAME_MATRIX, "Range Index/Name Pair", new ParameterTypeInt("key", "Range Index", 0, Integer.MAX_VALUE), new ParameterTypeString("value", "Range Label"));
        parameterTypeList.registerDependencyCondition(equalStringCondition);
        parameterTypes.add(parameterTypeList);
        ParameterTypeCategory parameterTypeCategory = new ParameterTypeCategory(CUSTOM_BINNING_NAME_DEFAULT, "Default to utilize for undefined range", (String[]) list.toArray(new String[list.size()]), 0, false);
        parameterTypeCategory.registerDependencyCondition(equalStringCondition);
        parameterTypes.add(parameterTypeCategory);
        return parameterTypes;
    }

    public abstract void addAdditionalParameters(List<ParameterType> list);

    @Override // eu.radoop.RadoopOperator
    public int getCost() {
        return 0;
    }

    public List<Attribute> getDiscretizationAttributes() throws OperatorException {
        return (List) this.attributeSubsetSelector.getAttributeSubset(getInputHes(), false, true).stream().filter((v0) -> {
            return v0.isNumerical();
        }).collect(Collectors.toList());
    }

    public String getPostfix() throws UndefinedParameterError {
        return getParameter(OUTPUT_ATTRIBUTE_POSTFIX);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static String generateOutputBin(String str, String str2, ExampleSetMetaData exampleSetMetaData) {
        String generateOutputBin = generateOutputBin(str, str2, 0);
        if (exampleSetMetaData != null) {
            int i = 1;
            while (exampleSetMetaData.containsAttributeName(generateOutputBin) == MetaDataInfo.YES) {
                generateOutputBin = generateOutputBin(str, str2, i);
                i++;
            }
        }
        return generateOutputBin;
    }

    private static String generateOutputBin(String str, String str2, int i) {
        String format = String.format(OUTPUT_BIN_FORMAT, str, str2);
        if (i > 0) {
            format = String.format("%s_%s", format, Integer.valueOf(i));
        }
        return format;
    }

    public static String generateOutputBin(Attribute attribute, Discretization discretization) throws UndefinedParameterError {
        return generateOutputBin(attribute.getName(), discretization.getPostfix(), discretization.getHesMDFromInputPort(discretization.getExampleSetInputPort()));
    }

    public static String generateOutputBin(AttributeMetaData attributeMetaData, Discretization discretization) throws UndefinedParameterError {
        return generateOutputBin(attributeMetaData.getName(), discretization.getPostfix(), discretization.getHesMDFromInputPort(discretization.getExampleSetInputPort()));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public RangeLabel.TYPE getCustomBinnigNameDefault() throws UndefinedParameterError {
        return RangeLabel.TYPE.getType(getParameter(CUSTOM_BINNING_NAME_DEFAULT));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public List<String[]> getCustomBinningNameMatrix() throws UndefinedParameterError {
        return ParameterTypeList.transformString2List(getParameter(CUSTOM_BINNING_NAME_MATRIX));
    }

    public RangeLabel.TYPE getRangeLabel() throws UndefinedParameterError {
        return RangeLabel.TYPE.getType(getParameter(BINNING_RANGE_LABEL));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean canDoWork() throws OperatorException {
        return (getInputHes() == null || getDiscretizationAttributes().isEmpty() || !checkPostfixs()) ? false : true;
    }

    public boolean checkPostfixs() throws UndefinedParameterError {
        for (String str : postFixLabels()) {
            if (!checkValidCharForSql(str)) {
                return false;
            }
        }
        return true;
    }

    public void doWork() throws OperatorException {
        StopWatch stopWatch = new StopWatch();
        stopWatch.start();
        try {
            if (!canDoWork()) {
                LogService.getRoot().log(Level.WARNING, "No Input ExampleSet provided, attributes set to work from, or postfixes are invalid");
                throw new UserError(this, 1003, new Object[]{getInputHes()});
            }
            HadoopExampleSet inputHes = getInputHes();
            RangeLabel.TYPE rangeLabel = getRangeLabel();
            checkForStop();
            List<AttributeRangeStruct> provideRanges = provideRanges();
            checkForStop();
            if (provideRanges.isEmpty()) {
                LogService.getRoot().log(Level.WARNING, "No Ranges provided to work with");
            } else {
                String tempTableName = getTempTableName();
                String generateSql = generateSql(tempTableName, getDiscretizationTable(), provideRanges, rangeLabel);
                checkForStop();
                RadoopNest radoopNest = getRadoopNest();
                LogService.getRoot().log(Level.FINE, String.format("Registering temp table %s from operator %s to nest %s", tempTableName, getName() + " " + hashCode(), radoopNest.getName() + " " + radoopNest.hashCode()));
                radoopNest.addTemporaryTableToList(tempTableName);
                getHiveHandler().runFastScript(inputHes.getUdfDependencies(), false, generateSql, new Object[0]);
                createExampleSet(tempTableName, inputHes, false, null, null);
                if (this.modelOutput != null && this.modelOutput.isConnected()) {
                    this.modelOutput.deliver(generatePreProcessModel(inputHes, provideRanges));
                }
            }
        } finally {
            stopWatch.stop();
            LogService.getRoot().log(Level.INFO, () -> {
                return String.format("Discretization finished total time took %s", stopWatch);
            });
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public DiscretizationModel generatePreProcessModel(HadoopExampleSet hadoopExampleSet, List<AttributeRangeStruct> list) throws OperatorException {
        HashMap hashMap = new HashMap();
        for (AttributeRangeStruct attributeRangeStruct : list) {
            String name = attributeRangeStruct.getAttribute().getName();
            TreeSet treeSet = new TreeSet();
            for (Range range : attributeRangeStruct.getRanges()) {
                treeSet.add(new Tupel(Double.valueOf(range.getHigh().doubleValue()), labelSqlCleanup(attributeRangeStruct.labelForRange(range, getRangeLabel()))));
            }
            hashMap.put(name, treeSet);
        }
        DiscretizationModel discretizationModel = new DiscretizationModel(hadoopExampleSet);
        discretizationModel.setRanges(hashMap);
        return discretizationModel;
    }

    protected static String labelSqlCleanup(String str) {
        String str2 = str;
        if (str2 != null) {
            str2 = str2.replaceAll("CONCAT", "").replaceAll("[,\\(\\)'\"]", "").replace("-decodeunhexE2889EUTF-8", "-" + DecimalFormatSymbols.getInstance().getInfinity()).replace("decodeunhexE2889EUTF-8", DecimalFormatSymbols.getInstance().getInfinity()).replace("[ ", " [").replace(" ]", "]").replaceAll("\\s{2,}", " ").trim();
        }
        return str2;
    }

    public static String generateSql(String str, String str2, List<AttributeRangeStruct> list, RangeLabel.TYPE type) {
        return RangesSQLFormatter.createSQLViewStatement(str, str2, (List) list.stream().map(attributeRangeStruct -> {
            return attributeRangeStruct.generateRangeSQLFormatter(type);
        }).collect(Collectors.toList()));
    }

    protected String getDiscretizationTable() throws OperatorException {
        return HadoopExampleSet.getTableName(getInputHes());
    }

    public abstract List<AttributeRangeStruct> provideRanges() throws OperatorException;

    @Override // eu.radoop.manipulation.AbstractPreprocessing
    protected int[] getFilterValueTypes() {
        return new int[]{2};
    }

    @Override // eu.radoop.manipulation.AbstractPreprocessing
    protected Class<? extends PreprocessingModel> getPreprocessingModelClass() {
        return DiscretizationModel.class;
    }
}
