package eu.radoop.operator.spark;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.operator.AbstractModel;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.OperatorVersion;
import com.rapidminer.operator.ProcessStoppedException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import eu.radoop.RadoopTools;
import eu.radoop.datahandler.HadoopExampleSet;
import eu.radoop.datahandler.HadoopExampleSetFactory;
import eu.radoop.datahandler.hdfs.TempHDFSDirectory;
import eu.radoop.datahandler.hive.FileFormatHive;
import eu.radoop.datahandler.mapreducehdfs.MapReduceHDFSHandler;
import eu.radoop.exception.HiveTableException;
import eu.radoop.exception.OperationKilledException;
import eu.radoop.modeling.prediction.RadoopAbstractLearner;
import eu.radoop.operator.ports.metadata.HadoopExampleSetMetaData;
import eu.radoop.operator.ports.metadata.HivePassThroughRule;
import eu.radoop.operator.spark.SparkTools;
import eu.radoop.spark.SparkOperation;
import eu.radoop.spark.SparkVersion;
import eu.radoop.transfer.MTOConverter;
import eu.radoop.transfer.TransferObject;
import eu.radoop.transfer.model.ModelTransferObject;
import eu.radoop.transfer.parameter.CommonParameter;
import eu.radoop.transfer.parameter.ParameterTransferObject;
import eu.radoop.transfer.parameter.ParameterTransferObject.ParameterKey;
import java.io.IOException;
import java.lang.Enum;
import java.lang.reflect.ParameterizedType;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;

/* loaded from: input_file:eu/radoop/operator/spark/AbstractSparkLearner.class */
public abstract class AbstractSparkLearner<P extends Enum<P> & ParameterTransferObject.ParameterKey, T extends ModelTransferObject, M extends AbstractModel> extends RadoopAbstractLearner<M> {
    protected OutputPort exampleSetOutput;
    private final SparkOperation sparkOperation;
    public static final OperatorVersion DEFAULT_INPUTFORMAT_CHANGED = new OperatorVersion(7, 3, 1);

    /* JADX INFO: Access modifiers changed from: protected */
    public AbstractSparkLearner(OperatorDescription operatorDescription, SparkOperation sparkOperation) {
        super(operatorDescription);
        this.exampleSetOutput = getOutputPorts().createPort(getCustomOutputExampleSetPortName(), !keepOriginalExampleSet());
        this.sparkOperation = sparkOperation;
        if (keepOriginalExampleSet()) {
            return;
        }
        getTransformer().addRule(new HivePassThroughRule(this.exampleSetInput, this.exampleSetOutput, false) { // from class: eu.radoop.operator.spark.AbstractSparkLearner.1
            @Override // eu.radoop.operator.ports.metadata.HivePassThroughRule
            public HadoopExampleSetMetaData modifyHesMD(HadoopExampleSetMetaData hadoopExampleSetMetaData) {
                hadoopExampleSetMetaData.addAllAttributes(AbstractSparkLearner.this.defineNewAttributes());
                return hadoopExampleSetMetaData;
            }
        });
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // eu.radoop.RadoopOperator
    public void performAdditionalChecks() {
        super.performAdditionalChecks();
        SparkTools.checkSparkVersionDesignTime(this, SparkVersion.get30AndAbove());
    }

    @Override // eu.radoop.modeling.prediction.RadoopAbstractLearner
    /* renamed from: learn */
    public M mo1317learn(HadoopExampleSet hadoopExampleSet) throws OperatorException {
        SparkTools.checkSparkVersionRuntime(this, SparkVersion.get30AndAbove());
        checkForStop();
        TempHDFSDirectory tempHdfsDirectory = getTempHdfsDirectory();
        TempHDFSDirectory tempHdfsDirectory2 = getTempHdfsDirectory();
        ParameterTransferObject<CommonParameter> parameterTransferObject = setupCommonParams(hadoopExampleSet, tempHdfsDirectory, tempHdfsDirectory2);
        ParameterTransferObject<P> parameterTransferObject2 = setupAlgorithmParams(hadoopExampleSet);
        ConcurrentLinkedQueue concurrentLinkedQueue = new ConcurrentLinkedQueue();
        Queue<String> concurrentLinkedQueue2 = new ConcurrentLinkedQueue<>();
        startMonitoringThread(concurrentLinkedQueue, concurrentLinkedQueue2);
        try {
            SparkTools.SparkFinalState finalState = getSparkHandler().runSpark(this, this, this.sparkOperation, concurrentLinkedQueue, concurrentLinkedQueue2, SparkTools.getSpecialSparkPropsForHiveVersion(getHadoopContext().getHiveConfiguration().getHiveVersion()), parameterTransferObject, parameterTransferObject2).getFinalState();
            if (finalState == SparkTools.SparkFinalState.SUCCEEDED && !keepOriginalExampleSet()) {
                createOutputExampleSetFromSparkResult(hadoopExampleSet, tempHdfsDirectory2);
            }
            while (!concurrentLinkedQueue.isEmpty()) {
                logSparkMessage(this, (String) concurrentLinkedQueue.remove());
            }
            try {
                M model = getModel(hadoopExampleSet, tempHdfsDirectory, finalState);
                postProcessModel(model, hadoopExampleSet);
                return model;
            } catch (IOException e) {
                throw new OperatorException("Could not convert Spark model to RapidMiner model!", e);
            }
        } catch (IOException e2) {
            throw RadoopTools.formattedOperatorException("Unable to run the Spark job.", e2);
        }
    }

    protected void createOutputExampleSetFromSparkResult(HadoopExampleSet hadoopExampleSet, TempHDFSDirectory tempHDFSDirectory) throws OperatorException {
        String tempTableName = getTempTableName();
        HadoopExampleSetFactory.createHiveTable(getHiveHandler(), tempTableName, collectSparkJobOutputExampleSetAttributesWithRoles(hadoopExampleSet), false, "STORED AS " + CommonParameter.FileFormat.TEXTFILE.toString());
        tempHDFSDirectory.loadDataIntoHive(getHiveHandler(), tempTableName, false, true);
        createExampleSet(this.exampleSetOutput, tempTableName, hadoopExampleSet, true, null, null, collectNewAttributeTypesByName(), null, null, new HadoopExampleSet[0]);
    }

    private Map<String, String> collectNewAttributeTypesByName() {
        List<AttributeMetaData> defineNewAttributes = defineNewAttributes();
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        for (AttributeMetaData attributeMetaData : defineNewAttributes) {
            linkedHashMap.put(attributeMetaData.getName(), attributeMetaData.getValueTypeName());
        }
        return linkedHashMap;
    }

    protected Map<Attribute, String> collectSparkJobOutputExampleSetAttributesWithRoles(HadoopExampleSet hadoopExampleSet) {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        linkedHashMap.putAll(collectInputAttributeRoles(hadoopExampleSet));
        for (AttributeMetaData attributeMetaData : defineNewAttributes()) {
            linkedHashMap.put(AttributeFactory.createAttribute(attributeMetaData.getName(), attributeMetaData.getValueType()), attributeMetaData.getRole());
        }
        return linkedHashMap;
    }

    private Map<Attribute, String> collectInputAttributeRoles(HadoopExampleSet hadoopExampleSet) {
        Attributes attributes = hadoopExampleSet.getAttributes();
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        Iterator allAttributes = attributes.allAttributes();
        while (allAttributes.hasNext()) {
            Attribute attribute = (Attribute) allAttributes.next();
            linkedHashMap.put(attribute, attributes.getRole(attribute).getSpecialName());
        }
        return linkedHashMap;
    }

    protected abstract List<AttributeMetaData> defineNewAttributes();

    protected abstract void postProcessModel(M m, HadoopExampleSet hadoopExampleSet);

    protected Map<String, String> getOperatorSpecificProperties() {
        return new HashMap();
    }

    protected abstract ParameterTransferObject<P> setupAlgorithmParams(HadoopExampleSet hadoopExampleSet) throws OperatorException;

    protected abstract String getCustomOutputExampleSetPortName();

    protected ParameterTransferObject<CommonParameter> setupCommonParams(HadoopExampleSet hadoopExampleSet, TempHDFSDirectory tempHDFSDirectory, TempHDFSDirectory tempHDFSDirectory2) throws OperatorException {
        if (hadoopExampleSet.getAttributes().size() == 0) {
            throw new UserError(this, 106);
        }
        checkPreconditions(hadoopExampleSet);
        CommonParameter.FileFormat materializeSparkInputTable = materializeSparkInputTable(hadoopExampleSet);
        try {
            String fieldSeparator = getHiveHandler().getTableExtendedMetaData(HadoopExampleSet.getTableName(hadoopExampleSet), true).getFieldSeparator();
            String nullString = getHiveHandler().getTableExtendedMetaData(HadoopExampleSet.getTableName(hadoopExampleSet), true).getNullString();
            ParameterTransferObject<CommonParameter> parameterTransferObject = new ParameterTransferObject<>();
            parameterTransferObject.setParameter(CommonParameter.INPUT_DIR, hadoopExampleSet.getSparkInputPathSpecification());
            parameterTransferObject.setParameter(CommonParameter.MODEL_OUTPUT_DIR, tempHDFSDirectory.getFullPath());
            parameterTransferObject.setParameter(CommonParameter.EXAMPLE_SET_OUTPUT_DIR, tempHDFSDirectory2.getFullPath());
            parameterTransferObject.setParameter(CommonParameter.INPUT_FORMAT, materializeSparkInputTable.name());
            parameterTransferObject.setParameter(CommonParameter.FIELD_SEPARATOR, fieldSeparator);
            parameterTransferObject.setParameter(CommonParameter.SPARK_VERSION, getHiveHandler().getConnectionEntry().getSparkVersion().name());
            parameterTransferObject.setParameter(CommonParameter.NULL_STRING, nullString);
            try {
                Map<String, String> columnTypes = getHiveHandler().getColumnTypes(HadoopExampleSet.getTableName(hadoopExampleSet));
                Boolean[] boolArr = new Boolean[columnTypes.size()];
                String[] strArr = new String[columnTypes.size()];
                String[] strArr2 = new String[columnTypes.size()];
                Arrays.fill(strArr2, (Object) null);
                String[] strArr3 = new String[columnTypes.size()];
                Arrays.fill(strArr3, (Object) null);
                Attribute label = hadoopExampleSet.getAttributes().getLabel();
                String name = label == null ? null : label.getName();
                Boolean[] boolArr2 = new Boolean[columnTypes.size()];
                Arrays.fill(boolArr2, Boolean.FALSE);
                int i = 0;
                for (String str : columnTypes.keySet()) {
                    Attribute attribute = hadoopExampleSet.getAttributes().get(str);
                    boolArr2[i] = Boolean.valueOf(filterFeatureAttribute(hadoopExampleSet.getAttributes(), attribute));
                    strArr[i] = str;
                    if (str.equals(name)) {
                        parameterTransferObject.setParameter(CommonParameter.LABEL_INDEX, Integer.valueOf(i));
                    }
                    if (attribute.isNominal()) {
                        boolArr[i] = true;
                    } else {
                        boolArr[i] = false;
                    }
                    if (attribute.getValueType() == 6) {
                        strArr2[i] = attribute.getMapping().getNegativeString();
                        strArr3[i] = attribute.getMapping().getPositiveString();
                    }
                    i++;
                }
                parameterTransferObject.setParameter(CommonParameter.IS_FEATURE, boolArr2);
                parameterTransferObject.setParameter(CommonParameter.COLUMN_NAMES, strArr);
                parameterTransferObject.setParameter(CommonParameter.IS_NOMINAL, boolArr);
                parameterTransferObject.setParameter(CommonParameter.NEGATIVE_VALUES, strArr2);
                parameterTransferObject.setParameter(CommonParameter.POSITIVE_VALUES, strArr3);
                return parameterTransferObject;
            } catch (HiveTableException e) {
                throw new OperatorException("Could not get Hive table metadata: " + e.toString());
            }
        } catch (HiveTableException e2) {
            throw new OperatorException("Can't get Hive table metadata!");
        }
    }

    protected abstract void checkPreconditions(HadoopExampleSet hadoopExampleSet) throws UserError;

    protected boolean filterFeatureAttribute(Attributes attributes, Attribute attribute) {
        return !attributes.findRoleByName(attribute.getName()).isSpecial();
    }

    protected CommonParameter.FileFormat materializeSparkInputTable(HadoopExampleSet hadoopExampleSet) throws OperatorException {
        CommonParameter.FileFormat fileFormat;
        CommonParameter.FileFormat fileFormat2 = CommonParameter.FileFormat.values()[getParameterAsInt(SparkTools.PARAMETER_DATAFORMAT)];
        boolean shouldMaterialize = hadoopExampleSet.shouldMaterialize(this, null, null, FileFormatHive.TEXTFILE);
        boolean shouldMaterialize2 = hadoopExampleSet.shouldMaterialize(this, null, null, FileFormatHive.PARQUET);
        if (shouldMaterialize && shouldMaterialize2) {
            if (fileFormat2.equals(CommonParameter.FileFormat.PARQUET)) {
                logNote("Materializing input HadoopExampleSet as Parquet.");
                hadoopExampleSet.materializeInFileFormat(this, getTempHdfsDirectory(), FileFormatHive.PARQUET);
                fileFormat = CommonParameter.FileFormat.PARQUET;
            } else {
                logNote("Materializing input HadoopExampleSet as Text.");
                hadoopExampleSet.materializeAsText(this, getTempHdfsDirectory());
                fileFormat = CommonParameter.FileFormat.TEXTFILE;
            }
            logNote("Materialized.");
        } else {
            fileFormat = shouldMaterialize ? CommonParameter.FileFormat.PARQUET : CommonParameter.FileFormat.TEXTFILE;
        }
        return fileFormat;
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        parameterTypes.add(new ParameterTypeCategory(SparkTools.PARAMETER_DATAFORMAT, "The input ExampleSet will be materialized in the specified format. This setting is ignored if the input is already a table in Text or in Parquet format - in this case no materalization is performed. Please note that you can force materializing in Text/Parquet by using the Store operator or by setting the File Format parameter of the Radoop Nest. Materializing in Parquet format requires less storage but requires a later Hive version.", SparkTools.DATAFORMATS, getCompatibilityLevel().isAtMost(DEFAULT_INPUTFORMAT_CHANGED) ? CommonParameter.FileFormat.PARQUET.ordinal() : CommonParameter.FileFormat.TEXTFILE.ordinal(), true));
        return parameterTypes;
    }

    protected static void logSparkMessage(Operator operator, String str) {
        SparkTools.logSparkMessage(operator, str);
    }

    protected void startMonitoringThread(Queue<String> queue, Queue<String> queue2) throws ProcessStoppedException {
        try {
            SparkTools.startMonitoringThread(this, queue, queue2, true);
        } catch (OperationKilledException e) {
            throw new ProcessStoppedException(this);
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    protected M getModel(HadoopExampleSet hadoopExampleSet, TempHDFSDirectory tempHDFSDirectory, SparkTools.SparkFinalState sparkFinalState) throws IOException, OperatorException {
        checkForStop();
        tempHDFSDirectory.cleanNonDataFiles();
        MapReduceHDFSHandler mapReduceHDFSHandler = getMapReduceHDFSHandler();
        Objects.requireNonNull(mapReduceHDFSHandler);
        MapReduceHDFSHandler.HDFSDirectoryReader hDFSDirectoryReader = new MapReduceHDFSHandler.HDFSDirectoryReader(tempHDFSDirectory.getSubDir());
        try {
            if (!sparkFinalState.equals(SparkTools.SparkFinalState.SUCCEEDED)) {
                SparkTools.handleSparkFailure(this, sparkFinalState, tempHDFSDirectory);
                return null;
            }
            if (!producesModelOutput()) {
                return null;
            }
            StringBuffer stringBuffer = new StringBuffer();
            while (true) {
                String readLineFromDirectory = hDFSDirectoryReader.readLineFromDirectory();
                if (readLineFromDirectory == null) {
                    break;
                }
                stringBuffer.append(readLineFromDirectory);
            }
            if (stringBuffer.length() <= 0) {
                throw new UserError(this, 1503);
            }
            ModelTransferObject modelTransferObject = (ModelTransferObject) TransferObject.fromJson(stringBuffer.toString(), getMTOClass());
            MTOConverter.setNominalMappings(modelTransferObject, hadoopExampleSet);
            return (M) convertModelFromMTO(modelTransferObject, hadoopExampleSet);
        } catch (IOException e) {
            if (e.getMessage() != null) {
                throw new UserError(this, 1504, new Object[]{RadoopTools.formatOperatorExceptionMessage(null, e)});
            }
            throw new UserError(this, 1503);
        }
    }

    protected abstract M convertModelFromMTO(T t, HadoopExampleSet hadoopExampleSet) throws OperatorException;

    private Class<P> getParameterKeyClass() {
        return (Class) ((ParameterizedType) getClass().getGenericSuperclass()).getActualTypeArguments()[0];
    }

    private Class<T> getMTOClass() {
        return (Class) ((ParameterizedType) getClass().getGenericSuperclass()).getActualTypeArguments()[1];
    }

    private Class<M> getRMModelClass() {
        return (Class) ((ParameterizedType) getClass().getGenericSuperclass()).getActualTypeArguments()[2];
    }

    @Override // eu.radoop.modeling.prediction.RadoopAbstractLearner
    public Class<M> getModelClass() {
        return getRMModelClass();
    }

    public OperatorVersion[] getIncompatibleVersionChanges() {
        return new OperatorVersion[]{DEFAULT_INPUTFORMAT_CHANGED};
    }
}
