package eu.radoop.modeling;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.AttributeRole;
import com.rapidminer.example.table.NominalMapping;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ProcessStoppedException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.clustering.ClusterModel;
import com.rapidminer.operator.learner.PredictionModel;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.tools.LogService;
import com.rapidminer.tools.Ontology;
import eu.radoop.RadoopConf;
import eu.radoop.RadoopOperator;
import eu.radoop.RadoopPluginPool;
import eu.radoop.RadoopTools;
import eu.radoop.datahandler.HadoopContext;
import eu.radoop.datahandler.HadoopExampleSet;
import eu.radoop.datahandler.HadoopExampleSetFactory;
import eu.radoop.datahandler.hdfs.TempHDFSDirectory;
import eu.radoop.datahandler.hdfs.TempHDFSFile;
import eu.radoop.datahandler.hive.FileFormatHive;
import eu.radoop.datahandler.hive.JdbcConnectionTools;
import eu.radoop.datahandler.hive.RadoopFileFormat;
import eu.radoop.exception.OperationKilledException;
import eu.radoop.hive.HiveStaticUtils;
import eu.radoop.manipulation.HiveWindowing;
import eu.radoop.modeling.prediction.ParameterTypeModelParam;
import eu.radoop.operator.meta.PushdownOperator;
import eu.radoop.operator.ports.metadata.HadoopExampleSetMetaData;
import eu.radoop.operator.spark.SparkTools;
import eu.radoop.spark.SparkJobResult;
import eu.radoop.spark.SparkOperation;
import eu.radoop.spark.SparkVersion;
import eu.radoop.tools.LogCollectionMethod;
import eu.radoop.tools.LogCollectionTools;
import eu.radoop.transfer.parameter.ParameterTransferObject;
import eu.radoop.transfer.parameter.SparkModelApplyParameter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import jodd.util.StringPool;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.hadoop.fs.shell.Display;
import org.apache.hadoop.yarn.client.cli.ClusterCLI;

/* loaded from: input_file:eu/radoop/modeling/SparkSqlModelApplier.class */
class SparkSqlModelApplier extends AbstractModelApplier {
    private static Logger log = LogService.getRoot();
    private static final String PREDICTION_QUERY_TEMPLATE = "SELECT <attribute_list>, <model_output_columns> FROM spark_apply_model_input t LATERAL VIEW spark_apply_model ('<param_file>', array(<quoted_attribute_list>), <class_list>, <attribute_list>)  tmp as <model_output_columns>";

    /* JADX INFO: Access modifiers changed from: protected */
    public SparkSqlModelApplier() {
        super(MetaData.class, Model.class);
    }

    @Override // eu.radoop.modeling.ModelApplier
    public int getCost(ParameterTypeModelParam.ModelUsageType modelUsageType) {
        return 1;
    }

    @Override // eu.radoop.modeling.ModelApplier
    public HadoopExampleSetMetaData modifyMetaData(MetaData metaData, HadoopExampleSetMetaData hadoopExampleSetMetaData) {
        return ModelAppliers.modifyMetaDataForModel(metaData, hadoopExampleSetMetaData);
    }

    @Override // eu.radoop.modeling.AbstractModelApplier, eu.radoop.modeling.ModelApplier
    public Map<String, String> getGeneratedRoles(RadoopOperator radoopOperator, Model model) {
        return ModelAppliers.getGeneratedRolesForModel(radoopOperator, model);
    }

    @Override // eu.radoop.modeling.AbstractModelApplier, eu.radoop.modeling.ModelApplier
    public boolean supports(Class<? extends Model> cls) {
        return super.supports(cls) && (PredictionModel.class.isAssignableFrom(cls) || ClusterModel.class.isAssignableFrom(cls));
    }

    @Override // eu.radoop.modeling.ModelApplier
    public String apply(RadoopOperator radoopOperator, Model model, HadoopExampleSet hadoopExampleSet, Map<String, String> map) throws OperatorException {
        SparkTools.checkSparkVersionRuntime(radoopOperator, SparkVersion.get30AndAbove());
        HadoopContext hadoopContext = radoopOperator.getHadoopContext();
        TempHDFSFile tempHDFSFile = null;
        try {
            HadoopExampleSet remapHadoopExampleSet = HadoopExampleSetFactory.remapHadoopExampleSet(hadoopExampleSet, model.getTrainingHeader(), true);
            checkCompatibility(remapHadoopExampleSet, model);
            tempHDFSFile = hadoopContext.getMapReduceHDFSHandler().pushObjectToTempFile(radoopOperator, model);
            log.fine("Model file: " + tempHDFSFile.getFullPathWithSpec());
            TempHDFSDirectory tempHdfsDirectory = radoopOperator.getTempHdfsDirectory();
            String tempTableName = radoopOperator.getTempTableName();
            log.fine("Result table: " + tempTableName);
            List<String> generateApplyStatements = generateApplyStatements(radoopOperator, model, tempHDFSFile, remapHadoopExampleSet, tempTableName);
            ParameterTransferObject parameterTransferObject = new ParameterTransferObject();
            parameterTransferObject.setParameter(SparkModelApplyParameter.DEFAULT_DB, hadoopContext.getHiveConfiguration().getHiveDB());
            parameterTransferObject.setParameter(SparkModelApplyParameter.INPUT_TABLE, HadoopExampleSet.getTableName(remapHadoopExampleSet));
            parameterTransferObject.setParameter(SparkModelApplyParameter.OUTPUT_TABLE, tempTableName);
            RadoopFileFormat fileFormat = hadoopContext.getFileFormat();
            if (fileFormat.isDefault()) {
                fileFormat = FileFormatHive.getFromString(hadoopContext.getHiveHandler().getFormattedDefaultFileFormat(hadoopContext.getMapReduceHDFSHandler()));
            }
            parameterTransferObject.setParameter(SparkModelApplyParameter.OUTPUT_TABLE_FORMAT, toSparkFormat(fileFormat.name()));
            parameterTransferObject.setParameter(SparkModelApplyParameter.EXCEPTION_DIRECTORY, tempHdfsDirectory.getFullPath());
            parameterTransferObject.setParameter(SparkModelApplyParameter.QUERY, generateApplyStatements.get(0));
            HashMap hashMap = new HashMap();
            hashMap.put(RadoopConf.SPARK_VALIDATE_OUTPUT_SPECS, "false");
            List<String> ensurePushdownDependenciesOnHDFS = hadoopContext.getUDFHandler().ensurePushdownDependenciesOnHDFS(radoopOperator, radoopOperator.getLogger());
            ensurePushdownDependenciesOnHDFS.add(PushdownOperator.copyExtension(hadoopContext.getConnectionEntry().getHdfsSharedDir(), RadoopPluginPool.findH2OPlugin(), hadoopContext.getUploadService(), radoopOperator));
            ensurePushdownDependenciesOnHDFS.add(hadoopContext.getUDFHandler().ensureHiveJarOnHDFS(radoopOperator.getLogger()));
            String fileSystemSpec = hadoopContext.getMapReduceHDFSHandler().getFileSystemSpec();
            hashMap.put(RadoopConf.SPARK_JARS, (String) ensurePushdownDependenciesOnHDFS.stream().map(str -> {
                return fileSystemSpec + str;
            }).collect(Collectors.joining(",")));
            hashMap.put(RadoopConf.SPARK_SQL_HIVE_HIVESERVER2_JDBC_URL, JdbcConnectionTools.getConnectionStringForHwc(hadoopContext.getHiveConfiguration(), getTrustStore(hadoopContext)));
            hashMap.put(RadoopConf.SPARK_SQL_HIVE_HIVESERVER2_JDBC_URL_PRINCIPAL, hadoopContext.getConnectionEntry().getAdvancedHiveSettings().getValue(RadoopConf.HIVE_SERVER_PRINCIPAL));
            if (hadoopContext.getHiveConfiguration().isHiveHighAvailability()) {
                hashMap.put(RadoopConf.SPARK_HADOOP_HIVE_ZOOKEEPER_QUORUM, hadoopContext.getHiveConfiguration().getZookeeperQuorum());
            }
            hashMap.put(RadoopConf.SPARK_HADOOP_HIVE_METASTORE_URIS, hadoopContext.getAdditionalHadoopInfo(RadoopConf.HIVE_METASTORE_URIS));
            hashMap.put(RadoopConf.SPARK_KRYO_REGISTRATOR, RadoopConf.SPARK_RADOOP_KRYO_REGISTRATOR_CLASSNAME_HIVE_ACID);
            log.fine("SpecialProps: " + hashMap);
            radoopOperator.checkForStop();
            ConcurrentLinkedQueue concurrentLinkedQueue = new ConcurrentLinkedQueue();
            ConcurrentLinkedQueue concurrentLinkedQueue2 = new ConcurrentLinkedQueue();
            try {
                SparkTools.startMonitoringThread(radoopOperator, concurrentLinkedQueue, concurrentLinkedQueue2, true);
                try {
                    log.fine("Submitting Model Apply Job");
                    SparkJobResult runSpark = hadoopContext.getSparkHandler().runSpark(radoopOperator, radoopOperator, SparkOperation.ModelApplier, concurrentLinkedQueue, concurrentLinkedQueue2, hashMap, null, parameterTransferObject);
                    SparkTools.SparkFinalState finalState = runSpark.getFinalState();
                    String applicationId = runSpark.getApplicationId();
                    if (finalState.equals(SparkTools.SparkFinalState.SUCCEEDED)) {
                        log.fine("Model Apply Job succeeded");
                    } else {
                        log.fine("Model Apply Job failed");
                        try {
                            LogCollectionTools.logStdOut(applicationId, LogCollectionMethod.ALL_CONTAINERS, hadoopContext);
                            SparkTools.handleSparkFailure(radoopOperator, finalState, tempHdfsDirectory);
                            radoopOperator.checkForOperationStop();
                        } catch (IOException e) {
                            if (e.getMessage() != null) {
                                throw new UserError(radoopOperator, 1504, new Object[]{RadoopTools.formatOperatorExceptionMessage(null, e)});
                            }
                            throw new UserError(radoopOperator, 1503);
                        }
                    }
                    if (tempHDFSFile != null) {
                        tempHDFSFile.close();
                    }
                    return tempTableName;
                } catch (IOException e2) {
                    throw RadoopTools.formattedOperatorException("The Spark job could not be submitted.", e2);
                }
            } catch (OperationKilledException e3) {
                throw new ProcessStoppedException(radoopOperator);
            }
        } catch (Throwable th) {
            if (tempHDFSFile != null) {
                tempHDFSFile.close();
            }
            throw th;
        }
    }

    private static List<String> generateApplyStatements(RadoopOperator radoopOperator, Model model, TempHDFSFile tempHDFSFile, HadoopExampleSet hadoopExampleSet, String str) throws OperatorException {
        if (model instanceof ClusterModel) {
            ((ClusterModel) model).checkCapabilities(hadoopExampleSet);
        }
        Iterator regularAttributes = model.getTrainingHeader().getAttributes().regularAttributes();
        while (regularAttributes.hasNext()) {
            Attribute attribute = ((AttributeRole) regularAttributes.next()).getAttribute();
            int valueType = attribute.getValueType();
            if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueType, 2) && !Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueType, 1)) {
                throw new UserError(radoopOperator, 1221, new Object[]{Ontology.VALUE_TYPE_NAMES[valueType], attribute.getName()});
            }
        }
        ArrayList arrayList = new ArrayList();
        StringBuilder sb = new StringBuilder();
        StringBuilder sb2 = new StringBuilder();
        if (model instanceof ClusterModel) {
            sb.append(HiveStaticUtils.getCanonicalAttributeName(!((ClusterModel) model).isAddingLabel() ? ClusterCLI.CMD : HiveWindowing.ROLE_LABEL));
            sb2.append("-1");
        } else {
            Attribute label = model.getTrainingHeader().getAttributes().getLabel();
            sb.append("prediction_" + HiveStaticUtils.getCanonicalAttributeName(label.getName()));
            if (label.isNominal()) {
                NominalMapping mapping = label.getMapping();
                sb2.append(mapping.size());
                for (String str2 : mapping.getValues()) {
                    sb2.append(",'").append(str2).append(StringPool.SINGLE_QUOTE);
                    sb.append(",").append(HiveModelApplier.getCanonicalConfidenceAttributeName(radoopOperator, str2));
                }
            } else {
                sb2.append("0");
            }
        }
        arrayList.add(PREDICTION_QUERY_TEMPLATE.replace("<attribute_list>", RadoopTools.getAttributeList(hadoopExampleSet.attributes)).replace("<quoted_attribute_list>", RadoopTools.getAttributeList(hadoopExampleSet.attributes, StringPool.SINGLE_QUOTE, false)).replace("<regular_list>", RadoopTools.getAttributeList(hadoopExampleSet.attributes, true)).replace("<source_name>", HadoopExampleSet.getTableName(hadoopExampleSet)).replace("<result_name>", str).replace("<param_file>", tempHDFSFile.getFullPathWithSpec()).replace("<model_output_columns>", sb).replace("<class_list>", sb2));
        return arrayList;
    }

    private String toSparkFormat(String str) {
        return str.equalsIgnoreCase("textfile") ? Display.Text.NAME : str.toLowerCase();
    }

    private static String getJavaxSslTrustStoreProp(String str) {
        if (str == null) {
            return null;
        }
        Matcher matcher = Pattern.compile("-D" + Pattern.quote(RadoopConf.JAVAX_NET_SSL_TRUSTSTORE) + "=([^\\s]+)").matcher(str);
        if (matcher.find()) {
            return matcher.group(1);
        }
        return null;
    }

    private String getTrustStore(HadoopContext hadoopContext) {
        String enabledValueOrNull = hadoopContext.getConnectionEntry().getAdvancedSparkSettings().getEnabledValueOrNull(RadoopConf.SSL_CLIENT_TRUSTSTORE_LOCATION);
        String enabledValueOrNull2 = hadoopContext.getConnectionEntry().getAdvancedSparkSettings().getEnabledValueOrNull(RadoopConf.DRIVER_EXTRA_JAVA_OPTIONS);
        String enabledValueOrNull3 = hadoopContext.getConnectionEntry().getAdvancedSparkSettings().getEnabledValueOrNull(RadoopConf.EXECUTOR_EXTRA_JAVA_OPTIONS);
        String javaxSslTrustStoreProp = getJavaxSslTrustStoreProp(enabledValueOrNull2);
        String javaxSslTrustStoreProp2 = getJavaxSslTrustStoreProp(enabledValueOrNull3);
        if ((javaxSslTrustStoreProp != null || javaxSslTrustStoreProp2 != null) && !Objects.equals(javaxSslTrustStoreProp, javaxSslTrustStoreProp2)) {
            log.warning("Executor and driver truststore is different.");
        }
        String str = (String) ObjectUtils.firstNonNull(javaxSslTrustStoreProp, javaxSslTrustStoreProp2, enabledValueOrNull);
        if (str == null) {
            log.warning("Could not determine trust store for Spark->Hive Jdbc connection, Spark job might fail.");
        } else {
            log.fine("Using trust store " + str);
        }
        return str;
    }
}
