package com.rapidminer.extension.webtableextraction.operator;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DoubleArrayDataRow;
import com.rapidminer.example.utils.ExampleSetBuilder;
import com.rapidminer.example.utils.ExampleSets;
import com.rapidminer.operator.Annotations;
import com.rapidminer.operator.IOObjectCollection;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.preprocessing.GuessValueTypes;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.parameter.conditions.EqualTypeCondition;
import com.rapidminer.tools.LogService;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.webdatacommons.webtables.extraction.ExtendedBasicExtractionAlgorithm;
import org.webdatacommons.webtables.extraction.model.DocumentMetadata;
import org.webdatacommons.webtables.extraction.stats.HashMapStatsData;
import org.webdatacommons.webtables.tools.data.Dataset;
import weka.gui.beans.xml.XMLBeans;

/* loaded from: input_file:com/rapidminer/extension/webtableextraction/operator/HTMLToExampleSetOperator.class */
public class HTMLToExampleSetOperator extends Operator {
    public static final String PARAMETER_FILENAME = "filename";
    public static final String PARAMETER_URL = "url";
    public static final String PARAMETER_SOURCE_TYPE = "resource_type";
    public static final int SOURCE_TYPE_FILE = 0;
    public static final int SOURCE_TYPE_URL = 1;
    private ExtendedBasicExtractionAlgorithm extendedBasicExtractionAlgorithm;
    private OutputPort exampleSetCollectionOutput;
    public static final String[] SOURCE_TYPES = {XMLBeans.VAL_FILE, "URL"};
    private static Logger LOGGER = LogService.getRoot();
    private static int NUM_RUNS = 1;

    public HTMLToExampleSetOperator(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.exampleSetCollectionOutput = getOutputPorts().createPort("collection of html data tables as example sets");
        getTransformer().addGenerationRule(this.exampleSetCollectionOutput, IOObjectCollection.class);
        this.extendedBasicExtractionAlgorithm = new ExtendedBasicExtractionAlgorithm(new HashMapStatsData(), true, null);
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        ParameterTypeCategory parameterTypeCategory = new ParameterTypeCategory(PARAMETER_SOURCE_TYPE, "Choose whether to open a file or a URL.", SOURCE_TYPES, 0, true);
        parameterTypeCategory.setExpert(false);
        parameterTypes.add(parameterTypeCategory);
        ParameterTypeFile parameterTypeFile = new ParameterTypeFile(PARAMETER_FILENAME, "File to open", (String) null, true, false);
        parameterTypeFile.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_SOURCE_TYPE, SOURCE_TYPES, true, new int[]{0}));
        parameterTypes.add(parameterTypeFile);
        ParameterTypeString parameterTypeString = new ParameterTypeString(PARAMETER_URL, "URL to open", true, false);
        parameterTypeString.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_SOURCE_TYPE, SOURCE_TYPES, true, new int[]{1}));
        parameterTypes.add(parameterTypeString);
        return parameterTypes;
    }

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    /* JADX WARN: Failed to find 'out' block for switch in B:3:0x0006. Please report as an issue. */
    protected void checkMetaData() throws UserError {
        try {
            switch (getParameterAsInt(PARAMETER_SOURCE_TYPE)) {
                case 0:
                    File parameterAsFile = getParameterAsFile(PARAMETER_FILENAME);
                    if (!parameterAsFile.exists()) {
                        throw new UserError(this, "301", new Object[]{parameterAsFile});
                    }
                    if (!parameterAsFile.canRead()) {
                        throw new UserError(this, "302", new Object[]{parameterAsFile, StringUtils.EMPTY});
                    }
                    return;
                case 1:
                    try {
                        new URL(getParameterAsString(PARAMETER_URL));
                        return;
                    } catch (MalformedURLException e) {
                        throw new UserError(this, e, "313", new Object[]{getParameterAsString(PARAMETER_URL)});
                    }
                default:
                    return;
            }
        } catch (UndefinedParameterError e2) {
        }
    }

    public void doWork() {
        IOObjectCollection iOObjectCollection = new IOObjectCollection();
        File file = null;
        InputStream inputStream = null;
        try {
            if (getParameterAsInt(PARAMETER_SOURCE_TYPE) == 0) {
                String parameterAsString = getParameterAsString(PARAMETER_FILENAME);
                file = new File(parameterAsString);
                LOGGER.log(Level.INFO, " Got Path = " + parameterAsString);
            } else if (getParameterAsInt(PARAMETER_SOURCE_TYPE) == 1) {
                inputStream = new URL(getParameterAsString(PARAMETER_URL)).openStream();
            }
        } catch (IOException e) {
            LOGGER.log(Level.WARNING, "Error connecting to the given url " + e.getMessage());
        } catch (UndefinedParameterError e2) {
            e2.printStackTrace();
        } catch (MalformedURLException e3) {
            LOGGER.log(Level.WARNING, "Error accessing the given url. Please check internet connection." + e3.getMessage());
        }
        long nanoTime = System.nanoTime();
        for (int i = 0; i < NUM_RUNS; i++) {
            Document document = null;
            DocumentMetadata documentMetadata = new DocumentMetadata(0L, 0L, StringUtils.EMPTY, StringUtils.EMPTY, StringUtils.EMPTY);
            List<Dataset> list = null;
            if (inputStream != null) {
                try {
                    try {
                        document = Jsoup.parse(inputStream, (String) null, StringUtils.EMPTY);
                    } catch (Throwable th) {
                        if (inputStream != null) {
                            try {
                                inputStream.close();
                            } catch (IOException e4) {
                                LOGGER.log(Level.WARNING, "Error closing the input stream" + e4.getMessage());
                            }
                        } else if (file != null) {
                        }
                        throw th;
                    }
                } catch (IOException e5) {
                    LOGGER.log(Level.WARNING, "Error accessing the given url " + e5.getMessage());
                    if (inputStream != null) {
                        try {
                            inputStream.close();
                        } catch (IOException e6) {
                            LOGGER.log(Level.WARNING, "Error closing the input stream" + e6.getMessage());
                            inputStream = null;
                        }
                    } else if (file != null) {
                        file = null;
                    }
                } catch (InterruptedException e7) {
                    LOGGER.log(Level.WARNING, "Error extracting data from the given url " + e7.getMessage());
                    if (inputStream != null) {
                        try {
                            inputStream.close();
                        } catch (IOException e8) {
                            LOGGER.log(Level.WARNING, "Error closing the input stream" + e8.getMessage());
                            inputStream = null;
                        }
                    } else if (file != null) {
                        file = null;
                    }
                }
            } else if (file != null) {
                document = Jsoup.parse(file, (String) null, StringUtils.EMPTY);
            }
            list = this.extendedBasicExtractionAlgorithm.extract(document, documentMetadata);
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (IOException e9) {
                    LOGGER.log(Level.WARNING, "Error closing the input stream" + e9.getMessage());
                    inputStream = null;
                }
            } else if (file != null) {
                file = null;
            }
            if (list != null) {
                Iterator<Dataset> it = list.iterator();
                while (it.hasNext()) {
                    ExampleSet createTable = createTable(it.next());
                    try {
                        createTable = new GuessValueTypes(getOperatorDescription()).apply(createTable);
                    } catch (OperatorException e10) {
                        e10.printStackTrace();
                    }
                    iOObjectCollection.add(createTable);
                }
            } else {
                LOGGER.log(Level.WARNING, "Could not retrieve results form given url");
            }
        }
        LOGGER.log(Level.INFO, "Total processing time (sec): " + ((((float) (System.nanoTime() - nanoTime)) / NUM_RUNS) / 1.0E9f));
        this.exampleSetCollectionOutput.deliver(iOObjectCollection);
    }

    private ExampleSet createTable(Dataset dataset) {
        String[][] strArr = dataset.relation;
        int length = strArr[0].length;
        int length2 = strArr.length;
        boolean booleanValue = dataset.getHasHeader().booleanValue();
        int headerRowIndex = booleanValue ? dataset.getHeaderRowIndex() : 0;
        String str = dataset.getKeyColumnIndex() + StringUtils.EMPTY;
        LinkedList linkedList = new LinkedList();
        int i = 0;
        HashMap hashMap = new HashMap();
        for (String[] strArr2 : strArr) {
            String str2 = strArr2[headerRowIndex];
            if (hashMap.containsKey(str2)) {
                int intValue = ((Integer) hashMap.get(str2)).intValue() + 1;
                hashMap.put(str2, new Integer(intValue));
                str2 = str2 + "-" + intValue;
            } else if (!str2.isEmpty()) {
                hashMap.put(str2, new Integer(0));
            }
            if (!booleanValue || str2 == null || str2.isEmpty()) {
                Attribute createAttribute = AttributeFactory.createAttribute("Attribute " + (i + 1), 5);
                if (dataset.getHasKeyColumn() && str.equals(str2)) {
                    str = "Attribute " + (i + 1);
                }
                linkedList.add(createAttribute);
            } else {
                linkedList.add(AttributeFactory.createAttribute(str2, 5));
            }
            i++;
        }
        ExampleSetBuilder from = ExampleSets.from(linkedList);
        for (int i2 = booleanValue ? headerRowIndex + 1 : 0; i2 < length; i2++) {
            double[] dArr = new double[linkedList.size()];
            for (int i3 = 0; i3 < length2; i3++) {
                dArr[i3] = ((Attribute) linkedList.get(i3)).getMapping().mapString(strArr[i3][i2]);
            }
            from.addDataRow(new DoubleArrayDataRow(dArr));
        }
        Annotations annotations = new Annotations();
        annotations.setAnnotation("Table or Page Title", dataset.getTitle().length() > 0 ? dataset.getTitle() : dataset.getPageTitle());
        ExampleSet build = from.build();
        build.getAnnotations().addAll(annotations);
        return build;
    }

    private int determineDataType(String str) {
        return 5;
    }

    private Object getPrimitive(String str) {
        try {
            return Byte.valueOf(str);
        } catch (NumberFormatException e) {
            try {
                return Short.valueOf(str);
            } catch (NumberFormatException e2) {
                try {
                    return Integer.valueOf(str);
                } catch (NumberFormatException e3) {
                    try {
                        return Float.valueOf(str);
                    } catch (NumberFormatException e4) {
                        try {
                            return Double.valueOf(str);
                        } catch (NumberFormatException e5) {
                            try {
                                return Long.valueOf(str);
                            } catch (NumberFormatException e6) {
                                try {
                                    return new BigInteger(str);
                                } catch (NumberFormatException e7) {
                                    try {
                                        return new BigDecimal(str);
                                    } catch (NumberFormatException e8) {
                                        return str.length() == 1 ? new Character(str.charAt(0)) : str;
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}
