package com.rapidminer.extension.webtableextraction.operator;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DoubleArrayDataRow;
import com.rapidminer.example.utils.ExampleSetBuilder;
import com.rapidminer.example.utils.ExampleSets;
import com.rapidminer.operator.Annotations;
import com.rapidminer.operator.IOObjectCollection;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.preprocessing.GuessValueTypes;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.parameter.conditions.EqualTypeCondition;
import com.rapidminer.tools.LogService;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.webdatacommons.webtables.extraction.ExtendedBasicExtractionAlgorithm;
import org.webdatacommons.webtables.extraction.model.DocumentMetadata;
import org.webdatacommons.webtables.extraction.stats.HashMapStatsData;
import org.webdatacommons.webtables.tools.data.Dataset;
import weka.gui.beans.xml.XMLBeans;

/* loaded from: input_file:com/rapidminer/extension/webtableextraction/operator/HTMLToExampleSetOperator.class */
public class HTMLToExampleSetOperator extends Operator {
    public static final String PARAMETER_FILENAME = "file name";
    public static final String PARAMETER_URL = "url";
    public static final String PARAMETER_SOURCE_TYPE = "resource_type";
    public static final int SOURCE_TYPE_FILE = 0;
    public static final int SOURCE_TYPE_URL = 1;
    private ExtendedBasicExtractionAlgorithm extendedBasicExtractionAlgorithm;
    private static final String DEFAULT_USER_AGENT = "Mozilla";
    public static final int JSOUP_CONNECTION_READ_TIMEOUT = 300000;
    private OutputPort exampleSetCollectionOutput;
    public static final String[] SOURCE_TYPES = {XMLBeans.VAL_FILE, "url"};
    private static Logger LOGGER = LogService.getRoot();
    private static int NUM_RUNS = 1;

    public HTMLToExampleSetOperator(OperatorDescription operatorDescription) {
        super(operatorDescription);
        this.exampleSetCollectionOutput = getOutputPorts().createPort("collection of html data tables as example sets");
        getTransformer().addGenerationRule(this.exampleSetCollectionOutput, IOObjectCollection.class);
        this.extendedBasicExtractionAlgorithm = new ExtendedBasicExtractionAlgorithm(new HashMapStatsData(), true, null);
    }

    public List<ParameterType> getParameterTypes() {
        List<ParameterType> parameterTypes = super.getParameterTypes();
        ParameterTypeCategory parameterTypeCategory = new ParameterTypeCategory("resource_type", "Choose whether to open a file or a URL.", SOURCE_TYPES, 0, true);
        parameterTypeCategory.setExpert(false);
        parameterTypes.add(parameterTypeCategory);
        ParameterTypeFile parameterTypeFile = new ParameterTypeFile("file name", "File to open", (String) null, true, false);
        parameterTypeFile.registerDependencyCondition(new EqualTypeCondition(this, "resource_type", SOURCE_TYPES, true, new int[]{0}));
        parameterTypes.add(parameterTypeFile);
        ParameterTypeString parameterTypeString = new ParameterTypeString("url", "URL to open", true, false);
        parameterTypeString.registerDependencyCondition(new EqualTypeCondition(this, "resource_type", SOURCE_TYPES, true, new int[]{1}));
        parameterTypes.add(parameterTypeString);
        return parameterTypes;
    }

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    /* JADX WARN: Failed to find 'out' block for switch in B:3:0x0006. Please report as an issue. */
    protected void checkMetaData() throws UserError {
        try {
            switch (getParameterAsInt("resource_type")) {
                case 0:
                    File parameterAsFile = getParameterAsFile("file name");
                    if (!parameterAsFile.exists()) {
                        throw new UserError(this, "301", new Object[]{parameterAsFile});
                    }
                    if (!parameterAsFile.canRead()) {
                        throw new UserError(this, "302", new Object[]{parameterAsFile, StringUtils.EMPTY});
                    }
                    return;
                case 1:
                    try {
                        new URL(getParameterAsString("url"));
                        return;
                    } catch (MalformedURLException e) {
                        throw new UserError(this, e, "313", new Object[]{getParameterAsString("url")});
                    }
                default:
                    return;
            }
        } catch (UndefinedParameterError e2) {
            LOGGER.log(Level.WARNING, "Error: " + e2.getMessage());
        }
    }

    public void doWork() throws OperatorException {
        IOObjectCollection<ExampleSet> iOObjectCollection = null;
        long nanoTime = System.nanoTime();
        try {
            if (getParameterAsInt("resource_type") == 0) {
                String parameterAsString = getParameterAsString("file name");
                File file = new File(parameterAsString);
                if (!file.exists()) {
                    throw new UserError(this, "301", new Object[]{file});
                }
                if (!file.canRead()) {
                    throw new UserError(this, "302", new Object[]{file, StringUtils.EMPTY});
                }
                iOObjectCollection = createCollectionFromFileOrUrl(null, parameterAsString, file);
            } else if (getParameterAsInt("resource_type") == 1) {
                String parameterAsString2 = getParameterAsString("url");
                iOObjectCollection = createCollectionFromFileOrUrl(new URL(parameterAsString2), parameterAsString2, null);
            }
        } catch (MalformedURLException e) {
            LOGGER.log(Level.WARNING, "Error accessing the given url. Please check internet connection." + e.getMessage());
        } catch (UndefinedParameterError e2) {
            LOGGER.log(Level.WARNING, "UndefinedParameterError : " + e2.getMessage());
        } catch (IOException e3) {
            LOGGER.log(Level.WARNING, "Error connecting to the given url " + e3.getMessage());
        }
        LOGGER.log(Level.INFO, "Total processing time (sec): " + ((((float) (System.nanoTime() - nanoTime)) / NUM_RUNS) / 1.0E9f));
        this.exampleSetCollectionOutput.deliver(iOObjectCollection);
    }

    private IOObjectCollection<ExampleSet> createCollectionFromFileOrUrl(URL url, String str, File file) {
        IOObjectCollection<ExampleSet> iOObjectCollection = new IOObjectCollection<>();
        String str2 = null;
        InputStream inputStream = null;
        for (int i = 0; i < NUM_RUNS; i++) {
            Document document = null;
            DocumentMetadata documentMetadata = new DocumentMetadata(0L, 0L, StringUtils.EMPTY, StringUtils.EMPTY, StringUtils.EMPTY);
            List<Dataset> list = null;
            try {
                try {
                    try {
                        long currentTimeMillis = System.currentTimeMillis();
                        if (url != null) {
                            try {
                                document = Jsoup.connect(str).userAgent(DEFAULT_USER_AGENT).get();
                                str2 = document.title();
                                if (str2 == null || str2.length() == 0) {
                                    document = Jsoup.connect(str).timeout(300000).get();
                                    str2 = document.title();
                                }
                            } catch (IOException e) {
                                LOGGER.log(Level.INFO, "Exception Opening Connectoin: " + e.getMessage());
                                document = Jsoup.connect(str).timeout(300000).userAgent(DEFAULT_USER_AGENT).get();
                                str2 = document.title();
                                LOGGER.log(Level.INFO, "Document Retrieved Successfully using User Agent");
                            }
                        } else if (file != null) {
                            document = Jsoup.parse(file, (String) null, StringUtils.EMPTY);
                            str2 = document.title();
                        }
                        LOGGER.log(Level.INFO, "Page parsed  in :" + ((System.currentTimeMillis() - currentTimeMillis) / 1000) + " seconds");
                        list = this.extendedBasicExtractionAlgorithm.extract(document, documentMetadata);
                        if (inputStream != null) {
                            try {
                                inputStream.close();
                            } catch (IOException e2) {
                                LOGGER.log(Level.WARNING, "Error closing the input stream" + e2.getMessage());
                                inputStream = null;
                            }
                        } else if (file != null) {
                            file = null;
                        }
                    } catch (InterruptedException e3) {
                        LOGGER.log(Level.WARNING, "Error extracting data from the given url " + e3.getMessage());
                        if (inputStream != null) {
                            try {
                                inputStream.close();
                            } catch (IOException e4) {
                                LOGGER.log(Level.WARNING, "Error closing the input stream" + e4.getMessage());
                                inputStream = null;
                            }
                        } else if (file != null) {
                            file = null;
                        }
                    }
                } catch (Throwable th) {
                    if (inputStream != null) {
                        try {
                            inputStream.close();
                        } catch (IOException e5) {
                            LOGGER.log(Level.WARNING, "Error closing the input stream" + e5.getMessage());
                        }
                    } else if (file != null) {
                    }
                    throw th;
                }
            } catch (IOException e6) {
                LOGGER.log(Level.WARNING, "Error accessing the given url " + e6.getMessage());
                if (inputStream != null) {
                    try {
                        inputStream.close();
                    } catch (IOException e7) {
                        LOGGER.log(Level.WARNING, "Error closing the input stream" + e7.getMessage());
                        inputStream = null;
                    }
                } else if (file != null) {
                    file = null;
                }
            }
            if (list != null) {
                Iterator<Dataset> it = list.iterator();
                while (it.hasNext()) {
                    ExampleSet createTable = createTable(it.next(), str2);
                    try {
                        createTable = new GuessValueTypes(getOperatorDescription()).apply(createTable);
                    } catch (OperatorException e8) {
                        LOGGER.log(Level.WARNING, "Error: " + e8.getMessage());
                    }
                    iOObjectCollection.add(createTable);
                }
            } else {
                LOGGER.log(Level.WARNING, "Could not retrieve results from given url");
            }
        }
        return iOObjectCollection;
    }

    private ExampleSet createTable(Dataset dataset, String str) {
        String[][] strArr = dataset.relation;
        int length = strArr[0].length;
        int length2 = strArr.length;
        boolean booleanValue = dataset.getHasHeader().booleanValue();
        int headerRowIndex = booleanValue ? dataset.getHeaderRowIndex() : 0;
        String str2 = dataset.getKeyColumnIndex() + StringUtils.EMPTY;
        LinkedList linkedList = new LinkedList();
        int i = 0;
        HashMap hashMap = new HashMap();
        for (String[] strArr2 : strArr) {
            String str3 = strArr2[headerRowIndex];
            if (hashMap.containsKey(str3)) {
                int intValue = ((Integer) hashMap.get(str3)).intValue() + 1;
                hashMap.put(str3, new Integer(intValue));
                str3 = str3 + "-" + intValue;
            } else if (!str3.isEmpty()) {
                hashMap.put(str3, new Integer(0));
            }
            if (!booleanValue || str3 == null || str3.isEmpty()) {
                Attribute createAttribute = AttributeFactory.createAttribute("Attribute " + (i + 1), 5);
                if (dataset.getHasKeyColumn() && str2.equals(str3)) {
                    str2 = "Attribute " + (i + 1);
                }
                linkedList.add(createAttribute);
            } else {
                linkedList.add(AttributeFactory.createAttribute(str3, 5));
            }
            i++;
        }
        ExampleSetBuilder from = ExampleSets.from(linkedList);
        for (int i2 = booleanValue ? headerRowIndex + 1 : 0; i2 < length; i2++) {
            double[] dArr = new double[linkedList.size()];
            for (int i3 = 0; i3 < length2; i3++) {
                dArr[i3] = ((Attribute) linkedList.get(i3)).getMapping().mapString(strArr[i3][i2]);
            }
            from.addDataRow(new DoubleArrayDataRow(dArr));
        }
        Annotations annotations = new Annotations();
        annotations.setAnnotation("Table or Page Title", dataset.getTitle().length() > 0 ? dataset.getTitle() : dataset.getPageTitle());
        annotations.setAnnotation("Document Title", str);
        ExampleSet build = from.build();
        build.getAnnotations().addAll(annotations);
        return build;
    }

    private int determineDataType(String str) {
        return 5;
    }

    private Object getPrimitive(String str) {
        try {
            return Byte.valueOf(str);
        } catch (NumberFormatException e) {
            try {
                return Short.valueOf(str);
            } catch (NumberFormatException e2) {
                try {
                    return Integer.valueOf(str);
                } catch (NumberFormatException e3) {
                    try {
                        return Float.valueOf(str);
                    } catch (NumberFormatException e4) {
                        try {
                            return Double.valueOf(str);
                        } catch (NumberFormatException e5) {
                            try {
                                return Long.valueOf(str);
                            } catch (NumberFormatException e6) {
                                try {
                                    return new BigInteger(str);
                                } catch (NumberFormatException e7) {
                                    try {
                                        return new BigDecimal(str);
                                    } catch (NumberFormatException e8) {
                                        return str.length() == 1 ? new Character(str.charAt(0)) : str;
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}
