package com.rapidminer.extension.webtableextraction.microdataparser;

import ch.qos.logback.classic.Level;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DoubleArrayDataRow;
import com.rapidminer.example.utils.ExampleSetBuilder;
import com.rapidminer.example.utils.ExampleSets;
import com.rapidminer.extension.webtableextraction.operator.StructuredDataExtractionOperator;
import com.rapidminer.operator.Annotations;
import com.rapidminer.tools.LogService;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;
import java.util.logging.Logger;
import org.apache.http.HttpHost;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.jsoup.select.NodeTraversor;

/* loaded from: input_file:com/rapidminer/extension/webtableextraction/microdataparser/StructuredDataExtractor.class */
public class StructuredDataExtractor {
    public static final String PRODUCT_ITEM_TYPE = "http://schema.org/Product";
    public static final String AGGREGATE_RATING_ITEM_TYPE = "http://schema.org/AggregateRating";
    public static final String OFFER_RATING_ITEM_TYPE = "http://schema.org/Offer";
    public static final String BRAND_RATING_ITEM_TYPE = "http://schema.org/Brand";
    public static final String IN_STOCK_ITEM_TYPE = "http://schema.org/InStock";
    public static final String PRICE_SPECIFICATION_ITEM_TYPE = "http://schema.org/PriceSpecification";
    public static final String OFFER_ITEM_CONDITION_ITEM_TYPE = "http://schema.org/OfferItemCondition";
    public static final String ORGANIZATION_ITEM_TYPE = "http://schema.org/Organization";
    public static final String ITEM_TITLE = "name";
    public static final String ITEM_PRICE = "price";
    public static final String PRODUCT_NAME = "productName";
    public static final String PRODUCT_PRICE = "productPrice";
    public static final String PRODUCT_ORGANIZATION = "organization";
    private Map<String, String> mapOfAttributeValues;
    private String targetAttributeValue = null;
    private Document document = null;
    private Logger LOGGER = LogService.getRoot();
    public int jSoupConnectionReadTimeout = 300000;
    private String urlString = "https://www.digikey.de/products/de/rf-if-and-rfid/rf-amplifiers/860";
    private Connection connection = null;
    private String userAgent = StructuredDataExtractionOperator.DEFAULT_USER_AGENT;
    private LinkedHashMap<String, Integer> mapOfSearchCounts = new LinkedHashMap<>();

    public static void main(String[] strArr) {
        try {
            Document document = Jsoup.connect("https://www.amazon.de/s/ref=nb_sb_noss_1?__mk_de_DE=%C3%85M%C3%85%C5%BD%C3%95%C3%91&url=search-alias%3Daps&field-keywords=chairs").timeout(Level.TRACE_INT).get();
            Element first = document.select("a").first();
            if (first != null) {
                System.out.println(document.body().text());
                System.out.println(first.text());
            } else {
                System.out.println("NOTHING");
            }
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e2) {
            e2.printStackTrace();
        }
    }

    public Connection createConnection() {
        try {
            this.connection = Jsoup.connect(this.urlString);
            this.connection.timeout(this.jSoupConnectionReadTimeout);
            this.connection.userAgent(this.userAgent);
            this.document = this.connection.get();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return this.connection;
    }

    public Document createConnection(String str) throws IOException {
        try {
            this.urlString = str;
            this.connection = Jsoup.connect(this.urlString);
            this.connection.timeout(this.jSoupConnectionReadTimeout);
            this.connection.userAgent(this.userAgent);
            this.document = this.connection.get();
            return this.document;
        } catch (IOException e) {
            e.printStackTrace();
            throw e;
        }
    }

    public ExampleSet getExampleSetFromSchemaOfInterest(Document document, boolean z, String str, String str2) {
        ExampleSet exampleSet = null;
        if (document != null) {
            Elements elementsByAttributeValue = document.getElementsByAttributeValue(str, str2);
            int i = 0;
            Map<String, Attribute> hashMap = new HashMap<>();
            HashMap hashMap2 = new HashMap();
            ArrayList arrayList = new ArrayList();
            Iterator it = elementsByAttributeValue.iterator();
            while (it.hasNext()) {
                Element element = (Element) it.next();
                String text = element.getElementsByAttributeValue(BaseNodeVisitor.ITEM_PROP_ATTRIBUTE, "name").text();
                this.LOGGER.log(java.util.logging.Level.INFO, "Product Name = \n" + text);
                String text2 = element.getElementsByAttributeValue(BaseNodeVisitor.ITEM_PROP_ATTRIBUTE, ITEM_PRICE).text();
                this.LOGGER.log(java.util.logging.Level.INFO, "Product Price = \n" + text2);
                String text3 = element.getElementsByAttributeValueContaining(BaseNodeVisitor.ITEM_TYPE_ATTRIBUTE, ORGANIZATION_ITEM_TYPE).text();
                this.LOGGER.log(java.util.logging.Level.INFO, "Organization = \n" + text3);
                if (i == 0) {
                    hashMap.put(PRODUCT_NAME, AttributeFactory.createAttribute(PRODUCT_NAME, 5));
                    hashMap.put(PRODUCT_PRICE, AttributeFactory.createAttribute(PRODUCT_PRICE, 5));
                    hashMap.put(PRODUCT_ORGANIZATION, AttributeFactory.createAttribute(PRODUCT_ORGANIZATION, 5));
                }
                hashMap2.put(PRODUCT_NAME, text);
                hashMap2.put(PRODUCT_PRICE, text2);
                hashMap2.put(PRODUCT_ORGANIZATION, text3);
                System.out.println("******** Starting Product Extraction ******** Attributes Before [" + i + "] =" + hashMap.size());
                if (z) {
                    extractAttributesByIterativeNodeParsingCornerCase1(element, hashMap, hashMap2);
                } else {
                    extractAttributesByIterativeNodeParsing(element, hashMap, hashMap2);
                }
                for (String str3 : hashMap.keySet()) {
                    System.out.println("Header: Attribute[" + str3 + "] = " + hashMap.get(str3).getName());
                }
                for (String str4 : hashMap2.keySet()) {
                    System.out.println("Row: Attribute[" + str4 + "] = " + hashMap2.get(str4));
                }
                System.out.println("******* Ending Product Extraction ****** Attributes After:" + hashMap.size() + " Row Values = " + hashMap2.size());
                i++;
                arrayList.add(hashMap2);
                hashMap2 = new HashMap();
            }
            this.LOGGER.log(java.util.logging.Level.INFO, "Total Products retrieved from Page = " + i);
            LinkedList linkedList = new LinkedList();
            Iterator<String> it2 = hashMap.keySet().iterator();
            while (it2.hasNext()) {
                linkedList.add(hashMap.get(it2.next()));
            }
            ExampleSetBuilder from = ExampleSets.from(linkedList);
            Iterator it3 = arrayList.iterator();
            while (it3.hasNext()) {
                Set keySet = ((Map) it3.next()).keySet();
                double[] dArr = new double[linkedList.size()];
                Iterator it4 = keySet.iterator();
                while (it4.hasNext()) {
                    dArr[linkedList.indexOf(hashMap.get((String) it4.next()))] = r0.getMapping().mapString((String) r0.get(r0));
                }
                from.addDataRow(new DoubleArrayDataRow(dArr));
            }
            Annotations annotations = new Annotations();
            annotations.setAnnotation("Document Title", document.title());
            annotations.setAnnotation("Document Location", document.location());
            annotations.setAnnotation("Document Id", document.id());
            annotations.setAnnotation("Document Character Set", document.charset().displayName());
            annotations.setAnnotation("Document Source", this.urlString);
            exampleSet = from.build();
            exampleSet.getAnnotations().addAll(annotations);
        } else {
            this.LOGGER.log(java.util.logging.Level.INFO, "Could not retrieve HTML document!");
        }
        return exampleSet;
    }

    public void getTupleByNestedKeywordSearch(Element element) {
        if (element.childNodeSize() == 0) {
            this.LOGGER.log(java.util.logging.Level.INFO, "Reached End of Graph - Returning");
            return;
        }
        Iterator it = element.getAllElements().iterator();
        while (it.hasNext()) {
            Element element2 = (Element) it.next();
            this.LOGGER.log(java.util.logging.Level.INFO, "Nested Element Added = \n" + element2.nodeName());
            getTupleByNestedKeywordSearch(element2);
        }
    }

    public void parseUsingAnchorTag() {
        try {
            this.connection = createConnection();
            this.document = this.connection.get();
        } catch (IOException e) {
            e.printStackTrace();
        }
        if (this.document == null) {
            this.LOGGER.log(java.util.logging.Level.INFO, "Could not retrieve HTML document!");
            return;
        }
        Iterator it = this.document.getElementsByAttributeValue(BaseNodeVisitor.ITEM_TYPE_ATTRIBUTE, PRODUCT_ITEM_TYPE).iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            String baseUri = element.baseUri();
            this.LOGGER.log(java.util.logging.Level.INFO, "HREF URI = " + baseUri);
            this.LOGGER.log(java.util.logging.Level.INFO, "HREF = " + element.attr(BaseNodeVisitor.VALUE_EXTRACTION_ATTRIBUTE_HREF));
            this.LOGGER.log(java.util.logging.Level.INFO, "HREF Text = " + element.text());
            if (this.mapOfSearchCounts.containsKey(baseUri.trim())) {
                this.mapOfSearchCounts.put(baseUri.trim(), Integer.valueOf(this.mapOfSearchCounts.get(baseUri.trim()).intValue() + 1));
            } else {
                this.mapOfSearchCounts.put(baseUri.trim(), 1);
            }
        }
        this.LOGGER.log(java.util.logging.Level.INFO, "Unique URL links found = " + this.mapOfSearchCounts.size());
    }

    public void parseUsingSelection() {
        try {
            this.connection = createConnection();
            this.document = this.connection.get();
        } catch (IOException e) {
            e.printStackTrace();
        }
        if (this.document == null) {
            this.LOGGER.log(java.util.logging.Level.INFO, "Could not retrieve HTML document!");
            return;
        }
        this.LOGGER.log(java.util.logging.Level.INFO, "Document Title = " + this.document.title());
        this.document.text();
        Iterator it = this.document.select("a[href]").iterator();
        while (it.hasNext()) {
            String attr = ((Element) it.next()).attr(BaseNodeVisitor.VALUE_EXTRACTION_ATTRIBUTE_HREF);
            if (attr.startsWith(HttpHost.DEFAULT_SCHEME_NAME) || attr.startsWith("https")) {
                this.LOGGER.log(java.util.logging.Level.INFO, "Found HREF = " + attr);
                this.mapOfSearchCounts.put(attr, 1);
            }
        }
        this.LOGGER.log(java.util.logging.Level.INFO, "Total Count of URL links found = " + this.mapOfSearchCounts.size());
    }

    public void extractAttributesByIterativeNodeParsing(Element element, Map<String, Attribute> map, Map<String, String> map2) {
        HashMap hashMap = new HashMap();
        NodeTraversor nodeTraversor = new NodeTraversor(new StructuredDataDefaultNodeVisitor(hashMap));
        if (element == null) {
            System.err.println("xxx Unable to find Product node. xxx");
            return;
        }
        nodeTraversor.traverse(element);
        for (String str : hashMap.keySet()) {
            String str2 = (String) hashMap.get(str);
            System.out.println("Attribute: " + str + " = " + str2);
            if (map.containsKey(str)) {
                map2.put(str, str2);
            } else {
                map.put(str, AttributeFactory.createAttribute(str, 5));
                map2.put(str, str2);
            }
        }
    }

    public void extractAttributesByIterativeNodeParsingCornerCase1(Element element, Map<String, Attribute> map, Map<String, String> map2) {
        HashMap hashMap = new HashMap();
        NodeTraversor nodeTraversor = new NodeTraversor(new StructuredDataCornerCaseNodeVisitor(getTargetAttributeValue(), hashMap));
        if (element == null) {
            System.err.println("xxx Unable to find Product node. xxx");
            return;
        }
        nodeTraversor.traverse(element);
        for (String str : hashMap.keySet()) {
            String str2 = (String) hashMap.get(str);
            System.out.println("Attribute: " + str + " = " + str2);
            if (map.containsKey(str)) {
                map2.put(str, str2);
            } else {
                map.put(str, AttributeFactory.createAttribute(str, 5));
                map2.put(str, str2);
            }
        }
    }

    public String getUrlString() {
        return this.urlString;
    }

    public void setUrlString(String str) {
        this.urlString = str;
    }

    public String getUserAgent() {
        return this.userAgent;
    }

    public void setUserAgent(String str) {
        this.userAgent = str;
    }

    public Map<String, String> getMapOfAttributeValues() {
        return this.mapOfAttributeValues;
    }

    public int getjSoupConnectionReadTimeout() {
        return this.jSoupConnectionReadTimeout;
    }

    public void setjSoupConnectionReadTimeout(int i) {
        this.jSoupConnectionReadTimeout = i;
    }

    public String getTargetAttributeValue() {
        return this.targetAttributeValue;
    }

    public void setTargetAttributeValue(String str) {
        this.targetAttributeValue = str;
    }
}
