package org.webdatacommons.webtables.extraction.model;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.jsoup.nodes.Element;
import org.webdatacommons.webtables.extraction.stats.TableStats;
import org.webdatacommons.webtables.extraction.util.CellTools;
import org.webdatacommons.webtables.extraction.util.Tools;
import weka.classifiers.lazy.kstar.KStarConstants;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;

/* loaded from: input_file:org/webdatacommons/webtables/extraction/model/FeaturesP1.class */
public class FeaturesP1 {
    private static String featureWhiteList = "LOCAL_RATIO_IS_NUMBER_COL_1, LOCAL_RATIO_ANCHOR_ROW_1, RATIO_IMG, LOCAL_RATIO_ANCHOR_COL_1, LOCAL_LENGTH_VARIANCE_COL_1, LOCAL_RATIO_IMAGE_COL_1, LOCAL_RATIO_IMAGE_COL_0, LOCAL_SPAN_RATIO_COL_2, LOCAL_SPAN_RATIO_COL_1, LOCAL_AVG_LENGTH_ROW_2, LOCAL_RATIO_HEADER_ROW_0, RATIO_DIGIT, LOCAL_RATIO_IMAGE_ROW_0, RATIO_ALPHABETICAL, LOCAL_RATIO_IMAGE_ROW_1, LOCAL_RATIO_INPUT_COL_1, LOCAL_RATIO_INPUT_COL_0, LOCAL_RATIO_CONTAINS_NUMBER_ROW_2, LOCAL_AVG_LENGTH_COL_0, RATIO_EMPTY, AVG_ROWS, LOCAL_RATIO_INPUT_ROW_1, LOCAL_RATIO_CONTAINS_NUMBER_COL_2, LOCAL_RATIO_HEADER_COL_1, LOCAL_RATIO_INPUT_ROW_0, AVG_COLS";
    private ArrayList<AbstractTableListener> globalListeners;
    private ArrayList<AbstractTableListener> localListeners;
    private ArrayList<Attribute> attributeList = new ArrayList<>();
    private FastVector attributeVector = new FastVector();
    private FastVector classAttrVector;
    private Attribute classAttr;

    /* loaded from: input_file:org/webdatacommons/webtables/extraction/model/FeaturesP1$AbstractTableListener.class */
    public abstract class AbstractTableListener {
        protected String featureName = "ABSTRACT_TABLE_LISTENER";

        public AbstractTableListener() {
        }

        public void start(TableStats tableStats) {
            initialize(tableStats);
        }

        protected abstract void initialize(TableStats tableStats);

        public void computeCell(Element element, TableStats tableStats) {
            onCell(element, tableStats);
        }

        protected abstract void onCell(Element element, TableStats tableStats);

        public void end() {
            finalize();
        }

        public String getFeatureName() {
            return this.featureName;
        }

        protected abstract void finalize();

        public abstract HashMap<String, Double> getResults();
    }

    /* loaded from: input_file:org/webdatacommons/webtables/extraction/model/FeaturesP1$AvgCols.class */
    public class AvgCols extends AbstractTableListener {
        private int cellCount;
        private double avgCols;
        private int tableHeight;

        public AvgCols() {
            super();
            this.featureName = "AVG_COLS";
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void initialize(TableStats tableStats) {
            this.cellCount = 0;
            this.tableHeight = tableStats.getTableHeight();
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void onCell(Element element, TableStats tableStats) {
            if (element != null) {
                this.cellCount++;
            }
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void finalize() {
            this.avgCols = this.cellCount / this.tableHeight;
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public HashMap<String, Double> getResults() {
            HashMap<String, Double> hashMap = new HashMap<>();
            hashMap.put(this.featureName, new Double(this.avgCols));
            return hashMap;
        }
    }

    /* loaded from: input_file:org/webdatacommons/webtables/extraction/model/FeaturesP1$AvgRows.class */
    public class AvgRows extends AbstractTableListener {
        private int cellCount;
        private double avgRows;
        private int tableWidth;

        public AvgRows() {
            super();
            this.featureName = "AVG_ROWS";
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void initialize(TableStats tableStats) {
            this.cellCount = 0;
            this.tableWidth = tableStats.getTableWidth();
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void onCell(Element element, TableStats tableStats) {
            if (element != null) {
                this.cellCount++;
            }
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void finalize() {
            this.avgRows = this.cellCount / this.tableWidth;
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public HashMap<String, Double> getResults() {
            HashMap<String, Double> hashMap = new HashMap<>();
            hashMap.put(this.featureName, new Double(this.avgRows));
            return hashMap;
        }
    }

    /* loaded from: input_file:org/webdatacommons/webtables/extraction/model/FeaturesP1$BlankTableListener.class */
    public class BlankTableListener extends AbstractTableListener {
        public BlankTableListener() {
            super();
            this.featureName = "BLANK_FEATURE";
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void initialize(TableStats tableStats) {
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void onCell(Element element, TableStats tableStats) {
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void finalize() {
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public HashMap<String, Double> getResults() {
            HashMap<String, Double> hashMap = new HashMap<>();
            hashMap.put(this.featureName, new Double(KStarConstants.FLOOR));
            return hashMap;
        }
    }

    /* loaded from: input_file:org/webdatacommons/webtables/extraction/model/FeaturesP1$ContentRatios.class */
    public class ContentRatios extends AbstractTableListener {
        private int cellCount;
        private int images;
        private int alphabetical;
        private int digits;
        private int empty;
        private double image_ratio;
        private double alphabetical_ratio;
        private double digit_ratio;
        private double empty_ratio;

        public ContentRatios() {
            super();
            this.featureName = "GROUP_GLOBAL_CONTENT_RATIOS";
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void initialize(TableStats tableStats) {
            this.cellCount = 0;
            this.images = 0;
            this.alphabetical = 0;
            this.digits = 0;
            this.empty = 0;
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void onCell(Element element, TableStats tableStats) {
            if (element != null) {
                this.cellCount++;
                switch (CellTools.getContentType(element)) {
                    case IMAGE:
                        this.images++;
                        return;
                    case ALPHABETICAL:
                        this.alphabetical++;
                        return;
                    case DIGIT:
                        this.digits++;
                        return;
                    case EMPTY:
                        this.empty++;
                        return;
                    default:
                        return;
                }
            }
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void finalize() {
            this.image_ratio = this.cellCount > 0 ? this.images / this.cellCount : KStarConstants.FLOOR;
            this.alphabetical_ratio = this.cellCount > 0 ? this.alphabetical / this.cellCount : KStarConstants.FLOOR;
            this.digit_ratio = this.cellCount > 0 ? this.digits / this.cellCount : KStarConstants.FLOOR;
            this.empty_ratio = this.cellCount > 0 ? this.empty / this.cellCount : KStarConstants.FLOOR;
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public HashMap<String, Double> getResults() {
            HashMap<String, Double> hashMap = new HashMap<>();
            hashMap.put("RATIO_IMG", new Double(this.image_ratio));
            hashMap.put("RATIO_ALPHABETICAL", new Double(this.alphabetical_ratio));
            hashMap.put("RATIO_DIGIT", new Double(this.digit_ratio));
            hashMap.put("RATIO_EMPTY", new Double(this.empty_ratio));
            return hashMap;
        }
    }

    /* loaded from: input_file:org/webdatacommons/webtables/extraction/model/FeaturesP1$LocalAvgLength.class */
    public class LocalAvgLength extends AbstractTableListener {
        private ArrayList<Integer> cellLengths;
        private double average;

        public LocalAvgLength() {
            super();
            this.featureName = "LOCAL_AVG_LENGTH";
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void initialize(TableStats tableStats) {
            this.cellLengths = new ArrayList<>();
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void onCell(Element element, TableStats tableStats) {
            if (element != null) {
                this.cellLengths.add(Integer.valueOf(CellTools.getCellLength(element)));
            }
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void finalize() {
            double d = 0.0d;
            while (this.cellLengths.iterator().hasNext()) {
                d += r0.next().intValue();
            }
            double size = this.cellLengths.size();
            this.average = size > KStarConstants.FLOOR ? d / size : KStarConstants.FLOOR;
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public HashMap<String, Double> getResults() {
            HashMap<String, Double> hashMap = new HashMap<>();
            hashMap.put(this.featureName, new Double(this.average));
            return hashMap;
        }
    }

    /* loaded from: input_file:org/webdatacommons/webtables/extraction/model/FeaturesP1$LocalContentRatios.class */
    public class LocalContentRatios extends AbstractTableListener {
        private int cellCount;
        private int count_th;
        private int count_anchor;
        private int count_img;
        private int count_input;
        private int count_contains_number;
        private int count_is_number;
        private double ratio_th;
        private double ratio_anchor;
        private double ratio_img;
        private double ratio_input;
        private double ratio_contains_number;
        private double ratio_is_number;

        public LocalContentRatios() {
            super();
            this.featureName = "GROUP_LOCAL_CONTENT_RATIOS";
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void initialize(TableStats tableStats) {
            this.count_is_number = 0;
            this.count_contains_number = 0;
            this.count_input = 0;
            this.count_img = 0;
            this.count_anchor = 0;
            this.count_th = 0;
            this.cellCount = 0;
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void onCell(Element element, TableStats tableStats) {
            if (element == null) {
                return;
            }
            if (element.getElementsByTag("th").size() > 0) {
                this.count_th++;
            }
            if (element.getElementsByTag("a").size() > 0) {
                this.count_anchor++;
            }
            if (element.getElementsByTag("img").size() > 0) {
                this.count_img++;
            }
            if (element.getElementsByTag("input").size() > 0) {
                this.count_input++;
            }
            String cleanCell = CellTools.cleanCell(element.text());
            if (cleanCell.matches(".*\\d.*")) {
                this.count_contains_number++;
            }
            if (CellTools.isNumericOnly(cleanCell)) {
                this.count_is_number++;
            }
            this.cellCount++;
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void finalize() {
            this.ratio_th = this.cellCount > 0 ? this.count_th / this.cellCount : KStarConstants.FLOOR;
            this.ratio_anchor = this.cellCount > 0 ? this.count_anchor / this.cellCount : KStarConstants.FLOOR;
            this.ratio_img = this.cellCount > 0 ? this.count_img / this.cellCount : KStarConstants.FLOOR;
            this.ratio_input = this.cellCount > 0 ? this.count_input / this.cellCount : KStarConstants.FLOOR;
            this.ratio_contains_number = this.cellCount > 0 ? this.count_contains_number / this.cellCount : KStarConstants.FLOOR;
            this.ratio_is_number = this.cellCount > 0 ? this.count_is_number / this.cellCount : KStarConstants.FLOOR;
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public HashMap<String, Double> getResults() {
            HashMap<String, Double> hashMap = new HashMap<>();
            hashMap.put("LOCAL_RATIO_HEADER", new Double(this.ratio_th));
            hashMap.put("LOCAL_RATIO_ANCHOR", new Double(this.ratio_anchor));
            hashMap.put("LOCAL_RATIO_IMAGE", new Double(this.ratio_img));
            hashMap.put("LOCAL_RATIO_INPUT", new Double(this.ratio_input));
            hashMap.put("LOCAL_RATIO_CONTAINS_NUMBER", new Double(this.ratio_contains_number));
            hashMap.put("LOCAL_RATIO_IS_NUMBER", new Double(this.ratio_is_number));
            return hashMap;
        }
    }

    /* loaded from: input_file:org/webdatacommons/webtables/extraction/model/FeaturesP1$LocalLengthVariance.class */
    public class LocalLengthVariance extends AbstractTableListener {
        private ArrayList<Integer> cellLengths;
        private double average;
        private double variance;

        public LocalLengthVariance() {
            super();
            this.featureName = "LOCAL_LENGTH_VARIANCE";
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void initialize(TableStats tableStats) {
            this.cellLengths = new ArrayList<>();
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void onCell(Element element, TableStats tableStats) {
            if (element != null) {
                this.cellLengths.add(Integer.valueOf(CellTools.getCellLength(element)));
            }
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void finalize() {
            double d = 0.0d;
            while (this.cellLengths.iterator().hasNext()) {
                d += r0.next().intValue();
            }
            double size = this.cellLengths.size();
            this.average = size > KStarConstants.FLOOR ? d / size : KStarConstants.FLOOR;
            double d2 = 0.0d;
            Iterator<Integer> it = this.cellLengths.iterator();
            while (it.hasNext()) {
                d2 += Math.pow(it.next().intValue() - this.average, 2.0d);
            }
            this.variance = size > KStarConstants.FLOOR ? d2 / size : KStarConstants.FLOOR;
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public HashMap<String, Double> getResults() {
            HashMap<String, Double> hashMap = new HashMap<>();
            hashMap.put(this.featureName, new Double(this.variance));
            return hashMap;
        }
    }

    /* loaded from: input_file:org/webdatacommons/webtables/extraction/model/FeaturesP1$LocalRatioSpan.class */
    public class LocalRatioSpan extends AbstractTableListener {
        private int nullCells;
        private int totalCells;
        private double ratio;

        public LocalRatioSpan() {
            super();
            this.featureName = "LOCAL_SPAN_RATIO";
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void initialize(TableStats tableStats) {
            this.nullCells = 0;
            this.totalCells = 0;
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void onCell(Element element, TableStats tableStats) {
            if (element == null) {
                this.nullCells++;
            }
            this.totalCells++;
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public void finalize() {
            this.ratio = this.totalCells > 0 ? this.nullCells / this.totalCells : KStarConstants.FLOOR;
        }

        @Override // org.webdatacommons.webtables.extraction.model.FeaturesP1.AbstractTableListener
        public HashMap<String, Double> getResults() {
            HashMap<String, Double> hashMap = new HashMap<>();
            hashMap.put(this.featureName, new Double(this.ratio));
            return hashMap;
        }
    }

    public static List<String> getFeatureNames() {
        return Arrays.asList(featureWhiteList.split(", "));
    }

    public FastVector getAttrVector() {
        return this.attributeVector;
    }

    public FastVector getClassVector() {
        return this.classAttrVector;
    }

    public FeaturesP1() {
        Iterator<String> it = getFeatureNames().iterator();
        while (it.hasNext()) {
            Attribute attribute = new Attribute(it.next());
            this.attributeList.add(attribute);
            this.attributeVector.addElement(attribute);
        }
        this.classAttrVector = new FastVector(5);
        this.classAttrVector.addElement("LAYOUT");
        this.classAttrVector.addElement("RELATION");
        this.classAttrVector.addElement("ENTITY");
        this.classAttrVector.addElement("MATRIX");
        this.classAttrVector.addElement("NONE");
        this.classAttr = new Attribute("CLASS", this.classAttrVector);
        this.attributeVector.addElement(this.classAttr);
    }

    public ArrayList<Attribute> getAttrList() {
        return this.attributeList;
    }

    public void initializeFeatures() {
        this.globalListeners = new ArrayList<>();
        this.globalListeners.add(new AvgRows());
        this.globalListeners.add(new AvgCols());
        this.globalListeners.add(new ContentRatios());
        this.localListeners = new ArrayList<>();
        this.localListeners.add(new LocalAvgLength());
        this.localListeners.add(new LocalRatioSpan());
        this.localListeners.add(new LocalContentRatios());
        this.localListeners.add(new LocalLengthVariance());
    }

    public Instance computeFeatures(Element[][] elementArr) {
        HashMap hashMap = new HashMap();
        TableStats tableStats = new TableStats(elementArr[0].length, elementArr.length);
        initializeFeatures();
        Iterator<AbstractTableListener> it = this.globalListeners.iterator();
        while (it.hasNext()) {
            it.next().start(tableStats);
        }
        tableStats.rowIndex = 0;
        while (tableStats.rowIndex < tableStats.getTableHeight()) {
            tableStats.colIndex = 0;
            while (tableStats.colIndex < tableStats.getTableWidth()) {
                Iterator<AbstractTableListener> it2 = this.globalListeners.iterator();
                while (it2.hasNext()) {
                    it2.next().computeCell(elementArr[tableStats.rowIndex][tableStats.colIndex], tableStats);
                }
                tableStats.colIndex++;
            }
            tableStats.rowIndex++;
        }
        Iterator<AbstractTableListener> it3 = this.globalListeners.iterator();
        while (it3.hasNext()) {
            it3.next().end();
        }
        Iterator<AbstractTableListener> it4 = this.globalListeners.iterator();
        while (it4.hasNext()) {
            hashMap.putAll(it4.next().getResults());
        }
        int[] iArr = {0, 1, tableStats.getTableHeight() - 1};
        for (int i = 0; i < iArr.length; i++) {
            int i2 = iArr[i];
            Iterator<AbstractTableListener> it5 = this.localListeners.iterator();
            while (it5.hasNext()) {
                it5.next().start(tableStats);
            }
            tableStats.colIndex = 0;
            while (tableStats.colIndex < tableStats.getTableWidth()) {
                Iterator<AbstractTableListener> it6 = this.localListeners.iterator();
                while (it6.hasNext()) {
                    it6.next().computeCell(elementArr[i2][tableStats.colIndex], tableStats);
                }
                tableStats.colIndex++;
            }
            Iterator<AbstractTableListener> it7 = this.localListeners.iterator();
            while (it7.hasNext()) {
                it7.next().end();
            }
            Iterator<AbstractTableListener> it8 = this.localListeners.iterator();
            while (it8.hasNext()) {
                for (Map.Entry<String, Double> entry : it8.next().getResults().entrySet()) {
                    hashMap.put(entry.getKey() + "_ROW_" + i, entry.getValue());
                }
            }
        }
        int[] iArr2 = {0, 1, tableStats.getTableWidth() - 1};
        for (int i3 = 0; i3 < iArr2.length; i3++) {
            int i4 = iArr2[i3];
            Iterator<AbstractTableListener> it9 = this.localListeners.iterator();
            while (it9.hasNext()) {
                it9.next().start(tableStats);
            }
            tableStats.rowIndex = 0;
            while (tableStats.rowIndex < tableStats.getTableHeight()) {
                Iterator<AbstractTableListener> it10 = this.localListeners.iterator();
                while (it10.hasNext()) {
                    it10.next().computeCell(elementArr[tableStats.rowIndex][i4], tableStats);
                }
                tableStats.rowIndex++;
            }
            Iterator<AbstractTableListener> it11 = this.localListeners.iterator();
            while (it11.hasNext()) {
                it11.next().end();
            }
            Iterator<AbstractTableListener> it12 = this.localListeners.iterator();
            while (it12.hasNext()) {
                for (Map.Entry<String, Double> entry2 : it12.next().getResults().entrySet()) {
                    hashMap.put(entry2.getKey() + "_COL_" + i3, entry2.getValue());
                }
            }
        }
        return Tools.createInstanceFromData(hashMap, this.attributeList, this.attributeVector);
    }
}
