package com.rapidminer.extension.pdftableextraction.operator;

import com.rapidminer.tools.LogService;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.pdfbox.pdmodel.PDDocument;
import technology.tabula.ObjectExtractor;
import technology.tabula.Page;
import technology.tabula.PageIterator;
import technology.tabula.Rectangle;
import technology.tabula.RectangularTextContainer;
import technology.tabula.Table;
import technology.tabula.extractors.BasicExtractionAlgorithm;

/* loaded from: input_file:com/rapidminer/extension/pdftableextraction/operator/PdfDataTableExtractor.class */
public class PdfDataTableExtractor {
    private float MAGIC_HEURISTIC_NUMBER;
    private static Logger LOGGER = LogService.getRoot();
    ExtendedNurminenDetectionAlgorithm detectionAlgorithm;

    /* loaded from: input_file:com/rapidminer/extension/pdftableextraction/operator/PdfDataTableExtractor$ExtractionMethod.class */
    private enum ExtractionMethod {
        BASIC,
        SPREADSHEET,
        DECIDE
    }

    public PdfDataTableExtractor() {
        this.MAGIC_HEURISTIC_NUMBER = 0.65f;
        this.detectionAlgorithm = new ExtendedNurminenDetectionAlgorithm();
    }

    public PdfDataTableExtractor(int i, int i2, int i3, int i4, float f, float f2, int i5, int i6, float f3) {
        this.MAGIC_HEURISTIC_NUMBER = 0.65f;
        this.detectionAlgorithm = new ExtendedNurminenDetectionAlgorithm(i, i2, i3, i4, f, f2, i5, i6, f3);
    }

    /* JADX WARN: Removed duplicated region for block: B:13:0x0096 A[EXC_TOP_SPLITTER, SYNTHETIC] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public java.util.List<java.lang.String[][]> decideAndExtract(java.lang.String r5, int r6) throws java.lang.Exception {
        /*
            Method dump skipped, instructions count: 426
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: com.rapidminer.extension.pdftableextraction.operator.PdfDataTableExtractor.decideAndExtract(java.lang.String, int):java.util.List");
    }

    public List<Table> extractTablesSpreadsheet(ExtendedSpreadsheetExtractionAlgorithm extendedSpreadsheetExtractionAlgorithm, Page page) {
        return extendedSpreadsheetExtractionAlgorithm.extract(page);
    }

    public List<Table> detectAndExtractTables(Page page) {
        BasicExtractionAlgorithm basicExtractionAlgorithm = new BasicExtractionAlgorithm();
        List<Rectangle> detect = this.detectionAlgorithm.detect(page);
        ArrayList arrayList = new ArrayList();
        Iterator<Rectangle> it = detect.iterator();
        while (it.hasNext()) {
            arrayList.addAll(basicExtractionAlgorithm.extract(page.getArea(it.next())));
        }
        return arrayList;
    }

    public Page getPage(String str, int i) throws IOException {
        ObjectExtractor objectExtractor = null;
        try {
            objectExtractor = new ObjectExtractor(PDDocument.load(str));
            Page extract = objectExtractor.extract(i);
            if (objectExtractor != null) {
                objectExtractor.close();
            }
            return extract;
        } catch (Throwable th) {
            if (objectExtractor != null) {
                objectExtractor.close();
            }
            throw th;
        }
    }

    public List<Page> getAllPages(String str) throws IOException {
        ObjectExtractor objectExtractor = null;
        try {
            objectExtractor = new ObjectExtractor(PDDocument.load(str));
            PageIterator extract = objectExtractor.extract();
            ArrayList arrayList = new ArrayList();
            while (extract.hasNext()) {
                arrayList.add(extract.next());
            }
            if (objectExtractor != null) {
                objectExtractor.close();
            }
            return arrayList;
        } catch (Throwable th) {
            if (objectExtractor != null) {
                objectExtractor.close();
            }
            throw th;
        }
    }

    public String[][] tableToArrayOfRows(Table table) {
        List<List<RectangularTextContainer>> rows = table.getRows();
        int i = -2147483647;
        for (int i2 = 0; i2 < rows.size(); i2++) {
            List<RectangularTextContainer> list = rows.get(i2);
            if (i < list.size()) {
                i = list.size();
            }
        }
        String[][] strArr = new String[rows.size()][i];
        for (int i3 = 0; i3 < rows.size(); i3++) {
            List<RectangularTextContainer> list2 = rows.get(i3);
            for (int i4 = 0; i4 < list2.size(); i4++) {
                strArr[i3][i4] = table.getCell(i3, i4).getText();
            }
        }
        return strArr;
    }

    public void prettyPrintTable(String[][] strArr) {
        int length = strArr.length;
        int length2 = strArr[0].length;
        for (int i = 0; i < length; i++) {
            for (int i2 = 0; i2 < length2; i2++) {
                LOGGER.log(Level.INFO, " Element at [" + i + "][" + i2 + "] = " + strArr[i][i2]);
            }
            LOGGER.log(Level.INFO, "\n");
        }
    }

    public float getMAGIC_HEURISTIC_NUMBER() {
        return this.MAGIC_HEURISTIC_NUMBER;
    }

    public void setMAGIC_HEURISTIC_NUMBER(float f) {
        this.MAGIC_HEURISTIC_NUMBER = f;
    }

    public String toString() {
        return "PDFDataTableExtractor.toString:\nGRAYSCALE_INTENSITY_THRESHOLD: " + this.detectionAlgorithm.getGRAYSCALE_INTENSITY_THRESHOLD() + "HORIZONTAL_EDGE_WIDTH_MINIMUM: " + this.detectionAlgorithm.getHORIZONTAL_EDGE_WIDTH_MINIMUM() + "VERTICAL_EDGE_HEIGHT_MINIMUM: " + this.detectionAlgorithm.getVERTICAL_EDGE_HEIGHT_MINIMUM() + "CELL_CORNER_DISTANCE_MAXIMUM: " + this.detectionAlgorithm.getCELL_CORNER_DISTANCE_MAXIMUM() + "POINT_SNAP_DISTANCE_THRESHOLD: " + this.detectionAlgorithm.getPOINT_SNAP_DISTANCE_THRESHOLD() + "TABLE_PADDING_AMOUNT: " + this.detectionAlgorithm.getTABLE_PADDING_AMOUNT() + "REQUIRED_TEXT_LINES_FOR_EDGE: " + this.detectionAlgorithm.getREQUIRED_TEXT_LINES_FOR_EDGE() + "REQUIRED_CELLS_FOR_TABLE: " + this.detectionAlgorithm.getREQUIRED_CELLS_FOR_TABLE() + "IDENTICAL_TABLE_OVERLAP_RATIO: " + this.detectionAlgorithm.getIDENTICAL_TABLE_OVERLAP_RATIO() + "MAGIC_HEURISTIC_NUMBER: " + this.MAGIC_HEURISTIC_NUMBER;
    }
}
