package technology.tabula;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDPageLabelRange;
import technology.tabula.detectors.NurminenDetectionAlgorithm;
import technology.tabula.extractors.BasicExtractionAlgorithm;
import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
import technology.tabula.writers.CSVWriter;
import technology.tabula.writers.JSONWriter;
import technology.tabula.writers.TSVWriter;
import technology.tabula.writers.Writer;

/* loaded from: input_file:technology/tabula/CommandLineApp.class */
public class CommandLineApp {
    private static String VERSION = "0.9.2";
    private static String VERSION_STRING = String.format("tabula %s (c) 2012-2016 Manuel Aristarán", VERSION);
    private static String BANNER = "\nTabula helps you extract tables from PDFs\n\n";
    private Appendable defaultOutput;
    private Rectangle pageArea;
    private List<Integer> pages;
    private OutputFormat outputFormat;
    private String password;
    private TableExtractor tableExtractor;

    /* loaded from: input_file:technology/tabula/CommandLineApp$DebugOutput.class */
    private class DebugOutput {
        private boolean debugEnabled;

        public DebugOutput(boolean z) {
            this.debugEnabled = z;
        }

        public void debug(String str) {
            if (this.debugEnabled) {
                System.err.println(str);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:technology/tabula/CommandLineApp$ExtractionMethod.class */
    public enum ExtractionMethod {
        BASIC,
        SPREADSHEET,
        DECIDE
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:technology/tabula/CommandLineApp$OutputFormat.class */
    public enum OutputFormat {
        CSV,
        TSV,
        JSON;

        static String[] formatNames() {
            OutputFormat[] values = values();
            String[] strArr = new String[values.length];
            for (int i = 0; i < values.length; i++) {
                strArr[i] = values[i].name();
            }
            return strArr;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:technology/tabula/CommandLineApp$TableExtractor.class */
    public static class TableExtractor {
        private boolean guess = false;
        private boolean useLineReturns = false;
        private BasicExtractionAlgorithm basicExtractor = new BasicExtractionAlgorithm();
        private SpreadsheetExtractionAlgorithm spreadsheetExtractor = new SpreadsheetExtractionAlgorithm();
        private List<Float> verticalRulingPositions = null;
        private ExtractionMethod method = ExtractionMethod.BASIC;

        public void setVerticalRulingPositions(List<Float> list) {
            this.verticalRulingPositions = list;
        }

        public void setGuess(boolean z) {
            this.guess = z;
        }

        public void setUseLineReturns(boolean z) {
            this.useLineReturns = z;
        }

        public void setMethod(ExtractionMethod extractionMethod) {
            this.method = extractionMethod;
        }

        public List<Table> extractTables(Page page) {
            ExtractionMethod extractionMethod = this.method;
            if (extractionMethod == ExtractionMethod.DECIDE) {
                extractionMethod = this.spreadsheetExtractor.isTabular(page) ? ExtractionMethod.SPREADSHEET : ExtractionMethod.BASIC;
            }
            switch (extractionMethod) {
                case BASIC:
                    return extractTablesBasic(page);
                case SPREADSHEET:
                    return extractTablesSpreadsheet(page);
                default:
                    return new ArrayList();
            }
        }

        public List<Table> extractTablesBasic(Page page) {
            if (!this.guess) {
                return this.verticalRulingPositions != null ? this.basicExtractor.extract(page, this.verticalRulingPositions) : this.basicExtractor.extract(page);
            }
            List<Rectangle> detect = new NurminenDetectionAlgorithm().detect(page);
            ArrayList arrayList = new ArrayList();
            Iterator<Rectangle> it = detect.iterator();
            while (it.hasNext()) {
                arrayList.addAll(this.basicExtractor.extract(page.getArea(it.next())));
            }
            return arrayList;
        }

        public List<Table> extractTablesSpreadsheet(Page page) {
            return this.spreadsheetExtractor.extract(page);
        }
    }

    public CommandLineApp(Appendable appendable, CommandLine commandLine) throws ParseException {
        this.defaultOutput = appendable;
        this.pageArea = whichArea(commandLine);
        this.pages = whichPages(commandLine);
        this.outputFormat = whichOutputFormat(commandLine);
        this.tableExtractor = createExtractor(commandLine);
        if (commandLine.hasOption('s')) {
            this.password = commandLine.getOptionValue('s');
        }
    }

    public static void main(String[] strArr) {
        try {
            CommandLine parse = new GnuParser().parse(buildOptions(), strArr);
            if (parse.hasOption('h')) {
                printHelp();
                System.exit(0);
            }
            if (parse.hasOption('v')) {
                System.out.println(VERSION_STRING);
                System.exit(0);
            }
            new CommandLineApp(System.out, parse).extractTables(parse);
        } catch (ParseException e) {
            System.err.println("Error: " + e.getMessage());
            System.exit(1);
        }
        System.exit(0);
    }

    public void extractTables(CommandLine commandLine) throws ParseException {
        if (commandLine.hasOption('b')) {
            if (commandLine.getArgs().length != 0) {
                throw new ParseException("Filename specified with batch\nTry --help for help");
            }
            File file = new File(commandLine.getOptionValue('b'));
            if (!file.isDirectory()) {
                throw new ParseException("Directory does not exist or is not a directory");
            }
            extractDirectoryTables(commandLine, file);
            return;
        }
        if (commandLine.getArgs().length != 1) {
            throw new ParseException("Need one filename\nTry --help for help");
        }
        File file2 = new File(commandLine.getArgs()[0]);
        if (!file2.exists()) {
            throw new ParseException("File does not exist");
        }
        extractFileTables(commandLine, file2);
    }

    public void extractDirectoryTables(CommandLine commandLine, File file) throws ParseException {
        for (File file2 : file.listFiles(new FilenameFilter() { // from class: technology.tabula.CommandLineApp.1
            @Override // java.io.FilenameFilter
            public boolean accept(File file3, String str) {
                return str.endsWith(".pdf");
            }
        })) {
            extractFileInto(file2, new File(getOutputFilename(file2)));
        }
    }

    public void extractFileTables(CommandLine commandLine, File file) throws ParseException {
        Appendable appendable = this.defaultOutput;
        if (commandLine.hasOption('o')) {
            extractFileInto(file, new File(commandLine.getOptionValue('o')));
        } else {
            extractFile(file, this.defaultOutput);
        }
    }

    public void extractFileInto(File file, File file2) throws ParseException {
        BufferedWriter bufferedWriter = null;
        try {
            try {
                bufferedWriter = new BufferedWriter(new FileWriter(file2.getAbsoluteFile()));
                file2.createNewFile();
                extractFile(file, bufferedWriter);
                if (bufferedWriter != null) {
                    try {
                        bufferedWriter.close();
                    } catch (IOException e) {
                        System.out.println("Error in closing the BufferedWriter" + e);
                    }
                }
            } catch (IOException e2) {
                throw new ParseException("Cannot create file " + file2);
            }
        } catch (Throwable th) {
            if (bufferedWriter != null) {
                try {
                    bufferedWriter.close();
                } catch (IOException e3) {
                    System.out.println("Error in closing the BufferedWriter" + e3);
                }
            }
            throw th;
        }
    }

    private void extractFile(File file, Appendable appendable) throws ParseException {
        PDDocument pDDocument = null;
        try {
            try {
                pDDocument = PDDocument.load(file);
                PageIterator pageIterator = getPageIterator(pDDocument);
                ArrayList arrayList = new ArrayList();
                while (pageIterator.hasNext()) {
                    Page next = pageIterator.next();
                    if (this.pageArea != null) {
                        next = next.getArea(this.pageArea);
                    }
                    arrayList.addAll(this.tableExtractor.extractTables(next));
                }
                writeTables(arrayList, appendable);
                if (pDDocument != null) {
                    try {
                        pDDocument.close();
                    } catch (IOException e) {
                        System.out.println("Error in closing pdf document" + e);
                    }
                }
            } catch (IOException e2) {
                throw new ParseException(e2.getMessage());
            }
        } catch (Throwable th) {
            if (pDDocument != null) {
                try {
                    pDDocument.close();
                } catch (IOException e3) {
                    System.out.println("Error in closing pdf document" + e3);
                    throw th;
                }
            }
            throw th;
        }
    }

    private PageIterator getPageIterator(PDDocument pDDocument) throws IOException {
        ObjectExtractor objectExtractor = this.password == null ? new ObjectExtractor(pDDocument) : new ObjectExtractor(pDDocument, this.password);
        return this.pages == null ? objectExtractor.extract() : objectExtractor.extract(this.pages);
    }

    private static OutputFormat whichOutputFormat(CommandLine commandLine) throws ParseException {
        if (!commandLine.hasOption('f')) {
            return OutputFormat.CSV;
        }
        try {
            return OutputFormat.valueOf(commandLine.getOptionValue('f'));
        } catch (IllegalArgumentException e) {
            throw new ParseException(String.format("format %s is illegal. Available formats: %s", commandLine.getOptionValue('f'), Utils.join(",", OutputFormat.formatNames())));
        }
    }

    private static Rectangle whichArea(CommandLine commandLine) throws ParseException {
        if (!commandLine.hasOption('a')) {
            return null;
        }
        List<Float> parseFloatList = parseFloatList(commandLine.getOptionValue('a'));
        if (parseFloatList.size() != 4) {
            throw new ParseException("area parameters must be top,left,bottom,right");
        }
        return new Rectangle(parseFloatList.get(0).floatValue(), parseFloatList.get(1).floatValue(), parseFloatList.get(3).floatValue() - parseFloatList.get(1).floatValue(), parseFloatList.get(2).floatValue() - parseFloatList.get(0).floatValue());
    }

    private static List<Integer> whichPages(CommandLine commandLine) throws ParseException {
        return Utils.parsePagesOption(commandLine.hasOption('p') ? commandLine.getOptionValue('p') : "1");
    }

    private static ExtractionMethod whichExtractionMethod(CommandLine commandLine) {
        return commandLine.hasOption('r') ? ExtractionMethod.SPREADSHEET : (commandLine.hasOption('n') || commandLine.hasOption('c') || commandLine.hasOption('g')) ? ExtractionMethod.BASIC : ExtractionMethod.DECIDE;
    }

    private static TableExtractor createExtractor(CommandLine commandLine) throws ParseException {
        TableExtractor tableExtractor = new TableExtractor();
        tableExtractor.setGuess(commandLine.hasOption('g'));
        tableExtractor.setMethod(whichExtractionMethod(commandLine));
        tableExtractor.setUseLineReturns(commandLine.hasOption('u'));
        if (commandLine.hasOption('c')) {
            tableExtractor.setVerticalRulingPositions(parseFloatList(commandLine.getOptionValue('c')));
        }
        return tableExtractor;
    }

    public static List<Float> parseFloatList(String str) throws ParseException {
        String[] split = str.split(",");
        ArrayList arrayList = new ArrayList();
        for (String str2 : split) {
            try {
                arrayList.add(Float.valueOf(Float.parseFloat(str2)));
            } catch (NumberFormatException e) {
                throw new ParseException("Wrong number syntax");
            }
        }
        return arrayList;
    }

    private static void printHelp() {
        new HelpFormatter().printHelp("tabula", BANNER, buildOptions(), "", true);
    }

    public static Options buildOptions() {
        Options options = new Options();
        options.addOption("v", "version", false, "Print version and exit.");
        options.addOption("h", "help", false, "Print this help text.");
        options.addOption("g", "guess", false, "Guess the portion of the page to analyze per page.");
        options.addOption("d", "debug", false, "Print detected table areas instead of processing");
        options.addOption(PDPageLabelRange.STYLE_ROMAN_LOWER, "spreadsheet", false, "[Deprecated in favor of -l/--lattice] Force PDF to be extracted using spreadsheet-style extraction (if there are ruling lines separating each cell, as in a PDF of an Excel spreadsheet)");
        options.addOption("n", "no-spreadsheet", false, "[Deprecated in favor of -t/--stream] Force PDF not to be extracted using spreadsheet-style extraction (if there are no ruling lines separating each cell)");
        options.addOption("l", "lattice", false, "Force PDF to be extracted using lattice-mode extraction (if there are ruling lines separating each cell, as in a PDF of an Excel spreadsheet)");
        options.addOption("t", "stream", false, "Force PDF to be extracted using stream-mode extraction (if there are no ruling lines separating each cell)");
        options.addOption("i", "silent", false, "Suppress all stderr output.");
        options.addOption("u", "use-line-returns", false, "Use embedded line returns in cells. (Only in spreadsheet mode.)");
        options.addOption("d", "debug", false, "Print detected table areas instead of processing.");
        OptionBuilder.withLongOpt("batch");
        OptionBuilder.withDescription("Convert all .pdfs in the provided directory.");
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("DIRECTORY");
        options.addOption(OptionBuilder.create("b"));
        OptionBuilder.withLongOpt("outfile");
        OptionBuilder.withDescription("Write output to <file> instead of STDOUT. Default: -");
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("OUTFILE");
        options.addOption(OptionBuilder.create("o"));
        OptionBuilder.withLongOpt("format");
        OptionBuilder.withDescription("Output format: (" + Utils.join(",", OutputFormat.formatNames()) + "). Default: CSV");
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("FORMAT");
        options.addOption(OptionBuilder.create("f"));
        OptionBuilder.withLongOpt("password");
        OptionBuilder.withDescription("Password to decrypt document. Default is empty");
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("PASSWORD");
        options.addOption(OptionBuilder.create("s"));
        OptionBuilder.withLongOpt("columns");
        OptionBuilder.withDescription("X coordinates of column boundaries. Example --columns 10.1,20.2,30.3");
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("COLUMNS");
        options.addOption(OptionBuilder.create("c"));
        OptionBuilder.withLongOpt("area");
        OptionBuilder.withDescription("Portion of the page to analyze (top,left,bottom,right). Example: --area 269.875,12.75,790.5,561. Default is entire page");
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("AREA");
        options.addOption(OptionBuilder.create(PDPageLabelRange.STYLE_LETTERS_LOWER));
        OptionBuilder.withLongOpt("pages");
        OptionBuilder.withDescription("Comma separated list of ranges, or all. Examples: --pages 1-3,5-7, --pages 3 or --pages all. Default is --pages 1");
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("PAGES");
        options.addOption(OptionBuilder.create("p"));
        return options;
    }

    private void writeTables(List<Table> list, Appendable appendable) throws IOException {
        Writer writer = null;
        switch (this.outputFormat) {
            case CSV:
                writer = new CSVWriter();
                break;
            case JSON:
                writer = new JSONWriter();
                break;
            case TSV:
                writer = new TSVWriter();
                break;
        }
        writer.write(appendable, list);
    }

    private String getOutputFilename(File file) {
        String str = ".csv";
        switch (this.outputFormat) {
            case CSV:
                str = ".csv";
                break;
            case JSON:
                str = ".json";
                break;
            case TSV:
                str = ".tsv";
                break;
        }
        return file.getPath().replaceFirst("(\\.pdf|)$", str);
    }
}
