package edu.pitt.dbmi.edda.operator.regexop.document;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.StringUtils;

/* loaded from: input_file:edu/pitt/dbmi/edda/operator/regexop/document/DocumentFetcher.class */
public class DocumentFetcher {
    private List<String[]> labelDirectoryPairs;
    private Iterator<LabeledDocument> labeledDocumentIterator;
    private final ArrayList<LabeledDocument> labeledDocuments = new ArrayList<>();
    private int totalNumberOfDocuments = 0;
    private boolean isUsingAbstractsOnly = false;

    public DocumentFetcher() {
    }

    public DocumentFetcher(List<String[]> list) {
        this.labelDirectoryPairs = list;
    }

    public void establishLabeledDocuments() {
        for (String[] strArr : this.labelDirectoryPairs) {
            String str = strArr[0];
            File[] listFiles = new File(strArr[1]).listFiles();
            if (listFiles != null) {
                Iterator it = Arrays.asList(listFiles).iterator();
                while (it.hasNext()) {
                    LabeledDocument labeledDocument = new LabeledDocument();
                    int i = this.totalNumberOfDocuments;
                    this.totalNumberOfDocuments = i + 1;
                    labeledDocument.documentNumber = Integer.valueOf(i);
                    labeledDocument.key = StringUtils.leftPad(labeledDocument.documentNumber + "", 10, "0") + ":";
                    labeledDocument.label = str;
                    labeledDocument.file = (File) it.next();
                    labeledDocument.isUsingAbstractsOnly = Boolean.valueOf(isUsingAbstractsOnly());
                    this.labeledDocuments.add(labeledDocument);
                }
            }
        }
    }

    public void startIteration() {
        this.labeledDocumentIterator = this.labeledDocuments.iterator();
    }

    public LabeledDocument getNext() {
        if (this.labeledDocumentIterator.hasNext()) {
            return this.labeledDocumentIterator.next();
        }
        return null;
    }

    public int getTotalNumberOfDocuments() {
        return this.totalNumberOfDocuments;
    }

    private HashMap<String, Double> tallyLabeledClasses() {
        HashMap<String, Double> hashMap = new HashMap<>();
        Iterator<LabeledDocument> it = this.labeledDocuments.iterator();
        while (it.hasNext()) {
            LabeledDocument next = it.next();
            Double d = hashMap.get(next.label);
            if (d == null) {
                hashMap.put(next.label, new Double(1.0d));
            } else {
                hashMap.put(next.label, new Double(d.doubleValue() + 1.0d));
            }
        }
        return hashMap;
    }

    public boolean isUsingAbstractsOnly() {
        return this.isUsingAbstractsOnly;
    }

    public void setUsingAbstractsOnly(boolean z) {
        this.isUsingAbstractsOnly = z;
    }

    public String toString() {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("\nDocumentFetcher:\n");
        HashMap<String, Double> tallyLabeledClasses = tallyLabeledClasses();
        for (String str : tallyLabeledClasses.keySet()) {
            stringBuffer.append("\t" + str + " has " + tallyLabeledClasses.get(str) + " documents.\n");
        }
        return stringBuffer.toString();
    }
}
