package edu.pitt.dbmi.edda.operator.regexop.rank;

import edu.pitt.dbmi.edda.operator.regexop.Annotation;
import edu.pitt.dbmi.edda.operator.regexop.document.DocumentFetcher;
import edu.pitt.dbmi.edda.operator.regexop.document.LabeledDocument;
import edu.pitt.dbmi.edda.operator.regexop.regex.RegularExpression;
import edu.pitt.dbmi.edda.operator.regexop.regex.RegularExpressionList;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.TreeSet;
import java.util.regex.Matcher;

/* loaded from: input_file:edu/pitt/dbmi/edda/operator/regexop/rank/RankerRegularExpression.class */
public abstract class RankerRegularExpression {
    protected DocumentFetcher documentFetcher;
    protected RegularExpressionList regularExpressions;
    private boolean isEliminatingSubsumedAnnotations;
    protected boolean isGeneratingDiagnostics = false;
    protected RankList ranks = RankListFactory.newRankerList();
    private Integer requiredNumberOfNonZeroRanksPerRegEx = 0;

    public void execute() {
        this.documentFetcher.startIteration();
        Long l = 0L;
        Long l2 = 0L;
        while (true) {
            LabeledDocument next = this.documentFetcher.getNext();
            if (next == null) {
                tallyRegExStatsOverDocumentSet();
                eliminatePredominantlyZeroRankedRegExs();
                System.out.println("After eliminating predominantly zero producing regExs there are " + this.regularExpressions.size() + " regExs.");
                System.out.println("After eliminating predominantly zero producing regExs there are " + this.ranks.size() + " nonzero ranks.");
                return;
            }
            Date date = new Date();
            if (isGeneratingDiagnostics()) {
                System.out.println("Processing document " + next.documentNumber);
            }
            TreeSet<Annotation> treeSet = new TreeSet<>(Annotation.annotationComparator);
            Iterator<RegularExpression> it = this.regularExpressions.iterator();
            while (it.hasNext()) {
                RegularExpression next2 = it.next();
                String content = next.getContent();
                next.clearContent();
                treeSet.addAll(findRegExMatches(content, next2));
            }
            if (isGeneratingDiagnostics()) {
                System.out.println("Accumulated " + treeSet.size() + " potentially overlapping annotations.");
            }
            if (this.isEliminatingSubsumedAnnotations) {
                eliminateSubsumedAnnotations(treeSet);
                System.out.println(treeSet.size() + " remain after overlap elimination.");
            }
            this.ranks.addAll(rankDocument(next, this.regularExpressions, treeSet));
            if (isGeneratingDiagnostics()) {
                l2 = Long.valueOf(l2.longValue() + 1);
                Long valueOf = Long.valueOf(new Date().getTime() - date.getTime());
                l = Long.valueOf(l.longValue() + valueOf.longValue());
                Long valueOf2 = Long.valueOf(l.longValue() / l2.longValue());
                System.out.println("Sparse rank list size is " + this.ranks.size());
                System.out.println("Time to process document is " + valueOf + " milliseconds.");
                System.out.println("Average time per document is " + valueOf2 + " milliseconds.");
            }
        }
    }

    private void eliminatePredominantlyZeroRankedRegExs() {
        RegularExpressionList regularExpressionList = new RegularExpressionList();
        ArrayList<Rank> arrayList = new ArrayList<>();
        Iterator<RegularExpression> it = this.regularExpressions.iterator();
        while (it.hasNext()) {
            RegularExpression next = it.next();
            Integer num = 0;
            this.ranks.iterate();
            while (this.ranks.hasNext()) {
                Rank next2 = this.ranks.getNext();
                if (next == next2.regularExpression && next2.value.doubleValue() > 0.0d) {
                    num = Integer.valueOf(num.intValue() + 1);
                }
            }
            if (num.intValue() < this.requiredNumberOfNonZeroRanksPerRegEx.intValue()) {
                regularExpressionList.add(next);
                this.ranks.iterate();
                while (this.ranks.hasNext()) {
                    Rank next3 = this.ranks.getNext();
                    if (next == next3.regularExpression) {
                        arrayList.add(next3);
                    }
                }
            }
        }
        this.regularExpressions.removeAll(regularExpressionList);
        this.ranks.removeAll(arrayList);
    }

    protected abstract RankList rankDocument(LabeledDocument labeledDocument, RegularExpressionList regularExpressionList, TreeSet<Annotation> treeSet);

    protected abstract void tallyRegExStatsOverDocumentSet();

    /* JADX INFO: Access modifiers changed from: protected */
    public TreeSet<Annotation> filterAnnotsForRegEx(RegularExpression regularExpression, TreeSet<Annotation> treeSet) {
        TreeSet<Annotation> treeSet2 = new TreeSet<>(Annotation.annotationComparator);
        Iterator<Annotation> it = treeSet.iterator();
        while (it.hasNext()) {
            Annotation next = it.next();
            if (next.kind.equals(regularExpression.name)) {
                treeSet2.add(next);
            }
        }
        return treeSet2;
    }

    protected TreeSet<Annotation> eliminateSubsumedAnnotations(TreeSet<Annotation> treeSet) {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        while (!treeSet.isEmpty()) {
            Annotation first = treeSet.first();
            treeSet.remove(first);
            arrayList.add(first);
            ArrayList arrayList3 = new ArrayList();
            Iterator<Annotation> it = treeSet.iterator();
            while (it.hasNext()) {
                Annotation next = it.next();
                if (first.subsumes(next)) {
                    arrayList3.add(next);
                }
            }
            arrayList2.addAll(arrayList3);
            treeSet.removeAll(arrayList3);
        }
        treeSet.addAll(arrayList);
        return treeSet;
    }

    protected TreeSet<Annotation> findRegExMatches(String str, RegularExpression regularExpression) {
        Matcher matcher = regularExpression.pattern.matcher(str);
        TreeSet<Annotation> treeSet = new TreeSet<>(Annotation.annotationComparator);
        while (matcher.find()) {
            Annotation annotation = new Annotation();
            annotation.kind = regularExpression.name;
            annotation.sPos = new Long(matcher.start());
            annotation.ePos = new Long(matcher.end());
            annotation.str = matcher.group();
            treeSet.add(annotation);
        }
        return treeSet;
    }

    public boolean isEliminatingSubsumedAnnotations() {
        return this.isEliminatingSubsumedAnnotations;
    }

    public void setEliminatingSubsumedAnnotations(boolean z) {
        this.isEliminatingSubsumedAnnotations = z;
    }

    public Integer getRequiredNumberOfNonZeroRanksPerRegEx() {
        return this.requiredNumberOfNonZeroRanksPerRegEx;
    }

    public void setRequiredNumberOfNonZeroRanksPerRegEx(Integer num) {
        this.requiredNumberOfNonZeroRanksPerRegEx = num;
    }

    public RankList getRanks() {
        return this.ranks;
    }

    public void setDocumentFetcher(DocumentFetcher documentFetcher) {
        this.documentFetcher = documentFetcher;
    }

    public DocumentFetcher getDocumentFetcher() {
        return this.documentFetcher;
    }

    public RegularExpressionList getRegularExpressions() {
        return this.regularExpressions;
    }

    public void setRegularExpressions(RegularExpressionList regularExpressionList) {
        this.regularExpressions = regularExpressionList;
    }

    public boolean isGeneratingDiagnostics() {
        return this.isGeneratingDiagnostics;
    }

    public void setGeneratingDiagnostics(boolean z) {
        this.isGeneratingDiagnostics = z;
    }
}
