package edu.pitt.dbmi.edda.operator.regexop.rank;

import edu.pitt.dbmi.edda.operator.regexop.Annotation;
import edu.pitt.dbmi.edda.operator.regexop.document.LabeledDocument;
import edu.pitt.dbmi.edda.operator.regexop.regex.RegularExpression;
import edu.pitt.dbmi.edda.operator.regexop.regex.RegularExpressionList;
import java.util.Iterator;
import java.util.TreeSet;

/* loaded from: input_file:edu/pitt/dbmi/edda/operator/regexop/rank/RankerTfIdf.class */
public class RankerTfIdf extends RankerRegularExpression {
    @Override // edu.pitt.dbmi.edda.operator.regexop.rank.RankerRegularExpression
    protected RankList rankDocument(LabeledDocument labeledDocument, RegularExpressionList regularExpressionList, TreeSet<Annotation> treeSet) {
        RankList newRankerList = RankListFactory.newRankerList();
        Iterator<RegularExpression> it = this.regularExpressions.iterator();
        while (it.hasNext()) {
            RegularExpression next = it.next();
            TreeSet<Annotation> filterAnnotsForRegEx = filterAnnotsForRegEx(next, treeSet);
            Rank newRank = Rank.newRank(labeledDocument, next);
            newRank.value = Double.valueOf(filterAnnotsForRegEx.size());
            newRankerList.add(newRank);
        }
        return newRankerList;
    }

    @Override // edu.pitt.dbmi.edda.operator.regexop.rank.RankerRegularExpression
    protected void tallyRegExStatsOverDocumentSet() {
        Double.valueOf(0.0d);
        Iterator<RegularExpression> it = this.regularExpressions.iterator();
        while (it.hasNext()) {
            RegularExpression next = it.next();
            RankList ranksFor = this.ranks.getRanksFor(next);
            Double calculateInverseDocumentFrequency = calculateInverseDocumentFrequency(ranksFor, next);
            System.out.println("Inverse document freq for " + next.name + " = \t" + calculateInverseDocumentFrequency);
            calculateTfIdf(calculateInverseDocumentFrequency, ranksFor);
        }
    }

    private void calculateTfIdf(Double d, RankList rankList) {
        this.documentFetcher.startIteration();
        while (true) {
            LabeledDocument next = this.documentFetcher.getNext();
            if (next == null) {
                return;
            }
            RankList ranksFor = rankList.getRanksFor(next);
            ranksFor.iterate();
            while (ranksFor.hasNext()) {
                Rank next2 = ranksFor.getNext();
                calculateWeightedTermFrequency(next2);
                next2.value = Double.valueOf(next2.value.doubleValue() * d.doubleValue());
            }
        }
    }

    private void calculateWeightedTermFrequency(Rank rank) {
        if (rank.value.doubleValue() > 0.0d) {
            rank.value = Double.valueOf(1.0d + Double.valueOf(Math.log10(rank.value.doubleValue())).doubleValue());
        } else {
            rank.value = Double.valueOf(0.0d);
        }
    }

    private Double calculateInverseDocumentFrequency(RankList rankList, RegularExpression regularExpression) {
        Double valueOf = Double.valueOf(0.0d);
        this.documentFetcher.startIteration();
        while (true) {
            LabeledDocument next = this.documentFetcher.getNext();
            if (next == null) {
                System.out.println("The regular expression " + regularExpression.name + " appears at least once in " + valueOf + " documents.");
                return Double.valueOf(Math.log10(Double.valueOf(this.documentFetcher.getTotalNumberOfDocuments() / (1.0d + valueOf.doubleValue())).doubleValue()));
            }
            RankList ranksFor = rankList.getRanksFor(next);
            ranksFor.iterate();
            if (ranksFor.hasNext() && ranksFor.getNext().value.doubleValue() > 0.0d) {
                valueOf = Double.valueOf(valueOf.doubleValue() + 1.0d);
            }
        }
    }
}
