package org.encogx.ml.world.learning.mdp;

import java.util.Iterator;
import org.encogx.ml.world.Action;
import org.encogx.ml.world.State;
import org.encogx.ml.world.SuccessorState;
import org.encogx.ml.world.World;

/* loaded from: input_file:org/encogx/ml/world/learning/mdp/ValueIteration.class */
public class ValueIteration extends MarkovDecisionProcess {
    private double discountFactor;

    public ValueIteration(World world, double d) {
        super(world);
        this.discountFactor = d;
    }

    public void calculateValue(State state) {
        double d = Double.NEGATIVE_INFINITY;
        if (getWorld().isGoalState(state)) {
            state.getPolicyValue()[0] = state.getReward();
            return;
        }
        Iterator<Action> it = getWorld().getActions().iterator();
        while (it.hasNext()) {
            double d2 = 0.0d;
            for (SuccessorState successorState : getWorld().getProbability().determineSuccessorStates(state, it.next())) {
                d2 += successorState.getProbability() * successorState.getState().getPolicyValue()[0];
            }
            d = Math.max(d, d2 * this.discountFactor);
        }
        state.getPolicyValue()[0] = d + state.getReward();
    }

    public void iteration() {
        Iterator<State> it = getWorld().getStates().iterator();
        while (it.hasNext()) {
            calculateValue(it.next());
        }
    }
}
