Question

我目前正在使用游戏代码库，其中一个部分使用遗传算法来处理某些AI。这个概念对我来说相当新鲜，虽然我已经查阅并了解它是如何工作的，但我很难发现如何将正在进化，变异等的内容与实际代码完全联系起来。

这些动作基本上是AI必须移动的每个可能选项。因此，它试图发展一个状态，并找出哪个是最好的行动。任何人都可以帮助我理解它比这更清楚吗？

private static double GAMMA = 0.90;
private static long BREAK_MS = 35;
private static int SIMULATION_DEPTH = 7;
private static int POPULATION_SIZE = 5;

private static double RECPROB = 0.1;
private double MUT = (1.0 / SIMULATION_DEPTH);
private final int N_ACTIONS;

private ElapsedCpuTimer timer;

private int genome[][][];
private final HashMap<Integer, Types.ACTIONS> action_mapping;
private final HashMap<Types.ACTIONS, Integer> r_action_mapping;
protected Random randomGenerator;

private int numSimulations;

/**
 * Public constructor with state observation and time due.
 *
 * @param stateObs     state observation of the current game.
 * @param elapsedTimer Timer for the controller creation.
 */
public Agent(StateObservation stateObs, ElapsedCpuTimer elapsedTimer) {

    randomGenerator = new Random();

    action_mapping = new HashMap<Integer, Types.ACTIONS>();
    r_action_mapping = new HashMap<Types.ACTIONS, Integer>();
    int i = 0;
    for (Types.ACTIONS action : stateObs.getAvailableActions()) {
        action_mapping.put(i, action);
        r_action_mapping.put(action, i);
        i++;
    }

    N_ACTIONS = stateObs.getAvailableActions().size();
    initGenome(stateObs);


}


double microbial_tournament(int[][] actionGenome, StateObservation stateObs, StateHeuristic heuristic) throws TimeoutException {
    int a, b, c, W, L;
    int i;


    a = (int) ((POPULATION_SIZE - 1) * randomGenerator.nextDouble());
    do {
        b = (int) ((POPULATION_SIZE - 1) * randomGenerator.nextDouble());
    } while (a == b);

    double score_a = simulate(stateObs, heuristic, actionGenome[a]);
    double score_b = simulate(stateObs, heuristic, actionGenome[b]);

    if (score_a > score_b) {
        W = a;
        L = b;
    } else {
        W = b;
        L = a;
    }

    int LEN = actionGenome[0].length;

    for (i = 0; i < LEN; i++) {
        if (randomGenerator.nextDouble() < RECPROB) {
            actionGenome[L][i] = actionGenome[W][i];
        }
    }


    for (i = 0; i < LEN; i++) {
        if (randomGenerator.nextDouble() < MUT) actionGenome[L][i] = randomGenerator.nextInt(N_ACTIONS);
    }

    return Math.max(score_a, score_b);

}

private void initGenome(StateObservation stateObs) {

    genome = new int[N_ACTIONS][POPULATION_SIZE][SIMULATION_DEPTH];


    // Randomize initial genome
    for (int i = 0; i < genome.length; i++) {
        for (int j = 0; j < genome[i].length; j++) {
            for (int k = 0; k < genome[i][j].length; k++) {
                genome[i][j][k] = randomGenerator.nextInt(N_ACTIONS);
            }
        }
    }
}


private double simulate(StateObservation stateObs, StateHeuristic heuristic, int[] policy) throws TimeoutException {


    //System.out.println("depth" + depth);
    long remaining = timer.remainingTimeMillis();
    if (remaining < BREAK_MS) {
        //System.out.println(remaining);
        throw new TimeoutException("Timeout");
    }


    int depth = 0;
    stateObs = stateObs.copy();
    for (; depth < policy.length; depth++) {
        Types.ACTIONS action = action_mapping.get(policy[depth]);

        stateObs.advance(action);

        if (stateObs.isGameOver()) {
            break;
        }
    }

    numSimulations++;
    double score = Math.pow(GAMMA, depth) * heuristic.evaluateState(stateObs);
    return score;


}

private Types.ACTIONS microbial(StateObservation stateObs, int maxdepth, StateHeuristic heuristic, int iterations) {

    double[] maxScores = new double[stateObs.getAvailableActions().size()];

    for (int i = 0; i < maxScores.length; i++) {
        maxScores[i] = Double.NEGATIVE_INFINITY;
    }


    outerloop:
    for (int i = 0; i < iterations; i++) {
        for (Types.ACTIONS action : stateObs.getAvailableActions()) {


            StateObservation stCopy = stateObs.copy();
            stCopy.advance(action);

            double score = 0;
            try {
                score = microbial_tournament(genome[r_action_mapping.get(action)], stCopy, heuristic) + randomGenerator.nextDouble()*0.00001;
            } catch (TimeoutException e) {
                break outerloop;
            }
            int int_act = this.r_action_mapping.get(action);

            if (score > maxScores[int_act]) {
                maxScores[int_act] = score;
            }


        }
    }

    Types.ACTIONS maxAction = this.action_mapping.get(Utils.argmax(maxScores));


    return maxAction;

}

/**
 * Picks an action. This function is called every game step to request an
 * action from the player.
 *
 * @param stateObs     Observation of the current state.
 * @param elapsedTimer Timer when the action returned is due.
 * @return An action for the current state
 */
public Types.ACTIONS act(StateObservation stateObs, ElapsedCpuTimer elapsedTimer) {

    this.timer = elapsedTimer;
    numSimulations = 0;

    Types.ACTIONS lastGoodAction = microbial(stateObs, SIMULATION_DEPTH, new WinScoreHeuristic(stateObs), 100);

    return lastGoodAction;
}


@Override
public void draw(Graphics2D g)
{
    //g.drawString("Num Simulations: " + numSimulations, 10, 20);
}

}

Answer 1

genome是解决方案（基因型）的编码，由simulate转换为实际问题空间（表型）。此外，作为评估的一部分，返回健身分数。其他方法初始化或扰动基因型以获得不同的解决方案。

请提出更具体的问题，如果您需要更多而不是倾倒大量代码，请“请解释”！

难以理解遗传算法 - Java

1 个答案: