Q学习,了解“ Connect 4”

时间:2020-03-13 14:54:39

标签: python agent q-learning reward

我最近想到了编写“连接4”游戏的想法(希望它像英语一样被称为)。但是当完成时,我发现基于Q-Learning为它开发对手很令人兴奋。我发现的所有视频都是关于在现成的环境中进行Q学习的。我现在的问题是如何将Q-Learning转移到程序中。我曾想过,如果特工赢了,特工会得到+100,如果他输了,特工会得到-100,如果他平局,特工会得到+20,但每一步都为-1。对奖励进行编程是没有问题的,但是我在使用Q学习公式和Q表时遇到困难,我应该如何准确输入当前状态等。 对于帮助或可能的解决方案,我将感到非常高兴!提前谢谢了 。 这是代码:



from collections import namedtuple
from itertools import cycle
from termcolor import colored

Player = namedtuple("Player", "name number")


def ask_column(field, players, player_index):
    while True:
        answer = input(
            f"{players[player_index].name}, du bist dran."
            f" Welche Spalte waehlst du (1-{len(field)+1})?"
        )
        if answer.isdigit():
            selected_column = int(answer) - 1
            if 0 <= selected_column < len(field):
                return selected_column
            print(colored(
                "Deine Eingabe war nicht in dem genannten Bereich,"
                " versuche es erneut.",
            "red"))
        else:
            print(colored("Die Eingabe war keine Zahl, versuche es erneut.","red"))


def place(field, selected_column, player):
    if field[0][selected_column] != 0:
        raise ValueError(colored(f"Spalte {selected_column} ist voll! ","red"))
    for row in range(1, len(field) + 1):
        if field[row - 1][selected_column] == 0:
            if row == len(field):
                field[row - 1][selected_column] = player
                for field_row in field:
                    print(field_row)
                return (selected_column, row - 1)
        else:
            field[row - 2][selected_column] = player
            for field_row in field:
                print(field_row)
            return (selected_column, row - 2)

    assert False


def check_victory(field, latest_occupied_coordinate, players, player_index):
    column, row = latest_occupied_coordinate

    # Row won?
    in_row_counter = 0
    for neighbouring_fields_in_row in range(column - 3, column + 4):
        if (
                not neighbouring_fields_in_row <= -1
                and not neighbouring_fields_in_row > len(field)
        ):
            if (
                    field[row][neighbouring_fields_in_row]
                    == players[player_index].number
            ):
                in_row_counter += 1
                if in_row_counter >= 4:
                    break
            else:
                in_row_counter = 0

    # Column won?
    in_column_counter = 0
    for neighbouring_fields_in_column in range(row - 3, row + 4):
        if (
                not neighbouring_fields_in_column <= -1
                and not neighbouring_fields_in_column >= len(field)
        ):
            if (
                    field[neighbouring_fields_in_column][column]
                    == players[player_index].number
            ):
                in_column_counter += 1
                if in_column_counter >= 4:
                    break
            else:
                in_column_counter = 0

    # Diagonal won?
    in_diagonal_top_to_bottom = 0
    for nf_top_to_bottom in range(-3, 4):
        if (
                not column + nf_top_to_bottom < 0
                and not column + nf_top_to_bottom > len(field)
                and not row + nf_top_to_bottom < 0
                and not row + nf_top_to_bottom >= len(field)
        ):
            if (
                    field[row + nf_top_to_bottom][column + nf_top_to_bottom]
                    == players[player_index].number
            ):
                in_diagonal_top_to_bottom += 1
                if in_diagonal_top_to_bottom >= 4:
                    break
            else:
                in_diagonal_top_to_bottom = 0

    in_diagonal_bottom_to_top = 0
    for nf_bottom_to_top in range(-3, 4):
        if (
                not column - nf_bottom_to_top <= -1
                and not column - nf_bottom_to_top > len(field)
                and not row + nf_bottom_to_top <= -1
                and not row + nf_bottom_to_top >= len(field)
        ):
            if (
                    field[row + nf_bottom_to_top][column - nf_bottom_to_top]
                    == players[player_index].number
            ):
                in_diagonal_bottom_to_top += 1
                if in_diagonal_bottom_to_top >= 4:
                    break
            else:
                in_diagonal_bottom_to_top = 0

    return (
            in_row_counter >= 4
            or in_column_counter >= 4
            or in_diagonal_top_to_bottom >= 4
            or in_diagonal_bottom_to_top >= 4
    )


def main():
    players = [
        Player(input("Spielername Spieler 1:"), 1),
        Player(input("Spielername Spieler 2:"), 2),
    ]
    for does_player_2_start in cycle([False, True]):
        #
        # TODO Use `None` and `Player` objects instead of 0, 1, and 2.
        #
        field = [
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
        ]

        player_indices = cycle([0, 1])
        if does_player_2_start:
            next(player_indices)

        for player_index in player_indices:
            while True:
                try:
                    latest_occupied_coordinate = place(
                        field,
                        ask_column(field, players, player_index),
                        players[player_index].number,
                    )
                except ValueError:
                    print(colored(
                        "Das ist leider nicht moeglich."
                        " Probiere eine andere Spalte"
                    ,"red"))
                else:
                    break

            if check_victory(
                    field, latest_occupied_coordinate, players, player_index
            ):
                print(colored("{} hat gewonnen!".format(players[player_index].name),"blue"))
                break

            if all(all(cell != 0 for cell in column) for column in field):
                print(colored("Unentschieden! Keiner hat gewonnen.","blue"))
                break

            #player_turn += 1

        continue_playing = input(
            "Moechtet ihr nochmal spielen? 1 = Ja, 2 = Nein"
        )
        if continue_playing != "1":
            break


if __name__ == "__main__":
    main()

0 个答案:

没有答案