我最近想到了编写“连接4”游戏的想法(希望它像英语一样被称为)。但是当完成时,我发现基于Q-Learning为它开发对手很令人兴奋。我发现的所有视频都是关于在现成的环境中进行Q学习的。我现在的问题是如何将Q-Learning转移到程序中。我曾想过,如果特工赢了,特工会得到+100,如果他输了,特工会得到-100,如果他平局,特工会得到+20,但每一步都为-1。对奖励进行编程是没有问题的,但是我在使用Q学习公式和Q表时遇到困难,我应该如何准确输入当前状态等。 对于帮助或可能的解决方案,我将感到非常高兴!提前谢谢了 。 这是代码:
from collections import namedtuple
from itertools import cycle
from termcolor import colored
Player = namedtuple("Player", "name number")
def ask_column(field, players, player_index):
while True:
answer = input(
f"{players[player_index].name}, du bist dran."
f" Welche Spalte waehlst du (1-{len(field)+1})?"
)
if answer.isdigit():
selected_column = int(answer) - 1
if 0 <= selected_column < len(field):
return selected_column
print(colored(
"Deine Eingabe war nicht in dem genannten Bereich,"
" versuche es erneut.",
"red"))
else:
print(colored("Die Eingabe war keine Zahl, versuche es erneut.","red"))
def place(field, selected_column, player):
if field[0][selected_column] != 0:
raise ValueError(colored(f"Spalte {selected_column} ist voll! ","red"))
for row in range(1, len(field) + 1):
if field[row - 1][selected_column] == 0:
if row == len(field):
field[row - 1][selected_column] = player
for field_row in field:
print(field_row)
return (selected_column, row - 1)
else:
field[row - 2][selected_column] = player
for field_row in field:
print(field_row)
return (selected_column, row - 2)
assert False
def check_victory(field, latest_occupied_coordinate, players, player_index):
column, row = latest_occupied_coordinate
# Row won?
in_row_counter = 0
for neighbouring_fields_in_row in range(column - 3, column + 4):
if (
not neighbouring_fields_in_row <= -1
and not neighbouring_fields_in_row > len(field)
):
if (
field[row][neighbouring_fields_in_row]
== players[player_index].number
):
in_row_counter += 1
if in_row_counter >= 4:
break
else:
in_row_counter = 0
# Column won?
in_column_counter = 0
for neighbouring_fields_in_column in range(row - 3, row + 4):
if (
not neighbouring_fields_in_column <= -1
and not neighbouring_fields_in_column >= len(field)
):
if (
field[neighbouring_fields_in_column][column]
== players[player_index].number
):
in_column_counter += 1
if in_column_counter >= 4:
break
else:
in_column_counter = 0
# Diagonal won?
in_diagonal_top_to_bottom = 0
for nf_top_to_bottom in range(-3, 4):
if (
not column + nf_top_to_bottom < 0
and not column + nf_top_to_bottom > len(field)
and not row + nf_top_to_bottom < 0
and not row + nf_top_to_bottom >= len(field)
):
if (
field[row + nf_top_to_bottom][column + nf_top_to_bottom]
== players[player_index].number
):
in_diagonal_top_to_bottom += 1
if in_diagonal_top_to_bottom >= 4:
break
else:
in_diagonal_top_to_bottom = 0
in_diagonal_bottom_to_top = 0
for nf_bottom_to_top in range(-3, 4):
if (
not column - nf_bottom_to_top <= -1
and not column - nf_bottom_to_top > len(field)
and not row + nf_bottom_to_top <= -1
and not row + nf_bottom_to_top >= len(field)
):
if (
field[row + nf_bottom_to_top][column - nf_bottom_to_top]
== players[player_index].number
):
in_diagonal_bottom_to_top += 1
if in_diagonal_bottom_to_top >= 4:
break
else:
in_diagonal_bottom_to_top = 0
return (
in_row_counter >= 4
or in_column_counter >= 4
or in_diagonal_top_to_bottom >= 4
or in_diagonal_bottom_to_top >= 4
)
def main():
players = [
Player(input("Spielername Spieler 1:"), 1),
Player(input("Spielername Spieler 2:"), 2),
]
for does_player_2_start in cycle([False, True]):
#
# TODO Use `None` and `Player` objects instead of 0, 1, and 2.
#
field = [
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
]
player_indices = cycle([0, 1])
if does_player_2_start:
next(player_indices)
for player_index in player_indices:
while True:
try:
latest_occupied_coordinate = place(
field,
ask_column(field, players, player_index),
players[player_index].number,
)
except ValueError:
print(colored(
"Das ist leider nicht moeglich."
" Probiere eine andere Spalte"
,"red"))
else:
break
if check_victory(
field, latest_occupied_coordinate, players, player_index
):
print(colored("{} hat gewonnen!".format(players[player_index].name),"blue"))
break
if all(all(cell != 0 for cell in column) for column in field):
print(colored("Unentschieden! Keiner hat gewonnen.","blue"))
break
#player_turn += 1
continue_playing = input(
"Moechtet ihr nochmal spielen? 1 = Ja, 2 = Nein"
)
if continue_playing != "1":
break
if __name__ == "__main__":
main()