我正在训练NN使用强化学习来玩2048。或者至少我认为我是,因为我是新手。
这就是NeuralNetwork.py的样子:
import random
import numpy as np
def nonlin(x, deriv=False):
if(deriv==True):
return x * (1-x)
return 1/(1+np.exp(-x))
np.random.seed(1)
class NeuralNetwork:
next_ID = 0
def __init__(self, HyperParams):
self.synapses = []
for synapse in range(len(HyperParams)-1):
self.synapses.append(2*np.random.random((HyperParams[synapse], HyperParams[synapse+1]))-1)
self.score = 0
# self.name = words[random.randint(0, len(words))].strip()
self.name = str(NeuralNetwork.next_ID)
NeuralNetwork.next_ID += 1
def train_batch(self, epoch, state, outcome):
for i in range(epoch):
self.layers = []
self.layers.append(state)
for j in range(len(self.synapses)):
self.layers.append(nonlin(np.dot(self.layers[-1], self.synapses[j])))
error = outcome - self.layers[-1]
if (i % 1000) == 0: print(str(np.mean(np.abs(error))))
for j in range(1,1+len(self.synapses)):
delta = error * nonlin(self.layers[-j], True)
error = delta.dot(self.synapses[-j].T)
self.synapses[-j] += self.layers[-(j+1)].T.dot(delta)
def train(self, state, outcome):
self.layers = []
self.layers.append(state)
for j in range(len(self.synapses)):
self.layers.append(nonlin(np.dot(self.layers[-1], self.synapses[j])))
error = outcome - self.layers[-1]
print("error: ", error.shape)
for j in range(1,1+len(self.synapses)):
delta = error * nonlin(self.layers[-j], True)
print("delta: ", delta.shape)
error = delta.dot(self.synapses[-j].T)
print("layer: ", self.layers[-(j+1)].shape)
print("layer.T: ", self.layers[-(j+1)].T.shape)
# this is the issue
print("dot: ", self.layers[-(j+1)].T.dot(delta).shape)
self.synapses[-j] += self.layers[-(j+1)].T.dot(delta)
def next_gen(self):
child = NeuralNetwork([1])
for synapse in self.synapses:
# add variation
child.synapses.append(synapse + 0.1*np.random.random(synapse.shape)-0.05)
# child.name += " son of " + self.name
child.name += "<-" + self.name
return child
def feed(self, state):
self.layers = []
self.layers.append(state)
for j in range(len(self.synapses)):
self.layers.append(nonlin(np.dot(self.layers[-1], self.synapses[j])))
return self.layers[-1]
这就是2048.py的样子:
import random
import os
import sys
import math
import numpy as np
from NeuralNetwork import *
# global vars, constants and setup
board = {}
row_size = 4
random.seed(1)
HP = (16,9,4)
# set up game board
for i in range(row_size): # row
for j in range(row_size): #column
board[(i,j)] = 0
# display function
def display():
for i in range(row_size):
print('\t'.join([str(board[(i,j)]) for j in range(row_size)]))
print()
# logic function
def logic(move, NN):
"""
char move is the move, one of any in "asdw"
NN is a NeuralNetwork object
"""
# print("mov", move)
score = 0
if move == 's':
for j in range(row_size): # columns
row_pointer = row_size-1
for i in reversed(range(row_size-1)): # go up the rows
if board[(i, j)] != 0:
# if there is a non-empty square above, and this is a zero #check
if board[(row_pointer, j)] == 0:
board[(row_pointer, j)] = board[(i, j)]
board[(i, j)] = 0
# row_pointer -= 1 # This is the new block to focus on
# if there is a non-empty square above, and they are not equivalent
elif board[(i, j)] != board[(row_pointer, j)]:
# while this intuitively is not a swap, without it I would need to zero board[(i,j)]
# that zero would cause problems if row_pointer-1 == i
board[(row_pointer-1, j)], board[(i, j)] = board[(i, j)], board[(row_pointer-1, j)]
row_pointer -= 1 # This is the new block to focus on
# if there is a non-empty square above, and they are the same
elif board[(i, j)] == board[(row_pointer, j)]:
board[(row_pointer, j)] += board[(i, j)]
board[(i, j)] = 0
score += board[(row_pointer, j)] + math.log(board[(row_pointer, j)], 2)
elif move == 'w':
for j in range(row_size): # columns
row_pointer = 0
for i in range(1, row_size): # go down the rows
if board[(i, j)] != 0:
# if there is a non-empty square above, and this is a zero
if board[(row_pointer, j)] == 0:
board[(row_pointer, j)] = board[(i, j)]
board[(i, j)] = 0
# if there is a non-empty square above, and they are not equivalent
elif board[(i, j)] != board[(row_pointer, j)]:
board[(row_pointer+1, j)], board[(i, j)] = board[(i, j)], board[(row_pointer+1, j)]
row_pointer += 1 # This is the new block to focus on
# if there is a non-empty square above, and they are the same
elif board[(i, j)] == board[(row_pointer, j)]:
board[(row_pointer, j)] += board[(i, j)]
board[(i, j)] = 0
score += board[(row_pointer, j)] + math.log(board[(row_pointer, j)], 2)
elif move == 'a':
for i in range(row_size): # rows
column_pointer = 0
for j in range(1, row_size): # go right through the columns
if board[(i, j)] != 0:
# if there is a non-empty square above, and this is a zero
if board[(i, column_pointer)] == 0:
board[(i, column_pointer)] = board[(i, j)]
board[(i, j)] = 0
# if there is a non-empty square above, and they are not equivalent
elif board[(i, j)] != board[(i, column_pointer)]:
board[(i, column_pointer+1)], board[(i, j)] = board[(i, j)], board[(i, column_pointer+1)]
column_pointer += 1 # This is the new block to focus on
# if there is a non-empty square above, and they are the same
elif board[(i, j)] == board[(i, column_pointer)]:
board[(i, column_pointer)] += board[(i, j)]
board[(i, j)] = 0
score += board[(i, column_pointer)] + math.log(board[(i, column_pointer)], 2)
elif move == 'd':
for i in range(row_size): # rows
column_pointer = row_size-1
for j in reversed(range(row_size-1)): # go left through the columns
if board[(i, j)] != 0:
# if there is a non-empty square above, and this is a zero
if board[(i, column_pointer)] == 0:
board[(i, column_pointer)] = board[(i, j)]
board[(i, j)] = 0
# if there is a non-empty square above, and they are not equivalent
elif board[(i, j)] != board[(i, column_pointer)]:
board[(i, column_pointer-1)], board[(i, j)] = board[(i, j)], board[(i, column_pointer-1)]
column_pointer -= 1 # This is the new block to focus on
# if there is a non-empty square above, and they are the same
elif board[(i, j)] == board[(i, column_pointer)]:
board[(i, column_pointer)] += board[(i, j)]
board[(i, j)] = 0
score += board[(i, column_pointer)] + math.log(board[(i, column_pointer)], 2)
else:
print("something is wrong")
NN.score += score
return score
# checks to see whether there are any valid moves in a full board with no 0's
def is_game_over():
# check the top-left square
for i in range(row_size-1):
for j in range(row_size-1):
if board[(i,j)] in [board[(i+1,j)], board[(i,j+1)]]: # check the one below and to the right
return False
# Check the right-most column
for j in range(row_size-1):
if board[(row_size-1,j)] == board[(row_size-1,j+1)]:
return False
# Check the bottom row
for i in range(row_size-1):
if board[(i,row_size-1)] == board[(i+1,row_size-1)]:
return False
# There is no way to combine, game over
return True
# NN controls
NN = NeuralNetwork(HP)
for step in range(10):
# set up game board
for i in range(row_size): # row
for j in range(row_size): #column
board[(i,j)] = 0
previous_board = []
quit = False
# game loop
while not quit:
# set a new empty tile to a 2
while True:
i = random.randint(0,row_size-1)
j = random.randint(0,row_size-1)
# print(i,j,board[(i,j)])
if board[(i,j)] != 0: continue
else: board[(i,j)] = 2 ; break
# View
# display()
# normalize data and make a guess with nn
state = np.array([board[(i,j)] for j in range(row_size) for i in range(row_size)])
state[state==0] = 1
state = np.log2(state)
state = state / np.max(state)
# print('\n'.join(['\t'.join([str(state[j*row_size+i]) for j in range(row_size)])for i in range(row_size)]))
move = NN.feed(state)
# move
reward = 0
previous_board = list(board.values())
while True:
if len(move[move == 0]) == 4:
if is_game_over():
# print("Game Over")
quit = True
break
reward = logic("asdw"[move.argmax()], NN)
if previous_board == list(board.values()): move[move.argmax()] = 0 ; continue
else: break
if reward:
reward = nonlin(math.log2(reward)-math.log2(2048))
move[np.argmax(move)] += reward
NN.train(state, move)
display()
print("score: " + str(NN.score))
NN.score = 0
我被告知numpy会在遇到两个点阵的1-D阵列时知道该怎么做,但这种情况并没有发生。我应该将这些数组制作为2D,其内部维度为1吗?你能帮忙吗?
这是完整的错误:
Traceback (most recent call last):
File "2048.py", line 195, in <module>
NN.train(state, move)
File "/home/jeff/Programs/grad_descent/NeuralNetwork.py", line 71, in train
print("dot: ", self.layers[-(j+1)].T.dot(delta).shape)
ValueError: shapes (9,) and (4,) not aligned: 9 (dim 0) != 4 (dim 0)
正如你所看到的,它们都是一维向量,所以numpy应该只是点它们。
答案 0 :(得分:0)
如果使用np.newaxis
提供明确的1-D列表示,它将起作用。
注意:如果您正在寻找标量输出,则两个向量需要为equal length。 OP中的错误消息表明您正在尝试使用长度为9
和长度为4
向量的点积。我假设您确实希望.dot()
返回外部产品。如果没有,内部产品不会起作用 - 在这种情况下,试着弄清楚为什么你没有得到两个相等长度的向量,你希望看到它们。
使用:
a = np.array([1,2,3])
b = np.array([2,3,4,5])
a
和b
的形状分别为(3,)
和(4,)
:
try:
print(a.shape)
print(b.shape)
print("a.b: \n{}".format(np.dot(a,b.T)))
except ValueError as e:
print("failed: {}".format(e))
输出:
(3,)
(4,)
failed: shapes (3,) and (4,) not aligned: 3 (dim 0) != 4 (dim 0)
使用newaxis
,形状变为(3,1)
和(4,1)
:
aa = a[:, np.newaxis]
bb = b[:, np.newaxis]
try:
print(aa.shape)
print(bb.shape)
print("aa.bb: \n{}".format(np.dot(aa,bb.T)))
except ValueError as e:
print("failed: {}".format(e))
输出:
(3, 1)
(4, 1)
aa.bb:
[[ 2 3 4 5]
[ 4 6 8 10]
[ 6 9 12 15]]