Question

我正在训练NN使用强化学习来玩2048。或者至少我认为我是，因为我是新手。

这就是NeuralNetwork.py的样子：

import random
import numpy as np

def nonlin(x, deriv=False):
    if(deriv==True):
        return x * (1-x)
    return 1/(1+np.exp(-x))


np.random.seed(1)


class NeuralNetwork:

    next_ID = 0

    def __init__(self, HyperParams):
        self.synapses = []
        for synapse in range(len(HyperParams)-1):
            self.synapses.append(2*np.random.random((HyperParams[synapse], HyperParams[synapse+1]))-1)
        self.score = 0
        # self.name = words[random.randint(0, len(words))].strip()
        self.name = str(NeuralNetwork.next_ID)
        NeuralNetwork.next_ID += 1


    def train_batch(self, epoch, state, outcome):
        for i in range(epoch):
            self.layers = []
            self.layers.append(state)
            for j in range(len(self.synapses)):
                self.layers.append(nonlin(np.dot(self.layers[-1], self.synapses[j])))

            error = outcome - self.layers[-1]
            if (i % 1000) == 0: print(str(np.mean(np.abs(error))))

            for j in range(1,1+len(self.synapses)):
                delta = error * nonlin(self.layers[-j], True)
                error = delta.dot(self.synapses[-j].T)
                self.synapses[-j] += self.layers[-(j+1)].T.dot(delta)


    def train(self, state, outcome):
        self.layers = []
        self.layers.append(state)
        for j in range(len(self.synapses)):
            self.layers.append(nonlin(np.dot(self.layers[-1], self.synapses[j])))

        error = outcome - self.layers[-1]
        print("error: ", error.shape)
        for j in range(1,1+len(self.synapses)):
            delta = error * nonlin(self.layers[-j], True)
            print("delta: ", delta.shape)
            error = delta.dot(self.synapses[-j].T)
            print("layer: ", self.layers[-(j+1)].shape)
            print("layer.T: ", self.layers[-(j+1)].T.shape)

            # this is the issue
            print("dot: ", self.layers[-(j+1)].T.dot(delta).shape)
            self.synapses[-j] += self.layers[-(j+1)].T.dot(delta) 


    def next_gen(self):
        child = NeuralNetwork([1])
        for synapse in self.synapses:
            # add variation
            child.synapses.append(synapse + 0.1*np.random.random(synapse.shape)-0.05)
        # child.name += " son of " + self.name
        child.name += "<-" + self.name
        return child

    def feed(self, state):
        self.layers = []
        self.layers.append(state)
        for j in range(len(self.synapses)):
            self.layers.append(nonlin(np.dot(self.layers[-1], self.synapses[j])))
        return self.layers[-1]

这就是2048.py的样子：

import random
import os
import sys
import math
import numpy as np
from NeuralNetwork import *

# global vars, constants and setup
board = {}
row_size = 4
random.seed(1)
HP = (16,9,4)

# set up game board
for i in range(row_size): # row
    for j in range(row_size): #column
        board[(i,j)] = 0


# display function
def display():
    for i in range(row_size):
        print('\t'.join([str(board[(i,j)]) for j in range(row_size)]))
    print()


# logic function
def logic(move, NN):
    """
    char move is the move, one of any in "asdw"
    NN is a NeuralNetwork object
    """
    # print("mov", move)
    score = 0
    if move == 's':
        for j in range(row_size): # columns
            row_pointer = row_size-1
            for i in reversed(range(row_size-1)): # go up the rows
                if board[(i, j)] != 0:
                    # if there is a non-empty square above, and this is a zero #check
                    if board[(row_pointer, j)] == 0:
                        board[(row_pointer, j)] = board[(i, j)]
                        board[(i, j)] = 0
                        # row_pointer -= 1 # This is the new block to focus on

                    # if there is a non-empty square above, and they are not equivalent
                    elif board[(i, j)] != board[(row_pointer, j)]:
                        # while this intuitively is not a swap, without it I would need to zero board[(i,j)]
                        # that zero would cause problems if row_pointer-1 == i
                        board[(row_pointer-1, j)], board[(i, j)] = board[(i, j)], board[(row_pointer-1, j)]
                        row_pointer -= 1 # This is the new block to focus on

                    # if there is a non-empty square above, and they are the same
                    elif board[(i, j)] == board[(row_pointer, j)]:
                        board[(row_pointer, j)] += board[(i, j)]
                        board[(i, j)] = 0
                        score += board[(row_pointer, j)] + math.log(board[(row_pointer, j)], 2)
    elif move == 'w':
        for j in range(row_size): # columns
            row_pointer = 0
            for i in range(1, row_size): # go down the rows
                if board[(i, j)] != 0:
                    # if there is a non-empty square above, and this is a zero
                    if board[(row_pointer, j)] == 0:
                        board[(row_pointer, j)] = board[(i, j)]
                        board[(i, j)] = 0

                    # if there is a non-empty square above, and they are not equivalent
                    elif board[(i, j)] != board[(row_pointer, j)]:
                        board[(row_pointer+1, j)], board[(i, j)] = board[(i, j)], board[(row_pointer+1, j)]
                        row_pointer += 1 # This is the new block to focus on

                    # if there is a non-empty square above, and they are the same
                    elif board[(i, j)] == board[(row_pointer, j)]:
                        board[(row_pointer, j)] += board[(i, j)]
                        board[(i, j)] = 0
                        score += board[(row_pointer, j)] + math.log(board[(row_pointer, j)], 2)
    elif move == 'a':
        for i in range(row_size): # rows
            column_pointer = 0
            for j in range(1, row_size): # go right through the columns
                if board[(i, j)] != 0:
                    # if there is a non-empty square above, and this is a zero
                    if board[(i, column_pointer)] == 0:
                        board[(i, column_pointer)] = board[(i, j)]
                        board[(i, j)] = 0

                    # if there is a non-empty square above, and they are not equivalent
                    elif board[(i, j)] != board[(i, column_pointer)]:
                        board[(i, column_pointer+1)], board[(i, j)] = board[(i, j)], board[(i, column_pointer+1)]
                        column_pointer += 1 # This is the new block to focus on

                    # if there is a non-empty square above, and they are the same
                    elif board[(i, j)] == board[(i, column_pointer)]:
                        board[(i, column_pointer)] += board[(i, j)]
                        board[(i, j)] = 0
                        score += board[(i, column_pointer)] + math.log(board[(i, column_pointer)], 2)
    elif move == 'd':
        for i in range(row_size): # rows
            column_pointer = row_size-1
            for j in reversed(range(row_size-1)): # go left through the columns
                if board[(i, j)] != 0:
                    # if there is a non-empty square above, and this is a zero
                    if board[(i, column_pointer)] == 0:
                        board[(i, column_pointer)] = board[(i, j)]
                        board[(i, j)] = 0

                    # if there is a non-empty square above, and they are not equivalent
                    elif board[(i, j)] != board[(i, column_pointer)]:
                        board[(i, column_pointer-1)], board[(i, j)] = board[(i, j)], board[(i, column_pointer-1)]
                        column_pointer -= 1 # This is the new block to focus on

                    # if there is a non-empty square above, and they are the same
                    elif board[(i, j)] == board[(i, column_pointer)]:
                        board[(i, column_pointer)] += board[(i, j)]
                        board[(i, j)] = 0
                        score += board[(i, column_pointer)] + math.log(board[(i, column_pointer)], 2)

    else:
        print("something is wrong")
    NN.score += score
    return score


# checks to see whether there are any valid moves in a full board with no 0's
def is_game_over():
    # check the top-left square
    for i in range(row_size-1):
        for j in range(row_size-1):
            if board[(i,j)] in [board[(i+1,j)], board[(i,j+1)]]: # check the one below and to the right
                return False
    # Check the right-most column
    for j in range(row_size-1):
        if board[(row_size-1,j)] == board[(row_size-1,j+1)]:
            return False
    # Check the bottom row
    for i in range(row_size-1):
        if board[(i,row_size-1)] == board[(i+1,row_size-1)]:
            return False
    # There is no way to combine, game over
    return True


# NN controls
NN = NeuralNetwork(HP)

for step in range(10):
    # set up game board
    for i in range(row_size): # row
        for j in range(row_size): #column
            board[(i,j)] = 0

    previous_board = []
    quit = False
    # game loop
    while not quit:
        # set a new empty tile to a 2
        while True:
            i = random.randint(0,row_size-1)
            j = random.randint(0,row_size-1)
            # print(i,j,board[(i,j)])
            if board[(i,j)] != 0: continue
            else: board[(i,j)] = 2 ; break


        # View
        # display()


        # normalize data and make a guess with nn
        state = np.array([board[(i,j)] for j in range(row_size) for i in range(row_size)])
        state[state==0] = 1
        state = np.log2(state)
        state = state / np.max(state)
        # print('\n'.join(['\t'.join([str(state[j*row_size+i]) for j in range(row_size)])for i in range(row_size)]))
        move = NN.feed(state)


        # move
        reward = 0
        previous_board = list(board.values())
        while True:
            if len(move[move == 0]) == 4:
                if is_game_over():
                    # print("Game Over")
                    quit = True
                    break
            reward = logic("asdw"[move.argmax()], NN)
            if previous_board == list(board.values()): move[move.argmax()] = 0 ; continue
            else: break

        if reward:
            reward = nonlin(math.log2(reward)-math.log2(2048))
            move[np.argmax(move)] += reward
            NN.train(state, move)
    display()


    print("score: " + str(NN.score))

    NN.score = 0

我被告知numpy会在遇到两个点阵的1-D阵列时知道该怎么做，但这种情况并没有发生。我应该将这些数组制作为2D，其内部维度为1吗？你能帮忙吗？

这是完整的错误：

Traceback (most recent call last):
  File "2048.py", line 195, in <module>
    NN.train(state, move)
  File "/home/jeff/Programs/grad_descent/NeuralNetwork.py", line 71, in train
    print("dot: ", self.layers[-(j+1)].T.dot(delta).shape)
ValueError: shapes (9,) and (4,) not aligned: 9 (dim 0) != 4 (dim 0)

正如你所看到的，它们都是一维向量，所以numpy应该只是点它们。

Answer 1

如果使用np.newaxis提供明确的1-D列表示，它将起作用。

注意：如果您正在寻找标量输出，则两个向量需要为equal length。 OP中的错误消息表明您正在尝试使用长度为9和长度为4向量的点积。我假设您确实希望.dot()返回外部产品。如果没有，内部产品不会起作用 - 在这种情况下，试着弄清楚为什么你没有得到两个相等长度的向量，你希望看到它们。

使用：

a = np.array([1,2,3])
b = np.array([2,3,4,5])

a和b的形状分别为(3,)和(4,)：

try:
    print(a.shape)
    print(b.shape)
    print("a.b: \n{}".format(np.dot(a,b.T)))
except ValueError as e:
    print("failed: {}".format(e))

输出：

(3,)
(4,)
failed: shapes (3,) and (4,) not aligned: 3 (dim 0) != 4 (dim 0)

使用newaxis，形状变为(3,1)和(4,1)：

aa = a[:, np.newaxis]
bb = b[:, np.newaxis]

try:
    print(aa.shape)
    print(bb.shape)
    print("aa.bb: \n{}".format(np.dot(aa,bb.T)))
except ValueError as e:
    print("failed: {}".format(e))

输出：

(3, 1)
(4, 1)  
aa.bb: 
[[ 2  3  4  5]
 [ 4  6  8 10]
 [ 6  9 12 15]]

ValueError：形状（9，）和（4，）未对齐

1 个答案: