Question

我正在使用卷积神经网络（tensorflow，alexnet）在游戏中驾驶汽车（极品飞车）。我只是一个想要使用机器学习的学生。我是初学者。

这是我的计划：

获取培训数据
平衡并随机播放
训练模特
测试模型

问题在于我没有找到一种方法来记录或检测我在玩游戏时所做的KeyPress。我希望python检测我按下哪些键，并将它们与游戏的帧图像一起存储在npy数组中。

我确实遇到了这段记录KeyPresses的代码，但只记录了字母键。我希望python也检测箭头键，空格键等。

import win32api as wapio
import time

keyList = ["\b"]
for char in "ABCDEFGHIJKLMNOPQRSTUVWXYZ 123456789,.'£$/\\":
    keyList.append(char)

def key_check():
    keys = []
    for key in keyList:
        if wapi.GetAsyncKeyState(ord(key)):
            keys.append(key)
    return keys

我还想要一种模拟KeyPresses的方法，这样我的模型实际上可以开车。

我确实拥有这段代码，完美无缺。我只想就这个问题提出建议。

import ctypes
import time

SendInput = ctypes.windll.user32.SendInput

W = 0x11
A = 0x1E
S = 0x1F
D = 0x20

# C struct redefinitions 
PUL = ctypes.POINTER(ctypes.c_ulong)
class KeyBdInput(ctypes.Structure):
    _fields_ = [("wVk", ctypes.c_ushort),
                ("wScan", ctypes.c_ushort),
                ("dwFlags", ctypes.c_ulong),
                ("time", ctypes.c_ulong),
                ("dwExtraInfo", PUL)]

class HardwareInput(ctypes.Structure):
    _fields_ = [("uMsg", ctypes.c_ulong),
                ("wParamL", ctypes.c_short),
                ("wParamH", ctypes.c_ushort)]

class MouseInput(ctypes.Structure):
    _fields_ = [("dx", ctypes.c_long),
                ("dy", ctypes.c_long),
                ("mouseData", ctypes.c_ulong),
                ("dwFlags", ctypes.c_ulong),
                ("time",ctypes.c_ulong),
                ("dwExtraInfo", PUL)]

class Input_I(ctypes.Union):
    _fields_ = [("ki", KeyBdInput),
                 ("mi", MouseInput),
                 ("hi", HardwareInput)]

class Input(ctypes.Structure):
    _fields_ = [("type", ctypes.c_ulong),
                ("ii", Input_I)]

# Actuals Functions

def PressKey(hexKeyCode):
    extra = ctypes.c_ulong(0)
    ii_ = Input_I()
    ii_.ki = KeyBdInput( 0, hexKeyCode, 0x0008, 0, ctypes.pointer(extra) )
    x = Input( ctypes.c_ulong(1), ii_ )
    ctypes.windll.user32.SendInput(1, ctypes.pointer(x), ctypes.sizeof(x))

def ReleaseKey(hexKeyCode):
    extra = ctypes.c_ulong(0)
    ii_ = Input_I()
    ii_.ki = KeyBdInput( 0, hexKeyCode, 0x0008 | 0x0002, 0, 
    ctypes.pointer(extra) )
    x = Input( ctypes.c_ulong(1), ii_ )
    ctypes.windll.user32.SendInput(1, ctypes.pointer(x), ctypes.sizeof(x))


if __name__ == '__main__':
    while (True):
        PressKey(0x11)
        time.sleep(1)
        ReleaseKey(0x11)
        time.sleep(1)

下一步是实际收集游戏的帧（图像）。我使用cv2来捕捉我的屏幕区域，在那里我玩游戏。然后我：

将其转换为灰度
调整图片大小
将其保存为numpy文件（每捕获1000帧）

数据捕获完全正常，但是一旦我有大约50 000帧（或大约500 MB的数据），保存数据需要很长时间。有时，Python会在numpy文件保存时崩溃，导致我的所有训练数据丢失。

这是我捕获数据的代码：

import numpy as np
import cv2
import time
from grabscreen import grab_screen
from getkeys import key_check
import os


def keys_to_output(keys):
    output = [0, 0, 0]

    if 'A' in keys:
        output[0] = 1
    elif 'D' in keys:
        output[2] = 1
    else:
        output[1] = 1

    return output


file_name = 'training_data.npy'

if os.path.isfile(file_name):
    print('File exists, loading previous data!')
    training_data = list(np.load(file_name))
else:
    print('File does not exist, starting fresh!')
    training_data = []


def main():
    for i in list(range(5))[::-1]:
        print(i + 1)
        time.sleep(1)

    if os.path.isfile(file_name):
        print('Existing Training Data:' + str(len(training_data)))
        print('Capturing Data!')   
    else:
        print('Capturing Data Freshly!') 


    while True:
        screen = grab_screen(region=(40, 250, 860, 560))
        screen = cv2.resize(screen, (120, 56))
        screen = cv2.cvtColor(screen, cv2.COLOR_BGR2GRAY)

        keys = key_check()
        output = keys_to_output(keys)
        training_data.append([screen, output])


        if len(training_data) % 1000 == 0:
            print('New Training Data: ' + str(len(training_data)))
            print('Saving Data!')
            np.save(file_name, training_data)
            print('Data saved succesfully! You can quit now.')
            print('Capturing data!')


main()

任何人都可以建议更好的方法来获取我的训练数据。另外，我听说过使用PyTables，但我不确定如何将它用于我的程序。

在此之后，我将我的数据洗牌，并平衡它：

import numpy as np
import pandas as pd
from collections import Counter
from random import shuffle

train_data = np.load('training_data.npy')

print('Training Data: ' + str(len(train_data)))
df = pd.DataFrame(train_data)
print(df.head())
print(Counter(df[1].apply(str)))

lefts = []
rights = []
forwards = []

shuffle(train_data)

for data in train_data:
    img = data[0]
    choice = data[1]

    if choice == [1, 0, 0]:
        lefts.append([img, choice])
    elif choice == [0, 1, 0]:
        forwards.append([img, choice])
    elif choice == [0, 0, 1]:
        rights.append([img, choice])
    else:
        print('no matches!!!')


forwards = forwards[:len(lefts)][:len(rights)]
lefts = lefts[:len(forwards)]
rights = rights[:len(forwards)]

final_data = forwards + lefts + rights

shuffle(final_data)
print('Final Balanced Data: ' + str(len(final_data)))
np.save('training_data_balanced.npy', final_data)

型号：

import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from tflearn.layers.normalization import local_response_normalization

def alexnet(width, height, lr):
    network = input_data(shape=[None, width, height, 1], name='input')
    network = conv_2d(network, 96, 11, strides=4, activation='relu')
    network = max_pool_2d(network, 3, strides=2)
    network = local_response_normalization(network)
    network = conv_2d(network, 256, 5, activation='relu')
    network = max_pool_2d(network, 3, strides=2)
    network = local_response_normalization(network)
    network = conv_2d(network, 384, 3, activation='relu')
    network = conv_2d(network, 384, 3, activation='relu')
    network = conv_2d(network, 256, 3, activation='relu')
    network = max_pool_2d(network, 3, strides=2)
    network = local_response_normalization(network)
    network = fully_connected(network, 4096, activation='tanh')
    network = dropout(network, 0.5)
    network = fully_connected(network, 4096, activation='tanh')
    network = dropout(network, 0.5)
    network = fully_connected(network, 3, activation='softmax')
    network = regression(network, optimizer='momentum',
                         loss='categorical_crossentropy',
                         learning_rate=lr, name='targets')

    model = tflearn.DNN(network, checkpoint_path='model_alexnet',
                        max_checkpoints=1, tensorboard_verbose=2, tensorboard_dir='log')

    return model

训练模型：

import numpy as np
from alexnet import alexnet

WIDTH = 120
HEIGHT = 56
LR = 1e-3
EPOCHS = 15
MODEL_NAME = 'nfs-car-{}-{}-epochs.model'.format(LR, EPOCHS)

model = alexnet(WIDTH, HEIGHT, LR)

# for every epoch finished, save the model.
# therefore, if accuracy drops or loss increases, we can terminate the script.
# we will have a trained model with the best accuracy and previously saved epoch.

for i in range(EPOCHS):
    train_data = np.load('training_data_balanced.npy')

    train = train_data[:-10] # 80% of balanced data
    test = train_data[-10:] # 20% of balanced data

    X = np.array([i[0] for i in train]).reshape(-1, WIDTH, HEIGHT, 1)
    Y = [i[1] for i in train]

    test_x = np.array([i[0] for i in test]).reshape(-1, WIDTH, HEIGHT, 1)
    test_y = [i[1] for i in test]

    model.fit({'input': X}, {'targets': Y}, n_epoch=1,
              validation_set=({'input': test_x}, {'targets': test_y}),
              snapshot_step=500, show_metric=True, run_id=MODEL_NAME)

    model.save(MODEL_NAME)
    print('Saved epoch: ' + str(i + 1))

然后，当然，我测试模型：

import numpy as np
import cv2
import time
from grabscreen import grab_screen
from getkeys import key_check
from alexnet import alexnet
from directkeys import PressKey, ReleaseKey, W, A, D

WIDTH = 120
HEIGHT = 56
LR = 1e-3
EPOCHS = 15
MODEL_NAME = 'nfs-car-{}-{}-epochs.model'.format(LR, EPOCHS)


def straight():
    PressKey(W)
    ReleaseKey(A)
    ReleaseKey(D)


def left():
    PressKey(W)
    PressKey(A)
    ReleaseKey(D) # added
    time.sleep(0.09)
    ReleaseKey(A)


def right():
    PressKey(W)
    PressKey(D)
    ReleaseKey(A) # added
    time.sleep(0.09)
    ReleaseKey(D)


model = alexnet(WIDTH, HEIGHT, LR)
model.load(MODEL_NAME)


def main():
    for i in list(range(5))[::-1]:
        print(i + 1)
        time.sleep(1)


    paused = False

    while True:

        if not paused:
            screen = grab_screen(region=(40, 250, 860, 560))
            screen = cv2.cvtColor(screen, cv2.COLOR_BGR2GRAY)
            screen = cv2.resize(screen, (120, 56))

            prediction = model.predict([screen.reshape(WIDTH, HEIGHT, 1)])[0]
            print(prediction)

            turn_thrush = .75
            fwd_thrush = 0.70

            if prediction[0] > turn_thrush:
                left()
            elif prediction[1] > fwd_thrush:
                straight()
            elif prediction[2] > turn_thrush:
                right()
            else:
                straight()

            keys = key_check()

            if 'T' in keys:
                if paused:
                    paused = False
                    print('Unpaused!')
                    time.sleep(1)
                else:
                    print('Pausing!')
                    paused = True
                    ReleaseKey(A)
                    ReleaseKey(W)
                    ReleaseKey(D)
                    time.sleep(1)


main()

所以是的，这就是我的想法。然而，由于缺乏适当的信息，我无法完全应用这一概念。

Answer 1

按键问题太详细了。只是让您知道audacity课程中有开源模拟器。您也许可以在那里找到一些线索。
对于大型训练集崩溃，您需要使用生成器技术。它更多地是将培训数据（以较小的批次）流式传输到培训中。

请查看此完整的实现，包括该项目的模拟器，模型训练，测试数据，源代码和摘要。

https://github.com/ericq/CarND-Behavioral-Cloning-P3

使用Python中的卷积神经网络自驾车（游戏）

1 个答案: