Question

我有一个Q学习程序，试图预测我的股票模拟股票市场，其中股票价格在1-2-3-1-2-3之间...

我已经尝试调试了几天，但无法获取。我什至完全从头开始，问题仍然存在。如果您有多余的时间，我只需要对此多加关注。

getStock（）函数可以模拟股价。

reducePricesToBinary（）函数将股票放入一个[[上一次股票涨跌还是下跌，连续跌跌多少次，连续]

readAI（）函数只是读取输入后应该发生的情况

checkGuess（）函数检查先前的猜测并根据其是否正确来更改policyGradient。

非常感谢您！

import requests
import sys
import time

# Constants
learningRate = 0.5
stocksToBuy = 250
discountFactor = 0.5

# Variables declared:

# getStock()
currentStockPrice = 0
pastStockPrice = 0

# reducePricesToBinary()
binaryVersionOfPrices = ""

# Ai()
AI = dict()

# convertBinaryToInputs()
inputsForAI = [0,0,0]

# Ai
guess = 0
oldGuess = 0
reward = 0
pastInputsForAI = ['0',0,0]
firstTurnOver = False

# Buying and Selling stocks
money = 1000000
shares = 0

#
countToSaveEveryFifteen = 0

# Saving anything to a file.
def save(name, data):
    with open(name, 'w') as f:
        f.write(str(data))

def saveEverything():
    save("AI", AI)
    save("binaryStockPrices", binaryVersionOfPrices)
    save("money", money)
    save("shares", shares)

# Runs after an error.
def onExit():
    saveEverything()
    sys.exit()

# Prints and saves an error log if a function crashes.
def crashProgram(errorMessage):
    print(errorMessage)
    with open("crashLogs", 'w') as f:
        f.write("{}\n\n".format(errorMessage))
    onExit()

# Runs a function with try catches to catch an error.
def doFunction(function):
    try:
        function()
    except Exception, e:
        crashProgram("Fatal error running {}().\n{}".format(function.__name__, e))

# Gets the current stock value.
#def getStock():
#    global currentStockPrice
#    res = requests.get("https://markets.businessinsider.com/stocks/aapl-stock")
#    stockCostString = ""
#    for x in range (9):
#        stockCostString += res.text[res.text.find('"price": "')+10 + x]
#    currentStockPrice = float(stockCostString)
#    print(currentStockPrice)

def getStock():
    global currentStockPrice
    currentStockPrice = 1 if currentStockPrice == 3 else (2 if currentStockPrice == 1 else 3)

# Turns the prices into 0's and 1's.
def reducePricesToBinary():
    global pastStockPrice
    global binaryVersionOfPrices
    binaryString = "1" if currentStockPrice > pastStockPrice else "0" if currentStockPrice < pastStockPrice else ""
    binaryVersionOfPrices += binaryString
    pastStockPrice = currentStockPrice

# Converts the binaryStockPrices to inputs for the AI.
def convertBinaryToInputs():
    global inputsForAI
    inputsForAI[0] = binaryVersionOfPrices[len(binaryVersionOfPrices)-1]
    counterOfFirstNumber = 1
    counterOfSecondNumber = 1
    while(binaryVersionOfPrices[len(binaryVersionOfPrices) - counterOfFirstNumber] == inputsForAI[0]):
        counterOfFirstNumber+=1
    counterOfFirstNumber-=1
    while(binaryVersionOfPrices[len(binaryVersionOfPrices) - counterOfFirstNumber - counterOfSecondNumber]!=inputsForAI[0]):
        counterOfSecondNumber += 1
    counterOfSecondNumber-=1
    inputsForAI[0] = binaryVersionOfPrices[len(binaryVersionOfPrices)-1]
    inputsForAI[1] = counterOfFirstNumber
    inputsForAI[2] = counterOfSecondNumber


# AI functions
def readAI():
    global guess
    try:
        AIGuess = AI[inputsForAI[0], inputsForAI[1], inputsForAI[2]]
    except:
        AI[inputsForAI[0], inputsForAI[1], inputsForAI[2]] = 0.5
        AIGuess = 0.5
    guess = AIGuess
    print("GUESS: {}".format(guess))
    print("INPUTS: {}".format(inputsForAI))
    return guess

def checkGuess():
    global firstTurnOver
    if(firstTurnOver):
        global oldGuess
        global reward
        global pastInputsForAI
        oldGuess = 0 if oldGuess == -1 else 1
        print("Old guess: " + str(oldGuess) + " Input: " + str(int(round(float(inputsForAI[0])))))
        reward = 1 if oldGuess == int(round(float(inputsForAI[0]))) else -1
        AI[pastInputsForAI[0], pastInputsForAI[1], pastInputsForAI[2]] = (1-learningRate) * AI[pastInputsForAI[0], pastInputsForAI[1], pastInputsForAI[2]] + learningRate * (reward + discountFactor * 1)
        oldGuess = int(round(float(guess)))
    pastInputsForAI = inputsForAI
    firstTurnOver = True

def buySellStocks():
    global money
    global shares
    oldStocks = shares
    if(guess > 0):
        while(money > currentStockPrice and (shares - oldStocks) < stocksToBuy * guess):
            money -= currentStockPrice
            shares += 1
    else:
        while(shares > 0 and (oldStocks - shares) > stocksToBuy * guess):
            money += currentStockPrice
            shares -= 1

# Loads the binaryVersionOfPrices from a file.
def loadBinaryPrices():
    global binaryVersionOfPrices
    with open("binaryStockPrices", 'r') as f:
        binaryVersionOfPrices = f.read()

def loadMoney():
    global money
    with open("money", 'r') as f:
        money = int(f.read())

def loadShares():
    global shares
    with open("shares", 'r') as f:
        shares = int(f.read())

# Loads the AI from a file.
def loadAI():
    global AI
    with open("AI", 'r') as f:
        AI = eval(f.read())

#Prints relative information
def printStuff():
    print("Stock price: {}\nCurrent balance: {}\nCurrent shares: {}\nTotal value: {}\nGuess: {}\n".format(currentStockPrice, money, shares, money + shares * currentStockPrice, guess))

# Loads all variables from files.
def onProgramStart():
    doFunction(loadAI)
    doFunction(loadBinaryPrices)
    doFunction(loadMoney)
    doFunction(loadShares)

# Saves every 15 checks
def saveEveryFifteen():
    global countToSaveEveryFifteen
    countToSaveEveryFifteen += 1
    if(countToSaveEveryFifteen == 15):
        saveEverything()
        countToSaveEveryFifteen = 0

# Runs all functions.
def doAllFunctions():
    doFunction(reducePricesToBinary)
    doFunction(convertBinaryToInputs)
    doFunction(readAI)
    doFunction(checkGuess)
    doFunction(buySellStocks)
    doFunction(saveEveryFifteen)
    doFunction(printStuff)
    doFunction(getStock)

# Loads variables from files.
onProgramStart()

# Repeats the process.
while(1):
    doAllFunctions()
    time.sleep(0.5)

Answer 1

正如我在评论中提到的，这是经过一些基本重构后的程序版本：

import sys
import time

# constants
learning_rate: float = 0.5
stocks_to_buy: float = 250
discount_factor: float = 0.5

# variables declared:

# get_stock()
current_stock_price: int = 0
past_stock_price: int = 0

# reduce_prices_to_binary()
binary_version_of_prices: str = ''

# ai()
a_i: dict = {}

# convert_binary_to_inputs()
inputs_for_a_i = [0, 0, 0]

# ai
guess = 0
old_guess = 0
reward = 0
past_inputs_for_a_i = ['0', 0, 0]
first_turn_over: bool = False

# buying and selling stocks
money: int = 1000000
shares: int = 0

#
count_to_save_every_fifteen: int = 0


# saving anything to a file.
def save(name, data):
    with open(name, 'w') as f:
        f.write(str(data))


def save_everything():
    save("a_i", a_i)
    save("binary_stock_prices", binary_version_of_prices)
    save("money", money)
    save("shares", shares)


# runs after an error.
def on_exit():
    save_everything()
    sys.exit()


# gets the current stock value.
# def get_stock():
#    global current_stock_price
#    res = requests.get("https://markets.businessinsider.com/stocks/aapl-stock")
#    stock_cost_string = ""
#    for x in range (9):
#        stock_cost_string += res.text[res.text.find('"price": "')+10 + x]
#    current_stock_price = float(stock_cost_string)
#    print(current_stock_price)

def get_stock():
    global current_stock_price
    if current_stock_price == 3:
        current_stock_price = 1
    elif current_stock_price == 1:
        current_stock_price = 2
    else:
        current_stock_price = 3


# turns the prices into 0's and 1's.
def reduce_prices_to_binary():
    global past_stock_price
    global binary_version_of_prices
    if current_stock_price > past_stock_price:
        binary_string = "1"
    elif current_stock_price < past_stock_price:
        binary_string = "0"
    else:
        binary_string = ""
    binary_version_of_prices += binary_string
    past_stock_price = current_stock_price


# converts the binary_stock_prices to inputs for the a_i.
def convert_binary_to_inputs():
    global inputs_for_a_i
    inputs_for_a_i[0] = binary_version_of_prices[len(binary_version_of_prices) - 1]
    counter_of_first_number = 1
    counter_of_second_number = 1
    while binary_version_of_prices[len(binary_version_of_prices) - counter_of_first_number] == inputs_for_a_i[0]:
        counter_of_first_number += 1
    counter_of_first_number -= 1
    while (binary_version_of_prices[
               len(binary_version_of_prices) - counter_of_first_number - counter_of_second_number] !=
           inputs_for_a_i[0]):
        counter_of_second_number += 1
    counter_of_second_number -= 1
    inputs_for_a_i[0] = binary_version_of_prices[len(binary_version_of_prices) - 1]
    inputs_for_a_i[1] = counter_of_first_number
    inputs_for_a_i[2] = counter_of_second_number


# a_i functions
def read_ai():
    global guess
    try:
        a_i_guess = a_i[inputs_for_a_i[0], inputs_for_a_i[1], inputs_for_a_i[2]]
    except:
        a_i[inputs_for_a_i[0], inputs_for_a_i[1], inputs_for_a_i[2]] = 0.5
        a_i_guess = 0.5
    guess = a_i_guess
    print(f'guess: {guess}')
    print(f'inputs: {inputs_for_a_i}')
    return guess


def check_guess():
    global first_turn_over
    if first_turn_over:
        global old_guess
        global reward
        global past_inputs_for_a_i
        old_guess = 0 if old_guess == -1 else 1
        print(f'old guess: {old_guess}, input: {round(float(inputs_for_a_i[0]))}')
        if old_guess == round(float(inputs_for_a_i[0])):
            reward = 1
        else:
            reward = -1
        a_i[past_inputs_for_a_i[0], past_inputs_for_a_i[1], past_inputs_for_a_i[2]] = (1 - learning_rate) * a_i[
            past_inputs_for_a_i[0], past_inputs_for_a_i[1], past_inputs_for_a_i[2]] + learning_rate * (
                                                                                              reward + discount_factor * 1)
        old_guess = int(round(float(guess)))
    past_inputs_for_a_i = inputs_for_a_i
    first_turn_over = True


def buy_sell_stocks():
    global money
    global shares
    old_stocks = shares
    if guess > 0:
        while money > current_stock_price and (shares - old_stocks) < stocks_to_buy * guess:
            money -= current_stock_price
            shares += 1
    else:
        while shares > 0 and (old_stocks - shares) > stocks_to_buy * guess:
            money += current_stock_price
            shares -= 1


# loads the binary_version_of_prices from a file.
def load_binary_prices():
    global binary_version_of_prices
    with open("../resources/ai_stock_files/binary_stock_prices", 'r') as f:
        binary_version_of_prices = f.read()


def load_money():
    global money
    with open("../resources/ai_stock_files/money") as f:
        money = int(f.read())


def load_shares():
    global shares
    with open("../resources/ai_stock_files/shares") as f:
        shares = int(f.read())


# loads the _a_i from a file.
def load_a_i():
    global a_i
    with open("../resources/ai_stock_files/a_i") as f:
        a_i = eval(f.read())


# prints relative information
def print_stuff():
    print(f"stock price: {current_stock_price}\n"
          f"current balance: {money}\n"
          f"current shares: {shares}\n"
          f"total value: {money + shares * current_stock_price}\n"
          f"guess: {guess}\n")


# loads all variables from files.
def on_program_start():
    load_a_i()
    load_binary_prices()
    load_money()
    load_shares()


# saves every 15 checks
def save_every_fifteen():
    global count_to_save_every_fifteen
    count_to_save_every_fifteen += 1
    if count_to_save_every_fifteen == 15:
        save_everything()
        count_to_save_every_fifteen = 0


# runs all functions.
def do_all_functions():
    reduce_prices_to_binary()
    convert_binary_to_inputs()
    read_ai()
    check_guess()
    buy_sell_stocks()
    save_every_fifteen()
    print_stuff()
    get_stock()


# loads variables from files.
on_program_start()

# repeats the process.
while True:
    do_all_functions()
    time.sleep(0.5)

Answer 2

在校正策略梯度中的策略时，我使用的是周期前的输入，并且通过按函数已使用周期前的输入的顺序调用函数来进行过度补偿，从而有效地减少了两个输入的梯度。由于我将输入循环为3，所以当我实际上被2偏移时，它看起来像是“ by-by-one”错误，因此很难检测。

Q-Learning AI无法识别简单模式

2 个答案: