我有一个Q学习程序,试图预测我的股票模拟股票市场,其中股票价格在1-2-3-1-2-3之间...
我已经尝试调试了几天,但无法获取。我什至完全从头开始,问题仍然存在。如果您有多余的时间,我只需要对此多加关注。
getStock()函数可以模拟股价。
reducePricesToBinary()函数将股票放入一个[[上一次股票涨跌还是下跌,连续跌跌多少次,连续]
readAI()函数只是读取输入后应该发生的情况
checkGuess()函数检查先前的猜测并根据其是否正确来更改policyGradient。
非常感谢您!
import requests
import sys
import time
# Constants
learningRate = 0.5
stocksToBuy = 250
discountFactor = 0.5
# Variables declared:
# getStock()
currentStockPrice = 0
pastStockPrice = 0
# reducePricesToBinary()
binaryVersionOfPrices = ""
# Ai()
AI = dict()
# convertBinaryToInputs()
inputsForAI = [0,0,0]
# Ai
guess = 0
oldGuess = 0
reward = 0
pastInputsForAI = ['0',0,0]
firstTurnOver = False
# Buying and Selling stocks
money = 1000000
shares = 0
#
countToSaveEveryFifteen = 0
# Saving anything to a file.
def save(name, data):
with open(name, 'w') as f:
f.write(str(data))
def saveEverything():
save("AI", AI)
save("binaryStockPrices", binaryVersionOfPrices)
save("money", money)
save("shares", shares)
# Runs after an error.
def onExit():
saveEverything()
sys.exit()
# Prints and saves an error log if a function crashes.
def crashProgram(errorMessage):
print(errorMessage)
with open("crashLogs", 'w') as f:
f.write("{}\n\n".format(errorMessage))
onExit()
# Runs a function with try catches to catch an error.
def doFunction(function):
try:
function()
except Exception, e:
crashProgram("Fatal error running {}().\n{}".format(function.__name__, e))
# Gets the current stock value.
#def getStock():
# global currentStockPrice
# res = requests.get("https://markets.businessinsider.com/stocks/aapl-stock")
# stockCostString = ""
# for x in range (9):
# stockCostString += res.text[res.text.find('"price": "')+10 + x]
# currentStockPrice = float(stockCostString)
# print(currentStockPrice)
def getStock():
global currentStockPrice
currentStockPrice = 1 if currentStockPrice == 3 else (2 if currentStockPrice == 1 else 3)
# Turns the prices into 0's and 1's.
def reducePricesToBinary():
global pastStockPrice
global binaryVersionOfPrices
binaryString = "1" if currentStockPrice > pastStockPrice else "0" if currentStockPrice < pastStockPrice else ""
binaryVersionOfPrices += binaryString
pastStockPrice = currentStockPrice
# Converts the binaryStockPrices to inputs for the AI.
def convertBinaryToInputs():
global inputsForAI
inputsForAI[0] = binaryVersionOfPrices[len(binaryVersionOfPrices)-1]
counterOfFirstNumber = 1
counterOfSecondNumber = 1
while(binaryVersionOfPrices[len(binaryVersionOfPrices) - counterOfFirstNumber] == inputsForAI[0]):
counterOfFirstNumber+=1
counterOfFirstNumber-=1
while(binaryVersionOfPrices[len(binaryVersionOfPrices) - counterOfFirstNumber - counterOfSecondNumber]!=inputsForAI[0]):
counterOfSecondNumber += 1
counterOfSecondNumber-=1
inputsForAI[0] = binaryVersionOfPrices[len(binaryVersionOfPrices)-1]
inputsForAI[1] = counterOfFirstNumber
inputsForAI[2] = counterOfSecondNumber
# AI functions
def readAI():
global guess
try:
AIGuess = AI[inputsForAI[0], inputsForAI[1], inputsForAI[2]]
except:
AI[inputsForAI[0], inputsForAI[1], inputsForAI[2]] = 0.5
AIGuess = 0.5
guess = AIGuess
print("GUESS: {}".format(guess))
print("INPUTS: {}".format(inputsForAI))
return guess
def checkGuess():
global firstTurnOver
if(firstTurnOver):
global oldGuess
global reward
global pastInputsForAI
oldGuess = 0 if oldGuess == -1 else 1
print("Old guess: " + str(oldGuess) + " Input: " + str(int(round(float(inputsForAI[0])))))
reward = 1 if oldGuess == int(round(float(inputsForAI[0]))) else -1
AI[pastInputsForAI[0], pastInputsForAI[1], pastInputsForAI[2]] = (1-learningRate) * AI[pastInputsForAI[0], pastInputsForAI[1], pastInputsForAI[2]] + learningRate * (reward + discountFactor * 1)
oldGuess = int(round(float(guess)))
pastInputsForAI = inputsForAI
firstTurnOver = True
def buySellStocks():
global money
global shares
oldStocks = shares
if(guess > 0):
while(money > currentStockPrice and (shares - oldStocks) < stocksToBuy * guess):
money -= currentStockPrice
shares += 1
else:
while(shares > 0 and (oldStocks - shares) > stocksToBuy * guess):
money += currentStockPrice
shares -= 1
# Loads the binaryVersionOfPrices from a file.
def loadBinaryPrices():
global binaryVersionOfPrices
with open("binaryStockPrices", 'r') as f:
binaryVersionOfPrices = f.read()
def loadMoney():
global money
with open("money", 'r') as f:
money = int(f.read())
def loadShares():
global shares
with open("shares", 'r') as f:
shares = int(f.read())
# Loads the AI from a file.
def loadAI():
global AI
with open("AI", 'r') as f:
AI = eval(f.read())
#Prints relative information
def printStuff():
print("Stock price: {}\nCurrent balance: {}\nCurrent shares: {}\nTotal value: {}\nGuess: {}\n".format(currentStockPrice, money, shares, money + shares * currentStockPrice, guess))
# Loads all variables from files.
def onProgramStart():
doFunction(loadAI)
doFunction(loadBinaryPrices)
doFunction(loadMoney)
doFunction(loadShares)
# Saves every 15 checks
def saveEveryFifteen():
global countToSaveEveryFifteen
countToSaveEveryFifteen += 1
if(countToSaveEveryFifteen == 15):
saveEverything()
countToSaveEveryFifteen = 0
# Runs all functions.
def doAllFunctions():
doFunction(reducePricesToBinary)
doFunction(convertBinaryToInputs)
doFunction(readAI)
doFunction(checkGuess)
doFunction(buySellStocks)
doFunction(saveEveryFifteen)
doFunction(printStuff)
doFunction(getStock)
# Loads variables from files.
onProgramStart()
# Repeats the process.
while(1):
doAllFunctions()
time.sleep(0.5)
答案 0 :(得分:1)
正如我在评论中提到的,这是经过一些基本重构后的程序版本:
import sys
import time
# constants
learning_rate: float = 0.5
stocks_to_buy: float = 250
discount_factor: float = 0.5
# variables declared:
# get_stock()
current_stock_price: int = 0
past_stock_price: int = 0
# reduce_prices_to_binary()
binary_version_of_prices: str = ''
# ai()
a_i: dict = {}
# convert_binary_to_inputs()
inputs_for_a_i = [0, 0, 0]
# ai
guess = 0
old_guess = 0
reward = 0
past_inputs_for_a_i = ['0', 0, 0]
first_turn_over: bool = False
# buying and selling stocks
money: int = 1000000
shares: int = 0
#
count_to_save_every_fifteen: int = 0
# saving anything to a file.
def save(name, data):
with open(name, 'w') as f:
f.write(str(data))
def save_everything():
save("a_i", a_i)
save("binary_stock_prices", binary_version_of_prices)
save("money", money)
save("shares", shares)
# runs after an error.
def on_exit():
save_everything()
sys.exit()
# gets the current stock value.
# def get_stock():
# global current_stock_price
# res = requests.get("https://markets.businessinsider.com/stocks/aapl-stock")
# stock_cost_string = ""
# for x in range (9):
# stock_cost_string += res.text[res.text.find('"price": "')+10 + x]
# current_stock_price = float(stock_cost_string)
# print(current_stock_price)
def get_stock():
global current_stock_price
if current_stock_price == 3:
current_stock_price = 1
elif current_stock_price == 1:
current_stock_price = 2
else:
current_stock_price = 3
# turns the prices into 0's and 1's.
def reduce_prices_to_binary():
global past_stock_price
global binary_version_of_prices
if current_stock_price > past_stock_price:
binary_string = "1"
elif current_stock_price < past_stock_price:
binary_string = "0"
else:
binary_string = ""
binary_version_of_prices += binary_string
past_stock_price = current_stock_price
# converts the binary_stock_prices to inputs for the a_i.
def convert_binary_to_inputs():
global inputs_for_a_i
inputs_for_a_i[0] = binary_version_of_prices[len(binary_version_of_prices) - 1]
counter_of_first_number = 1
counter_of_second_number = 1
while binary_version_of_prices[len(binary_version_of_prices) - counter_of_first_number] == inputs_for_a_i[0]:
counter_of_first_number += 1
counter_of_first_number -= 1
while (binary_version_of_prices[
len(binary_version_of_prices) - counter_of_first_number - counter_of_second_number] !=
inputs_for_a_i[0]):
counter_of_second_number += 1
counter_of_second_number -= 1
inputs_for_a_i[0] = binary_version_of_prices[len(binary_version_of_prices) - 1]
inputs_for_a_i[1] = counter_of_first_number
inputs_for_a_i[2] = counter_of_second_number
# a_i functions
def read_ai():
global guess
try:
a_i_guess = a_i[inputs_for_a_i[0], inputs_for_a_i[1], inputs_for_a_i[2]]
except:
a_i[inputs_for_a_i[0], inputs_for_a_i[1], inputs_for_a_i[2]] = 0.5
a_i_guess = 0.5
guess = a_i_guess
print(f'guess: {guess}')
print(f'inputs: {inputs_for_a_i}')
return guess
def check_guess():
global first_turn_over
if first_turn_over:
global old_guess
global reward
global past_inputs_for_a_i
old_guess = 0 if old_guess == -1 else 1
print(f'old guess: {old_guess}, input: {round(float(inputs_for_a_i[0]))}')
if old_guess == round(float(inputs_for_a_i[0])):
reward = 1
else:
reward = -1
a_i[past_inputs_for_a_i[0], past_inputs_for_a_i[1], past_inputs_for_a_i[2]] = (1 - learning_rate) * a_i[
past_inputs_for_a_i[0], past_inputs_for_a_i[1], past_inputs_for_a_i[2]] + learning_rate * (
reward + discount_factor * 1)
old_guess = int(round(float(guess)))
past_inputs_for_a_i = inputs_for_a_i
first_turn_over = True
def buy_sell_stocks():
global money
global shares
old_stocks = shares
if guess > 0:
while money > current_stock_price and (shares - old_stocks) < stocks_to_buy * guess:
money -= current_stock_price
shares += 1
else:
while shares > 0 and (old_stocks - shares) > stocks_to_buy * guess:
money += current_stock_price
shares -= 1
# loads the binary_version_of_prices from a file.
def load_binary_prices():
global binary_version_of_prices
with open("../resources/ai_stock_files/binary_stock_prices", 'r') as f:
binary_version_of_prices = f.read()
def load_money():
global money
with open("../resources/ai_stock_files/money") as f:
money = int(f.read())
def load_shares():
global shares
with open("../resources/ai_stock_files/shares") as f:
shares = int(f.read())
# loads the _a_i from a file.
def load_a_i():
global a_i
with open("../resources/ai_stock_files/a_i") as f:
a_i = eval(f.read())
# prints relative information
def print_stuff():
print(f"stock price: {current_stock_price}\n"
f"current balance: {money}\n"
f"current shares: {shares}\n"
f"total value: {money + shares * current_stock_price}\n"
f"guess: {guess}\n")
# loads all variables from files.
def on_program_start():
load_a_i()
load_binary_prices()
load_money()
load_shares()
# saves every 15 checks
def save_every_fifteen():
global count_to_save_every_fifteen
count_to_save_every_fifteen += 1
if count_to_save_every_fifteen == 15:
save_everything()
count_to_save_every_fifteen = 0
# runs all functions.
def do_all_functions():
reduce_prices_to_binary()
convert_binary_to_inputs()
read_ai()
check_guess()
buy_sell_stocks()
save_every_fifteen()
print_stuff()
get_stock()
# loads variables from files.
on_program_start()
# repeats the process.
while True:
do_all_functions()
time.sleep(0.5)
答案 1 :(得分:0)
在校正策略梯度中的策略时,我使用的是周期前的输入,并且通过按函数已使用周期前的输入的顺序调用函数来进行过度补偿,从而有效地减少了两个输入的梯度。由于我将输入循环为3,所以当我实际上被2偏移时,它看起来像是“ by-by-one”错误,因此很难检测。