Question

随机生成的初始种群执行速度很快，第一代子代种群也执行得很快，但是随着每次交换的进行，种群逐渐变慢。

tf.global_variables_initializer（） 第47行和 run_simulation （第152行< / strong>），每次运行会逐渐花费更多时间，从而使单次迭代的总运行时间增加一秒钟-为什么？

不幸的是，我需要将其全部粘贴，因为我只是不知道问题可能来自何处。

神经网络位于“策略”对象中。

from matplotlib import pyplot as plt import numpy as np import random, json, math, time import tensorflow as tf class DecisionPolicy: def select_action(self, current_state, step): pass def update_q(self, state, action, reward, next_state): pass class RandomDecisionPolicy(DecisionPolicy): def __init__(self, actions): self.actions = actions def select_action(self, current_state, step): action = self.actions[random.randint(0, len(self.actions) - 1)] return action policy = None class QLearningDecisionPolicy(DecisionPolicy): def __init__(self, actions, input_dim, variaDict): self.epsilon = 0.9 self.gamma = 0.001 self.actions = actions output_dim = len(actions) h1_dim = 200 with tf.variable_scope("model", reuse=tf.AUTO_REUSE): self.x = tf.placeholder(tf.float32, [None, input_dim]) self.y = tf.placeholder(tf.float32, [output_dim]) #self.W1 = tf.Variable(variaDict['W1'], name="q") self.W1 = tf.get_variable("q", [input_dim, h1_dim]) #self.b1 = tf.Variable(variaDict['b1'], name="e") self.b1 = tf.get_variable("e", [h1_dim]) W1_assign = self.W1.assign(variaDict['W1']) b1_assign = self.b1.assign(variaDict['b1']) # {'W1': tf.Variable(tf.random_normal([input_dim, h1_dim])), # 'b1' : tf.Variable(tf.constant(0.1, shape=[h1_dim])), # 'W2': tf.Variable(tf.random_normal([h1_dim, output_dim])), # 'b2' : tf.Variable(tf.constant(0.1, shape=[output_dim]))} h1 = tf.nn.relu(tf.matmul(self.x, self.W1) + self.b1) #self.W2 = tf.Variable(variaDict['W2'], name="c") self.W2 = tf.get_variable("c", [h1_dim, output_dim]) #self.b2 = tf.Variable(variaDict['b2'], name="R") self.b2 = tf.get_variable("R", [output_dim]) W2_assign = self.W2.assign(variaDict['W2']) b2_assign = self.b2.assign(variaDict['b2']) self.q = tf.nn.relu(tf.matmul(h1, self.W2) + self.b2) loss = tf.square(self.y - self.q) #print(self.y, self.q, loss) ???????????? co to y self.train_op = tf.train.AdagradOptimizer(0.01).minimize(loss) self.sess = tf.Session() start_time = time.time() #print(self.sess.run(tf.report_uninitialized_variables())) self.sess.run(tf.global_variables_initializer()) #print(len(tf.global_variables())) print("variable initialization took %s seconds" % (time.time() - start_time)) def export_variables(self): return {'W1':self.sess.run(self.W1), 'b1':self.sess.run(self.b1), 'W2':self.sess.run(self.W2), 'b2':self.sess.run(self.b2)} #return {'W1': self.evW1, 'b1':self.evb1, 'W2':self.evW2, 'b2':self.evb2} def select_action(self, current_state, step): threshold = min(self.epsilon, step / 1000.) if random.random() < threshold: # Exploit best option with probability epsilon action_q_vals = self.sess.run(self.q, feed_dict={self.x: current_state}) action_idx = np.argmax(action_q_vals) # TODO: replace w/ tensorflow's argmax action = self.actions[action_idx] else: # Explore random option with probability 1 - epsilon action = self.actions[random.randint(0, len(self.actions) - 1)] return action def update_q(self, state, action, reward, next_state): action_q_vals = self.sess.run(self.q, feed_dict={self.x: state}) next_action_q_vals = self.sess.run(self.q, feed_dict={self.x: next_state}) next_action_idx = np.argmax(next_action_q_vals) action_q_vals[0, next_action_idx] = reward + self.gamma * next_action_q_vals[0, next_action_idx] action_q_vals = np.squeeze(np.asarray(action_q_vals)) self.sess.run(self.train_op, feed_dict={self.x: state, self.y: action_q_vals}) def run_simulation(initial_budget, initial_num_stocks, prices, hist, debug=False): budget = initial_budget num_stocks = initial_num_stocks share_value = 0 transitions = list() for i in range(len(prices) - hist - 1): #if i % 100 == 0: #print('progress {:.2f}%'.format(float(100*i) / (len(prices) - hist - 1))) current_state = np.asmatrix(np.hstack((prices[i:i+hist], budget, num_stocks))) current_portfolio = budget + num_stocks * share_value action = policy.select_action(current_state, i) share_value = float(prices[i + hist + 1]) if action == 'Buy' and budget >= share_value: budget -= share_value num_stocks += 1 #print('bought 1 @ ' +str(share_value)) elif action == 'Sell' and num_stocks > 0: budget += share_value * 0.998001 num_stocks -= 1 #print('sold 1 @ ' +str(share_value)) else: action = 'Hold' new_portfolio = budget + num_stocks * share_value reward = new_portfolio - current_portfolio next_state = np.asmatrix(np.hstack((prices[i+1:i+hist+1], budget, num_stocks))) transitions.append((current_state, action, reward, next_state)) policy.update_q(current_state, action, reward, next_state) portfolio = budget + num_stocks * share_value if debug: print('${}\t{} shares'.format(budget, num_stocks)) return portfolio def run_simulations(budget, num_stocks, prices, hist, num_tries, variaDict=None, selection=None): global policy final_portfolios = list() for i in range(num_tries): #print(i) if variaDict != None: policy = QLearningDecisionPolicy(actions, hist + 2, variaDict) elif selection != None: newPolicyVariasDict = crossover_with_mut(random.choice(selection), random.choice(selection)) #print(newPolicyVariasDict) #tfDict = {'W1': tf.Variable(newPolicyVariasDict['W1']), # 'b1' : tf.Variable(newPolicyVariasDict['b1']), # 'W2': tf.Variable(newPolicyVariasDict['W2']), # 'b2' : tf.Variable(newPolicyVariasDict['b2'])} #print(tfDict['W1'].dtype) #start_time = time.time() policy = QLearningDecisionPolicy(actions, hist + 2, newPolicyVariasDict) start_time = time.time() final_portfolio = run_simulation(budget, num_stocks, prices, hist) print("simulation took %s seconds" % (time.time() - start_time)) #print(policy.sess.run(policy.b2)) final_portfolios.append(final_portfolio) scoresList.append(final_portfolio) scoresDict[final_portfolio] = policy.export_variables()#tu moga byc dwa takie same yolo policy.sess.close() avg, std = np.mean(final_portfolios), np.std(final_portfolios) plt.clf() plt.title('Final Portfolio Value') plt.xlabel('Simulation #') plt.ylabel('Net worth') plt.plot(final_portfolios) #plt.show() return avg, std, final_portfolios def get_prices(cache_filename='stock_prices.txt'): #stock_prices = np.load(cache_filename) try: with open(cache_filename, 'r') as f: stock_prices = json.load(f) except: stock_prices = [4996.98, 4996.99, 4996.49, 4996.98, 4996.99, 4996.99, 4997.26, 4997.26, 4997.29, 4997.3, 4997.34, 4995.15, 4995.15, 4995.15, 4995.13, 4997.32, 4997.32, 4996.74, 4995.03, 4995.03, 4995.02, 4997.33, 4996.41, 4996.4, 4997.34, 4997.32, 4997.4, 4997.5, 4997.85, 4996.31, 4996.31, 4996.87, 4996.31, 4996.31, 4997.1, 4996.34, 4996.37, 4996.37, 4997.09, 4996.36, 4996.37, 4996.36, 4996.36, 4996.36, 4996.38, 4996.38, 4996.38, 4995.09, 4996.38, 4996.38, 4997.3, 4997.31, 4997.33, 4996.17, 4996.36, 4995.11, 4996.03, 4995.15, 4995.11, 4995.1, 4995.09, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.0, 4995.03, 4995.01, 4995.0, 4995.0, 4995.03, 4995.03, 4995.03, 4995.02, 4995.02, 4995.01, 4995.0, 4995.01, 4995.0, 4994.98, 4994.16, 4995.01, 4995.01, 4995.01, 4994.44, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.0, 4995.03, 4995.03, 4995.03, 4995.01, 4995.01, 4995.01, 4995.03, 4995.03, 4994.3, 4995.03, 4995.01, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.02, 4995.03, 4995.0, 4995.01, 4995.01, 4995.03, 4995.0, 4994.06, 4993.97, 4993.97, 4993.81, 4993.8, 4993.7, 4993.01, 4992.99, 4992.69, 4992.51, 4992.42, 4992.37, 4992.27, 4992.01, 4992.0, 4991.87, 4994.98, 4994.47, 4994.97, 4994.97, 4994.97, 4994.58, 4994.95, 4994.97, 4994.97, 4994.97, 4993.95, 4993.79, 4993.96, 4994.39, 4994.82, 4994.78, 4994.78, 4994.79, 4993.98, 4994.0, 4994.32, 4994.0, 4993.98, 4993.96, 4993.96, 4994.09, 4992.45, 4994.06, 4994.06, 4992.45, 4992.37, 4992.35, 4992.35, 4992.25, 4992.14, 4993.31, 4994.07, 4994.03, 4993.38, 4994.03, 4994.0, 4992.51, 4993.25, 4994.0, 4994.01, 4992.51, 4992.48, 4994.0, 4994.0, 4993.97, 4993.35, 4992.53, 4993.13, 4993.94, 4993.94, 4992.49, 4992.48, 4993.91, 4993.91, 4993.91, 4993.48, 4992.52, 4992.52, 4992.52, 4992.49, 4993.89, 4992.49, 4992.48, 4992.49, 4993.89, 4993.9, 4992.52, 4992.51, 4994.74, 4993.97, 4993.97, 4994.74, 4994.75, 4993.99, 4993.97, 4994.46, 4994.75, 4994.75, 4994.75, 4993.99, 4993.97, 4994.49, 4994.75, 4994.0, 4994.75, 4994.75, 4994.43, 4994.02, 4994.75, 4994.27, 4994.02, 4994.75, 4994.02, 4994.02, 4994.41, 4994.0, 4994.02, 4994.29, 4994.0, 4994.75, 4994.75, 4994.75, 4994.02, 4994.02, 4994.35, 4994.02, 4994.0, 4994.0, 4994.0, 4994.75, 4994.02, 4994.5, 4994.73, 4994.0, 4994.73, 4994.0, 4994.73, 4994.73, 4994.73, 4994.75, 4994.75, 4994.73, 4994.43, 4994.73, 4994.75, 4994.99, 4995.0, 4995.0, 4995.02, 4995.03, 4995.03, 4995.03, 4995.03, 4995.01, 4994.8, 4994.69, 4994.69, 4995.01, 4995.03, 4995.03, 4995.03, 4994.81, 4995.03, 4994.74, 4994.75, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4995.03, 4994.79, 4994.93, 4995.03, 4995.03, 4995.03, 4994.87, 4994.85, 4994.86, 4994.87, 4994.98] return stock_prices def plot_prices(prices): plt.title('BTC/USDT') plt.xlabel('tick #') plt.ylabel('price (USDT)') plt.plot(prices) #plt.show() plt.savefig('prices.png') def crossover_with_mut(vars1, vars2): MUT_PROBABILITY = 0.1 child = {'W1': [], 'b1': [], 'W2': [], 'b2': []} #W1 for rowidx in range(len(vars1['W1'])): inner = [] for colidx in range(len(vars1['W1'][0])): if random.random() < MUT_PROBABILITY: if random.random() < 0.5: inner.append(vars1['W1'][rowidx][colidx]) else: inner.append(vars2['W1'][rowidx][colidx]) else: inner.append(random.uniform(-2, 2)) child['W1'].append(inner) #b1 for rowidx in range(len(vars1['b1'])): if random.random() < MUT_PROBABILITY: if random.random() < 0.5: child['b1'].append(vars1['b1'][rowidx]) else: child['b1'].append(vars2['b1'][rowidx]) else: child['b1'].append(random.uniform(0.08, 0.12)) #W2 for rowidx in range(len(vars1['W2'])): inner = [] for colidx in range(len(vars1['W2'][0])): if random.random() < MUT_PROBABILITY: if random.random() < 0.5: inner.append(vars1['W2'][rowidx][colidx]) else: inner.append(vars2['W2'][rowidx][colidx]) else: inner.append(random.uniform(-2, 2)) child['W2'].append(inner) #b2 for rowidx in range(len(vars1['b2'])): if random.random() < MUT_PROBABILITY: if random.random() < 0.5: child['b2'].append(vars1['b2'][rowidx]) else: child['b2'].append(vars2['b2'][rowidx]) else: child['b2'].append(random.uniform(0.08, 0.12)) return child if __name__ == '__main__': prices = get_prices() plot_prices(prices) actions = ['Buy', 'Sell', 'Hold'] hist = 200 pop_size = 10 # policy = RandomDecisionPolicy(actions) budget = 100000.0 num_stocks = 0 while len(prices) < hist: print('jeszcze chwileczkę') time.sleep(2) maxGenerations = 100 selection = [] for gen in range(maxGenerations): #print('gen '+str(gen)) scoresList = [] scoresDict = {} start_time = time.time() if gen == 0: input_dim = hist + 2 h1_dim = 200 output_dim = len(actions) #randoDict = {'W1': tf.Variable(tf.random_normal([input_dim, h1_dim])), # 'b1' : tf.Variable(tf.constant(0.1, shape=[h1_dim])), # 'W2': tf.Variable(tf.random_normal([h1_dim, output_dim])), # 'b2' : tf.Variable(tf.constant(0.1, shape=[output_dim]))} randoDict = {'W1': tf.random_normal([input_dim, h1_dim]), 'b1' : tf.constant(0.1, shape=[h1_dim]), 'W2': tf.random_normal([h1_dim, output_dim]), 'b2' : tf.constant(0.1, shape=[output_dim])} avg, std, final_portfolios = run_simulations(budget, num_stocks, prices, hist, pop_size, variaDict = randoDict) #print(avg, std) #print(scoresDict[scoresList[-1]]['b2']) else: avg, std, final_portfolios = run_simulations(budget, num_stocks, prices, hist, pop_size, selection = selection) #print(avg, std) print('###FITTEST###: ' + str(max(final_portfolios)) + ' elapsed: ' +str(time.time()-start_time)) scoresList.sort() best = [] bestScores = scoresList[-math.ceil(0.3*pop_size):] #todo stala 0.3 for score in bestScores: best.append(scoresDict[score]) luckers = [] for i in range(math.ceil(0.2*pop_size)): #todo stala 0.2 luckers.append(scoresDict[random.choice(scoresList[:-math.ceil(0.3*pop_size)])]) selection = best + luckers #print(selection) #print(selection[0]['b2']) #print(selection[1]['b2']) #print(policy.sess.run(scoresDict[scoresList[0]]['W1']))

每次执行都需要更多时间来初始化和优化tensorflow变量。这种行为的根源是什么？

Tensorflow +遗传算法的执行速度随着代代相传而急剧降低

0 个答案: