Question

这些天我才刚开始玩强化学习，我发现了自然进化策略，我有点理解它是如何工作的，但是我对Python很陌生，我发现这段代码基本上实现了NES算法

https://github.com/huseinzol05/Stock-Prediction-Models/blob/master/agent/updated-NES-google.ipynb

import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
import seaborn as sns
import random
sns.set()

# CSV containing the TSLA stock predictions in the form of
# [Date, Open, High, Low, Close, Adj Close, Volume] from
# Yahoo! Finance
df = pd.read_csv('TSLA.csv')
df.head()


def get_state(data, t, n):
    d = t - n + 1
    block = data[d : t + 1] if d >= 0 else -d * [data[0]] + data[0 : t + 1]
    res = []
    for i in range(n - 1):
        res.append(block[i + 1] - block[i])
    return np.array([res])

close = df.Close.values.tolist()
window_size = 30
skip = 1
l = len(close) - 1

class Deep_Evolution_Strategy:

    inputs = None

    def __init__(
        self, weights, reward_function, population_size, sigma, learning_rate
    ):
        self.weights = weights
        self.reward_function = reward_function
        self.population_size = population_size
        self.sigma = sigma
        self.learning_rate = learning_rate

    def _get_weight_from_population(self, weights, population):
        weights_population = []
        for index, i in enumerate(population):
            jittered = self.sigma * i
            weights_population.append(weights[index] + jittered)
        return weights_population

    def get_weights(self):
        return self.weights

    def train(self, epoch = 100, print_every = 1):
        lasttime = time.time()
        for i in range(epoch):
            population = []
            rewards = np.zeros(self.population_size)
            for k in range(self.population_size):
                x = []
                for w in self.weights:
                    x.append(np.random.randn(*w.shape))
                population.append(x)
            for k in range(self.population_size):
                weights_population = self._get_weight_from_population(self.weights, population[k])
                rewards[k] = self.reward_function(weights_population)
            rewards = (rewards - np.mean(rewards)) / np.std(rewards)
            for index, w in enumerate(self.weights):
                A = np.array([p[index] for p in population])
                self.weights[index] = (
                    w
                    + self.learning_rate
                    / (self.population_size * self.sigma)
                    * np.dot(A.T, rewards).T
                )


class Model:
    def __init__(self, input_size, layer_size, output_size):
        self.weights = [
            np.random.randn(input_size, layer_size),
            np.random.randn(layer_size, output_size),
            np.random.randn(layer_size, 1),
            np.random.randn(1, layer_size),
        ]

    def predict(self, inputs):
        feed = np.dot(inputs, self.weights[0]) + self.weights[-1]
        decision = np.dot(feed, self.weights[1])
        buy = np.dot(feed, self.weights[2])
        return decision, buy

    def get_weights(self):
        return self.weights

    def set_weights(self, weights):
        self.weights = weights


class Agent:

    POPULATION_SIZE = 15
    SIGMA = 0.1
    LEARNING_RATE = 0.03

    def __init__(self, model, money, max_buy, max_sell):
        self.model = model
        self.initial_money = money
        self.max_buy = max_buy
        self.max_sell = max_sell
        self.es = Deep_Evolution_Strategy(
            self.model.get_weights(),
            self.get_reward,
            self.POPULATION_SIZE,
            self.SIGMA,
            self.LEARNING_RATE,
        )

    def act(self, sequence):
        decision, buy = self.model.predict(np.array(sequence))
        return np.argmax(decision[0]), int(buy[0])

    def get_reward(self, weights):
        initial_money = self.initial_money
        starting_money = initial_money
        self.model.weights = weights
        state = get_state(close, 0, window_size + 1)
        inventory = []
        quantity = 0
        for t in range(0, l, skip):
            action, buy = self.act(state)
            next_state = get_state(close, t + 1, window_size + 1)
            if action == 1 and initial_money >= close[t]:
                if buy < 0:
                    buy = 1
                if buy > self.max_buy:
                    buy_units = self.max_buy
                else:
                    buy_units = buy
                total_buy = buy_units * close[t]
                initial_money -= total_buy
                inventory.append(total_buy)
                quantity += buy_units
            elif action == 2 and len(inventory) > 0:
                if quantity > self.max_sell:
                    sell_units = self.max_sell
                else:
                    sell_units = quantity
                quantity -= sell_units
                total_sell = sell_units * close[t]
                initial_money += total_sell

            state = next_state
        return ((initial_money - starting_money) / starting_money) * 100

    def fit(self, iterations, checkpoint):
        self.es.train(iterations, print_every = checkpoint)

    def buy(self):
        initial_money = self.initial_money
        state = get_state(close, 0, window_size + 1)
        starting_money = initial_money
        states_sell = []
        states_buy = []
        inventory = []
        quantity = 0
        for t in range(0, l, skip):
            action, buy = self.act(state)
            next_state = get_state(close, t + 1, window_size + 1)
            if action == 1 and initial_money >= close[t]:
                if buy < 0:
                    buy = 1
                if buy > self.max_buy:
                    buy_units = self.max_buy
                else:
                    buy_units = buy
                total_buy = buy_units * close[t]
                initial_money -= total_buy
                inventory.append(total_buy)
                quantity += buy_units
                states_buy.append(t)
            elif action == 2 and len(inventory) > 0:
                bought_price = inventory.pop(0)
                if quantity > self.max_sell:
                    sell_units = self.max_sell
                else:
                    sell_units = quantity
                if sell_units < 1:
                    continue
                quantity -= sell_units
                total_sell = sell_units * close[t]
                initial_money += total_sell
                states_sell.append(t)
                try:
                    invest = ((total_sell - bought_price) / bought_price) * 100
                except:
                    invest = 0
            state = next_state

        invest = ((initial_money - starting_money) / starting_money) * 100

model = Model(window_size, 500, 3)
agent = Agent(model, 10000, 5, 5)
agent.fit(500, 10)
agent.buy()

如您所见，它正用于库存预测，并且仅使用“关闭”列，但是我想使用更多参数（例如，高和低）进行尝试。

在为使用此二维列表而需要对其进行更改时，我很挣扎。我尝试了一个简单的更改：

close = df.loc[:,['Close','Open']].values.tolist()

在列表的每一行添加一个属性。但是，当我运行代码时，执行agent.fit（）调用时开始出现错误：

agent.fit(iterations = 500, checkpoint = 10)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-225-d97697984016> in <module>()
----> 1 agent.fit(iterations = 500, checkpoint = 10)

<ipython-input-223-35d9fbba5756> in fit(self, iterations, checkpoint)
     66 
     67     def fit(self, iterations, checkpoint):
---> 68         self.es.train(iterations, print_every = checkpoint)
     69 
     70     def buy(self):

<ipython-input-220-84ca345091f4> in train(self, epoch, print_every)
     33                     self.weights, population[k]
     34                 )
---> 35                 rewards[k] = self.reward_function(weights_population)
     36             rewards = (rewards - np.mean(rewards)) / np.std(rewards)
     37 

<ipython-input-223-35d9fbba5756> in get_reward(self, weights)
     36 
     37         self.model.weights = weights
---> 38         state = get_state(self.close, 0, self.window_size + 1)
     39         inventory = []
     40         quantity = 0

<ipython-input-219-0df8d8be24a9> in get_state(data, t, n)
      4     res = []
      5     for i in range(n - 1):
----> 6         res.append(block[i + 1] - block[i])
      7     return np.array([res])

TypeError: unsupported operand type(s) for -: 'list' and 'list'

我认为第一步是需要更新Model类以使用不同的input_size参数，对吗？

任何帮助将不胜感激！谢谢

多元自然进化策略

0 个答案: