我正在尝试拥有一个可以预测一些查询的深度q学习代理,示例查询为(1 6 0 20 40 40 0 0),所以我应该得到一个对应的MissStream 84766(不必获取准确的MissStream)。 到目前为止,在我的代码中,我将冒号分布视为状态,并且我认为动作也就是分布本身,因此我需要一个函数来查询预测模型。
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow *
import matplotlib.pyplot as plt
import pandas_datareader as data_reader
from tqdm import tqdm_notebook, tqdm
from collections import deque
class MissAgent():
def __init__(self, state_i, action_space, model_name="MissAgent"):
self.state_i = state_i
self.action_space = action_space
self.memory = deque(2000)
self.model_name = model.name
self.gamma = 0.95
self.epsilon = 1.0
self.epsilon_final = 0.01
self.epsilon_decay = 0.995
def model_builder(self): #Defining the Neural Network
model = tf.keras.models.Sequential()
model.add(tf.layers.dense({inputShape: [self.state_i], units: 32, activation: 'relu'}));
model.add(tf.layers.dense({ units: 64, activation: 'relu'}));
model.add(tf.layers.dense({ units: 128, activation: 'relu'}));
model.add(tf.layers.dense({ units: self.action_space, activation: 'linear'}));
model.compile(loss='mse', optimizer=tf.keras.optimizer.Adam(lr=0.001))
return model
def action(self, state):
if random.random() <= self.epsilon:
return random.randrange(self.action_space)
actions = self.model.predict(actions[0])
def batch_train(self, batch_size):
batch = []
for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
batch.append(self.memory[i])
for state, action, reward, next_state, done in batch:
reward = reward
if not done:
reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
target = self.model.predict(state)
target[0][action] = reward
self.model.fit(state, target, epochs=1, verbose=0)
if self.epsilon > self.epsilon_final:
self.epsilon *= self.epsilon_decay
def sigmoid(x):
return 1 (1 + math.exp(-x))
def state_creator(data):
state = []
for i in range(len(data)):
state.append(sigmoid(data['Distribution'].iloc[i]))
return np.array([state])
window_size = 10
episodes = 1000
batch_size = 32
m = MissAgent (window_size)
dataset = pd.read_csv(/home/elbee/Desktop/dataset.csv)
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = int(0.15 * len(dataset))
dataset = dataset.shuffle()
train_dataset = dataset.take(train_size)
test_dataset = dataset.skip(train_size)
val_dataset = dataset.skip(val_size)
test_dataset = dataset.take(test_size)
for episode in range(1, episodes + 1):
print("Episode: ", episode)
state = state_creator(dataset)
for t in tqdm(range(train_dataset)):
action = m.action(state)
next_state = state_creator(train_dataset)
reward = 0
if t == train_dataset - 1:
done = True
else:
done = False
m.memory.append((state, action, reward, next_state, done))
state = next_state
if done:
print("DONE.")
if len(m.memory) > batch_size:
m.batch_train(batch_size)
if episode % 100 == 0:
m.model.save()
这是我的数据集的屏幕截图