有没有办法用多维密钥对存储一个值(例如在一个numpy数组中)?
下面的代码试图将两个numpy数组的奖励值存储为具有形状(1,25)和(1,3)的密钥对。
非常感谢提前!
num_episodes=500
# this is the table that will hold our summated rewards for
# each action in each state
r_table = np.zeros((10000, 10000))
for g in range(num_episodes):
s = np.array(state.sample(), dtype=np.int)
done = False
count = 0
while not done:
if np.sum(r_table[s, :]) == 0:
# make a random selection of actions
EUR_elec_sell = 0.050
EUR_elec_buy = 0.100
EUR_gas = 0.030
rranges = ((0, 1250),(0, 2000),(0, 3000))
res0 = brute(reward, rranges, finish=None)
res1 = minimize(reward, res0, bounds=[(0, 1250),(0, 2000),(0, 3000)])
a = res1.x
a = list(map(int, a.round(decimals=-1)))
else:
# select the action with highest cummulative reward
a = np.argmax(r_table[s, :])
s_t1 = model.predict([np.append(s, a)]).astype(int)
new_s = np.append(s_t1, np.delete(s, 1))
r = reward(a)
count += 1
if count == 1000: done=True
r_table[s, a] += r
s = new_s
答案 0 :(得分:0)
您可以使用tuple(s[0]) + tuple(a)
之类的密钥,但您需要的内容实际上更复杂,因为您需要查询给定s
向量的所有值。您可以table_r
成为dict
dict
个,tuple(s[0])
是第一个密钥,tuple(a)
是第二个密钥:
num_episodes=500
# this is the table that will hold our summated rewards for
# each action in each state
r_table = {}
for g in range(num_episodes):
s = np.array(state.sample(), dtype=np.int)
done = False
count = 0
while not done:
s_key = tuple(s[0])
if sum(r_table.setdefault(s_key, {}).values()) == 0:
# make a random selection of actions
EUR_elec_sell = 0.050
EUR_elec_buy = 0.100
EUR_gas = 0.030
rranges = ((0, 1250),(0, 2000),(0, 3000))
res0 = brute(reward, rranges, finish=None)
res1 = minimize(reward, res0, bounds=[(0, 1250),(0, 2000),(0, 3000)])
a = res1.x
a = list(map(int, a.round(decimals=-1)))
else:
# select the action with highest cummulative reward
a = max(r_table[s_key].items(), key=lambda it: -it[1])[0]
s_t1 = model.predict([np.append(s, a)]).astype(int)
new_s = np.append(s_t1, np.delete(s, 1))
r = reward(a)
count += 1
if count == 1000: done=True
a_key = tuple(a)
r_table[s_key][a_key] = r_table[s_key].get(a_key, 0) + r
s = new_s