我有一个名为1000次观察行动的数组;从0
到99
意味着100
观察是随机的,然后我想根据一个名为R的2列矩阵计算行动的价值,这取决于行动的价值。
我收到了这个错误
Traceback (most recent call last)
<ipython-input-21-930470b1de32> in <module>()
19 print(R)
20 for i in range(100,999,1):
---> 21 if R[i-1,0]>R[i-1,1]:
22 action[i]=1
23 else:
IndexError: index 99 is out of bounds for axis 0 with size 99
代码:
import numpy as np
from numpy import random
import random
action=np.zeros((1000))
def reward(action,i,n):
R=np.zeros((n,2))
for i in range (i,n,1):
if action[i]==1:
R[i,0]= 1+action[i]/2
else:
R[i,1]=1+action[i]/2
return R
random.seed(771)
for i in range (0,99,1):
action[i]=random.randint(1,2)
print(action[0:99])
R=reward(action,0,99)
print(R)
for i in range(100,999,1):
if R[i-1,0]>R[i-1,1]:
action[i]=1
else:
action[i]=2
R=reward(action,100,999)
答案 0 :(得分:1)
R=reward(action,0,99)
创建了99个元素的列表:
>>> len(R)
99
然后当你运行
for i in range(100,999,1):
if R[i-1,0]>R[i-1,1]:
将生成从99开始的索引,但R的最后一个索引是98(从0到98,因此长度为99)。
尝试:for i in range(99,1000,1):
答案 1 :(得分:0)
不包括Python中范围的结束索引。例如,前100个值将为[0,100]。因此,不包括第100个索引,但包括第0个索引。这写为action[0:100]
。
这应该适合你。
import numpy as np
from numpy import random
import random
def reward(action,i,n):
R=np.zeros((n,2))
for i in range (i,n,1):
if action[i]==1:
R[i,0]= 1+action[i]/2
else:
R[i,1]=1+action[i]/2
return R
random.seed(771)
action=np.zeros((1000))
for i in range (0,100,1):
action[i]=random.randint(1,2)
print(action[0:100])
R=reward(action,0,100)
print(R)
for i in range(100,999,1):
if R[i-1,0]>R[i-1,1]:
action[i]=1
else:
action[i]=2
R=reward(action,100,999)
答案 2 :(得分:0)
我在我的datset上应用了相同的原理,但它没有工作没有错误但没有结果
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime as dt
from numpy import random
import pandas as pd
import random
from sklearn.model_selection import train_test_split
#import excel file
df1 = pd.read_excel('new.xlsx')
df1 = pd.DataFrame(df1)
df1.head(8)
df1['WMRT'][0]
#discretization of walmart
wal = np.zeros(df1.shape[0])
for i in range(1,df1.shape[0],1):
if (df1['WMRT'][i] > df1['WMRT'][i-1] ):
wal[i]=1
else:
wal[i]=-1
df1['wal']=wal
print(df1['wal'])
#Discretization Dow Jones
djd = np.zeros(df1.shape[0])
for i in range(1,df1.shape[0],1):
if (df1['DJ'][i] > df1['DJ'][i-1] ):
djd[i]=1
else:
djd[i]=-1
df1['DDJ']=djd
print(df1['DDJ'])
#codification of the state based on the Dow Jonesindex and walmart values
state = np.zeros(df1.shape[0])
for i in range (1,df1.shape[0],1):
if (df1['DDJ'][i]== -1 and df1['wal'][i]== -1):
state[i]=1
elif(df1['DDJ'][i]== 1 and df1['wal'][i]== -1):
state[i]=2
elif(df1['DDJ'][i]== -1 and df1['wal'][i]== 1):
state[i]=3
else:
state[i]=4
df1['state']=state
print(df1['state'])
#training phase (i=4616)
#generate the same sample each time
random.seed(771)
#intialize randomly the actions for 100 observations
def number_update(action,p0,n):
N = np.zeros((n,8))
for i in range(p0,n,1):
N[i,:]=N[i-1,:]
k=int(2*(state[i]-1)+action[i])
N[i,k]=N[i,k]+1
return N
def Rhoo(action,p0,n):
RHO = np.zeros((n, 8))
r = np.zeros((n))
for i in range(p0,n,1):
RHO[i,:]=RHO[i-1,:]
k = int(2*(state[i]-1)+action[i])
if k in [0,2,4,6]:
r[i]=df1['WMRT'][i]/df1['WMRT'][i-1]-1
else:
r[i]=df1['DJ'][i]/df1['DJ'][i-1]-1
RHO[i,k] = RHO[i,k]+r[i]
return RHO
#Average reward for each action 0 and 1
def reward(N,RHO,p0,n):
R = np.zeros((n, 2))
for i in range (p0,n,1):
#j : even number
#k : odd number
k = 0
for j in range(0,7,2):
if N[i,j] != 0 :
R[i,0] += RHO[i,j]/N[i,j]
else :
R[i,0] = 0
k = j+1
if N[i,k] != 0 :
R[i,1] += RHO[i,k]/N[i,k]
else :
R[i,1] = 0
#print(R)
return R
def reward_max(action,p0,n):
r = np.zeros((n))
for i in range(p0,n,1):
k = int(2*(state[i]-1)+action[i])
if k in [0,2,4,6]:
r[i]=df1['WMRT'][i]/df1['WMRT'][i-1]-1
else:
r[i]=df1['DJ'][i]/df1['DJ'][i-1]-1
return r
#determine the action
def main():
action=np.zeros((df1.shape[0]))
train, test = train_test_split(df1, test_size=0.3)
#Training the model
#generate the same sample each time
random.seed(771)
# take 100 action randomly
for i in range(0,100,1):
action[i]=random.randint(0,1)
print(action)
print(type(action))
#N is the number of time the action 1 or 0 was taken from a state i
print("N")
N =number_update(action,1,100)
print(N)
#RHO represents the cumulative sum of the previous rewards obtained every time the action 0 or 1
print("RHO")
RHO = Rhoo(action,1,100)
print(RHO)
# represents the average reward for every action 0 or 1
print("R")
R = reward(N,RHO,1,100)
print(R)
for i in range(100,2265,1):
if(R[i-1,0]>R[i-1,1]):
action[i]=1
else:
action[i]=0
N = number_update(action,100,2265)
RHO = Rhoo(action,100,2265)
R = reward(N,RHO,100,2265)
print(action[100:2265])
print(R.shape)
main()