Question

我有一个名为1000次观察行动的数组;从0到99意味着100观察是随机的，然后我想根据一个名为R的2列矩阵计算行动的价值，这取决于行动的价值。我收到了这个错误

Traceback (most recent call last)
<ipython-input-21-930470b1de32> in <module>()
     19 print(R)
     20 for i in range(100,999,1):
---> 21     if R[i-1,0]>R[i-1,1]:
     22         action[i]=1
     23     else:
IndexError: index 99 is out of bounds for axis 0 with size 99

代码：

import numpy as np
from numpy import random
import random

action=np.zeros((1000))
def reward(action,i,n):
    R=np.zeros((n,2))
    for i in range (i,n,1):
        if action[i]==1:
            R[i,0]= 1+action[i]/2
        else:
            R[i,1]=1+action[i]/2
    return R
random.seed(771)
for i in range (0,99,1):
        action[i]=random.randint(1,2)
print(action[0:99])
R=reward(action,0,99)
print(R)       
for i in range(100,999,1):
    if R[i-1,0]>R[i-1,1]:
        action[i]=1
    else:
        action[i]=2
    R=reward(action,100,999)

Answer 1

R=reward(action,0,99)创建了99个元素的列表：

>>> len(R)
99

然后当你运行

for i in range(100,999,1):
    if R[i-1,0]>R[i-1,1]:

将生成从99开始的索引，但R的最后一个索引是98（从0到98，因此长度为99）。

尝试：for i in range(99,1000,1):

Answer 2

不包括Python中范围的结束索引。例如，前100个值将为[0,100]。因此，不包括第100个索引，但包括第0个索引。这写为action[0:100]。

这应该适合你。

import numpy as np
from numpy import random
import random

def reward(action,i,n):
    R=np.zeros((n,2))
    for i in range (i,n,1):
        if action[i]==1:
            R[i,0]= 1+action[i]/2
        else:
            R[i,1]=1+action[i]/2
    return R

random.seed(771)

action=np.zeros((1000))
for i in range (0,100,1):
        action[i]=random.randint(1,2)
print(action[0:100])
R=reward(action,0,100)
print(R)

for i in range(100,999,1):
    if R[i-1,0]>R[i-1,1]:
        action[i]=1
    else:
        action[i]=2
    R=reward(action,100,999)

Answer 3

我在我的datset上应用了相同的原理，但它没有工作没有错误但没有结果

import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime as dt
from numpy import random
import pandas as pd
import random
from sklearn.model_selection import train_test_split
#import excel file
df1 = pd.read_excel('new.xlsx')
df1 = pd.DataFrame(df1) 
df1.head(8)
df1['WMRT'][0]
#discretization of walmart
wal = np.zeros(df1.shape[0])
for i in range(1,df1.shape[0],1):
    if (df1['WMRT'][i] > df1['WMRT'][i-1] ):
        wal[i]=1
    else:
         wal[i]=-1
df1['wal']=wal
print(df1['wal'])
#Discretization Dow Jones
djd = np.zeros(df1.shape[0])
for i in range(1,df1.shape[0],1):
    if (df1['DJ'][i] > df1['DJ'][i-1]  ):
        djd[i]=1
    else:
         djd[i]=-1
df1['DDJ']=djd
print(df1['DDJ'])
#codification of the state based on the Dow Jonesindex and walmart values
state = np.zeros(df1.shape[0])
for i in range (1,df1.shape[0],1):
    if (df1['DDJ'][i]== -1 and df1['wal'][i]== -1):
        state[i]=1
    elif(df1['DDJ'][i]== 1 and df1['wal'][i]== -1):
        state[i]=2
    elif(df1['DDJ'][i]== -1 and df1['wal'][i]== 1):
         state[i]=3
    else:
        state[i]=4
df1['state']=state
print(df1['state'])
#training phase (i=4616)
#generate the same sample each time
random.seed(771)
#intialize randomly the actions for 100 observations
def number_update(action,p0,n):
    N = np.zeros((n,8))
    for i in range(p0,n,1):
        N[i,:]=N[i-1,:]
        k=int(2*(state[i]-1)+action[i])
        N[i,k]=N[i,k]+1
    return N
def Rhoo(action,p0,n):
    RHO = np.zeros((n, 8))
    r = np.zeros((n))
    for i in range(p0,n,1):
        RHO[i,:]=RHO[i-1,:]
        k = int(2*(state[i]-1)+action[i])
        if k in [0,2,4,6]:
            r[i]=df1['WMRT'][i]/df1['WMRT'][i-1]-1
        else:
            r[i]=df1['DJ'][i]/df1['DJ'][i-1]-1   
        RHO[i,k] = RHO[i,k]+r[i]
    return RHO
#Average reward for each action 0 and 1
def reward(N,RHO,p0,n):
    R = np.zeros((n, 2))
    for i in range (p0,n,1):
    #j : even number
    #k : odd number
        k = 0
        for j in range(0,7,2):
            if N[i,j] != 0 :
                  R[i,0] += RHO[i,j]/N[i,j]
            else : 
                 R[i,0] = 0
            k = j+1
            if N[i,k] != 0 :
                R[i,1] += RHO[i,k]/N[i,k]
            else : 
                R[i,1] = 0
    #print(R)
    return R
def reward_max(action,p0,n):
    r = np.zeros((n))
    for i in range(p0,n,1):
        k = int(2*(state[i]-1)+action[i])
        if k in [0,2,4,6]:
            r[i]=df1['WMRT'][i]/df1['WMRT'][i-1]-1
        else:
            r[i]=df1['DJ'][i]/df1['DJ'][i-1]-1   
    return r
#determine the action
def main():

    action=np.zeros((df1.shape[0]))
    train, test = train_test_split(df1, test_size=0.3)
    #Training the model
    #generate the same sample each time
    random.seed(771)
    # take 100 action randomly
    for i in range(0,100,1):
        action[i]=random.randint(0,1)
    print(action)
    print(type(action))
    #N is the number of time the action 1 or 0 was taken from a state i
    print("N")
    N =number_update(action,1,100)
    print(N)
    #RHO represents the cumulative sum of the previous rewards obtained every time the action 0 or 1
    print("RHO")
    RHO = Rhoo(action,1,100)
    print(RHO)
    # represents the average reward for every action 0 or 1
    print("R")
    R = reward(N,RHO,1,100)
    print(R)
    for i in range(100,2265,1):
        if(R[i-1,0]>R[i-1,1]):
            action[i]=1
        else:
            action[i]=0
        N = number_update(action,100,2265)
        RHO = Rhoo(action,100,2265)
        R = reward(N,RHO,100,2265)
    print(action[100:2265])
    print(R.shape)

main()

结果取决于python

3 个答案: