我正在尝试遵循YOUTUBE教程,以使用神经网络进行比特币价格预测。
我无法再运行代码。第24行
df[col] = preprocessing.scale( df[col].values)
导致
Traceback (most recent call last):
File "C:/Users/tpottel/programming/java/eclipse projects/BBitBotCoinUsing2/pythonproject/deeplearning/video-lstm-bitcoin.py", line 109, in <module>
validation_X, validation_y = preprocess_df(main_df)
File "C:/Users/tpottel/programming/java/eclipse projects/BBitBotCoinUsing2/pythonproject/deeplearning/video-lstm-bitcoin.py", line 24, in preprocess_df
df[col] = preprocessing.scale( df[col].values)
File "C:\python37-32\lib\site-packages\sklearn\preprocessing\data.py", line 141, in scale
force_all_finite='allow-nan')
File "C:\python37-32\lib\site-packages\sklearn\utils\validation.py", line 496, in check_array
array = np.asarray(array, dtype=dtype, order=order)
File "C:\python37-32\lib\site-packages\numpy\core\_asarray.py", line 85, in asarray
return array(a, dtype, copy=False, order=order)
TypeError: float() argument must be a string or a number, not 'method'
代码:
import pandas as pd
import os
from sklearn import preprocessing
from collections import deque
import numpy as np
import random
# create a empty data frame
main_df= pd.DataFrame()
SEQ_LEN = 60 # last 60 minuts of data
FUTURE_PERIOD_PREDICT = 3 # i minute
RATIO_TO_PREDICT="LTC-USD"
def preprocess_df(df):
df=df.drop("future",1)
for col in df.columns:
if col != "":
df[col]= df[col].pct_change
df.dropna(inplace=True)
df[col] = preprocessing.scale( df[col].values)
df.dropna(inplace=True)
sequential_data= []
prev_days=deque(maxLen=SEQ_LEN)
for i in df.values:
prev_days.append([n for n in i[:-1]])
if len(prev_days) == SEQ_LEN:
sequential_data.append([np.array(prev_days), i[-1]])
# dont understand this why are we shuffling seq data
random.shuffle(sequential_data)
buys = []
sells = []
for seq, target in sequential_data:
if target==0:
sells.append({seq, target})
else:
buys.append({seq, target})
random.shuffle(buys)
random.shuffle(sells)
lower = min ( len(buys), len(sells))
buys=buys[:lower]
sells = sells[:lower]
sequential_data=buys+sells
random.shuffle(sequential_data)
X=[]
y=[]
for seq, target in sequential_data:
X.append(seq)
y.append( target)
return np.array(X), y
def classify( current, future):
if float(future) > float(future):
return 1
else:
return 0
ratios = ["BTC-USD","LTC-USD","ETH-USD","BCH-USD"]
for ratio in ratios:
dataset= f"c:\\datasets\\crypto_data\\{ratio}.csv"
df=pd.read_csv(dataset, names=["time", "low", "high", "open", "close", "volume"] )
df.rename( columns={ "close":f"{ratio}_close","volume":f"{ratio}_volume" }, inplace=True )
df.set_index("time", inplace=True)
df=df[ [f"{ratio}_close", f"{ratio}_volume" ] ]
if len(main_df)==0:
main_df = df
else:
main_df = main_df.join(df)
# adding a new col
main_df['future'] = main_df [ f"{RATIO_TO_PREDICT}_close" ].shift(-FUTURE_PERIOD_PREDICT)
main_df["target"] = list ( map( classify, main_df[f"{RATIO_TO_PREDICT}_close"],main_df['future'] ))
#get a sorted list of just the time sgtamps
# NOTE vales converts it to a numpy array wjitch has the methed sort
times=sorted( main_df.index.values)
# negativ to get t5% from the top ie
last_5pct=times[ -int(0.5*len(times))]
print(last_5pct)
#get validation data
# we do not slice it becouse the data may be out of order
# tsting data
validation_main_df=main_df[( main_df.index >= last_5pct)]
# traing data
main_df=main_df[( main_df.index < last_5pct)]
validation_X, validation_y = preprocess_df(main_df)
答案 0 :(得分:-1)
您应在函数调用后使用加括号,如下所示:
df[col] = preprocessing.scale( df[col].values())
此外,您的代码中还有另一个错误。 Deque类中的maxlen参数(代码中的此行)
prev_days=deque(maxLen=SEQ_LEN)
的写法没有大写字母L,因此应为prev_days=deque(maxlen=SEQ_LEN)