我尝试了这段代码,但这显示属性错误。
from dask.base import compute
import dask.dataframe as dd
import pandas as pd
def dfWithTripTimes(df):
startTime=datetime.now()
duration=df[["tpep_pickup_datetime","tpep_dropoff_datetime"]].compute()
pickup_time=[timeToUnix(pkup) for pkuo in duration["tpep_pickup_datetime"].values]
dropoff_time=[timeToUnix(pkup) for pkuo in duration["tpep_dropoff_datetime"].values]
trip_Duration=(np.array(dropoff_time)-np.array(pickup_time))/float(60)
NewFrame=df[['passanger_count','trip_distance','pickup_longitude','pickup_latitude','dropoff_longitude','dropoff_latitude']]
NewFrame=df["trip_duration"]=trip_duration
NewFrame=df["pickup_time"]=pickup_time
NewFrame=df["speed"]=(NewFrame["trip_distance"]/NewFrame["Trip_Duration"])*60
print("Time taken for creation of dataframe is {}".format(datetime.now()-startTime))
return NewFrame
new_frame=dfWithTripTimes(data)
答案 0 :(得分:0)
只有Dask DataFrame对象具有.compute
方法。您得到的错误与您的数据框改为Pandas DataFrame一致。如果您使用的是熊猫,则无需致电.compute()