我无法使用以下网页的{couponTable“类https://bet.hkjc.com/football/odds/odds_inplay.aspx?lang=CH来抓取div中的所有数据。我在做什么错了?
# Creating the DataFrame
df = sc.parallelize([('a',1,100),('a',2,200),('a',3,300),('a',4,400),
('a',5,500),('a',6,600),('b',1,23),('b',2,32),('b',3,34),('b',4,55),
('b',5,43)]).toDF(['Name','yr','cash'])
df.show()
# Loading the requisite packages
from pyspark.sql import Window
from pyspark.sql.functions import col, collect_list
import numpy as np
w = (Window.partitionBy('Name').orderBy(col('yr').desc()).rangeBetween(Window.unboundedPreceding, 0))
df = df.withColumn('cash_list', collect_list('cash').over(w))
df.show(truncate=False)
df = df.withColumn('discount_rate', lit(0.3))
#calculate npv
df = df.withColumn('npv_value', np.npv(df.discount_rate, df.cash_list))