在安装pyspark之后,我试图按如下所述运行代码,并使IndexError:列表索引超出范围。
import findspark
findspark.init("/opt/spark")
import random
from pyspark import SparkContext
sc = SparkContext(appName="EstimatePi")
def inside(p):
x, y = random.random(), random.random()
return x*x + y*y < 1
NUM_SAMPLES = 1000000
count = sc.parallelize(range(0, NUM_SAMPLES)) \
.filter(inside).count()
print("Pi is roughly %f" % (4.0 * count / NUM_SAMPLES))
sc.stop()