我正试图用jupyter-notebook设置火花。我遵循了一个在线教程,并且似乎一直在工作,除了我不断收到与py4j相关的错误。
我尝试了几个不同的代码示例,并且某些行给我错误。我以为是错误地从文本文件中加载的,但事实并非如此。
from pyspark import SparkContext, SparkConf
conf = SparkConf().setAppName("primeNumbers").setMaster("local[*]")
sc = SparkContext(conf = conf)
lines = sc.textFile("in/prime_nums.text")
numbers = lines.flatMap(lambda line: line.split("\t"))
validNumbers = numbers.filter(lambda number: number)
intNumbers = validNumbers.map(lambda number: int(number))
print("Sum is: {}".format(intNumbers.reduce(lambda x, y: x +
y)))'''
以下是最后一行或多或少发生的错误:
Py4JJavaError Traceback (most recent
call last)
<ipython-input-1-d6e1c80db92a> in <module>()
14 print("hi")
15
---> 16 print("Sum is: {}".format(intNumbers.reduce(lambda x, y: x
+ y)))
/usr/local/opt/spark/spark-2.3.3-bin-
hadoop2.7/python/pyspark/rdd.pyc in reduce(self, f)
840 yield reduce(f, iterator, initial)
841
--> 842 vals = self.mapPartitions(func).collect()
843 if vals:
844 return reduce(f, vals)
/usr/local/opt/spark/spark-2.3.3-bin-
hadoop2.7/python/pyspark/rdd.pyc in collect(self)
812 """
813 with SCCallSiteSync(self.context) as css:
--> 814 sock_info =
self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
815 return list(_load_from_socket(sock_info,
self._jrdd_deserializer))
816
/usr/local/lib/python2.7/site-packages/py4j/java_gateway.pyc in
__call__(self, *args)
1284 answer = self.gateway_client.send_command(command)
1285 return_value = get_return_value(
-> 1286 answer, self.gateway_client, self.target_id, self.name)
1287
1288 for temp_arg in temp_args:
/usr/local/lib/python2.7/site-packages/py4j/protocol.pyc in get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}
{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
Py4JJavaError: An error occurred while calling
z:org.apache.spark.api.python.PythonRDD.collectAndServe.
: java.lang.IllegalArgumentException
at org.apache.xbean.asm5.ClassReader.<init>(Unknown Source)
at org.apache.xbean.asm5.ClassReader.<init>(Unknown Source)
at org.apache.xbean.asm5.ClassReader.<init>(Unknown Source)
at org.apache.spark.util.ClosureCleaner$.getClassReader(ClosureCleaner.scala:46)
at org.apache.spark.util.FieldAccessFinder$$anon$3$$anonfun$visitMethodInsn$2.apply(ClosureCleaner.scala:449)
at org.apache.spark.util.FieldAccessFinder$$anon$3$$anonfun$visitMethodInsn$2.apply(ClosureCleaner.scala:432)
at scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:733)
at scala.collection.mutable.HashMap$$anon$1$$anonfun$foreach$2.apply(HashMap.scala:103)
以及更多此类行
输出应该只是一个数字