我正在尝试在Pyspark中运行具有依赖项的函数,但我一直收到错误(遵循代码)。 我的Pyspark工作正常,并参与我的Python环境,我的功能也是正确的。我在Spyder上使用Python 3.6和Spark 2.2.0。
我的代码是:
import flex_time
import pathTraveltime
import pickle
import time
from datetime import datetime, timedelta
import pytz
import cgitb
cgitb.enable()
import pyspark
import os,sys
os.environ['SPARK_HOME']
sys.path.append("C:\opt\spark\spark-2.2.0-bin-hadoop2.7\python")
import findspark
findspark.init("C:\opt\spark\spark-2.2.0-bin-hadoop2.7")
spark_home = os.environ.get("SPARK_HOME")
sys.path.insert(0, spark_home + "python")
sys.path.insert(0, os.path.join(spark_home, "python\lib\py4j-0.10.4-src"))
conf = pyspark.SparkConf()
conf.set("spark.executor.memory", '4g')
conf.set('spark.executor.cores', '16')
conf.set('spark.cores.max', '16')
conf.set("spark.driver.memory",'4g')
conf.setMaster("local[2]") # number of threads to use
sc = pyspark.SparkContext.getOrCreate(conf=conf)
sc.addPyFile('~pathTraveltime.py')
sc.addPyFile('~flex_time.py')
sc.addPyFile('~Prediction_X.py')
with open('detailed_network.pickle','rb') as f:
graph = pickle.load(f, encoding='latin1')
input_json=data =
[{"origin_lat":38.916228,"origin_lon":-77.031576,"destination_lat":
38.918236,"destination_lon":-77.229942,"dept_time":"23:30","before":
60,"after":60},
{"origin_lat":38.916228,"origin_lon":-77.031576,"destination_lat":38.918236,
"destination_lon":-77.229942,"dept_time":"23:30","before":60,"after":60}]
results =[]
#Function
def flex_func(input_json):
import flex_time
import pathTraveltime
origin_lat=input_json["origin_lat"]
origin_lon=input_json["origin_lon"]
destination_lat=input_json["destination_lat"]
destination_lon=input_json["destination_lon"]
path = flex_time._ShortestPath(origin_lat, origin_lon, destination_lat,
destination_lon, graph)
#Pyspark Code
count = sc.parallelize(input_json).map(lambda j:
flex_func(j)).collect()
我不断收到此错误消息:
#Error Message
Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 7.0 failed 1 times, most recent failure: Lost task 0.0 in stage 7.0 (TID 14, localhost, executor driver): org.apache.spark.SparkException: Python worker exited unexpectedly (crashed)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRDD.scala:230)
at org.apache.spark.api.python.PythonRunner$$anon$1.<init>(PythonRDD.scala:234)
at org.apache.spark.api.python.PythonRunner.compute(PythonRDD.scala:152)
at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:63)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
Caused by: java.io.EOFException
at java.io.DataInputStream.readInt(Unknown Source)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRDD.scala:166)
11 more