以下是我的错误消息。当我在Datastax Spark中使用python 2.7并使用下面的代码时,它不起作用。我不知道为什么。一些建议会非常感激。感谢
vi /etc/dse/spark/spark-env.sh
export PYTHONHOME=/usr/local
export PYTHONPATH=/usr/local/lib/python2.7
export PYSPARK_PYTHON=/usr/local/bin/python2.7
错误讯息:
Error from python worker:
/usr/local/bin/python2.7: /usr/local/lib/python2.7/lib-dynload/_io.so: undefined symbol: _PyCodec_LookupTextEncoding
PYTHONPATH was:
/usr/share/dse/spark/python/lib/pyspark.zip:/usr/share/dse/spark/python/lib/py4j-0.8.2.1-src.zip:/usr/share/dse/spark/lib/spark-core_2.10-1.4.2.2.jar:/usr/local/lib/python2.7
java.io.EOFException
at java.io.DataInputStream.readInt(DataInputStream.java:392)
at org.apache.spark.api.python.PythonWorkerFactory.startDaemon(PythonWorkerFactory.scala:163)
at org.apache.spark.api.python.PythonWorkerFactory.createThroughDaemon(PythonWorkerFactory.scala:86)
at org.apache.spark.api.python.PythonWorkerFactory.create(PythonWorkerFactory.scala:62)
at org.apache.spark.SparkEnv.createPythonWorker(SparkEnv.scala:130)
at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:73)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
at org.apache.spark.api.python.PairwiseRDD.compute(PythonRDD.scala:315)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:70)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)