我尝试创建一个创建Spark上下文并运行一些简单模型的玩具程序。 我的代码如下
from pyspark.sql import SQLContext
from pyspark import SparkContext,SparkConf
from pyspark.sql import DataFrameNaFunctions
from pyspark.ml import Pipeline
from pyspark.ml.classification import DecisionTreeClassifier
from pyspark.ml.feature import Binarizer
from pyspark.ml.feature import VectorAssembler, StringIndexer, VectorIndexer
from pyspark.sql.types import IntegerType
from pyspark.sql.types import DoubleType
from pyspark.sql.types import *
from pyspark.sql.functions import unix_timestamp
from pyspark.sql.functions import isnan, when, count, col,trim
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
from pyspark.mllib.evaluation import MulticlassMetrics
import warnings
import pandas as pd
from pandas.plotting import scatter_matrix
import os.path
import re
from pyspark.sql.functions import udf, col
from pyspark.ml.feature import StringIndexer
from pyspark.sql.functions import when, lit
import pyspark.sql.functions as f
import logging
import sys
conf = SparkConf().setAppName("myname-app").setMaster('local').set(spark.Network.timeout", "600s")
conf.set("spark.executor.heartbeatInterval","500s")
conf.set("spark.driver.cores",4)
warnings.filterwarnings('ignore')
%matplotlib inline
sc = SparkContext.getOrCreate(conf=conf)
这时,先前正在运行的代码现在挂起。我什么都没改变。这是错误消息
是的。我不得不打断键盘
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-11-f29246ef897b> in <module>()
1 sc = SparkContext.getOrCreate(conf=conf)
~/anaconda3/lib/python3.6/site-packages/pyspark/context.py in getOrCreate(cls, conf)
332 with SparkContext._lock:
333 if SparkContext._active_spark_context is None:
334 SparkContext(conf=conf or SparkConf())
335 return SparkContext._active_spark_context
336
~/anaconda3/lib/python3.6/site-packages/pyspark/context.py in __init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
113 """
114 self._callsite = first_spark_call() or CallSite(None, None, None)
115 SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
116 try:
117 self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
~/anaconda3/lib/python3.6/site-packages/pyspark/context.py in _ensure_initialized(cls, instance, gateway, conf)
281 with SparkContext._lock:
282 if not SparkContext._gateway:
283 SparkContext._gateway = gateway or launch_gateway(conf)
284 SparkContext._jvm = SparkContext._gateway.jvm
285
~/anaconda3/lib/python3.6/site-packages/pyspark/java_gateway.py in launch_gateway(conf)
85 while gateway_port is None and proc.poll() is None:
86 timeout = 1 # (seconds)
87 readable, _, _ = select.select([callback_socket], [], [], timeout)
88 if callback_socket in readable:
89 gateway_connection = callback_socket.accept()[0]
KeyboardInterrupt:
在日志中,我得到以下信息:
Exception in thread "main" java.util.NoSuchElementException: key not found: _ PYSPARK_DRIVER_CONN_INFO_PATH
at scala.collection.MapLike$class.default(MapLike.scala:228)
at scala.collection.AbstractMap.default(Map.scala:59)
at scala.collection.MapLike$class.apply(MapLike.scala:141)
at scala.collection.AbstractMap.apply(Map.scala:59)
at org.apache.spark.api.python.PythonGatewayServer$.main(PythonGatewayServer.scala:64)
at org.apache.spark.api.python.PythonGatewayServer.main(PythonGatewayServer.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:849)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:924)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:933)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
我不知道,这是怎么突然发生的。
我正在运行Spark 2.4.0和python3