我使用pyspark运行模型(jar文件),但是遇到了这个问题。
ERROR:root:Exception while sending command.
Traceback (most recent call last):
File "/home/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 1159, in send_command
raise Py4JNetworkError("Answer from Java side is empty")
py4j.protocol.Py4JNetworkError: Answer from Java side is empty
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 985, in send_command
response = connection.send_command(command)
File "/home/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 1164, in send_command
"Error while receiving", e, proto.ERROR_ON_RECEIVE)
py4j.protocol.Py4JNetworkError: Error while receiving
----------------------------------------
Exception happened during processing of request from ('127.0.0.1', 39328)
Traceback (most recent call last):
File "/opt/anaconda3/lib/python3.7/socketserver.py", line 316, in _handle_request_noblock
self.process_request(request, client_address)
File "/opt/anaconda3/lib/python3.7/socketserver.py", line 347, in process_request
self.finish_request(request, client_address)
File "/opt/anaconda3/lib/python3.7/socketserver.py", line 360, in finish_request
self.RequestHandlerClass(request, client_address, self)
File "/opt/anaconda3/lib/python3.7/socketserver.py", line 720, in __init__
self.handle()
File "/home/spark/python/pyspark/accumulators.py", line 265, in handle
poll(accum_updates)
File "/home/spark/python/pyspark/accumulators.py", line 238, in poll
if func():
File "/home/spark/python/pyspark/accumulators.py", line 242, in accum_updates
num_updates = read_int(self.rfile)
File "/home/spark/python/pyspark/serializers.py", line 692, in read_int
raise EOFError
EOFError
----------------------------------------
--------------------------------------------------------
Py4JError Traceback (most recent call last)
<ipython-input-5-497c78df0390> in <module>
----> 1 model = train_model(trainDf,testDf)
<ipython-input-3-2658aebab70f> in train_model(trainDF, testDF)
66 )
67 pipeline = Pipeline(stages=[xgboost])
---> 68 model = pipeline.fit(trainDF)
69
70 # model.transform(testDF)
/home/spark/python/pyspark/ml/base.py in fit(self, dataset, params)
130 return self.copy(params)._fit(dataset)
131 else:
--> 132 return self._fit(dataset)
133 else:
134 raise ValueError("Params must be either a param map or a list/tuple of param maps, "
/home/spark/python/pyspark/ml/pipeline.py in _fit(self, dataset)
107 dataset = stage.transform(dataset)
108 else: # must be an Estimator
--> 109 model = stage.fit(dataset)
110 transformers.append(model)
111 if i < indexOfLastEstimator:
/home/spark/python/pyspark/ml/base.py in fit(self, dataset, params)
130 return self.copy(params)._fit(dataset)
131 else:
--> 132 return self._fit(dataset)
133 else:
134 raise ValueError("Params must be either a param map or a list/tuple of param maps, "
/home/spark/python/pyspark/ml/wrapper.py in _fit(self, dataset)
286
287 def _fit(self, dataset):
--> 288 java_model = self._fit_java(dataset)
289 model = self._create_model(java_model)
290 return self._copyValues(model)
/home/spark/python/pyspark/ml/wrapper.py in _fit_java(self, dataset)
283 """
284 self._transfer_params_to_java()
--> 285 return self._java_obj.fit(dataset._jdf)
286
287 def _fit(self, dataset):
/home/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:
/home/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
61 def deco(*a, **kw):
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
65 s = e.java_exception.toString()
/home/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
334 raise Py4JError(
335 "An error occurred while calling {0}{1}{2}".
--> 336 format(target_id, ".", name))
337 else:
338 type = answer[1]
Py4JError: An error occurred while calling o171.fit
我的代码就像
def train_model(trainDF,testDF):
xgboost = XGBoostClassifier(
featuresCol="features",
labelCol="label",
predictionCol="prediction",
numClass=10,
missing=0.0
)
pipeline = Pipeline(stages=[xgboost])
model = pipeline.fit(trainDF)
model.transform(testDF)
return model