我已将cv文件从s3存储桶中提取到了熊猫df,并且将其转换为spark df,因为我无法检索该文件直接将spark df导出。当我将管道应用于数据集时,我得到了
AttributeError: 'NoneType' object has no attribute '_jvm'
代码:
from pyspark.ml import Pipeline
cols = data.columns
pipeline = Pipeline(stages = stages)
pipelineModel = pipeline.fit(data)
错误:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-85-7c891007896f> in <module>
3 cols = data.columns
4 pipeline = Pipeline(stages = stages)
----> 5 pipelineModel = pipeline.fit(data)
6 data = pipelineModel.transform(data)
7 selectedCols = ['features']+cols
~/Downloads/spark/python/pyspark/ml/base.py in fit(self, dataset, params)
129 return self.copy(params)._fit(dataset)
130 else:
--> 131 return self._fit(dataset)
132 else:
133 raise ValueError("Params must be either a param map or a list/tuple of param maps, "
~/Downloads/spark/python/pyspark/ml/pipeline.py in _fit(self, dataset)
107 dataset = stage.transform(dataset)
108 else: # must be an Estimator
--> 109 model = stage.fit(dataset)
110 transformers.append(model)
111 if i < indexOfLastEstimator:
~/Downloads/spark/python/pyspark/ml/base.py in fit(self, dataset, params)
129 return self.copy(params)._fit(dataset)
130 else:
--> 131 return self._fit(dataset)
132 else:
133 raise ValueError("Params must be either a param map or a list/tuple of param maps, "
~/Downloads/spark/python/pyspark/ml/wrapper.py in _fit(self, dataset)
319
320 def _fit(self, dataset):
--> 321 java_model = self._fit_java(dataset)
322 model = self._create_model(java_model)
323 return self._copyValues(model)
~/Downloads/spark/python/pyspark/ml/wrapper.py in _fit_java(self, dataset)
315 :return: fitted Java model
316 """
--> 317 self._transfer_params_to_java()
318 return self._java_obj.fit(dataset._jdf)
319
~/Downloads/spark/python/pyspark/ml/wrapper.py in _transfer_params_to_java(self)
147 if len(pair_defaults) > 0:
148 sc = SparkContext._active_spark_context
--> 149 pair_defaults_seq = sc._jvm.PythonUtils.toSeq(pair_defaults)
150 self._java_obj.setDefault(pair_defaults_seq)
151
AttributeError: 'NoneType' object has no attribute '_jvm'