这是我关于stakcoverflow的第一篇文章,因为我找不到任何解决此消息的线索"' PipelinedRDD'对象没有属性' _jdf'"当我在我的火车数据集上调用trainer.fit以在Spark下用Python创建神经网络模型时出现
这是我的代码
from pyspark import SparkContext
from pyspark.ml.classification import MultilayerPerceptronClassifier, MultilayerPerceptronClassificationModel
from pyspark.mllib.feature import StandardScaler
from pyspark.mllib.regression import LabeledPoint
from pyspark.sql import SQLContext
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
### Import data in Spark ###
RDD_RAWfileWH= sc.textFile("c:/Anaconda2/Cognet/Data_For_Cognet_ready.csv")
header = RDD_RAWfileWH.first()
# Delete header from RAWData
RDD_RAWfile1 = RDD_RAWfileWH.filter(lambda x: x != header)
# Split each line of the RDD
RDD_RAWfile = RDD_RAWfile1.map(lambda line:[float(x) for x in line.split(',')])
FinalData = RDD_RAWfile.map(lambda row: LabeledPoint(row[0],[row[1:]]))
(trainingData, testData) = FinalData.randomSplit([0.7, 0.3])
layers = [15, 2, 3]
# create the trainer and set its parameters
trainer = MultilayerPerceptronClassifier(maxIter=100, layers=layers, blockSize=128,seed=1234)
# train the model
model = trainer.fit(trainingData)

和跟踪
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-28-123dce2b085a> in <module>()
46 trainer = MultilayerPerceptronClassifier(maxIter=100, layers=layers, blockSize=128,seed=1234)
47 # train the model
---> 48 model = trainer.fit(trainingData)
49 # compute accuracy on the test set
50 # result = model.transform(test)
C:\Users\piod7321\spark-1.6.1-bin-hadoop2.6\python\pyspark\ml\pipeline.pyc in fit(self, dataset, params)
67 return self.copy(params)._fit(dataset)
68 else:
---> 69 return self._fit(dataset)
70 else:
71 raise ValueError("Params must be either a param map or a list/tuple of param maps, "
C:\Users\piod7321\spark-1.6.1-bin-hadoop2.6\python\pyspark\ml\wrapper.pyc in _fit(self, dataset)
131
132 def _fit(self, dataset):
--> 133 java_model = self._fit_java(dataset)
134 return self._create_model(java_model)
135
C:\Users\piod7321\spark-1.6.1-bin-hadoop2.6\python\pyspark\ml\wrapper.pyc in _fit_java(self, dataset)
128 """
129 self._transfer_params_to_java()
--> 130 return self._java_obj.fit(dataset._jdf)
131
132 def _fit(self, dataset):
AttributeError: 'PipelinedRDD' object has no attribute '_jdf'
&#13;
我不是Spark的专家,所以如果有人知道这个jdf属性是什么以及如何解决这个问题对我来说非常有帮助。
非常感谢