当我尝试在PySpark中安装MultilayerPerceptronClassifier时,我收到一条奇怪的错误消息。错误消息似乎与要素密集型矢量变量的结构有关,但我不明白问题是什么。我在下面创建了自己的数据集,以使基础数据透明。我将不胜感激任何人的见解。
from pyspark.ml.classification import MultilayerPerceptronClassifier
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
from pyspark.ml.feature import VectorAssembler
# df = spark.read.format("libsvm").load("C:/spark-2.3.3-bin-hadoop2.7/data/mllib/sample_libsvm_data.txt")
df = spark.createDataFrame([(3.0,69,57,56,678,345),(3.0,67,56,58,678,345),(3.0,67,54,57,678,345),(3.0,68,55,58,678,345),(3.0,68,53,52,678,345)
,(2.0,11,10,907,16,458),(2.0,12,14,909,12,456),(2.0,11,13,910,10,459),(2.0,12,11,905,16,459),(2.0,10,13,902,10,459)
,(1.0,30,11,123,568,891),(1.0,32,12,124,567,890),(1.0,34,10,123,566,895),(1.0,35,15,121,564,894),(1.0,30,12,124,560,896)],
['flower_type', 'sepal_len','sepal_width','R','G','B'])
input_columns = ['sepal_len','sepal_width','R','G']
assembler = VectorAssembler(inputCols=input_columns,outputCol='features')
output = assembler.transform(df)
# Rename your dependent variable
renamed = output.withColumnRenamed('flower_type','label')
final_data = renamed.select('features','label')
final_data.show()
train,test = final_data.randomSplit([0.7,0.3])
# specify layers for the neural network:
# input layer of size 4 (features), two intermediate of size 5 and 4
# and output of size 3 (classes)
layers = [4, 5, 4, 3]
# create the trainer and set its parameters
trainer = MultilayerPerceptronClassifier(maxIter=100, layers=layers, blockSize=128, seed=1234)
# train the model
model = trainer.fit(train)
我收到的错误消息如下:
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
<ipython-input-46-034e222b23a1> in <module>()
37
38 # train the model
---> 39 model = trainer.fit(train)
C:\spark-2.3.3-bin-hadoop2.7\python\pyspark\ml\base.py in fit(self, dataset, params)
130 return self.copy(params)._fit(dataset)
131 else:
--> 132 return self._fit(dataset)
133 else:
134 raise ValueError("Params must be either a param map or a list/tuple of param maps, "