我正在尝试运行KNN回归,但始终出现以下错误:
ValueError: Query data dimension must match training data dimension
在模型中,我从数据中传入了5个特征作为训练集X。训练集的形状为(348,5)。我在该网站上看到了针对类似问题的其他一些答案,并尝试操纵预测数组Y_ = knn.predict(T)的形状,因为这是我认为我的问题所在,但是没有成功。感谢任何可以帮助我理解和解决我的问题的人。提前致谢。
模型:
knn = neighbors.KNeighborsRegressor(n_neighbors=10)
#training data:
X = df[['PopSquared', 'IndicateBurglary', 'IndicateLarceny', 'IndicateGTA', ' IndicateArson']]
Y = df['Property\ncrime'].values.reshape(-1, 1)
knn.fit(X, Y)
# Set up prediction line.
T = np.arange(0, 50, 0.1)[:, np.newaxis]
Y_ = knn.predict(T)
完整的追溯错误消息:
ValueError Traceback (most recent call last)
<ipython-input-50-f35bfb134dc9> in <module>()
12
13 # Trailing underscores are a common convention for a prediction.
---> 14 Y_ = knn.predict(T)
~\Anaconda3\lib\site-packages\sklearn\neighbors\regression.py in predict(self, X)
142 X = check_array(X, accept_sparse='csr')
143
--> 144 neigh_dist, neigh_ind = self.kneighbors(X)
145
146 weights = _get_weights(neigh_dist, self.weights)
~\Anaconda3\lib\site-packages\sklearn\neighbors\base.py in kneighbors(self, X, n_neighbors, return_distance)
383 delayed(self._tree.query, check_pickle=False)(
384 X[s], n_neighbors, return_distance)
--> 385 for s in gen_even_slices(X.shape[0], n_jobs)
386 )
387 if return_distance:
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
777 # was dispatched. In particular this covers the edge
778 # case of Parallel used with an exhausted iterator.
--> 779 while self.dispatch_one_batch(iterator):
780 self._iterating = True
781 else:
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
623 return False
624 else:
--> 625 self._dispatch(tasks)
626 return True
627
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
586 dispatch_timestamp = time.time()
587 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 588 job = self._backend.apply_async(batch, callback=cb)
589 self._jobs.append(job)
590
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
109 def apply_async(self, func, callback=None):
110 """Schedule a func to be run"""
--> 111 result = ImmediateResult(func)
112 if callback:
113 callback(result)
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
330 # Don't delay the application, to avoid keeping the input
331 # arguments in memory
--> 332 self.results = batch()
333
334 def get(self):
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
~\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
sklearn/neighbors/binary_tree.pxi in sklearn.neighbors.kd_tree.BinaryTree.query()
ValueError: query data dimension must match training data dimension