我创建了一个数据框:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsRegressor
data = pd.DataFrame(np.random.randint(0,10,(10,5)), columns=list('abcde'))
data.c[:5] = 0
data.c[5:] = 1
data.a = np.arange(5).tolist()+np.arange(5).tolist()
data=data.set_index(list('ac'))
data = data.unstack('c')
然后定义度量函数:
myfunc = lambda a, b: ((a - b)**2).sum(axis=1, level=[0]).apply(np.sqrt).sum(axis=1).values
我想要的是在列上按级别0对数据帧求和,并应用sqrt,最后求和所有列。 它适用于这样的自定义代码:
b = data.iloc[-1]
myfunc(data,b)
#output:array([ 18.09035957, 12.62123278, 20.45561243, 14.29386508, 0. ])
但在myfunc
中使用KNeighborsRegressor
作为指标,会引发错误。这是KNeighborsRegressor的平均类不能适合数据帧吗?任何人都可以帮助我,谢谢?
knn = KNeighborsRegressor(n_neighbors=3, metric=myfunc)
knn.fit(a, np.arange(5))
knn.predict(b)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-23-33f10a670994> in <module>()
1 knn = KNeighborsRegressor(n_neighbors=3, metric=myfunc)
2 knn.fit(a, np.arange(5))
----> 3 knn.predict(b)
C:\Anaconda3\lib\site-packages\sklearn\neighbors\regression.py in predict(self, X)
142 X = check_array(X, accept_sparse='csr')
143
--> 144 neigh_dist, neigh_ind = self.kneighbors(X)
145
146 weights = _get_weights(neigh_dist, self.weights)
C:\Anaconda3\lib\site-packages\sklearn\neighbors\base.py in kneighbors(self, X, n_neighbors, return_distance)
355 dist = pairwise_distances(
356 X, self._fit_X, self.effective_metric_, n_jobs=n_jobs,
--> 357 **self.effective_metric_params_)
358
359 neigh_ind = argpartition(dist, n_neighbors - 1, axis=1)
C:\Anaconda3\lib\site-packages\sklearn\metrics\pairwise.py in pairwise_distances(X, Y, metric, n_jobs, **kwds)
1238 func = partial(distance.cdist, metric=metric, **kwds)
1239
-> 1240 return _parallel_pairwise(X, Y, func, n_jobs, **kwds)
1241
1242
C:\Anaconda3\lib\site-packages\sklearn\metrics\pairwise.py in _parallel_pairwise(X, Y, func, n_jobs, **kwds)
1081 if n_jobs == 1:
1082 # Special case to avoid picklability checks in delayed
-> 1083 return func(X, Y, **kwds)
1084
1085 # TODO: in some cases, backend='threading' may be appropriate
C:\Anaconda3\lib\site-packages\sklearn\metrics\pairwise.py in _pairwise_callable(X, Y, metric, **kwds)
1119 iterator = itertools.product(range(X.shape[0]), range(Y.shape[0]))
1120 for i, j in iterator:
-> 1121 out[i, j] = metric(X[i], Y[j], **kwds)
1122
1123 return out
<ipython-input-17-7f81015a2d21> in <lambda>(a, b)
----> 1 myfunc = lambda a, b: ((a - b)**2).sum(axis=1, level=[0]).apply(np.sqrt).sum(axis=1).values
TypeError: _sum() got an unexpected keyword argument 'level'