我的Python代码中有一个自定义类,用于处理k-means聚类。该类采用一些自定义群集的参数,但是从传递给该类的列表中减去两个值时,出现以下错误:
Traceback (most recent call last):
File "/home/dev/PycharmProjects/KMeans/KMeansApplication.py", line 22, in <module>
application()
File "/home/dev/PycharmProjects/KMeans/KMeansApplication.py", line 16, in application
opt_num_clusters = cluster_calculator.calculate_optimum_clusters()
File "/home/dev/PycharmProjects/KMeans/ClusterCalculator.py", line 19, in calculate_optimum_clusters
self.init_opt_line()
File "/home/dev/PycharmProjects/KMeans/ClusterCalculator.py", line 33, in init_opt_line
self. m = (self.sum_squared_dist[0] - self.sum_squared_dist[1]) / (1 - self.calc_border)
TypeError: unsupported operand type(s) for -: 'KMeans' and 'KMeans'
这是我的自定义类的代码:
import KMeansClusterer
from math import sqrt, fabs
from matplotlib import pyplot as plp
class ClusterCalculator:
m = 0
b = 0
sum_squared_dist = []
derivates = []
distances = []
line_coordinates = []
def __init__(self, calc_border, data):
self.calc_border = calc_border
self.data = data
def calculate_optimum_clusters(self):
self.calculate_squared_dist()
self.init_opt_line()
self.calc_distances()
self.calc_line_coordinates()
opt_clusters = self.get_optimum_clusters()
print("Evaluated", opt_clusters, "as optimum number of clusters")
return opt_clusters
def calculate_squared_dist(self):
for k in range(1, self.calc_border):
kmeans = KMeansClusterer.KMeansClusterer(k, self.data)
self.sum_squared_dist.append(kmeans.calc_custom_params(self.data, k))
def init_opt_line(self):
#here the error is thrown
self. m = (self.sum_squared_dist[0] - self.sum_squared_dist[1]) / (1 - self.calc_border)
self.b = (1 * self.sum_squared_dist[0] - self.calc_border*self.sum_squared_dist[0]) / (1 - self.calc_border)
def calc_y_value(self, x_calc):
return self.m * x_calc + self.b
def calc_line_coordinates(self):
for i in range(1, self.calc_border):
self.line_coordinates.append(self.calc_y_value(i))
def calc_distances(self):
for i in range(1, self.calc_border):
self.distances.append(sqrt(fabs(self.calc_y_value(i))))
print("For border", self.calc_border, ", calculated the following distances: \n", self.distances)
def get_optimum_clusters(self):
return self.distances.index((max(self.distances)))
def plot_results(self):
plp.plot(range(1, self.calc_border), self.sum_squared_dist, "bx-")
plp.plot(range(1, self.calc_border), self.line_coordinates, "bx-")
plp.xlabel("Number of clusters")
plp.ylabel("Sum of squared distances")
plp.show()
我还附加了KMeansClusterer,因为sum_squared_dist
在那里填充了值:
from sklearn.cluster import KMeans
from matplotlib import pyplot as plp
class KMeansClusterer:
def __init__(self, clusters, data):
self.clusters = clusters
self.data = data
def cluster(self):
kmeans = KMeans(n_clusters=self.cluster(), random_state=0).fit(self.data)
print("Clustered", len(kmeans.labels_), "GTINs")
for i, cluster_center in enumerate(kmeans.cluster_centers_):
plp.plot(cluster_center, label="Center {0}".format(i))
plp.legend(loc="best")
plp.show()
def calc_custom_params(self, data_frame, clusters):
kmeans = KMeans(n_clusters=clusters, random_state=0).fit(data_frame)
return kmeans
def cluster_without_plot(self):
return KMeans(n_clusters=self.cluster(), random_state=0).fit(self.data)
我无法想象为什么不支持'-',我试图减去两个整数类型和1的列表值和一个整数变量。
答案 0 :(得分:2)
KMeans.fit()
返回一个类实例,这意味着calc_custom_params()
返回一个类实例,因此您的列表sum_squared_dist
不包含整数,元素是sklearn.cluster.KMeans
类的对象
答案 1 :(得分:2)
Python无法自动减去类。您需要在您的类上实现__sub__
方法,以便python知道如何处理减去这些类。您可以在https://docs.python.org/3/library/operator.html