我正在尝试创建一个预测函数,在该函数中我可以返回输入矩阵最接近的簇。我不确定自己是否正确,因为每个预测都需要很长时间才能得出预测。 我在下面为我的KMeans算法创建了一个类:
import numpy as np
import tensorflow as tf
class KmeansTensorflow:
def __init__(self, input_matrix, num_clusters):
self._input = input_matrix.todense()
self._num_clusters = num_clusters
def train(self):
with tf.Session() as sess:
k = self._num_clusters
# select random points as a starting position
start_pos = tf.Variable(self._input[np.random.randint(self._input.shape[0], size=k), :],
dtype=tf.float32)
self.centroids = tf.Variable(start_pos.initialized_value(), 'S', dtype=tf.float32)
# populate points
points = tf.Variable(self._input, 'X', dtype=tf.float32)
ones_like = tf.ones((points.get_shape()[0], 1))
prev_assignments = tf.Variable(tf.zeros((points.get_shape()[0],), dtype=tf.int64))
# find the distance between all points: http://stackoverflow.com/a/43839605/1090562
p1 = tf.matmul(
tf.expand_dims(tf.reduce_sum(tf.square(points), 1), 1),
tf.ones(shape=(1, k))
)
p2 = tf.transpose(tf.matmul(
tf.reshape(tf.reduce_sum(tf.square(self.centroids), 1), shape=[-1, 1]),
ones_like,
transpose_b=True
))
distance = tf.sqrt(tf.add(p1, p2) - 2 * tf.matmul(points, self.centroids, transpose_b=True))
# assign each point to a closest centroid
point_to_centroid_assignment = tf.argmin(distance, axis=1)
# recalculate the centers
total = tf.unsorted_segment_sum(points, point_to_centroid_assignment, k)
count = tf.unsorted_segment_sum(ones_like, point_to_centroid_assignment, k)
means = total / count
# continue if there is any difference between the current and previous assignment
is_continue = tf.reduce_any(tf.not_equal(point_to_centroid_assignment, prev_assignments))
with tf.control_dependencies([is_continue]):
loop = tf.group(self.centroids.assign(means), prev_assignments.assign(point_to_centroid_assignment))
# sess = tf.Session()
sess.run(tf.global_variables_initializer())
# do many iterations. Hopefully you will stop because of has_changed is False
has_changed, cnt = True, 0
while has_changed and cnt < 1000:
cnt += 1
has_changed, _ = sess.run([is_continue, loop])
res = sess.run(point_to_centroid_assignment)
return list(res)
这是我的预测功能:
def predict(self, input_vector):
with tf.Session() as sess:
points = tf.Variable(input_vector, 'P', dtype=tf.float32)
ones_like = tf.ones((points.get_shape()[0], 1))
p1 = tf.matmul(
tf.expand_dims(tf.reduce_sum(tf.square(points), 1), 1),
tf.ones(shape=(1, self._num_clusters))
)
p2 = tf.transpose(tf.matmul(
tf.reshape(tf.reduce_sum(tf.square(self.centroids), 1), shape=[-1, 1]),
ones_like,
transpose_b=True
))
distance = tf.sqrt(tf.add(p1, p2) - 2 * tf.matmul(points, self.centroids, transpose_b=True))
point_to_centroid_assignment = tf.argmin(distance, axis=1)
sess.run(tf.global_variables_initializer())
res = sess.run(point_to_centroid_assignment)
return res
当我运行predict()时,通常需要一分钟才能得出结果。