创建张量流预测函数

时间:2018-07-23 22:59:27

标签: python tensorflow

我正在尝试创建一个预测函数,在该函数中我可以返回输入矩阵最接近的簇。我不确定自己是否正确,因为每个预测都需要很长时间才能得出预测。 我在下面为我的KMeans算法创建了一个类:

import numpy as np
import tensorflow as tf


class KmeansTensorflow:
    def __init__(self, input_matrix, num_clusters):
        self._input = input_matrix.todense()
        self._num_clusters = num_clusters

    def train(self):
        with tf.Session() as sess:
            k = self._num_clusters
            # select random points as a starting position
            start_pos = tf.Variable(self._input[np.random.randint(self._input.shape[0], size=k), :],
                                    dtype=tf.float32)
            self.centroids = tf.Variable(start_pos.initialized_value(), 'S', dtype=tf.float32)

            # populate points
            points = tf.Variable(self._input, 'X', dtype=tf.float32)
            ones_like = tf.ones((points.get_shape()[0], 1))
            prev_assignments = tf.Variable(tf.zeros((points.get_shape()[0],), dtype=tf.int64))

            # find the distance between all points: http://stackoverflow.com/a/43839605/1090562
            p1 = tf.matmul(
                tf.expand_dims(tf.reduce_sum(tf.square(points), 1), 1),
                tf.ones(shape=(1, k))
            )
            p2 = tf.transpose(tf.matmul(
                tf.reshape(tf.reduce_sum(tf.square(self.centroids), 1), shape=[-1, 1]),
                ones_like,
                transpose_b=True
            ))
            distance = tf.sqrt(tf.add(p1, p2) - 2 * tf.matmul(points, self.centroids, transpose_b=True))

            # assign each point to a closest centroid
            point_to_centroid_assignment = tf.argmin(distance, axis=1)

            # recalculate the centers
            total = tf.unsorted_segment_sum(points, point_to_centroid_assignment, k)
            count = tf.unsorted_segment_sum(ones_like, point_to_centroid_assignment, k)
            means = total / count

            # continue if there is any difference between the current and previous assignment
            is_continue = tf.reduce_any(tf.not_equal(point_to_centroid_assignment, prev_assignments))

            with tf.control_dependencies([is_continue]):
                loop = tf.group(self.centroids.assign(means), prev_assignments.assign(point_to_centroid_assignment))

            # sess = tf.Session()
            sess.run(tf.global_variables_initializer())

            # do many iterations. Hopefully you will stop because of has_changed is False
            has_changed, cnt = True, 0
            while has_changed and cnt < 1000:
                cnt += 1
                has_changed, _ = sess.run([is_continue, loop])

            res = sess.run(point_to_centroid_assignment)
            return list(res)

这是我的预测功能:

    def predict(self, input_vector):
        with tf.Session() as sess:
            points = tf.Variable(input_vector, 'P', dtype=tf.float32)
            ones_like = tf.ones((points.get_shape()[0], 1))

            p1 = tf.matmul(
                tf.expand_dims(tf.reduce_sum(tf.square(points), 1), 1),
                tf.ones(shape=(1, self._num_clusters))
            )

            p2 = tf.transpose(tf.matmul(
                tf.reshape(tf.reduce_sum(tf.square(self.centroids), 1), shape=[-1, 1]),
                ones_like,
                transpose_b=True
            ))

            distance = tf.sqrt(tf.add(p1, p2) - 2 * tf.matmul(points, self.centroids, transpose_b=True))
            point_to_centroid_assignment = tf.argmin(distance, axis=1)
            sess.run(tf.global_variables_initializer())

            res = sess.run(point_to_centroid_assignment)
            return res

当我运行predict()时,通常需要一分钟才能得出结果。

0 个答案:

没有答案