Question

我有一个工作的tensorflow模型，我试图转换为使用队列。它可能不是最好的功能，但它的工作原理。

数据以列表（dict（））的形式出现，名为＆＃39; rows＆＃39;来自TF外部的处理管道，格式为[{＆＃39; y1＆＃39;：1，＆＃39; y2＆＃39;：0，＆＃39; y3＆＃39;：1，＆＃39; y4＆＃39;：0，＆＃39; x1＆＃39;：...＆＃39; x1182＆＃39;：0}]（SPECIAL_FIELD_CHAR是＆＃39; y＆＃39;，意味着计算了它来自＆＃39; xN＆＃39;数据）。 features_outputs（）只返回xs和ys为[＆＃39; y1＆＃39;，＆＃39; y2＆＃39;，＆＃39; y3＆＃39;，＆＃39; y4＆＃39;]和[＆＃39; x1＆＃39;，...，＆＃39; x1182＆＃39;]。想法是xs决定了ys。每行xs计算有4个独立的y。

def train_rows(initial_weights, weights_filename, rows):
    (features, outputs ) = features_outputs(rows[0].keys())

    x_true = [ [float(row[feature]) for feature in features] for row in rows]
    try:
        y_true = [ [float(row[output]) for output in outputs] for row in rows ]
    except Exception as e:
        print [row[output] for output in outputs], e

    w_true = np.random.rand(len(features), 1) # init weights
    b_true = np.random.rand(1) # init bias

    x_in = tf.placeholder(tf.float32, [None, len(features)], "x_in")
    if initial_weights is None:
        w = tf.Variable(tf.random_normal((len(features), len(outputs))), name="w")
        b = tf.Variable(tf.constant(0.1, shape=[len(outputs)]), name="b")
    else:
        w = tf.Variable(weights['w'], name="w")
        b = tf.Variable(weights['b'], name="b")

    h = tf.add(tf.matmul(x_in, w), b, name="h")
    y_in = tf.placeholder(tf.float32, [None, len(outputs)], "y_in")
    loss_op = tf.reduce_mean(tf.square(tf.subtract(y_in, h)), name="loss")
    #train_op = tf.train.AdamOptimizer(0.01).minimize(loss_op)
    train_op = tf.train.GradientDescentOptimizer(0.3).minimize(loss_op)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        last_error = 1.7976931348623157e+308
        this_error = 1.7976931348623157e+307
        diff = 1
        iteration = initial_weights['iteration'] if initial_weights is not None and 'iteration' in initial_weights else 0
        while diff > 0:
            iteration += 1
            last_error = this_error
            for step in range(1000):
                sess.run(train_op, feed_dict={
                  x_in: x_true,
                  y_in: y_true
                })

            w_computed = sess.run(w)
            b_computed = sess.run(b)

            pred = tf.add(tf.matmul(x_in, w), b)

            results = sess.run(pred, feed_dict={x_in: x_true})
            error = tf.losses.mean_squared_error(y_true, results)
            this_error = float(error.eval())
            (diff, locs) = compare(y_true, results)
            if locs < 50:
                print "iteration:", iteration, "error:",this_error, "diff:", diff, "locs:", locs

这会产生一个收敛的模型。但是，基于队列的版本没有，并且错误迅速增加：

def multithreaded_train_rows(initial_weights, weights_filename, rows):
    (features, outputs ) = features_outputs(rows[0].keys())

    x_true = np.array([ [float(row[feature]) for feature in features] for row in rows])
    y_true = np.array([ [float(row[output]) for output in outputs] for row in rows ])


    #queue
    q = tf.FIFOQueue(capacity=len(rows), dtypes=tf.float32)
    #enq_op = q.enqueue_many(x_true)
    enq_op = q.enqueue_many(np.array( [ [float(row[f]) for f in sorted(row.keys())] for row in rows]  ))
    qr = tf.train.QueueRunner(q, [enq_op] * 1)
    tf.train.add_queue_runner(qr)

    keys = sorted(row.keys())
    x_indices = np.array([[i] for i in range(len(keys)) if not keys[i].startswith(SPECIAL_FIELD_CHAR)])
    y_indices = np.array([[i] for i in range(len(keys)) if     keys[i].startswith(SPECIAL_FIELD_CHAR)])

    input = q.dequeue()

    x_in = tf.transpose(tf.gather(input, x_indices))
    y_in = tf.gather(input, y_indices)

    if initial_weights is None:
        print 'Creating weights', len(x_indices), len(y_indices)
        w = tf.Variable(tf.random_normal((len(x_indices), len(y_indices))), name="w")
        b = tf.Variable(tf.constant(0.1, shape=[len(y_indices)]), name="b")
    else:
        print 'Using supplied weights', len(weights['w']), len(weights['w'][0])
        w = tf.Variable(weights['w'], name="w")
        b = tf.Variable(weights['b'], name="b")

    y = tf.add(tf.matmul(x_in, w), b, name="y")

    loss_op = tf.reduce_mean(tf.squared_difference(y_in, y), name="loss")
    train_op = tf.train.GradientDescentOptimizer(0.3).minimize(loss_op)

    print 'Starting session'
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        last_error = 1.7976931348623157e+308
        this_error = 1.7976931348623157e+307
        diff = 1
        iteration = initial_weights['iteration'] if initial_weights is not None and 'iteration' in initial_weights else 0
        while diff > 0:
            iteration += 1
            last_error = this_error
            for step in range(100):
                sess.run([train_op, loss_op])

            w_computed = sess.run(w)
            b_computed = sess.run(b)

            pred = tf.add(tf.matmul(x_in, w), b)

            results = sess.run(y, feed_dict={x_in: x_true})
            error = tf.losses.mean_squared_error(y_true, results)
            this_error = float(error.eval())

            (diff, locs) = compare(y_true, results)
            if locs < 50:
                print "iteration:", iteration, "error:",this_error, "diff:", diff, "locs:", locs

        coord.request_stop()
        coord.join(threads)

他们应该是一样的，但我必须改变一些事情： 1.为matmul（）的x_in添加一个tf.transpose（） 2.排队xs和ys的整行，然后使用tf.gather（）拉开。

我已经搜索了很多匹配我的例子，我找不到关于如何重新启动队列并从头开始继续培训的文档。它似乎永远训练（不知道为什么，谁在补充队列？）它也永远不会停止。

但最重要的是我不知道为什么给出完全相同的数据，第一个收敛而第二个收不到？

Answer 1

不需要聚集编组。仅排列输入（x_true），然后

根据y_true评估y，而不是y_in

class CsvUploadView(LoginRequiredMixin, FormView):
form_class = CsvAttendeeForm
template_name = 'attendee/upload_attendee_csv.html'
success_url = reverse_lazy('home')

def post(self, request, *args, **kwargs):

    form_class = self.get_form_class()
    form = self.get_form(form_class)
    file = request.FILES.get('file')
    if form.is_valid():
        if file:
            reader = csv.reader(file, delimiter=',')
            next(reader)
            attendee_instance = Attendee()
            Attendee.objects.bulk_create(
                [Attendee(firstname=row[0],
                          surname=row[1],
                          email=row[2],
                          )
                 for row in reader])

            attendee_instance.save()

        return self.form_valid(form)

    else:
        return self.form_invalid(form)

从feed_dict转换为队列会导致损失增加

1 个答案: