从feed_dict转换为队列会导致损失增加

时间:2017-11-10 21:00:32

标签: tensorflow

我有一个工作的tensorflow模型,我试图转换为使用队列。它可能不是最好的功能,但它的工作原理。

数据以列表(dict())的形式出现,名为' rows'来自TF外部的处理管道,格式为[{' y1':1,' y2':0,' y3':1,' y4':0,' x1':...' x1182':0}](SPECIAL_FIELD_CHAR是' y',意味着计算了它来自' xN'数据)。 features_outputs()只返回xs和ys为[' y1',' y2',' y3',' y4']和[' x1',...,' x1182']。想法是xs决定了ys。每行xs计算有4个独立的y。

def train_rows(initial_weights, weights_filename, rows):
    (features, outputs ) = features_outputs(rows[0].keys())

    x_true = [ [float(row[feature]) for feature in features] for row in rows]
    try:
        y_true = [ [float(row[output]) for output in outputs] for row in rows ]
    except Exception as e:
        print [row[output] for output in outputs], e

    w_true = np.random.rand(len(features), 1) # init weights
    b_true = np.random.rand(1) # init bias

    x_in = tf.placeholder(tf.float32, [None, len(features)], "x_in")
    if initial_weights is None:
        w = tf.Variable(tf.random_normal((len(features), len(outputs))), name="w")
        b = tf.Variable(tf.constant(0.1, shape=[len(outputs)]), name="b")
    else:
        w = tf.Variable(weights['w'], name="w")
        b = tf.Variable(weights['b'], name="b")

    h = tf.add(tf.matmul(x_in, w), b, name="h")
    y_in = tf.placeholder(tf.float32, [None, len(outputs)], "y_in")
    loss_op = tf.reduce_mean(tf.square(tf.subtract(y_in, h)), name="loss")
    #train_op = tf.train.AdamOptimizer(0.01).minimize(loss_op)
    train_op = tf.train.GradientDescentOptimizer(0.3).minimize(loss_op)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        last_error = 1.7976931348623157e+308
        this_error = 1.7976931348623157e+307
        diff = 1
        iteration = initial_weights['iteration'] if initial_weights is not None and 'iteration' in initial_weights else 0
        while diff > 0:
            iteration += 1
            last_error = this_error
            for step in range(1000):
                sess.run(train_op, feed_dict={
                  x_in: x_true,
                  y_in: y_true
                })

            w_computed = sess.run(w)
            b_computed = sess.run(b)

            pred = tf.add(tf.matmul(x_in, w), b)

            results = sess.run(pred, feed_dict={x_in: x_true})
            error = tf.losses.mean_squared_error(y_true, results)
            this_error = float(error.eval())
            (diff, locs) = compare(y_true, results)
            if locs < 50:
                print "iteration:", iteration, "error:",this_error, "diff:", diff, "locs:", locs

这会产生一个收敛的模型。但是,基于队列的版本没有,并且错误迅速增加:

def multithreaded_train_rows(initial_weights, weights_filename, rows):
    (features, outputs ) = features_outputs(rows[0].keys())

    x_true = np.array([ [float(row[feature]) for feature in features] for row in rows])
    y_true = np.array([ [float(row[output]) for output in outputs] for row in rows ])


    #queue
    q = tf.FIFOQueue(capacity=len(rows), dtypes=tf.float32)
    #enq_op = q.enqueue_many(x_true)
    enq_op = q.enqueue_many(np.array( [ [float(row[f]) for f in sorted(row.keys())] for row in rows]  ))
    qr = tf.train.QueueRunner(q, [enq_op] * 1)
    tf.train.add_queue_runner(qr)

    keys = sorted(row.keys())
    x_indices = np.array([[i] for i in range(len(keys)) if not keys[i].startswith(SPECIAL_FIELD_CHAR)])
    y_indices = np.array([[i] for i in range(len(keys)) if     keys[i].startswith(SPECIAL_FIELD_CHAR)])

    input = q.dequeue()

    x_in = tf.transpose(tf.gather(input, x_indices))
    y_in = tf.gather(input, y_indices)

    if initial_weights is None:
        print 'Creating weights', len(x_indices), len(y_indices)
        w = tf.Variable(tf.random_normal((len(x_indices), len(y_indices))), name="w")
        b = tf.Variable(tf.constant(0.1, shape=[len(y_indices)]), name="b")
    else:
        print 'Using supplied weights', len(weights['w']), len(weights['w'][0])
        w = tf.Variable(weights['w'], name="w")
        b = tf.Variable(weights['b'], name="b")

    y = tf.add(tf.matmul(x_in, w), b, name="y")

    loss_op = tf.reduce_mean(tf.squared_difference(y_in, y), name="loss")
    train_op = tf.train.GradientDescentOptimizer(0.3).minimize(loss_op)

    print 'Starting session'
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        last_error = 1.7976931348623157e+308
        this_error = 1.7976931348623157e+307
        diff = 1
        iteration = initial_weights['iteration'] if initial_weights is not None and 'iteration' in initial_weights else 0
        while diff > 0:
            iteration += 1
            last_error = this_error
            for step in range(100):
                sess.run([train_op, loss_op])

            w_computed = sess.run(w)
            b_computed = sess.run(b)

            pred = tf.add(tf.matmul(x_in, w), b)

            results = sess.run(y, feed_dict={x_in: x_true})
            error = tf.losses.mean_squared_error(y_true, results)
            this_error = float(error.eval())

            (diff, locs) = compare(y_true, results)
            if locs < 50:
                print "iteration:", iteration, "error:",this_error, "diff:", diff, "locs:", locs

        coord.request_stop()
        coord.join(threads)

他们应该是一样的,但我必须改变一些事情: 1.为matmul()的x_in添加一个tf.transpose() 2.排队xs和ys的整行,然后使用tf.gather()拉开。

我已经搜索了很多匹配我的例子,我找不到关于如何重新启动队列并从头开始继续培训的文档。它似乎永远训练(不知道为什么,谁在补充队列?)它也永远不会停止。

但最重要的是我不知道为什么给出完全相同的数据,第一个收敛而第二个收不到?

1 个答案:

答案 0 :(得分:0)

  • 不需要聚集编组。仅排列输入(x_true),然后
  • 根据y_true评估y,而不是y_in

    class CsvUploadView(LoginRequiredMixin, FormView):
    form_class = CsvAttendeeForm
    template_name = 'attendee/upload_attendee_csv.html'
    success_url = reverse_lazy('home')
    
    def post(self, request, *args, **kwargs):
    
        form_class = self.get_form_class()
        form = self.get_form(form_class)
        file = request.FILES.get('file')
        if form.is_valid():
            if file:
                reader = csv.reader(file, delimiter=',')
                next(reader)
                attendee_instance = Attendee()
                Attendee.objects.bulk_create(
                    [Attendee(firstname=row[0],
                              surname=row[1],
                              email=row[2],
                              )
                     for row in reader])
    
                attendee_instance.save()
    
            return self.form_valid(form)
    
        else:
            return self.form_invalid(form)