sess.run()内存错误

时间:2017-11-02 09:44:07

标签: python tensorflow

我正在修改此github(https://github.com/hehefan/Video-Classification)中的代码,因此它可以接受我自己的输入。当我尝试为5 epoch一次运行一个视频时,它运行没有问题。但是,当我尝试从多个视频连续运行时,它会在sess.run()行中运行到内存错误。

代码:

import sys
import os
import numpy as np
import tensorflow as tf
import gzip
#import cPickle
import _pickle as cPickle
import random
from config import FLAGS
from models import DynamicRNN
from models import AveragePooling

filename = ['D2N2Sur', 'H2N2A', 'H2N2C', 'H2N2D', 'H2N2S', 'N2A', 'N2C', 'N2D', 'N2H', 'N2S', 'N2Sur', 'S2N2H']

#TRAINING LABEL
batch_label = list(range(12))

#DATA PROCESSING
data = []
batch_length = []
for name in filename:
  #READ DATA
  counter = 0
  frame = 0
  video=[]
  l = ""
  f = open("Train1/"+name+".txt", "r")
  for line in f:
    l = l + line[:-1]
    counter += 1
    if (counter == 365):
      l = list(l)
      video.append(l)
      l = ""
      counter = 0
      frame += 1

  #MAKE SURE ALL VIDEO HAVE SAME LENGTH
  #PAD BY 0
  frame = FLAGS.max_video_length - frame
  for number in range(frame):
    video.append([0]*FLAGS.feature_size)

  #APPEND VIDEO TO DATA
  data.append(video)
  batch_length.append(FLAGS.max_video_length)

training_steps_per_epoch = len(data) // FLAGS.batch_size

if not os.path.exists(FLAGS.checkpoint_dir):
  os.makedirs(FLAGS.checkpoint_dir)

model = AveragePooling(feature_size=FLAGS.feature_size, max_video_length=FLAGS.max_video_length,
    num_classes=FLAGS.num_classes, cell_size=FLAGS.size, use_lstm=FLAGS.use_lstm,
    learning_rate=FLAGS.learning_rate, learning_rate_decay_factor=FLAGS.learning_rate_decay_factor,
    min_learning_rate=FLAGS.min_learning_rate, training_steps_per_epoch=training_steps_per_epoch,
    max_gradient_norm=FLAGS.max_gradient_norm, keep_prob=FLAGS.keep_prob, is_training=True)

with tf.Session() as sess:
  ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
  if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
    print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
    model.saver.restore(sess, ckpt.model_checkpoint_path)
    step = int(ckpt.model_checkpoint_path.split('-')[1])
  else:
    sess.run(tf.global_variables_initializer())
    step = 0
  for epoch in range(1, FLAGS.num_epochs+1):
    random.shuffle(data)
    batch_feature = []

    batch_feature.append(data)

    feed_dict = {model.frame_feature_ph: batch_feature, model.video_length_ph:batch_length, model.video_label_ph:batch_label}
    loss,  _ = sess.run([model.loss, model.train_op], feed_dict=feed_dict)
    step += 1
    if step % FLAGS.steps_per_checkpoint == 0:
      checkpoint_path = os.path.join(FLAGS.checkpoint_dir, "ckpt")
      model.saver.save(sess, checkpoint_path, global_step=model.global_step)
    print ("%5d: %3d, %.3f"%(step, epoch, loss))
    sys.stdout.flush()

错误:

Traceback (most recent call last):
  File "/root/Documents/EmotionRecognition/masstrain.py", line 114, in <module>
    loss,  _ = sess.run([model.loss, model.train_op], feed_dict=feed_dict)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 895, in run
    run_metadata_ptr)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1093, in _run
    np_val = np.asarray(subfeed_val, dtype=subfeed_dtype)
  File "/usr/local/lib/python3.5/dist-packages/numpy/core/numeric.py", line 482, in asarray
    return array(a, dtype, copy=False, order=order)
MemoryError

Process finished with exit code 1

有人对此有所了解吗?

0 个答案:

没有答案