我正在修改此github(https://github.com/hehefan/Video-Classification)中的代码,因此它可以接受我自己的输入。当我尝试为5 epoch
一次运行一个视频时,它运行没有问题。但是,当我尝试从多个视频连续运行时,它会在sess.run()
行中运行到内存错误。
代码:
import sys
import os
import numpy as np
import tensorflow as tf
import gzip
#import cPickle
import _pickle as cPickle
import random
from config import FLAGS
from models import DynamicRNN
from models import AveragePooling
filename = ['D2N2Sur', 'H2N2A', 'H2N2C', 'H2N2D', 'H2N2S', 'N2A', 'N2C', 'N2D', 'N2H', 'N2S', 'N2Sur', 'S2N2H']
#TRAINING LABEL
batch_label = list(range(12))
#DATA PROCESSING
data = []
batch_length = []
for name in filename:
#READ DATA
counter = 0
frame = 0
video=[]
l = ""
f = open("Train1/"+name+".txt", "r")
for line in f:
l = l + line[:-1]
counter += 1
if (counter == 365):
l = list(l)
video.append(l)
l = ""
counter = 0
frame += 1
#MAKE SURE ALL VIDEO HAVE SAME LENGTH
#PAD BY 0
frame = FLAGS.max_video_length - frame
for number in range(frame):
video.append([0]*FLAGS.feature_size)
#APPEND VIDEO TO DATA
data.append(video)
batch_length.append(FLAGS.max_video_length)
training_steps_per_epoch = len(data) // FLAGS.batch_size
if not os.path.exists(FLAGS.checkpoint_dir):
os.makedirs(FLAGS.checkpoint_dir)
model = AveragePooling(feature_size=FLAGS.feature_size, max_video_length=FLAGS.max_video_length,
num_classes=FLAGS.num_classes, cell_size=FLAGS.size, use_lstm=FLAGS.use_lstm,
learning_rate=FLAGS.learning_rate, learning_rate_decay_factor=FLAGS.learning_rate_decay_factor,
min_learning_rate=FLAGS.min_learning_rate, training_steps_per_epoch=training_steps_per_epoch,
max_gradient_norm=FLAGS.max_gradient_norm, keep_prob=FLAGS.keep_prob, is_training=True)
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
model.saver.restore(sess, ckpt.model_checkpoint_path)
step = int(ckpt.model_checkpoint_path.split('-')[1])
else:
sess.run(tf.global_variables_initializer())
step = 0
for epoch in range(1, FLAGS.num_epochs+1):
random.shuffle(data)
batch_feature = []
batch_feature.append(data)
feed_dict = {model.frame_feature_ph: batch_feature, model.video_length_ph:batch_length, model.video_label_ph:batch_label}
loss, _ = sess.run([model.loss, model.train_op], feed_dict=feed_dict)
step += 1
if step % FLAGS.steps_per_checkpoint == 0:
checkpoint_path = os.path.join(FLAGS.checkpoint_dir, "ckpt")
model.saver.save(sess, checkpoint_path, global_step=model.global_step)
print ("%5d: %3d, %.3f"%(step, epoch, loss))
sys.stdout.flush()
错误:
Traceback (most recent call last):
File "/root/Documents/EmotionRecognition/masstrain.py", line 114, in <module>
loss, _ = sess.run([model.loss, model.train_op], feed_dict=feed_dict)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1093, in _run
np_val = np.asarray(subfeed_val, dtype=subfeed_dtype)
File "/usr/local/lib/python3.5/dist-packages/numpy/core/numeric.py", line 482, in asarray
return array(a, dtype, copy=False, order=order)
MemoryError
Process finished with exit code 1
有人对此有所了解吗?