我正在运行python(v 3.6.5)代码,该代码使用TensorFlow(v 1.13.2)来使用经过训练的模型执行推理(在Windows 8.1上)。
我想捕获(并记录)从TensorFlow库内部抛出的异常/错误。
例如,当批处理大小(在session.run()期间)太大时,进程将占用所有系统内存并崩溃。
我的代码如下:
import tensorflow as tf
import math
from tqdm import tqdm
# …
def parse_function(image_string, frame_id):
image = tf.image.decode_jpeg(image_string, channels=3)
resize_image = tf.image.resize_images(image, [224, 224], method=tf.image.ResizeMethod.BICUBIC)
return resize_image, frame_id
def load_graph(frozen_graph_filename):
with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
with tf.Graph().as_default() as graph:
tf.import_graph_def(graph_def, name="prefix")
return graph
def main(_):
batch_size = 128
num_frames = 5000
num_batches = int(np.ceil(num_frames / batch_size))
frame_ids = get_ids()
with MyFrameReader() as frd:
im_list = []
for id in frame_ids:
im_list.append(frd.get_frame(id))
dataset = tf.data.Dataset.from_tensor_slices((im_list, frame_ids))
dataset = dataset.map(parse_function)
batched_dataset = dataset.batch(batch_size)
iterator = batched_dataset.make_initializable_iterator()
next_element = iterator.get_next()
graph = load_graph(PB_FILE)
x = graph.get_tensor_by_name('prefix/input_image:0')
y = graph.get_tensor_by_name('prefix/output_node:0')
sess1 = tf.Session(graph=graph)
sess2 = tf.Session(config= tf.ConfigProto(device_count={'GPU': 0})) # Run on CPU
sess2.run(iterator.initializer)
for _ in tqdm(range(num_batches)):
try:
# pre process
inference_batch, frame_id_batch = sess2.run(next_element)
# main process
scores_np = sess1.run(y, feed_dict={x: inference_batch})
# post process …
except MemoryError as e:
print('Error 1')
except Exception as e:
print('Error 2')
except tf.errors.OpError as e:
print('Error 3')
except:
print('Error 4')
sess1.close()
sess2.close()
我看到该进程的内存在增加,并且在某些时候它死了,而没有到达异常处理代码。 (如果我在python中添加了隐藏内存的代码,那么我设法捕获了内存异常)
有人可以解释发生了什么吗?