我正在尝试使用TensorFlow在python中编写GAN,但是,我似乎遇到了在GPU上运行时内存不足的问题,即使我的代码似乎没有什么需要特别大的问题内存量。正在使用所有默认参数运行该程序。它应该读取少量图像,通过鉴别器网络运行它们,使用生成器网络基于噪声生成“图像”,通过相同的鉴别器运行结果,并训练两个网络。
我的python代码testtf.py
:
import tensorflow as tf
import numpy as np
import PIL, os, argparse, sys, random
lrelu = lambda x: tf.maximum(x,tf.multiply(x,.02))
bce = lambda x,z: -(x*tf.log(z+1e-12) + (1.-x)*tf.log(1.-z+1e-12))
parser = argparse.ArgumentParser()
parser.add_argument('-src',default=os.path.dirname(os.path.abspath(__file__))) #PAth of training data
parser.add_argument('-size',default='128x128') #Size of images
parser.add_argument('-mode',default='RGB') #Color space
parser.add_argument('-depf',type=int,default=16) #Starting filters
parser.add_argument('-depi',type=int,default=16) #Amount to increase filters
parser.add_argument('-batch',type=int,default=10) #Batch size
parser.add_argument('-rate',type=float,default=12e-5) #Learning rate
parser.add_argument('-epoch',type=int,default=1000) #Epochs to train
args = parser.parse_args(sys.argv[1:])
args.src = args.src.replace('\\','/')
if not args.src.endswith('/'): args.src += '/'
if args.size.count('x')!=1: args.size = '64x64'
args.mode = args.mode.upper()
tr_data = [file for file in os.listdir(args.src) if (file.lower().endswith('.jpg') or file.lower().endswith('.png'))] #List of training data files
ind = args.size.index('x',0)
i_width = int(args.size[:ind])
i_height = int(args.size[ind+1:])
i_chan = 3
if args.mode=='RGBA' or args.mode=='HSVA': i_chan=4
elif args.mode=='BW' or args.mode=='G': i_chan=1 #Get image dimensions and channels
fil_fin = args.depf #Additional parameters
wid_fin,hi_fin = i_width, i_height
while min(wid_fin,hi_fin)>=8:
wid_fin/=2
hi_fin/=2
fil_fin+=args.depi
def rgb2hsv(r,g,b): #Helper function to convert rgb to hsv
hsv = [0,0,0]
r/=255
g/=255
b/=255
low = min(r,g,b)
hi = max(r,g,b)
hsv[2]=hi
if hi>0: hsv[1]=(hi-lo)/hi
if hi==r: hsv[0] = (g-b)/(hi-lo)
elif hi==g: hsv[0] = (b-r)/(hi-lo)+2
else: hsv[0] = (r-g)/(hi-lo)+4
while hsv[0]<0: hsv[0]+=6
hsv[0] *= 255/6
return hsv
def load_img(path): #Loads image as data
global i_height, i_width, args
im = PIL.Image.open(args.src+path).resize((i_width,i_height)).convert(mode='RGBA')
pix = im.load()
ret = []
for y in range(i_height):
row = []
for x in range(i_width):
data = list(pix[x,y])
if args.mode=='RGB': data = data[:3]
elif args.mode=='HSV': data = rgb2hsv(*data[:3])
elif args.mode=='HSVA':
data = rgb2hsv(*data[:3])
data.append(data[3])
elif args.mode=='BW': data.append(1 if max(data[:3])>(255/2) else 0)
elif args.mode=='G': data.append(max(data[:3]))
row.append([k/255. for k in data])
ret.append(row)
return ret
tr_img_data = [load_img(path) for path in tr_data]
#===========================TENSORFLOW CODE STARTS HERE=======================
def disc(inp, reuse=None): #Discriminator for GAN
global args, i_width, i_height, i_chan
cw = i_width
ch = i_height
f = args.depf
with tf.variable_scope('disc',reuse=reuse):
x = tf.reshape(inp,shape=[-1,i_width,i_height,i_chan])
while min(cw,ch)>=8:
cw/=2
ch/=2
x = tf.layers.conv2d(x,filters=f,kernel_size=8,strides=2,padding='same',activation=lrelu) #Convolve tensor to smaller layer of more filters
f+=args.depi
f-=args.depi
x = tf.contrib.layers.flatten(x)
x = tf.layers.dense(x,units=int(cw*ch*f),activation=lrelu)
x = tf.layers.dense(x,units=int(cw*ch*f),activation=tf.nn.sigmoid)
return x
def gen(inp, trng=True): #Generator for GAN
global args, i_width, i_height, i_chan
cw,ch = [i_width, i_height]
f = args.depf
while min(cw,ch)>=8: #Get some useful information
cw/=2
ch/=2
f += args.depi
with tf.variable_scope('gen',reuse=None):
x = tf.layers.dense(inp,units=cw*ch*f,activation=lrelu)
x = tf.contrib.layers.batch_norm(x,decay=.99, is_training=trng)
x = tf.reshape(x,shape=[-1,int(cw),int(ch),int(f)])
while f>args.depf:
cw*=2
ch*=2
f -= args.depi
x = tf.layers.conv2d_transpose(x,filters=f,kernel_size=8,strides=2,padding='same',activation=lrelu) #Transpose convolve to larger layer of fewer filters
x = tf.contrib.layers.batch_norm(x,decay=.99,is_training=trng)
x = tf.layers.conv2d_transpose(x,filters=i_chan,kernel_size=8,strides=1,padding='same',activation=tf.nn.sigmoid)
return x
img_in = tf.placeholder(tf.float32, shape=[None,i_width,i_height,i_chan]) #Placeholder for image dat
noise_in = tf.placeholder(tf.float32, shape=[None,int(wid_fin*hi_fin*fil_fin)]) #Placeholder for noise
d_real = disc(img_in) #Results of discriminator on training data
g = gen(noise_in) #Generated data
d_fake = disc(g,reuse=True) #Results of discriminator on generated data
d_real_loss = bce(np.ones_like(d_real),d_real) #Losses to minimize
d_fake_loss = bce(np.zeros_like(d_fake),d_fake)
g_loss = tf.reduce_mean(bce(np.ones_like(d_fake),d_fake))
d_loss = tf.reduce_mean(.5*(d_real_loss+d_fake_loss))
d_vars = [var for var in tf.trainable_variables() if var.name.startswith("disc")]
g_vars = [var for var in tf.trainable_variables() if var.name.startswith("gen")]
d_reg = tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(1e-6),d_vars)
g_reg = tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(1e-6),g_vars)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
opt_g = tf.train.RMSPropOptimizer(args.rate).minimize(g_reg+g_loss,var_list=g_vars)
opt_d = tf.train.RMSPropOptimizer(args.rate).minimize(d_reg+d_loss,var_list=d_vars)
pass
sess = tf.Session()
sess.run(tf.global_variables_initializer(),options=tf.RunOptions(report_tensor_allocations_upon_oom=True))
losss=0
def step():
global args, losss
train_d,train_g = True,True
feed_imgs = [random.choice(tr_img_data) for i in range(args.batch)] #Get image data to use
feed_noise = np.random.uniform(0.,1.,[args.batch,int(wid_fin*hi_fin*fil_fin)]) #Generate noise
loss_dreal, loss_dfake, loss_g, loss_d = sess.run([d_real_loss, d_fake_loss, g_loss, d_loss], feed_dict={img_in:feed_imgs, noise_in:feed_noise}) #Run TF
if loss_g*1.5<loss_d: train_g = False
if loss_d*2<loss_g: train_d=False
losss = [loss_g, loss_d]
if train_g: #Train
sess.run(opt_g, feed_dict={img_in:feed_imgs, noise_in:feed_noise})
pass
if train_d:
sess.run(opt_d, feed_dict={img_in:feed_imgs, noise_in:feed_noise})
pass
def train(ep):
global losss
for i in range(ep):
if ep-i<30: print(losss)
step()
train(args.epoch)
运行python testtf.py 2>dat.txt
会将以下内容保存到dat.txt
:
2018-07-16 14:28:43.636085: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1392] Found device 0 with properties:
name: GeForce GTX 750 Ti major: 5 minor: 0 memoryClockRate(GHz): 1.15
pciBusID: 0000:01:00.0
totalMemory: 2.00GiB freeMemory: 428.24MiB
2018-07-16 14:28:43.642553: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1471] Adding visible gpu devices: 0
2018-07-16 14:28:44.811132: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:952] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-07-16 14:28:44.811711: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:958] 0
(Many similar lines omitted to stay under character limit)
2018-07-16 14:28:58.283837: I T:\src\github\tensorflow\tensorflow\core\common_runtime\bfc_allocator.cc:674] 1 Chunks of size 13892608 totalling 13.25MiB
2018-07-16 14:28:58.284323: I T:\src\github\tensorflow\tensorflow\core\common_runtime\bfc_allocator.cc:678] Sum Total of in-use chunks: 137.94MiB
2018-07-16 14:28:58.284793: I T:\src\github\tensorflow\tensorflow\core\common_runtime\bfc_allocator.cc:680] Stats:
Limit: 161529856
InUse: 144637952
MaxInUse: 161529856
NumAllocs: 289
MaxAllocSize: 14613504
2018-07-16 14:28:58.285723: W T:\src\github\tensorflow\tensorflow\core\common_runtime\bfc_allocator.cc:279] *********************************************************************************_******xxx_________
2018-07-16 14:28:58.286415: W T:\src\github\tensorflow\tensorflow\core\framework\op_kernel.cc:1318] OP_REQUIRES failed at conv_grad_input_ops.cc:676 : Resource exhausted: OOM when allocating tensor with shape[16,16,128,128] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
Traceback (most recent call last):
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1322, in _do_call
return fn(*args)
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1307, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1409, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[16,16,128,128] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[Node: gen/conv2d_transpose_4/conv2d_transpose = Conv2DBackpropInput[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 2, 2], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](gen/conv2d_transpose_4/conv2d_transpose-0-VecPermuteNHWCToNCHW-LayoutOptimizer, gen/conv2d_transpose_4/kernel/read, gen/BatchNorm_4/FusedBatchNorm)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
[[Node: Mean/_5 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_516_Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "testtf.py", line 168, in <module>
train(args.epoch)
File "testtf.py", line 166, in train
step()
File "testtf.py", line 150, in step
loss_dreal, loss_dfake, loss_g, loss_d = sess.run([d_real_loss, d_fake_loss, g_loss, d_loss], feed_dict={img_in:feed_imgs, noise_in:feed_noise}) #Run TF
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 900, in run
run_metadata_ptr)
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1135, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1316, in _do_run
run_metadata)
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1335, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[16,16,128,128] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[Node: gen/conv2d_transpose_4/conv2d_transpose = Conv2DBackpropInput[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 2, 2], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](gen/conv2d_transpose_4/conv2d_transpose-0-VecPermuteNHWCToNCHW-LayoutOptimizer, gen/conv2d_transpose_4/kernel/read, gen/BatchNorm_4/FusedBatchNorm)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
[[Node: Mean/_5 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_516_Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
Caused by op 'gen/conv2d_transpose_4/conv2d_transpose', defined at:
File "testtf.py", line 120, in <module>
g = gen(noise_in) #Generated data
File "testtf.py", line 111, in gen
x = tf.layers.conv2d_transpose(x,filters=f,kernel_size=8,strides=2,padding='same',activation=lrelu) #Transpose convolve to larger layer of fewer filters
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\layers\convolutional.py", line 1272, in conv2d_transpose
return layer.apply(inputs)
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 774, in apply
return self.__call__(inputs, *args, **kwargs)
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\layers\base.py", line 329, in __call__
outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 703, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\layers\convolutional.py", line 777, in call
data_format=conv_utils.convert_data_format(self.data_format, ndim=4))
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 1254, in conv2d_transpose
name=name)
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\gen_nn_ops.py", line 1340, in conv2d_backprop_input
dilations=dilations, name=name)
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 3414, in create_op
op_def=op_def)
File "C:\Users\Sellar.King\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 1740, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[16,16,128,128] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[Node: gen/conv2d_transpose_4/conv2d_transpose = Conv2DBackpropInput[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 2, 2], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](gen/conv2d_transpose_4/conv2d_transpose-0-VecPermuteNHWCToNCHW-LayoutOptimizer, gen/conv2d_transpose_4/kernel/read, gen/BatchNorm_4/FusedBatchNorm)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
[[Node: Mean/_5 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_516_Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
是在某个地方隐藏了更多的内存使用吗?不好的做法会导致过多的内存使用?还是对于约154MB的GPU来说这只是过多的内存?