Tensorflow:保存/导入检查点无错误地工作,但所有导入的变量都具有值'none'

时间:2017-10-18 17:11:52

标签: tensorflow tensorflow-gpu

我正在训练一个深度CNN用于图像增强,并且遇到了一个非常奇怪的问题。

我的网络架构是完全卷积的,并实现了几个小的“U形”组件,其中特征图是向下/向上采样的,以便在“顶层”中进行处理。在顶层,有几个节点,网络“猜测”输出图像,然后将较低层的输出添加到从猜测派生的特征。损失函数我在最终预测和这些猜测中都会对错误进行处罚。

因此定义了网络:

def convnet(x, weights, biases):
    #TOP LAYER
    conv0_1        =  conv3dWrap(x, weights['wConv0_1'], biases['bConv0_1'],[1,1,1,1,1])
    conv0_2        =  conv3dWrap(conv0_1, weights['wConv0_2'], biases['bConv0_2'],[1,1,1,1,1])

    #MID LAYER DOWN SAMPLE
    conv1_1        =  conv3dWrap(conv0_2, weights['wConv1_1'], biases['bConv1_1'],[1,2,2,2,1])
    conv1_2        =  conv3dWrap(conv1_1, weights['wConv1_2'], biases['bConv1_2'],[1,1,1,1,1])

    #BOTTOM LAYER DOWN SAMPLE
    conv2_1        = conv3dWrap(conv1_2, weights['wConv2_1'], biases['bConv2_1'],[1,2,2,2,1])
    conv2_2        = conv3dWrap(conv2_1, weights['wConv2_2'], biases['bConv2_2'],[1,1,1,1,1])
    conv2_3        = conv3dWrap(conv2_2, weights['wConv2_3'], biases['bConv2_3'],[1,1,1,1,1])
    convTrans2_1   = conv3dTransWrap(conv2_3,weights['wTConv2_1'], biases['bTConv2_1'], [4,2,32,32,64],[1,2,2,2,1])

    #MID LAYER UPSAMPLE
    conv1_3        =  conv3dWrap(tf.add(convTrans2_1,conv1_2),weights['wConv1_3'], biases['bConv1_3'],[1,1,1,1,1])
    conv1_4        =  conv3dWrap(conv1_3, weights['wConv1_4'], biases['bConv1_4'],[1,1,1,1,1])
    convTrans1_1   =  conv3dTransWrap(conv1_4, weights['wTConv1_1'], biases['bTConv1_1'], [4,4,64,64,32],[1,2,2,2,1])

    #TOP LAYER AGAIN
    conv0_3        =  conv3dWrap(tf.add(conv0_2,convTrans1_1), weights['wConv0_3'], biases['bConv0_3'],[1,1,1,1,1])
    conv0_4        =  conv3dWrap(conv0_3, weights['wConv0_4'], biases['bConv0_4'],[1,1,1,1,1])
    recon0_1       =  reconWrap(conv0_3, weights['wReconDS0_1'], biases['bReconDS0_1'],[1,1,1,1,1])
    print(recon0_1.shape)
    catRecon0_1    =  tf.add(conv0_4,tf.contrib.keras.backend.repeat_elements(recon0_1,32,4))
    conv0_5        =  conv3dWrap(catRecon0_1, weights['wConv0_5'], biases['bConv0_5'],[1,1,1,1,1])

    #MID LAYER AGAIN
    conv1_5        =  conv3dWrap(conv0_5, weights['wConv1_5'], biases['bConv1_5'],[1,2,2,2,1])
    conv1_6        =  conv3dWrap(conv1_5, weights['wConv1_6'], biases['bConv1_6'],[1,1,1,1,1])

    #BOTTOM LAYER
    conv2_4        = conv3dWrap(conv1_6, weights['wConv2_4'], biases['bConv2_4'],[1,2,2,2,1])
    conv2_5        = conv3dWrap(conv2_4, weights['wConv2_5'], biases['bConv2_5'],[1,1,1,1,1])
    conv2_6        = conv3dWrap(conv2_5, weights['wConv2_6'], biases['bConv2_6'],[1,1,1,1,1])
    convTrans2_2   = conv3dTransWrap(conv2_6,weights['wTConv2_2'], biases['bTConv2_2'], [4,2,32,32,64],[1,2,2,2,1])

    #MID LAYER UPSAMPLE
    conv1_7        =  conv3dWrap(tf.add(convTrans2_2,conv1_6),weights['wConv1_7'], biases['bConv1_7'],[1,1,1,1,1])
    conv1_8        =  conv3dWrap(conv1_7, weights['wConv1_8'], biases['bConv1_8'],[1,1,1,1,1])
    convTrans1_2   =  conv3dTransWrap(conv1_8,weights['wTConv1_2'], biases['bTConv1_2'], [4,4,64,64,32],[1,2,2,2,1])

    #TOP LAYER
    conv0_6        =  conv3dWrap(tf.add(conv0_5,convTrans1_2), weights['wConv0_6'], biases['bConv0_6'],[1,1,1,1,1])
    recon0_2       =  reconWrap(conv0_6, weights['wReconDS0_2'], biases['bReconDS0_2'],[1,1,1,1,1])
    catRecon0_2    =  tf.add(conv0_6,tf.contrib.keras.backend.repeat_elements(recon0_2,32,4))
    conv0_7        =  conv3dWrap(catRecon0_2, weights['wConv0_7'], biases['bConv0_7'],[1,1,1,1,1])

    #MID LAYER
    conv1_9        =  conv3dWrap(conv0_7, weights['wConv1_9'], biases['bConv1_9'],[1,2,2,2,1]) 
    conv1_10       =  conv3dWrap(conv1_9, weights['wConv1_10'], biases['bConv1_10'],[1,1,1,1,1])

    #BOTTOM LAYER
    conv2_7        = conv3dWrap(conv1_10, weights['wConv2_7'], biases['bConv2_7'],[1,2,2,2,1])
    conv2_8        = conv3dWrap(conv2_7,  weights['wConv2_8'], biases['bConv2_8'],[1,1,1,1,1])
    conv2_9        = conv3dWrap(conv2_8,  weights['wConv2_9'], biases['bConv2_9'],[1,1,1,1,1])
    convTrans2_3   = conv3dTransWrap(conv2_9, weights['wTConv2_3'], biases['bTConv2_3'], [4,2,32,32,64],[1,2,2,2,1])

    #MID LAYER UPSAMPLE
    conv1_11        =  conv3dWrap(tf.add(convTrans2_3,conv1_10),weights['wConv1_11'], biases['bConv1_11'],[1,1,1,1,1])
    conv1_12        =  conv3dWrap(conv1_11, weights['wConv1_12'], biases['bConv1_12'],[1,1,1,1,1])
    convTrans1_3    =   conv3dTransWrap(conv1_12,weights['wTConv1_3'], biases['bTConv1_3'], [4,4,64,64,32],[1,2,2,2,1])

    #TOP LAYER
    conv0_8        =  conv3dWrap(tf.add(conv0_7,convTrans1_3), weights['wConv0_8'], biases['bConv0_8'],[1,1,1,1,1])
    recon0_3       =  reconWrap(conv0_8, weights['wReconDS0_3'], biases['bReconDS0_3'],[1,1,1,1,1])
    catRecon0_3    =  tf.add(conv0_8,tf.contrib.keras.backend.repeat_elements(recon0_3,32,4))
    conv0_9        =  conv3dWrap(catRecon0_3, weights['wConv0_9'], biases['bConv0_9'],[1,1,1,1,1])
    print(recon0_3.shape)

    #MID LAYER
    conv1_13        =  conv3dWrap(conv0_9, weights['wConv1_13'], biases['bConv1_13'],[1,2,2,2,1]) 
    conv1_14       =   conv3dWrap(conv1_13, weights['wConv1_14'], biases['bConv1_14'],[1,1,1,1,1])

    #BOTTOM LAYER
    conv2_10        = conv3dWrap(conv1_14, weights['wConv2_10'], biases['bConv2_10'],[1,2,2,2,1])
    conv2_11        = conv3dWrap(conv2_10,  weights['wConv2_11'], biases['bConv2_11'],[1,1,1,1,1])
    conv2_12        = conv3dWrap(conv2_11,  weights['wConv2_12'], biases['bConv2_12'],[1,1,1,1,1])
    convTrans2_4   = conv3dTransWrap(conv2_12, weights['wTConv2_4'], biases['bTConv2_4'], [4,2,32,32,64],[1,2,2,2,1])

    #MID LAYER UPSAMPLE
    conv1_15        =  conv3dWrap(tf.add(convTrans2_4,conv1_14),weights['wConv1_15'], biases['bConv1_15'],[1,1,1,1,1])
    conv1_16        =  conv3dWrap(conv1_15, weights['wConv1_16'], biases['bConv1_16'],[1,1,1,1,1])
    convTrans1_4    =  conv3dTransWrap(conv1_16,weights['wTConv1_4'], biases['bTConv1_4'], [4,4,64,64,32],[1,2,2,2,1])

    #TOP LAYER
    conv0_10        =  conv3dWrap(tf.add(conv0_9,convTrans1_4), weights['wConv0_10'], biases['bConv0_10'],[1,1,1,1,1])

    #OUTPUT
    convOUT        =  reconWrap(conv0_10, weights['wConvOUT'], biases['bConvOUT'],[1,1,1,1,1])
    print(convOUT.shape)

    return recon0_1, recon0_2, recon0_3, convOUT

所有“包装”如下:

def conv3dWrap(x, W, b, strides):
    x = tf.nn.conv3d(x, W, strides, padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def reconWrap(x, W, b, strides):
    x = tf.nn.conv3d(x, W, strides, padding='SAME')
    x = tf.nn.bias_add(x, b)
    return x

def conv3dTransWrap(x, W, b, shape, strides):
    x = tf.nn.conv3d_transpose(x, W, shape, strides, padding='SAME')
    x = tf.nn.bias_add(x,b)
    return tf.nn.relu(x)

我的权重和偏见存储在开始训练之前定义的词典中:

weights={
#TOP LAYER
'wConv0_1':      tf.Variable(tf.random_normal([4, 3, 3, 1, 5]),   name='wC0_1'),
'wConv0_2':      tf.Variable(tf.random_normal([4, 3, 3, 5, 32]),  name='wC0_2'),
'wConv0_3':      tf.Variable(tf.random_normal([4, 3, 3, 32, 32]), name='wC0_3'),
'wConv0_4':      tf.Variable(tf.random_normal([4, 3, 3, 32, 32]),  name='wC0_4'),
'wReconDS0_1':   tf.Variable(tf.random_normal([1, 1, 1, 32, 1]) , name='wR0_1') ...... #THIS CONTINUES FOR QUITE AWHILE 

然后,我开始这样的训练:

def train_cnn(x):
    epochLosses=[]
    print('Beginning Training!')
    print(NUM_EPOCHS)
    r1,r2,r3,pred = convNet(x, weights, biases)        
    cost = (tf.losses.mean_squared_error(y,pred) 
    + 0.25* ((tf.losses.mean_squared_error(y,r1)) 
    + (tf.losses.mean_squared_error(y,r2))
    + (tf.losses.mean_squared_error(y,r3))))

    regularizer= 0.01*tf.nn.l2_loss((weights['wConv0_1'])+
                                    0.01*tf.nn.l2_loss(weights['wConv0_2'])+
                                    0.01*tf.nn.l2_loss(weights['wConv0_3'])+     
                                    0.01*tf.nn.l2_loss(weights['wConv0_4'])+    
                                    0.01*tf.nn.l2_loss(weights['wReconDS0_1'])+  
                                    0.01*tf.nn.l2_loss(weights['wConv0_5'])+ 
                                    0.01*tf.nn.l2_loss(weights['wConv0_6'])+     
                                    0.01*tf.nn.l2_loss(weights['wReconDS0_2'])+  
                                    0.01*tf.nn.l2_loss(weights['wReconDS0_3'])+  
                                    0.01*tf.nn.l2_loss(weights['wConv0_7'])+     
                                    0.01*tf.nn.l2_loss(weights['wConv0_8'])+      
                                    0.01*tf.nn.l2_loss(weights['wConv0_9'])+     
                                    0.01*tf.nn.l2_loss(weights['wConv0_10'])+     
                                    0.01*tf.nn.l2_loss(weights['wConvOUT'])+      
                                    0.01*tf.nn.l2_loss(weights['wConv1_1'])+   
                                    0.01*tf.nn.l2_loss(weights['wConv1_2'])+     
                                    0.01*tf.nn.l2_loss(weights['wConv1_3'])+     
                                    0.01*tf.nn.l2_loss(weights['wConv1_4'])+      
                                    0.01*tf.nn.l2_loss(weights['wConv1_5'])+      
                                    0.01*tf.nn.l2_loss(weights['wConv1_6'])+      
                                    0.01*tf.nn.l2_loss(weights['wConv1_7'])+      
                                    0.01*tf.nn.l2_loss(weights['wConv1_8'])+     
                                    0.01*tf.nn.l2_loss(weights['wConv1_9'])+      
                                    0.01*tf.nn.l2_loss(weights['wConv1_10'])+     
                                    0.01*tf.nn.l2_loss(weights['wConv1_11'])+      
                                    0.01*tf.nn.l2_loss(weights['wConv1_12'])+     
                                    0.01*tf.nn.l2_loss(weights['wConv1_13'])+      
                                    0.01*tf.nn.l2_loss(weights['wConv1_14'])+      
                                    0.01*tf.nn.l2_loss(weights['wConv1_15'])+      
                                    0.01*tf.nn.l2_loss(weights['wConv1_16'])+      
                                    0.01*tf.nn.l2_loss(weights['wTConv1_1'])+      
                                    0.01*tf.nn.l2_loss(weights['wTConv1_2'])+     
                                    0.01*tf.nn.l2_loss(weights['wTConv1_3'])+      
                                    0.01*tf.nn.l2_loss(weights['wTConv1_4'])+     
                                    0.01*tf.nn.l2_loss(weights['wConv2_1'])+     
                                    0.01*tf.nn.l2_loss(weights['wConv2_2'])+   
                                    0.01*tf.nn.l2_loss(weights['wConv2_3'])+     
                                    0.01*tf.nn.l2_loss(weights['wConv2_4'])+     
                                    0.01*tf.nn.l2_loss(weights['wConv2_5'])+    
                                    0.01*tf.nn.l2_loss(weights['wConv2_6'])+     
                                    0.01*tf.nn.l2_loss(weights['wConv2_7'])+     
                                    0.01*tf.nn.l2_loss(weights['wConv2_8'])+      
                                    0.01*tf.nn.l2_loss(weights['wConv2_9'])+      
                                    0.01*tf.nn.l2_loss(weights['wConv2_10'])+     
                                    0.01*tf.nn.l2_loss(weights['wConv2_11'])+     
                                    0.01*tf.nn.l2_loss(weights['wConv2_12'])+   
                                    0.01*tf.nn.l2_loss(weights['wTConv2_1'])+     
                                    0.01*tf.nn.l2_loss(weights['wTConv2_2'])+     
                                    0.01*tf.nn.l2_loss(weights['wTConv2_3'])+    
                                    0.01*tf.nn.l2_loss(weights['wTConv2_4']))
    cost=cost+regularizer
    optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(cost)
    saver = tf.train.Saver()
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    valLosses=[]
    epochLosses=[]
    print('Beginning Session!')
    writer  =  tf.summary.FileWriter ( './GRAPH' ,  sess.graph)
    sess.run(tf.global_variables_initializer())

最后,我继续做一些批量加载的东西,一旦准备好了,我会做以下事情(对于每一次传递,一旦我有重量导入工作,我将不会保存每一次传递):

                _, c = sess.run([optimizer, cost], feed_dict = {x: inBatch,y: gsBatch})
                epoch_loss += c           
                save_path = saver.save(sess, "./CHKPT/model.cpkt")  

所以当我继续导入这个模型时

sess = tf.Session()
x = tf.placeholder(dtype=tf.float32)
new_saver = tf.train.import_meta_graph('./CHKPT/model.cpkt.meta')
sess.run(tf.global_variables_initializer())
a,b,c,pred = convNet(x, weights, biases)

我遇到以下错误:

ValueError: Tried to convert 'filter' to a tensor and failed. Error: None values not supported.

当我查看导入的权重和偏差时,每个都具有值“无”。这不仅奇怪,而且网络在训练期间“快速运行”,速度远远超出我的预期。我担心没有合法的计算。

情况绝对不是这样,但是,我几乎是肯定的我正在遵循我已经逐字用于许多其他网络的保存/加载过程。任何人都可以对这里可能发生的事情有所了解吗?

编辑:我对TF也很新,而且我的代码中可能存在非理想性。如果你看到保存/导入之外的任何东西都不是犹太洁食,请告诉我。

1 个答案:

答案 0 :(得分:0)

运行sess.run(tf.global_variables_initializer())将重新初始化每个张量并删除其加载的值。加载模型时跳过调用tf.global_variables_initializer()。初始化由保护程序完成。

您还缺少恢复调用(import_meta_graph()仅加载保护程序对象。)

new_saver = tf.train.import_meta_graph('./CHKPT/model.cpkt.meta')
new_saver.restore(sess, './CHKPT/model.cpkt')

此后当你跑:

a,b,c,pred = convNet(x, weights, biases)

您创建了一个全新的网络,从不使用加载的网络。 相反,在恢复模型后,您必须在tf.global_variables()内找到所需的张量。例如searching for them by name