Question

我正在研究有关数值优化的工程问题。通常我使用像Kriging这样的元模型方法 - 但现在我想尝试新的流行的东西。这里的数据显示了具有7个几何参数的机器部件，因此我获得了一个功率值。之后，当数据适合时，我想看看是否可以使用在网络上运行的简单粒子群算法来优化零件。

我编写了我自己的简单人工神经网络，其中有几个隐藏层和numpy，小规模网络（例如7,30,30,1）的性能是每秒数百个时期。较大的网（例如7,999,999,1）经历了明显的减速。现在我想利用我的GPU（Quadro M4000）的强大功能。这是我的代码，我在网上找到的修改后的MNIST示例https://github.com/tflearn/tflearn/blob/master/examples/images/dnn.py

# -*- coding: utf-8 -*-

from __future__ import division, print_function, absolute_import
import tflearn
import tensorflow as tf
import numpy as np

with tf.device('/gpu:0'):
    # Data loading and preprocessing. 7 geometric parameters with one solution (last row) each
    Train =np.array([[0.848,0.994,0.475,0.120,0.141,0.670,0.156,-0.29750],
            [0.761,0.221,0.414,0.825,0.429,0.174,0.804,12.28900],
            [0.915,0.891,0.177,0.936,0.503,0.820,0.028,4.46918],
            [0.730,0.203,0.660,0.065,0.124,0.326,0.183,10.51110],
            [0.071,0.247,0.512,0.568,0.395,0.149,0.881,12.31750],
            [0.650,0.836,0.165,0.712,0.334,0.764,0.134,10.23280],
            [0.144,0.261,0.605,0.208,0.557,0.640,0.281,12.26680],
            [0.584,0.301,0.959,0.460,0.987,0.300,0.107,9.39676],
            [0.384,0.555,0.276,0.755,0.718,0.200,0.249,12.49240],
            [0.278,0.080,0.376,0.857,0.462,0.900,0.559,10.70610],
            [0.740,0.514,0.086,0.968,0.443,0.923,0.641,9.75352],
            [0.483,0.103,0.945,0.547,0.822,0.784,0.614,10.08490],
            [0.871,0.732,0.117,0.881,0.883,0.594,0.759,10.22090],
            [0.334,0.332,0.547,0.392,0.150,0.075,0.726,12.01180],
            [0.558,0.464,0.083,0.486,0.651,0.975,0.065,8.93398],
            [0.154,0.775,0.781,0.047,0.944,0.689,0.676,10.59200],
            [0.616,0.145,0.195,0.337,0.269,0.266,0.746,11.55840],
            [0.260,0.296,0.922,0.005,0.028,0.465,0.379,1.95064],
            [0.005,0.757,0.554,0.731,0.906,0.163,0.581,12.48950],
            [0.542,0.876,0.025,0.138,0.250,0.747,0.783,8.96552],
            [0.353,0.363,0.844,0.175,0.803,0.287,0.905,10.94560],
            [0.785,0.701,0.491,0.067,0.076,0.656,0.079,11.47360],
            [0.826,0.181,0.530,0.789,0.305,0.395,0.843,12.08160],
            [0.450,0.153,0.790,0.609,0.006,0.768,0.626,9.92177],
            [0.212,0.192,0.595,0.245,0.776,0.866,0.093,11.61500],
            [0.066,0.279,0.866,0.219,0.789,0.574,0.899,11.29240],
            [0.037,0.428,0.361,0.564,0.378,0.946,0.967,11.27440],
            [0.690,0.903,0.285,1.000,0.058,0.233,0.476,11.69370],
            [0.027,0.450,0.228,0.899,0.955,0.449,0.346,11.38700],
            [0.240,0.032,0.687,0.372,0.329,0.844,0.309,11.53640],
            [0.187,0.665,0.301,0.423,0.114,0.868,0.216,11.86770],
            [0.505,0.499,0.201,0.737,0.179,0.120,0.484,12.45780],
            [0.578,0.397,0.821,0.661,0.212,0.337,0.036,4.31481],
            [0.949,0.646,0.892,0.586,0.702,0.536,0.416,1.93674],
            [0.885,0.789,0.994,0.312,0.227,0.037,0.350,0.05622],
            [0.957,0.123,0.317,0.848,0.549,0.510,0.705,11.63030],
            [0.110,0.692,0.384,0.269,0.933,0.113,0.423,12.74620],
            [0.127,0.678,0.008,0.351,0.649,0.620,0.860,10.38200],
            [0.228,0.940,0.247,0.099,0.689,0.093,0.956,11.63780],
            [0.403,0.063,0.464,0.904,0.091,0.401,0.288,11.86670],
            [0.976,0.088,0.718,0.966,0.261,0.816,0.992,10.76000],
            [0.801,0.475,0.871,0.510,0.613,0.885,0.261,8.64352],
            [0.710,0.413,0.262,0.299,0.864,0.426,0.517,12.07250],
            [0.421,0.919,0.764,0.627,0.620,0.729,0.539,1.79113],
            [0.446,0.971,0.338,0.692,0.187,0.061,0.121,12.02700],
            [0.861,0.539,0.901,0.319,0.481,0.369,0.231,1.28605],
            [0.367,0.809,0.675,0.112,0.574,0.701,0.200,10.91850],
            [0.288,0.042,0.618,0.264,0.848,0.355,0.513,12.04380]])

    Y = Train[:, 7].reshape([48,1])
    X = np.delete(Train, (7), axis=1)

    Test =np.array([[0.088,0.857,0.448,0.156,0.364,0.487,0.007,12.21500],
            [0.653,0.371,0.645,0.767,0.034,0.526,0.386,11.5270],
            [0.618,0.008,0.573,0.929,0.284,0.556,0.588,11.55110],
            [0.531,0.348,0.109,0.190,0.674,0.985,0.785,9.66132],
            [0.768,0.609,0.744,0.024,0.969,0.009,0.331,9.49345],
            [0.177,0.523,0.974,0.814,0.736,0.611,0.451,9.61419],
            [0.919,0.817,0.057,0.530,0.415,0.467,0.693,10.44850],
            [0.310,0.628,0.804,0.672,0.598,0.244,0.652,10.37430],
            [0.430,0.900,0.480,0.700,0.620,0.300,0.600,11.77790],
            [0.318,0.741,0.142,0.405,0.532,0.189,0.936,11.86930]])

    testY = Test[:, 7].reshape([10,1])
    testX = np.delete(Test, (7), axis=1)    


    # Building deep neural network

    input_layer = tflearn.input_data(shape=[None, 7])
    dense1 = tflearn.fully_connected(input_layer, 70, activation='relu',
                                    regularizer='L2', weight_decay=0.001)
    dropout1 = tflearn.dropout(dense1, 0.8)
    dense2 = tflearn.fully_connected(dropout1, 70, activation='tanh',
                                    regularizer='L2', weight_decay=0.001)
    dropout2 = tflearn.dropout(dense2, 0.8)
    linear = tflearn.fully_connected(dropout2, 1, activation='linear')

    #Regression using SGD 
    sgd = tflearn.optimizers.SGD(learning_rate=0.01, lr_decay=0.96, decay_step=100)
    net = tflearn.regression(linear, optimizer=sgd, loss='mean_square',
                                    metric='R2', learning_rate=0.01)
with tf.device('/cpu:0'):       
    # Training
    model = tflearn.DNN(net) 
    model.fit(X, Y, n_epoch=200, validation_set=(testX, testY),batch_size = None,
            show_metric=True, run_id="dense_model")

    print("\nTest prediction")
    print(testY)
    print(model.predict(testX))

现在我有两个问题：

基本上无论我选择什么尺寸的隐藏层我都能获得这种性能。每个时期超过一秒！即使它只在CPU上运行它应该更快，不是吗？

Training Step: 19 | total loss: 87.36314 | time: 1.019s

Training Step: 20 | total loss: 86.11715 | time: 1.013s

我想知道为什么使用“with tf.device（'/ gpu：0'）：”因为完整的代码崩溃并显示错误消息：

tensorflow.python.framework.errors_impl.InvalidArgumentError: Node 'init_3/NoOp': Unknown input node '^is_training/Assign'

编辑：我通过添加这些行来修正数字2：

tflearn.init_graph(num_cores=6,gpu_memory_fraction=0.2)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

10到10000个节点之间的差异是可以忽略的小~1.1秒对1.7秒，所以我不得不假设其他地方有一个重要的开销？

Answer 1

好的，答案是验证每个epoche花了这么多时间。老实说，我没有想到这一点，因为据我所知，它只有一个前瞻性的计算而且不应该花那么多时间。仅验证每个n-timesstep更改，具体如下：

model.fit(X, Y, n_epoch=2000, validation_set=(testX, testY),batch_size = None,snapshot_step=100, snapshot_epoch=False,
        show_metric=True, run_id="dense_model")

TFlearn / Tensorflow方式慢于预期

1 个答案: