如何使用TensorflowSharp训练模型

时间:2018-03-15 04:47:22

标签: c# tensorflow

我是TensorfolwSharp的新手。我看到一些关于如何使用TensorfolwSharp加载模型和预测的示例。但我找不到使用TensorfolwSharp训练模型的例子。我试一试,现在卡在Optimizer。 ApplyAdam的参数太多,甚至不能确定这是正确使用的函数。

以下是我的工作Tensorflow代码。

dataX = pd.read_csv('dataX.csv', sep = ',', header = None)
dataX = pd.read_csv('dataY.csv', sep = ',', header = None)  
x = tf.placeholder(tf.float32, [None, trainX.shape[1]]) 
y = tf.placeholder(tf.float32, [None, 1]) 
W0 = tf.Variable(tf.random_normal([trainX.shape[1], h0size], seed = seed))
b0 = tf.Variable(tf.random_normal([h0size], seed = seed))
h = tf.matmul(x, W0) + b0
pred = tf.sigmoid(h)
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = y, logits = h))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(50):
        _, c = sess.run([optimizer, cost], feed_dict ={x: dataX, y: dataX})

以下是我尝试转换为TensorflowSharp

        using (var session = new TFSession())
        {
            var graph = session.Graph;
            //dataX = pd.read_csv('dataX.csv', sep = ',', header = None)
            //dataY = pd.read_csv('dataY.csv', sep = ',', header = None)
            float[,] aX = LoadCsv("dataX.csv");
            float[,] aY = LoadCsv("dataY.csv");
            TFTensor dataX = new TFTensor(aX);
            TFTensor dataY = new TFTensor(aY);
            //x = tf.placeholder(tf.float32, [None, trainX.shape[1]]) 
            //y = tf.placeholder(tf.float32, [None, 1]) 
            var x = graph.Placeholder(TFDataType.Float, new TFShape(dataX.Shape));
            var y = graph.Placeholder(TFDataType.Float, new TFShape(dataY.Shape));
            var W0 = graph.Variable(graph.RandomNormal(new TFShape(dataX.Shape[1], 1)));
            var b0 = graph.Variable(graph.RandomNormal(new TFShape(1)));
            var h = graph.Add(graph.MatMul(x, W0), b0);
            var pred = graph.Sigmoid(h);
            var cost = graph.ReduceMean(graph.SigmoidCrossEntropyWithLogits(y,h));
            //optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
            // Too many parameters for ApplyAdam, not even sure this is the right function to use. 
            TFOutput var, m = 0, v = 0, beta1_power, beta2_power, lr = 0.01, beta1 = 0.9, beta2 = 0.999, epsilon = 0.00000001, grad;
            var optimizer = graph.ApplyAdam(var, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad);
            for (int i = 0; i < 50 ; i++)
                session.GetRunner().Run(
                    inputs: new[] { optimizer },
                    inputValues: new[] { dataX, dataY }
                    );
}

1 个答案:

答案 0 :(得分:2)

TensorFlow API尚未提供优化程序类。因此,为了最小化成本函数,您应该使用API​​计算梯度,然后手动更新模型的可训练参数。这是一个为方便起见使用单独的类的示例:

class Model : IDisposable
{
    TFSession _session;

    TFGraph _graph;

    TFOutput _input;

    TFOutput _output;

    LinearLayer _y_out;

    TFOutput _cost;

    TFTensor _dataX;

    TFTensor _dataY;

    GradientDescentOptimizer _gradientDescentOptimizer;


    public Model()
    {
        float[,] aX = LoadCsv("dataX.csv");
        float[,] aY = LoadCsv("dataY.csv");
        _dataX = new TFTensor(aX);
        _dataY = new TFTensor(aY);

        _session = new TFSession();

        _graph = _session.Graph;

        _input = _graph.Placeholder(TFDataType.Float);
        _output = _graph.Placeholder(TFDataType.Float);

        _y_out = new LinearLayer(_graph, _input, (int)_dataX.Shape[0], 1);

        cost = _graph.ReduceMean(_graph.SigmoidCrossEntropyWithLogits(_y_out.Result, _output));

        _gradientDescentOptimizer = new GradientDescentOptimizer(_graph, _cost, _y_out.W, _y_out.b);
        _gradientDescentOptimizer.ApplyGradientDescent(_graph);

        var runner = _session.GetRunner();


        runner.AddTarget(_y_out.InitB.Operation);

        runner.Run();
    }

    public void TrainModelIteration()
    {
        var runner = _session.GetRunner();

        runner.AddInput(_input, _dataX);
        runner.AddInput(_output, _dataY);


        for (int i = 0; i < 2; i++)
        {
            runner.Fetch(_gradientDescentOptimizer.Updates[i]);
        }

        runner.Run();
    }


    public void Dispose()
    {
        _graph.Dispose();
        _session.Dispose();
    }
}


class LinearLayer
{
    public TFOutput Result { get; set; }

    public TFOutput W { get; set; }

    public TFOutput b { get; set; }

    public TFOutput InitW { get; set; }

    public TFOutput InitB { get; set; }

    public LinearLayer(TFGraph graph, TFOutput x, int inSize, int outSize)
    {
        var wShape = new TFShape(inSize, outSize);

        W = graph.VariableV2(wShape, TFDataType.Float);

        TFOutput tfOutputWShape = graph.Const(wShape);
        TFOutput initialW = graph.RandomUniform(tfOutputWShape, TFDataType.Float);

        InitW = graph.Assign(W, initialW);


        var bShape = new TFShape(outSize);

        b = graph.VariableV2(bShape, TFDataType.Float);

        TFOutput tfOutputBShape = graph.Const(bShape);
        TFOutput initialB = graph.RandomUniform(tfOutputBShape, TFDataType.Float);

        InitB = graph.Assign(b, initialB);

        var matMul = graph.MatMul(x, W);

        Result = graph.Add(matMul, b);
    }
}


class GradientDescentOptimizer
{
    private TFOutput[] _variables;

    public TFOutput[] Updates { get; set; }

    private TFOutput[] _gradients;

    public GradientDescentOptimizer(TFGraph graph, TFOutput grad, TFOutput w, TFOutput b)
    {
        _variables = new TFOutput[4];

        _variables[0] = w;

        _variables[1] = b;

        _gradients = graph.AddGradients(new TFOutput[] { grad }, new TFOutput[] { w, b });

        Updates = new TFOutput[4];
    }

    public void ApplyGradientDescent(TFGraph graph, float alpha = 0.01f)
    {
        TFOutput tfAlpha = graph.Const(alpha);

        for (int i = 0; i < 2; i++)
        {
            Updates[i] = graph.ApplyGradientDescent(_variables[i], tfAlpha, _gradients[i]);
        }
    }
}