我想用C ++推断我的模型,因为我想知道它是否比Python-API快
我使用了在Stackoverflow上找到的脚本,将Checkpoint-Model转换为* .pb文件(https://stackoverflow.com/a/45868106/9356668)。 然后,我使用了这个项目(https://github.com/lysukhin/tensorflow-object-detection-cpp)来开始。通过玩节点名称,我可以确定Tensorflow找到了输入和输出张量。我还知道,当我打印内容时,来自CvMat的数据已写入到输入张量中。
class ConvolutionalNetworkDuo:
def __init__(self):
self.NUM_CHANNELS = 1
self.NUM_OF_CLASSESS = 3
self.layer_number = 0
self.logits = None
self.inputs = tf.placeholder(tf.float32, [None, 350, 350, 1], name='network_input')
self.keep_prob1 = tf.placeholder(tf.float32, name='keep_prob1')
self.in_training_mode = tf.placeholder(tf.bool, name='in_train_mode')
self.train_encode = True
self.train_decode = True
self.train_regression = True
conv1 = self.convolutional_layer(self.inputs, [3, 3, self.NUM_CHANNELS, 32], 2, tf.nn.elu, trainable=self.train_encode)
conv3 = self.convolutional_layer(conv1, [3, 3, 32, 32], 2, tf.nn.elu, trainable=self.train_encode)
conv5 = self.convolutional_layer(conv3, [3, 3, 32, 32], 1, tf.nn.elu, trainable=self.train_encode)
conv8 = self.convolutional_layer(conv5, [3, 3, 32, 32], 2, tf.nn.elu, trainable=self.train_encode)
conv9 = self.convolutional_layer(conv8, [5, 5, 32, 64], 2, tf.nn.elu, trainable=self.train_encode)
conv13 = self.convolutional_layer(conv9, [5, 5, 64, 128], 1, tf.nn.elu, trainable=self.train_encode)
conv16 = self.convolutional_layer(conv13, [5, 5, 128, 128], 2, tf.nn.elu, trainable=self.train_encode)
conv17 = self.convolutional_layer(conv16, [5, 5, 128, 256], 1, tf.nn.elu, trainable=self.train_encode)
conv18 = self.convolutional_layer(conv17, [1, 1, 256, 128], 1, tf.nn.elu, trainable=self.train_encode)
conv19 = self.convolutional_layer(conv18, [5, 5, 128, self.NUM_OF_CLASSESS], 1, tf.nn.elu)
dropout = tf.nn.dropout(conv19, keep_prob=self.keep_prob1, name='conv_dropout_' + str(self.layer_number))
batch, w, h, c = dropout.get_shape().as_list()
dropout = tf.reshape(dropout, [-1, w * h * c])
out_weights1 = tf.get_variable('fcl1_weight', shape=[w * h * c, w * h * c], trainable=self.train_encode)
out_bias1 = tf.Variable(tf.random_normal([w * h * c], stddev=0.1), name='fcl1_bias', trainable=self.train_encode)
dropout = tf.matmul(dropout, out_weights1) + out_bias1
dropout = tf.nn.elu(dropout)
dropout = tf.reshape(dropout, [-1, w, h, c])
deconv1 = self.deconvolutional_layer(dropout, [4, 4, 64, self.NUM_OF_CLASSESS], tf.shape(conv9), activation=tf.nn.elu, stride=2, trainable=self.train_decode)
fuse1 = tf.add(deconv1, conv9, name='conv_fuse_' + str(self.layer_number))
deconv2 = self.deconvolutional_layer(fuse1, [4, 4, 32, 64], tf.shape(conv8), activation=tf.nn.elu, trainable=self.train_decode)
fuse2 = tf.add(deconv2, conv8, name='conv_fuse_' + str(self.layer_number))
shape = tf.shape(self.inputs)
deconv_shape2 = tf.stack([shape[0], 350, 350, self.NUM_OF_CLASSESS], name='conv_stack_' + str(self.layer_number))
self.layer_number += 1
deconv3 = self.deconvolutional_layer(fuse2, [16, 16, self.NUM_OF_CLASSESS, 32], deconv_shape2, stride=8, activation=tf.nn.elu, trainable=self.train_decode)
self.logits1 = deconv3
annotation_pred = tf.argmax(deconv3, axis=3, name="prediction")
annotation_pred = tf.expand_dims(annotation_pred, axis=3)
annotation_pred = tf.cast(annotation_pred, tf.uint8)
self.annotation = annotation_pred
self.heatmap = tf.nn.softmax(self.logits1, axis=3, name='classification_out')
batch, w, h, c = dropout.get_shape().as_list()
coord = tf.reshape(dropout, [-1, w * h * c])
out_weights2 = tf.get_variable('fcl2_weight', shape=[w * h * c, 128], trainable=self.train_regression)
out_bias2 = tf.Variable(tf.random_normal([128], stddev=0.1), name='fcl2_bias', trainable=self.train_regression)
out_weights3 = tf.get_variable('fcl3_weight', shape=[128, 3], trainable=self.train_regression)
out_bias3 = tf.Variable(tf.random_normal([3], stddev=0.1), name='fcl3_bias', trainable=self.train_regression)
coord = tf.matmul(coord, out_weights2) + out_bias2
coord = tf.nn.elu(coord)
self.logits2 = tf.add(tf.matmul(coord, out_weights3), out_bias3, name='regression_out')
def weight_variable(self, shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(self, shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def max_pool_2x2(self, x):
res = tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='conv_pool_' + str(self.layer_number))
self.layer_number += 1
return res
def convolutional_layer(self, x, shape, stride=1, activation=None, trainable=True):
W = tf.Variable(tf.truncated_normal(shape, stddev=0.1), trainable=trainable)
conv = tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='SAME', name='conv_conv_' + str(self.layer_number))
self.layer_number += 1
b = tf.Variable(tf.constant(0.1, shape=[shape[-1]]), trainable=trainable)
conv = tf.add(conv, b)
if activation:
conv = activation(conv)
print 'conv_' + str(self.layer_number), conv.shape
return conv
def deconvolutional_layer(self, x, shape, output_shape, stride=2, activation=None, trainable=True):
W = tf.Variable(tf.truncated_normal(shape, stddev=0.1), trainable=trainable)
conv = tf.nn.conv2d_transpose(x, W, output_shape, strides=[1, stride, stride, 1], padding="SAME", name='conv_deconv_' + str(self.layer_number))
self.layer_number += 1
b = tf.Variable(tf.constant(0.1, shape=[shape[-2]]), trainable=trainable)
conv = tf.add(conv, b)
if activation:
conv = activation(conv)
print 'deconv_' + str(self.layer_number), conv.shape
return conv
这是我用Python训练的模型。我用来输入灰度的Input-占位符称为“ network_input”。这两个输出都是“ classification_out”和“ regression_out”。
int main(int argc, char* argv[]) {
// Set dirs variables
string ROOTDIR = "../";
string LABELS = "demo/ssd_mobilenet_v1_egohands/labels_map.pbtxt";
//string GRAPH = "demo/ssd_mobilenet_v1_egohands/frozen_inference_graph.pb";
string GRAPH = "demo/ssd_mobilenet_v1_egohands/output_graph.pb";
// Set input & output nodes names
string inputLayer = "network_input";
vector<string> outputLayer = {"regression_out", "classification_out"};
// Load and initialize the model from .pb file
std::unique_ptr<tensorflow::Session> session;
string graphPath = tensorflow::io::JoinPath(ROOTDIR, GRAPH);
LOG(INFO) << "graphPath:" << graphPath;
Status loadGraphStatus = loadGraph(graphPath, &session);
if (!loadGraphStatus.ok()) {
LOG(ERROR) << "loadGraph(): ERROR" << loadGraphStatus;
return -1;
} else
LOG(INFO) << "loadGraph(): frozen graph loaded" << endl;
// Load labels map from .pbtxt file
std::map<int, std::string> labelsMap = std::map<int,std::string>();
Status readLabelsMapStatus = readLabelsMapFile(tensorflow::io::JoinPath(ROOTDIR, LABELS), labelsMap);
if (!readLabelsMapStatus.ok()) {
LOG(ERROR) << "readLabelsMapFile(): ERROR" << loadGraphStatus;
return -1;
} else
LOG(INFO) << "readLabelsMapFile(): labels map loaded with " << labelsMap.size() << " label(s)" << endl;
Mat frame;
Tensor tensor;
std::vector<Tensor> outputs;
double thresholdScore = 0.5;
double thresholdIOU = 0.8;
// FPS count
int nFrames = 25;
int iFrame = 0;
double fps = 0.;
time_t start, end;
time(&start);
// Start streaming frames from camera
VideoCapture cap(0);
tensorflow::TensorShape shape = tensorflow::TensorShape();
shape.AddDim(1);
shape.AddDim(350);
shape.AddDim(350);
shape.AddDim(1);
while (cap.isOpened()) {
frame = imread("/home/sascha/tensorflow-object-detection-cpp/in.jpg");
imshow("bam", frame);
waitKey(1000);
cout << "Frame # " << iFrame << endl;
if (nFrames % (iFrame + 1) == 0) {
time(&end);
fps = 1. * nFrames / difftime(end, start);
time(&start);
}
iFrame++;
// Convert mat to tensor
cout << "Convert mat to tensor" << endl;
tensor = Tensor(tensorflow::DT_FLOAT, shape);
Status readTensorStatus = readTensorFromMat(frame, tensor);
if (!readTensorStatus.ok()) {
LOG(ERROR) << "Mat->Tensor conversion failed: " << readTensorStatus;
return -1;
}
// Run the graph on tensor
cout << "Run the graph on tensor" << endl;
outputs.clear();
tensorflow::Tensor keep_prob = tensorflow::Tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape());
keep_prob.scalar<float>()() = 1.0;
cout << tensor.shape() << endl;
cout << keep_prob.shape() << endl;
Status runStatus = session->Run({{inputLayer, tensor}, {"keep_prob1", keep_prob}}, outputLayer, {}, &outputs);
if (!runStatus.ok()) {
LOG(ERROR) << "Running model failed: " << runStatus;
return -1;
}
else {cout << "done." << endl;}
这是我在main.cc中更改的代码片段。
Status readTensorFromMat(const Mat &mat, Tensor &outTensor) {
auto root = tensorflow::Scope::NewRootScope();
using namespace ::tensorflow::ops;
// Trick from https://github.com/tensorflow/tensorflow/issues/8033
float *p = outTensor.flat<float>().data();
Mat fakeMat(mat.rows, mat.cols, CV_32FC3, p);
mat.convertTo(fakeMat, CV_32FC3);
for(int i = 0; i < 350*350; i++) {
int val = outTensor.flat<float>().data()[i];
cout << i << ":" << val << endl;
}
/*
auto input_tensor = Placeholder(root.WithOpName("input"), tensorflow::DT_FLOAT);
vector<pair<string, tensorflow::Tensor>> inputs = {{"input", outTensor}};
auto uint8Caster = Cast(root.WithOpName("uint8_Cast"), outTensor, tensorflow::DT_UINT8);
// This runs the GraphDef network definition that we've just constructed, and
// returns the results in the output outTensor.
tensorflow::GraphDef graph;
TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));
vector<Tensor> outTensors;
unique_ptr<tensorflow::Session> session(tensorflow::NewSession(tensorflow::SessionOptions()));
TF_RETURN_IF_ERROR(session->Create(graph));
TF_RETURN_IF_ERROR(session->Run({inputs}, {"uint8_Cast"}, {}, &outTensors));
outTensor = outTensors.at(0);
*/
return Status::OK();
}
这是一种更改的图像读取方法,因为原始方法读取3通道RGB图像,并且我需要1通道浮点张量。
输出看起来像这样(也有一些CUDA输出,但是没关系):
2019-02-19 11:15:24.403692: I /home/sascha/tensorflow-object-detection-cpp/main.cpp:59] loadGraph(): frozen graph loaded
2019-02-19 11:15:24.404093: I /home/sascha/tensorflow-object-detection-cpp/main.cpp:69] readLabelsMapFile(): labels map loaded with 1 label(s)
Frame # 0
Convert mat to tensor
Run the graph on tensor
[1,350,350,1]
[]
2019-02-19 11:15:26.305366: F tensorflow/core/framework/tensor_shape.cc:44] Check failed: NDIMS == dims() (2 vs. 4)Asking for tensor of 2 dimensions from a tensor of 4 dimensions
Process finished with exit code 134 (interrupted by signal 6: SIGABRT)
此错误消息实际上对我没有帮助。我不知道哪个Tensor是问题或如何找出。由于此版本的Python版本非常适合训练和推断,因此看来模型本身有问题是不可能的。所以我假设在加载输入张量或类似的东西时我犯了一个错误。我不经常使用C ++,而是第一次使用Tensorflows C ++-API。希望有人能帮助我,并预先感谢!如果您需要任何其他信息,请告诉我。