Question

我想用C ++推断我的模型，因为我想知道它是否比Python-API快

我使用了在Stackoverflow上找到的脚本，将Checkpoint-Model转换为* .pb文件（https://stackoverflow.com/a/45868106/9356668）。然后，我使用了这个项目（https://github.com/lysukhin/tensorflow-object-detection-cpp）来开始。通过玩节点名称，我可以确定Tensorflow找到了输入和输出张量。我还知道，当我打印内容时，来自CvMat的数据已写入到输入张量中。

class ConvolutionalNetworkDuo:
def __init__(self):
    self.NUM_CHANNELS = 1
    self.NUM_OF_CLASSESS = 3
    self.layer_number = 0
    self.logits = None
    self.inputs = tf.placeholder(tf.float32, [None, 350, 350, 1], name='network_input')
    self.keep_prob1 = tf.placeholder(tf.float32, name='keep_prob1')
    self.in_training_mode = tf.placeholder(tf.bool, name='in_train_mode')
    self.train_encode = True
    self.train_decode = True
    self.train_regression = True

    conv1 = self.convolutional_layer(self.inputs, [3, 3, self.NUM_CHANNELS, 32], 2, tf.nn.elu, trainable=self.train_encode)
    conv3 = self.convolutional_layer(conv1, [3, 3, 32, 32], 2, tf.nn.elu, trainable=self.train_encode)
    conv5 = self.convolutional_layer(conv3, [3, 3, 32, 32], 1, tf.nn.elu, trainable=self.train_encode)
    conv8 = self.convolutional_layer(conv5, [3, 3, 32, 32], 2, tf.nn.elu, trainable=self.train_encode)
    conv9 = self.convolutional_layer(conv8, [5, 5, 32, 64], 2, tf.nn.elu, trainable=self.train_encode)
    conv13 = self.convolutional_layer(conv9, [5, 5, 64, 128], 1, tf.nn.elu, trainable=self.train_encode)
    conv16 = self.convolutional_layer(conv13, [5, 5, 128, 128], 2, tf.nn.elu, trainable=self.train_encode)
    conv17 = self.convolutional_layer(conv16, [5, 5, 128, 256], 1, tf.nn.elu, trainable=self.train_encode)
    conv18 = self.convolutional_layer(conv17, [1, 1, 256, 128], 1, tf.nn.elu, trainable=self.train_encode)
    conv19 = self.convolutional_layer(conv18, [5, 5, 128, self.NUM_OF_CLASSESS], 1, tf.nn.elu)
    dropout = tf.nn.dropout(conv19, keep_prob=self.keep_prob1, name='conv_dropout_' + str(self.layer_number))

    batch, w, h, c = dropout.get_shape().as_list()
    dropout = tf.reshape(dropout, [-1, w * h * c])
    out_weights1 = tf.get_variable('fcl1_weight', shape=[w * h * c, w * h * c], trainable=self.train_encode)
    out_bias1 = tf.Variable(tf.random_normal([w * h * c], stddev=0.1), name='fcl1_bias', trainable=self.train_encode)
    dropout = tf.matmul(dropout, out_weights1) + out_bias1
    dropout = tf.nn.elu(dropout)
    dropout = tf.reshape(dropout, [-1, w, h, c])

    deconv1 = self.deconvolutional_layer(dropout, [4, 4, 64, self.NUM_OF_CLASSESS], tf.shape(conv9), activation=tf.nn.elu, stride=2, trainable=self.train_decode)
    fuse1 = tf.add(deconv1, conv9, name='conv_fuse_' + str(self.layer_number))

    deconv2 = self.deconvolutional_layer(fuse1, [4, 4, 32, 64], tf.shape(conv8), activation=tf.nn.elu, trainable=self.train_decode)
    fuse2 = tf.add(deconv2, conv8, name='conv_fuse_' + str(self.layer_number))

    shape = tf.shape(self.inputs)
    deconv_shape2 = tf.stack([shape[0], 350, 350, self.NUM_OF_CLASSESS], name='conv_stack_' + str(self.layer_number))
    self.layer_number += 1
    deconv3 = self.deconvolutional_layer(fuse2, [16, 16, self.NUM_OF_CLASSESS, 32], deconv_shape2, stride=8, activation=tf.nn.elu, trainable=self.train_decode)
    self.logits1 = deconv3
    annotation_pred = tf.argmax(deconv3, axis=3, name="prediction")
    annotation_pred = tf.expand_dims(annotation_pred, axis=3)
    annotation_pred = tf.cast(annotation_pred, tf.uint8)
    self.annotation = annotation_pred
    self.heatmap = tf.nn.softmax(self.logits1, axis=3, name='classification_out')

    batch, w, h, c = dropout.get_shape().as_list()
    coord = tf.reshape(dropout, [-1, w * h * c])
    out_weights2 = tf.get_variable('fcl2_weight', shape=[w * h * c, 128], trainable=self.train_regression)
    out_bias2 = tf.Variable(tf.random_normal([128], stddev=0.1), name='fcl2_bias', trainable=self.train_regression)

    out_weights3 = tf.get_variable('fcl3_weight', shape=[128, 3], trainable=self.train_regression)
    out_bias3 = tf.Variable(tf.random_normal([3], stddev=0.1), name='fcl3_bias', trainable=self.train_regression)

    coord = tf.matmul(coord, out_weights2) + out_bias2
    coord = tf.nn.elu(coord)
    self.logits2 = tf.add(tf.matmul(coord, out_weights3), out_bias3, name='regression_out')

def weight_variable(self, shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(self, shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def max_pool_2x2(self, x):
    res = tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                         strides=[1, 2, 2, 1], padding='SAME', name='conv_pool_' + str(self.layer_number))
    self.layer_number += 1
    return res

def convolutional_layer(self, x, shape, stride=1, activation=None, trainable=True):
    W = tf.Variable(tf.truncated_normal(shape, stddev=0.1), trainable=trainable)
    conv = tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='SAME', name='conv_conv_' + str(self.layer_number))
    self.layer_number += 1
    b = tf.Variable(tf.constant(0.1, shape=[shape[-1]]), trainable=trainable)
    conv = tf.add(conv, b)
    if activation:
        conv = activation(conv)
    print 'conv_' + str(self.layer_number), conv.shape
    return conv

def deconvolutional_layer(self, x, shape, output_shape, stride=2, activation=None, trainable=True):
    W = tf.Variable(tf.truncated_normal(shape, stddev=0.1), trainable=trainable)
    conv = tf.nn.conv2d_transpose(x, W, output_shape, strides=[1, stride, stride, 1], padding="SAME", name='conv_deconv_' + str(self.layer_number))
    self.layer_number += 1
    b = tf.Variable(tf.constant(0.1, shape=[shape[-2]]), trainable=trainable)
    conv = tf.add(conv, b)
    if activation:
        conv = activation(conv)
    print 'deconv_' + str(self.layer_number), conv.shape
    return conv

这是我用Python训练的模型。我用来输入灰度的Input-占位符称为“ network_input”。这两个输出都是“ classification_out”和“ regression_out”。

int main(int argc, char* argv[]) {

// Set dirs variables
string ROOTDIR = "../";
string LABELS = "demo/ssd_mobilenet_v1_egohands/labels_map.pbtxt";
//string GRAPH = "demo/ssd_mobilenet_v1_egohands/frozen_inference_graph.pb";
string GRAPH = "demo/ssd_mobilenet_v1_egohands/output_graph.pb";

// Set input & output nodes names
string inputLayer = "network_input";
vector<string> outputLayer = {"regression_out", "classification_out"};

// Load and initialize the model from .pb file
std::unique_ptr<tensorflow::Session> session;
string graphPath = tensorflow::io::JoinPath(ROOTDIR, GRAPH);
LOG(INFO) << "graphPath:" << graphPath;
Status loadGraphStatus = loadGraph(graphPath, &session);
if (!loadGraphStatus.ok()) {
    LOG(ERROR) << "loadGraph(): ERROR" << loadGraphStatus;
    return -1;
} else
    LOG(INFO) << "loadGraph(): frozen graph loaded" << endl;


// Load labels map from .pbtxt file
std::map<int, std::string> labelsMap = std::map<int,std::string>();
Status readLabelsMapStatus = readLabelsMapFile(tensorflow::io::JoinPath(ROOTDIR, LABELS), labelsMap);
if (!readLabelsMapStatus.ok()) {
    LOG(ERROR) << "readLabelsMapFile(): ERROR" << loadGraphStatus;
    return -1;
} else
    LOG(INFO) << "readLabelsMapFile(): labels map loaded with " << labelsMap.size() << " label(s)" << endl;

Mat frame;
Tensor tensor;
std::vector<Tensor> outputs;
double thresholdScore = 0.5;
double thresholdIOU = 0.8;

// FPS count
int nFrames = 25;
int iFrame = 0;
double fps = 0.;
time_t start, end;
time(&start);

// Start streaming frames from camera
VideoCapture cap(0);

tensorflow::TensorShape shape = tensorflow::TensorShape();
shape.AddDim(1);
shape.AddDim(350);
shape.AddDim(350);
shape.AddDim(1);

while (cap.isOpened()) {
    frame = imread("/home/sascha/tensorflow-object-detection-cpp/in.jpg");
    imshow("bam", frame);
    waitKey(1000);
    cout << "Frame # " << iFrame << endl;

    if (nFrames % (iFrame + 1) == 0) {
        time(&end);
        fps = 1. * nFrames / difftime(end, start);
        time(&start);
    }
    iFrame++;

    // Convert mat to tensor
    cout << "Convert mat to tensor" << endl;
    tensor = Tensor(tensorflow::DT_FLOAT, shape);
    Status readTensorStatus = readTensorFromMat(frame, tensor);

    if (!readTensorStatus.ok()) {
        LOG(ERROR) << "Mat->Tensor conversion failed: " << readTensorStatus;
        return -1;
    }

    // Run the graph on tensor
    cout << "Run the graph on tensor" << endl;
    outputs.clear();
    tensorflow::Tensor keep_prob = tensorflow::Tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape());
    keep_prob.scalar<float>()() = 1.0;
    cout << tensor.shape() << endl;
    cout << keep_prob.shape() << endl;
    Status runStatus = session->Run({{inputLayer, tensor}, {"keep_prob1", keep_prob}}, outputLayer, {}, &outputs);
    if (!runStatus.ok()) {
        LOG(ERROR) << "Running model failed: " << runStatus;
        return -1;
    }
    else {cout << "done." << endl;}

这是我在main.cc中更改的代码片段。

Status readTensorFromMat(const Mat &mat, Tensor &outTensor) {

auto root = tensorflow::Scope::NewRootScope();
using namespace ::tensorflow::ops;

// Trick from https://github.com/tensorflow/tensorflow/issues/8033
float *p = outTensor.flat<float>().data();
Mat fakeMat(mat.rows, mat.cols, CV_32FC3, p);
mat.convertTo(fakeMat, CV_32FC3);
for(int i = 0; i < 350*350; i++) {
    int val = outTensor.flat<float>().data()[i];
    cout << i << ":" << val << endl;
}
/*
auto input_tensor = Placeholder(root.WithOpName("input"), tensorflow::DT_FLOAT);
vector<pair<string, tensorflow::Tensor>> inputs = {{"input", outTensor}};
auto uint8Caster = Cast(root.WithOpName("uint8_Cast"), outTensor, tensorflow::DT_UINT8);

// This runs the GraphDef network definition that we've just constructed, and
// returns the results in the output outTensor.
tensorflow::GraphDef graph;
TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));

vector<Tensor> outTensors;
unique_ptr<tensorflow::Session> session(tensorflow::NewSession(tensorflow::SessionOptions()));

TF_RETURN_IF_ERROR(session->Create(graph));
TF_RETURN_IF_ERROR(session->Run({inputs}, {"uint8_Cast"}, {}, &outTensors));

outTensor = outTensors.at(0);
 */
return Status::OK();

}

这是一种更改的图像读取方法，因为原始方法读取3通道RGB图像，并且我需要1通道浮点张量。

输出看起来像这样（也有一些CUDA输出，但是没关系）：

2019-02-19 11:15:24.403692: I /home/sascha/tensorflow-object-detection-cpp/main.cpp:59] loadGraph(): frozen graph loaded
2019-02-19 11:15:24.404093: I /home/sascha/tensorflow-object-detection-cpp/main.cpp:69] readLabelsMapFile(): labels map loaded with 1 label(s)
Frame # 0
Convert mat to tensor
Run the graph on tensor
[1,350,350,1]
[]
2019-02-19 11:15:26.305366: F tensorflow/core/framework/tensor_shape.cc:44] Check failed: NDIMS == dims() (2 vs. 4)Asking for tensor of 2 dimensions from a tensor of 4 dimensions
Process finished with exit code 134 (interrupted by signal 6: SIGABRT)

此错误消息实际上对我没有帮助。我不知道哪个Tensor是问题或如何找出。由于此版本的Python版本非常适合训练和推断，因此看来模型本身有问题是不可能的。所以我假设在加载输入张量或类似的东西时我犯了一个错误。我不经常使用C ++，而是第一次使用Tensorflows C ++-API。希望有人能帮助我，并预先感谢！如果您需要任何其他信息，请告诉我。

“从4维张量中求2维张量”-冻结图推理时出错

0 个答案: