Question

我在零售数据集上训练了张量流逻辑回归模型，以预测回头客（“是”或“否”），结果是：

测试准确度：0.903 ...

损失：0.221 ...

不是很好，但是还不错吗？然后，我将模型放入使用Android Studio的Android应用程序中，并尝试从应用程序中提供新数据，并使用InferenceInterface.fetch获得结果。但是对新数据的预测总是非常错误的-即使我输入了伪数据，它也始终可以100％确定地预测“是”。

我还在不同的数据集上测试了模型，测试准确度为0.98，损失仅为0.06，并且发生了相同的问题。

我想知道我如何构建模型或将模型导出到应用程序是否有问题。任何帮助将不胜感激！

模型代码（基于this tutorial）：

#<<left out all the imports>>

learning_rate = 0.1
training_epochs = 250
batch_size = 250
display_step = 1
MODEL_NAME = 'lrmodel'

dt = pd.read_csv('retail_data.csv', header=0, index_col='CustomerID')
dt.info()

npArray = np.array(dt)

#data splitting and preprocessing time
xvals = npArray[:,:-1].astype(float)
yvals = npArray[:,-1]

le = preprocessing.LabelEncoder()
yvals  = le.fit_transform(yvals)
x_norm = preprocessing.scale(xvals)

XTrain, XTest, yTrain, yTest = train_test_split(x_norm, yvals, random_state=1)

#encoding
yTrain_resize = np.zeros((len(yTrain),2))
yTrain_resize[np.arange(len(yTrain)),yTrain] = 1
yTrain = yTrain_resize

yTest_resize = np.zeros((len(yTest),2))
yTest_resize[np.arange(len(yTest)),yTest] = 1
yTest = yTest_resize

#-----building the logistic regression model---

def create_LR_model(inputs):
    W = tf.Variable(tf.random_normal([10, 2], mean=0.0, stddev=0.05))
    b = tf.Variable(tf.zeros([2]))

    X = tf.reshape(inputs, [-1, 10])

    return (tf.matmul(X, W) + b)

tf.reset_default_graph()

X = tf.placeholder(tf.float32, [None, 10], name="input")
Y = tf.placeholder(tf.float32, [None, 2])

out = create_LR_model(X)
pred = tf.sigmoid(out, name="output")

# Minimize error using cross entropy
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = out, labels = Y))
# Gradient Descent
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

train_count = len(XTrain)

#defining optimizer and accuracy
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))

accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


#----Training the model------------------------------------------
saver = tf.train.Saver()

history = dict(train_loss=[], 
            train_acc=[], 
            test_loss=[], 
            test_acc=[])

sess=tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

train_count = len(XTrain)

for i in range(1, training_epochs + 1):
    for start, end in zip(range(0, train_count, batch_size),
                       range(batch_size, train_count + 1,batch_size)):
        sess.run(optimizer, feed_dict={X: XTrain[start:end],
                                    Y: yTrain[start:end]})

    _, acc_train, loss_train = sess.run([pred, accuracy, cost], feed_dict={
                                            X: XTrain, Y: yTrain})

    _, acc_test, loss_test = sess.run([pred, accuracy, cost], feed_dict={
                                            X: XTest, Y: yTest})

    history['train_loss'].append(loss_train)
    history['train_acc'].append(acc_train)
    history['test_loss'].append(loss_test)
    history['test_acc'].append(acc_test)

    if i != 1 and i % 10 != 0:
         continue

    print(f'epoch: {i} test accuracy: {acc_test} loss: {loss_test}')


predictions, acc_final, loss_final = sess.run([pred, accuracy, cost], feed_dict={X: XTest, Y: yTest})

tf.train.write_graph(sess.graph_def, 'out', MODEL_NAME + '.pbtxt', True)

saver.save(sess, 'out/' + MODEL_NAME + '.chkp')

print()
print(f'final results: accuracy: {acc_final} loss: {loss_final}')

#-----Storing model to disk-----------------------------------------
input_node_name = 'input'
output_node_name = 'output'

if not path.exists('out'):
        os.mkdir('out')

freeze_graph.freeze_graph('out/' + MODEL_NAME + '.pbtxt', None, False,
'out/' + MODEL_NAME + '.chkp', output_node_name, "save/restore_all",
"save/Const:0", 'out/frozen_' + MODEL_NAME + '.pb',  clear_devices=True, initializer_nodes="")

input_graph_def = tf.GraphDef()
with tf.gfile.Open('out/frozen_' + MODEL_NAME + '.pb', "rb") as f:
input_graph_def.ParseFromString(f.read())

output_graph_def = optimize_for_inference_lib.optimize_for_inference(
    input_graph_def, [input_node_name], [output_node_name],
    tf.float32.as_datatype_enum)

with tf.gfile.FastGFile('out/opt_' + MODEL_NAME + '.pb', "wb") as f:
f.write(output_graph_def.SerializeToString())

print("graph saved!")

作为其他参考，我的Java Tensorflow函数的代码是这样的：

private TensorFlowInferenceInterface inferenceInterface;

private static final String MODEL_FILE = "file:///android_asset/out/opt_lrmodel.pb";
private static final String INPUT_NODE = "input";
private static final String OUTPUT_NODE = "output";
private static final String[] OUTPUT_NODES = {"output"};
private static final long[] INPUT_SIZE = {1, 10};
private static final int OUTPUT_SIZE = 2;

public TensorFlowClassifier(final Context context) {
    inferenceInterface = new TensorFlowInferenceInterface(context.getAssets(), MODEL_FILE);
}

public float[] predictWelfare(float[] data) {
    float[] result = new float[OUTPUT_SIZE];
    inferenceInterface.feed(INPUT_NODE, data, INPUT_SIZE);
    inferenceInterface.run(OUTPUT_NODES);
    inferenceInterface.fetch(OUTPUT_NODE, result);
    String s = "";
    for (int i=0; i<OUTPUT_SIZE; i++) {
        s += String.valueOf(result[i]) + " || ";
    }
    Log.v("result:", s);
    return result;
}

日志始终输出

result:: 0.0 || 1.0 ||

无论我输入的数据是标记为“否”还是“是”

Tensorflow模型的准确性为90％，但是Tensorflow InferenceInterface.fetch对新数据的结果是非常错误的吗？

0 个答案: