我正在尝试训练一个分类模型,该模型的损失在减少,但准确性也在下降,这对我来说没有意义。我以为我在计算精度时错了,但事实并非如此。随着损失的减少,准确性的提高不应该吗?还是损失与准确性之间根本没有关联?
import numpy as np
import random
import numpy.core.multiarray
import cv2
import tensorflow as tf
import pandas as pd
from glob import glob
from get_data import next_batch, rawData
test, train, result_dict = rawData()
# optimization varibles
learning_rate = 1e-3 # 0.05
epochs = 100 # 100
batch_size = 64 # 50
dropout = 0.9
# Network Parameters
n_input = 6000 # 784
n_hidden1 = 5000 # 512
n_hidden2 = 4500 # 32*12 # 256
n_hidden3 = 500
n_output = 4 # 10
# declare the training data placeholders
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_output])
keep_prob = tf.placeholder(tf.float32)
# Weights
weights = {
'w1': tf.Variable(tf.truncated_normal([n_input, n_hidden1], stddev=0.1)),
'w2': tf.Variable(tf.truncated_normal([n_hidden1, n_hidden2], stddev=0.1)),
# 'w3': tf.Variable(tf.truncated_normal([n_hidden2, n_hidden3], stddev=0.1)),
'out': tf.Variable(tf.truncated_normal([n_hidden2, n_output], stddev=0.1)),
}
# Biases
biases = {
'b1': tf.Variable(tf.constant(0.1, shape=[n_hidden1])),
'b2': tf.Variable(tf.constant(0.1, shape=[n_hidden2])),
# 'b3': tf.Variable(tf.constant(0.1, shape=[n_hidden3])),
'out': tf.Variable(tf.constant(0.1, shape=[n_output]))
}
# Layers
layer_1 = tf.nn.relu(tf.add(tf.matmul(X, weights['w1']), biases['b1']))
layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['w2']), biases['b2']))
# layer_3 = tf.nn.relu(tf.add(tf.matmul(layer_2, weights['w3']), biases['b3']))
layer_drop = tf.nn.dropout(layer_2, keep_prob)
output_layer = tf.matmul(layer_2, weights['out']) + biases['out']
# Gradient
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits_v2(
labels=Y, logits=output_layer
))
train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cross_entropy)
# change that into tf.nn.softmax insta of argmax
correct_pred = tf.equal(tf.argmax(output_layer, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for i in range(epochs):
# batch_x, batch_y = mnist.train.next_batch(batch_size)
mapIndexPosition = list(zip(train["data"], train["labels"]))
random.shuffle(mapIndexPosition)
r_data, r_labels = zip(*mapIndexPosition)
batchs = next_batch(batch_size, r_data, r_labels, shuffle=False)
# print("---------------- i: ", i)
for batch in batchs:
batch_x, batch_y = batch
feed_dict ={X: batch_x, Y:batch_y, keep_prob: dropout}
_, loss = sess.run([train_step, cross_entropy], feed_dict=feed_dict)
acc = sess.run(accuracy, feed_dict=feed_dict)
if i % 10 == 0:
print(
"Iteration",
str(i),
"\t| Loss =",
str(loss),
"\t| Accuracy =",
str(acc)
)
# test_accuracy = sess.run(accuracy, feed_dict={X: mnist.test.images, Y: mnist.test.labels, keep_prob: 1.0})
test_accuracy = sess.run(accuracy, feed_dict={X: test["data"], Y: test["labels"], keep_prob: 1.0})
print("\nAccuracy on test set:", test_accuracy)
处理数据文件:
import random
import numpy as np
import cv2
import pandas as pd
from glob import glob
# from data_structure import DataStructure
class GetData:
bulk_data = None
bulk_labels = None
result = None
img_data = None
img_label = None
result_dict = {'data': [], 'labels': []}
def __init__(self, bulk_data, bulk_labels):
# print("get_data works?")
self.bulk_data = bulk_data
self.bulk_labels = bulk_labels
# Push the object in the array for the model class to use
def structureData(self):
# Object Structure in way that will have: Data and (correct) Label
# Make an object of whatever data came to be.
self.result = []
self.img_data = []
self.img_label = []
i = 0
while(i < (len(self.bulk_data) and len(self.bulk_labels))):
image = cv2.imread(self.bulk_data[i])
scaled_image = cv2.resize(image, (64, 64))
scaled_image = np.array(scaled_image).flatten()
self.img_data.append(scaled_image)
self.img_label.append(self.bulk_labels[i])
self.result_dict["data"].append(scaled_image)
self.result_dict["labels"].append(self.bulk_labels[i])
i += 1
return self.result
def get_images(self):
return self.img_data
def get_label(self):
return self.img_label
def get_dict(self):
return self.result_dict
def get_raw_data(self):
i = 0
while(i < (len(self.bulk_data) and len(self.bulk_labels))):
self.result_dict["data"].append(self.bulk_data[i])
self.result_dict["labels"].append(self.bulk_labels[i])
i += 1
return self.result_dict
def next_batch(batch_size, data, labels, shuffle=False):
data = np.asarray(data)
labels = np.asarray(labels)
# print(data.shape[0], labels.shape[0])
# This has to change and iterate over the
assert data.shape[0] == labels.shape[0]
if shuffle:
indices = np.arange(data.shape[0])
np.random.shuffle(indices)
for start_idx in range(0, data.shape[0], batch_size):
end_idx = min(start_idx + batch_size, data.shape[0])
if shuffle:
excerpt = indices[start_idx:end_idx]
else:
excerpt = slice(start_idx, end_idx)
yield data[excerpt], labels[excerpt]
def rawData():
test_data = {"data": [],"labels":[] }
train_data = {"data": [], "labels": []}
relationship_frame = pd.read_csv('relationships.csv')
rawdata = relationship_frame.iloc[:,0].tolist()
labels = relationship_frame.iloc[:,2].tolist()
hot_encoding_labels = pd.get_dummies(labels)
print(relationship_frame['Concentration'].value_counts())
rawdata_temp = []
for i in rawdata:
raw_temp = eval(i)
rawdata_temp.append(raw_temp)
# print(len(rawdata_temp))
data = GetData(rawdata_temp, hot_encoding_labels.values)
result_dict = data.get_raw_data()
# print(result_dict)
test_ratio = round((len(result_dict["data"]) / 10) * 3)
train_ratio = len(result_dict["data"]) - test_ratio
test_data["data"] = result_dict["data"][:test_ratio]
test_data["labels"] = result_dict["labels"][:test_ratio]
# print(len(test_data["data"]), len(test_data["labels"]))
for i in test_data["data"]:
result_dict["data"].remove(i)
for i in test_data["labels"]:
result_dict["labels"].remove(i)
train_data["data"] = result_dict["data"][:train_ratio]
train_data["labels"] = result_dict["labels"][:train_ratio]
# print(train_data["data"], len(train_data["labels"]))
return test_data, train_data, result_dict
我得到的结果是:
Iteration 0 | Loss = 11616709.0 | Accuracy = 0.29166666
Iteration 10 | Loss = 133036.08 | Accuracy = 0.20833333
Iteration 20 | Loss = 51588.387 | Accuracy = 0.41666666
Iteration 30 | Loss = 8822.799 | Accuracy = 0.25
Iteration 40 | Loss = 1.3812752 | Accuracy = 0.25
Iteration 50 | Loss = 1.3597528 | Accuracy = 0.41666666
Iteration 60 | Loss = 1.3821254 | Accuracy = 0.375
Iteration 70 | Loss = 1.3808714 | Accuracy = 0.375
Iteration 80 | Loss = 1.3909651 | Accuracy = 0.25
Iteration 90 | Loss = 1.3287157 | Accuracy = 0.41666666
看到损耗降低,但精度也降低(损耗和精度是每批)。不知道是什么原因造成的。难道我做错了什么?任何帮助是极大的赞赏。谢谢。 :)
答案 0 :(得分:1)
简短的回答:是的,看来您做错了什么,因为您的模型似乎收敛于看起来不合标准的精度。我不会尝试对部分代码进行逆向工程;我会帮助您了解要寻找的内容。
损失和准确性是相关的,但两者有很多空间可以朝着不同的方向前进。
准确度是一个离散量度,每个输入都有一个简单的二进制评分:如果您的模型得到正确的答案,则它会得分。批处理精度为点/输入。看来您的分数在24分中的5分至10分之间正确。
损失是评估结果的模拟指标。可以用许多不同的方法进行评估。我不知道您正在使用什么损失函数。我将用一个简单的例子来说明。
您正在尝试将输入图像分类为10位数之一,即0-9。一个简单的损失函数是您的实际猜测与100%某个预测之间的差。该模型选择最高匹配比例作为分类,但损失函数取决于置信度。
让我们仅查看数字1、2、3以使数字简短。在评估任意标记为A-D的图像时,请考虑这些情况。准确性只是最佳猜测是否正确;损失就是该模型对其正确答案的信心有多远。
image => A B C D
confidence
1 1.0 0.0 0.4 0.48
2 0.0 1.0 0.3 0.51
3 0.0 0.0 0.3 0.01
label 1 1 1 1
acc 1 0 1 0
error 0.0 1.0 0.6 0.52
对于每种情况:
查看C和D,您会发现,对于正确的猜测,比对不正确的猜测,损失可能要更多,具体取决于模型对图像的困惑。
很明显,您的损失函数是不同的;它以某种方式缩放并使用其他功能。与大多数模型一样,损耗开始时非常高,并且下降得非常好。在第40次迭代中,您实现了非常低的损失:损失函数认为模型在评估图像方面做得很好。但是,返回的分类经常会出现一些问题:您的准确度大约为1/3。
您似乎只测试了24张图像;尽管可能会达到您的目的,但这将加剧该问题。我建议您跟踪预测置信度(softmax变换的向量结果),并将其与返回的预测进行比较。