我正在尝试使用Gluon和mxnet建立一个神经网络来实现fizzbuzz程序。然而,它给了我一个奇怪的结果;训练数据的准确性(35-42%)明显差于测试数据的准确性(97-99%)。我使用数字101到1024作为训练数据集,数字1到100作为测试数据集。
那么,为什么训练数据的准确性比测试数据的准确性差?是不是应该反过来呢?
我的代码:
import numpy as np
from mxnet import gluon, nd, autograd
import mxnet as mx
ctx = mx.cpu()
mx.random.seed(1)
def binary_encode(i, digits):
return np.array([i>>d&1 for d in range(digits)])
def fizzbuzz_encode(i):
if i%15 == 0:
return np.array([0, 0, 0, 1])
elif i%5 == 0:
return np.array([0, 0, 1, 0])
elif i%3 == 0:
return np.array([0, 1, 0, 0])
else:
return np.array([1, 0, 0, 0])
def fizzbuzz_decode(i, pred):
if pred == 0:
return i
elif pred == 1:
return 'fizz'
elif pred == 2:
return 'buzz'
else:
return 'fizzbuzz'
num_digits = 10 #number of digits in the input
trX = np.array([binary_encode(i, num_digits) for i in range(101, 2**num_digits)])
trY = np.array([fizzbuzz_encode(i) for i in range(101, 2**num_digits)])
tr_dataset = gluon.data.dataset.ArrayDataset(trX, trY) #training dataset
testX = np.array([binary_encode(i, num_digits) for i in range(1, 101)])
testY = np.array([fizzbuzz_encode(i) for i in range(1, 101)])
test_dataset = gluon.data.dataset.ArrayDataset(testX, testY) #testing dataset
hidden_layers = 1 #number of hidden layers
hidden_units = 100 #number of nodes in a hidden layer
batch_size = 32
train_data = gluon.data.DataLoader(tr_dataset, batch_size, shuffle=False)
test_data = gluon.data.DataLoader(test_dataset, batch_size, shuffle=False)
net = gluon.nn.Sequential() #making the neural net
with net.name_scope():
net.add(gluon.nn.Dense(hidden_units, activation='relu')) #hidden layer
net.add(gluon.nn.Dense(4)) #output layer
net.collect_params().initialize(mx.init.Normal(sigma=0.01), ctx=ctx) #setting the initial weights and biases
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=False) #loss function (Softmax Cross Entropy)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate':0.05}) #setting up the optimizer (Stochastic Gradient Descent)
epochs = 1000
for e in range(epochs): #training procedure
cumulative_loss = 0
for i, (data, label) in enumerate(train_data):
data = data.as_in_context(ctx)
data = data.astype(np.float32)
label = label.as_in_context(ctx)
label = label.astype(np.float32)
with autograd.record():
output = net(data)
loss = softmax_cross_entropy(output, label)
loss.backward()
trainer.step(data.shape[0])
cumulative_loss += nd.sum(loss).asscalar()
prediction = []
correct = []
for i, (data, label) in enumerate(test_data):
data = data.as_in_context(ctx)
data = data.astype(np.float32)
for j in net(data):
prediction.append(fizzbuzz_decode(len(prediction)+1, nd.argmax(j, axis=0))) #prediction array
for i in prediction:
print(i) #prints the final output
for i, val in enumerate(testY):
correct.append(fizzbuzz_decode(i+1, np.argmax(val, axis=0)))
accuracy = 0
for i in range(100):
if prediction[i]==correct[i]:
accuracy+=1
print('\nThe acuuracy of the training data is ' + str(accuracy) + '%')
答案 0 :(得分:1)
我稍微使用了您的代码,我猜您忘记将循环中的test_data
或testY
分别替换为train_data
和trY
以下代码块:
for i, (data, label) in enumerate(test_data):
和
for i, val in enumerate(testY):
如果我这样做,我的训练准确率达到99%,测试精度达到97%。