所以这个问题是关于GANs。
我试图为自己的概念证明做一个微不足道的例子;即,生成手写数字(MNIST)的图像。虽然大多数人会通过深度卷积gans(dgGANs)来解决这个问题,但我只想通过1D阵列实现这一目标(即代替28x28灰度像素值,28 * 28 1d阵列)。
此git repo具有“vanilla”gans,它将MNIST数据集视为784个值的1d数组。他们的输出值看起来非常可接受,所以我想做类似的事情。
from __future__ import print_function
import matplotlib as mpl
from matplotlib import pyplot as plt
import mxnet as mx
from mxnet import nd, gluon, autograd
from mxnet.gluon import nn, utils
import numpy as np
import os
from math import floor
from random import random
import time
from datetime import datetime
import logging
ctx = mx.gpu()
np.random.seed(3)
batch_size = 100
epochs = 100
generator_learning_rate = 0.001
discriminator_learning_rate = 0.001
beta1 = 0.5
latent_z_size = 100
mnist = mx.test_utils.get_mnist()
# convert imgs to arrays
flattened_training_data = mnist["test_data"].reshape(10000, 28*28)
G = nn.Sequential()
with G.name_scope():
G.add(nn.Dense(300, activation="relu"))
G.add(nn.Dense(28 * 28, activation="tanh"))
D = nn.Sequential()
with D.name_scope():
D.add(nn.Dense(128, activation="relu"))
D.add(nn.Dense(64, activation="relu"))
D.add(nn.Dense(32, activation="relu"))
D.add(nn.Dense(2, activation="tanh"))
loss = gluon.loss.SoftmaxCrossEntropyLoss()
G.initialize(mx.init.Normal(0.02), ctx=ctx)
D.initialize(mx.init.Normal(0.02), ctx=ctx)
trainer_G = gluon.Trainer(G.collect_params(), 'adam', {"learning_rate": generator_learning_rate, "beta1": beta1})
trainer_D = gluon.Trainer(D.collect_params(), 'adam', {"learning_rate": discriminator_learning_rate, "beta1": beta1})
metric = mx.metric.Accuracy()
import matplotlib.pyplot as plt
import time
def dynamic_line_plt(ax, y_data, colors=['r', 'b', 'g'], labels=['Line1', 'Line2', 'Line3']):
x_data = []
y_max = 0
y_min = 0
x_min = 0
x_max = 0
for y in y_data:
x_data.append(list(range(len(y))))
if max(y) > y_max:
y_max = max(y)
if min(y) < y_min:
y_min = min(y)
if len(y) > x_max:
x_max = len(y)
ax.set_ylim(y_min, y_max)
ax.set_xlim(x_min, x_max)
if ax.lines:
for i, line in enumerate(ax.lines):
line.set_xdata(x_data[i])
line.set_ydata(y_data[i])
else:
for i in range(len(y_data)):
l = ax.plot(x_data[i], y_data[i], colors[i], label=labels[i])
ax.legend()
fig.canvas.draw()
stamp = datetime.now().strftime('%Y_%m_%d-%H_%M')
logging.basicConfig(level=logging.DEBUG)
# arrays to store data for plotting
loss_D = nd.array([0], ctx=ctx)
loss_G = nd.array([0], ctx=ctx)
acc_d = nd.array([0], ctx=ctx)
labels = ['Discriminator Loss', 'Generator Loss', 'Discriminator Acc.']
%matplotlib notebook
fig, ax = plt.subplots(1, 1)
ax.set_xlabel('Time')
ax.set_ylabel('Loss')
dynamic_line_plt(ax, [loss_D.asnumpy(), loss_G.asnumpy(), acc_d.asnumpy()], labels=labels)
for epoch in range(epochs):
tic = time.time()
data_iter.reset()
for i, batch in enumerate(data_iter):
####################################
# Update Disriminator: maximize log(D(x)) + log(1-D(G(z)))
####################################
# extract batch of real data
data = batch.data[0].as_in_context(ctx)
# add noise
# Produce our noisey input to the generator
latent_z = mx.nd.random_normal(0,1,shape=(batch_size, latent_z_size), ctx=ctx)
# soft and noisy labels
# real_label = mx.nd.ones((batch_size, ), ctx=ctx) * nd.random_uniform(.7, 1.2, shape=(1)).asscalar()
# fake_label = mx.nd.ones((batch_size, ), ctx=ctx) * nd.random_uniform(0, .3, shape=(1)).asscalar()
# real_label = nd.random_uniform(.7, 1.2, shape=(batch_size), ctx=ctx)
# fake_label = nd.random_uniform(0, .3, shape=(batch_size), ctx=ctx)
real_label = mx.nd.ones((batch_size, ), ctx=ctx)
fake_label = mx.nd.zeros((batch_size, ), ctx=ctx)
with autograd.record():
# train with real data
real_output = D(data)
errD_real = loss(real_output, real_label)
# train with fake data
fake = G(latent_z)
fake_output = D(fake.detach())
errD_fake = loss(fake_output, fake_label)
errD = errD_real + errD_fake
errD.backward()
trainer_D.step(batch_size)
metric.update([real_label, ], [real_output,])
metric.update([fake_label, ], [fake_output,])
####################################
# Update Generator: maximize log(D(G(z)))
####################################
with autograd.record():
output = D(fake)
errG = loss(output, real_label)
errG.backward()
trainer_G.step(batch_size)
####
# Plot Loss
####
# append new data to arrays
loss_D = nd.concat(loss_D, nd.mean(errD), dim=0)
loss_G = nd.concat(loss_G, nd.mean(errG), dim=0)
name, acc = metric.get()
acc_d = nd.concat(acc_d, nd.array([acc], ctx=ctx), dim=0)
# plot array
dynamic_line_plt(ax, [loss_D.asnumpy(), loss_G.asnumpy(), acc_d.asnumpy()], labels=labels)
name, acc = metric.get()
metric.reset()
logging.info('Binary training acc at epoch %d: %s=%f' % (epoch, name, acc))
logging.info('time: %f' % (time.time() - tic))
img = G(mx.nd.random_normal(0,1,shape=(100, latent_z_size), ctx=ctx))[0].reshape((28, 28))
plt.imshow(img.asnumpy(),cmap='gray')
plt.show()
现在这并不像上面的回购示例那样好。虽然相当相似。
因此,我想知道你是否可以看看并找出原因:
我一直在努力尝试各种各样的事情以改善结果(我将在一秒钟内列出这一点),但对于MNIST数据集,这真的不需要。
我尝试过的事情(我也尝试了很多组合):
如果您有任何想法,请与我们联系。
答案 0 :(得分:2)
1)如果您查看原始数据集:
training_set = mnist["train_data"].reshape(60000, 28, 28)
plt.imshow(training_set[10,:,:], cmap='gray')
你会注意到黑色背景上的数字是白色的。因此,从技术上讲,您的结果不会被反转 - 它们与您用作实际数据的原始图像模式相匹配。
如果您想要将颜色反转以用于可视化目的,您可以通过添加&#39; _r&#39;来更改托盘来实现这一目的。 (适用于所有颜色的托盘):
plt.imshow(img.asnumpy(), cmap='gray_r')
您还可以通过更改vmin和vmax参数来播放颜色范围。它们控制颜色之间的差异有多大。默认情况下,它会根据提供的集自动计算。
2)&#34;为什么结果低于标准&#34; - 我认为这正是社区开始使用dcGAN的原因。对我来说,你提供的git repo中的结果非常嘈杂。当然,它们与您收到的不同,只需将激活函数从tanh更改为sigmoid就可以达到相同的质量,如github上的示例所示:
G = nn.Sequential()
with G.name_scope():
G.add(nn.Dense(300, activation="relu"))
G.add(nn.Dense(28 * 28, activation="sigmoid"))
D = nn.Sequential()
with D.name_scope():
D.add(nn.Dense(128, activation="relu"))
D.add(nn.Dense(64, activation="relu"))
D.add(nn.Dense(32, activation="relu"))
D.add(nn.Dense(2, activation="sigmoid"))
Sigmoid never goes below zero在这种情况下效果更好。如果我训练30个时期的更新模型(其余的超参数相同),我会得到一张示例图片。
如果您决定探索dcGAN以获得更好的结果,请查看此处 - https://mxnet.incubator.apache.org/tutorials/unsupervised_learning/gan.html这是一个很好的解释如何使用Mxnet和Gluon构建dcGAN的教程。通过使用dcGAN,您将获得比这更好的结果。