我正在尝试实现混合密度网络 (MDN),它可以学习混合高斯分布。我想使用 MDN 来拟合条件概率分布 (p(y|x))。模型输入 x 的维度为 (batch_size, 1),y (label) 为 (batch_size, 1)。这是我的代码。
数据有问题吗?当我使用生成的数据集时,结果是正确的。
谢谢。
class MDN(nn.Module):
"""A mixture density network layer
The input maps to the parameters of a MoG probability distribution, where
each Gaussian has O dimensions and diagonal covariance.
Arguments:
in_features (int): the number of dimensions in the input
out_features (int): the number of dimensions in the output
num_gaussians (int): the number of Gaussians per output dimensions
Input:
minibatch (BxD): B is the batch size and D is the number of input
dimensions.
Output:
(pi, sigma, mu) (BxG, BxGxO, BxGxO): B is the batch size, G is the
number of Gaussians, and O is the number of dimensions for each
Gaussian. Pi is a multinomial distribution of the Gaussians. Sigma
is the standard deviation of each Gaussian. Mu is the mean of each
Gaussian.
"""
def __init__(self, n_hidden, num_gaussians, in_features=1, out_features=1):
super(MDN, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.num_gaussians = num_gaussians
self.z_h = nn.Sequential(
nn.Linear(1, n_hidden)
)
self.pi = nn.Sequential(
nn.Linear(n_hidden, num_gaussians),
nn.Softmax(dim=1)
)
self.sigma = nn.Linear(n_hidden, out_features * num_gaussians)
self.mu = nn.Linear(n_hidden, out_features * num_gaussians)
def forward(self, minibatch):
z_h = self.z_h(minibatch)
pi = self.pi(z_h)
sigma = torch.exp(self.sigma(z_h))
sigma = sigma.view(-1, self.num_gaussians, self.out_features)
mu = self.mu(z_h)
mu = mu.view(-1, self.num_gaussians, self.out_features)
return pi, sigma, mu
ONEOVERSQRT2PI = 1.0 / math.sqrt(2 * math.pi)
def gaussian_probability(sigma, mu, target):
"""Returns the probability of `target` given MoG parameters `sigma` and `mu`.
Arguments:
sigma (BxGxO): The standard deviation of the Gaussians. B is the batch
size, G is the number of Gaussians, and O is the number of
dimensions per Gaussian.
mu (BxGxO): The means of the Gaussians. B is the batch size, G is the
number of Gaussians, and O is the number of dimensions per Gaussian.
target (BxI): A batch of target. B is the batch size and I is the number of
input dimensions.
Returns:
probabilities (BxG): The probability of each point in the probability
of the distribution in the corresponding sigma/mu index.
"""
target = target.unsqueeze(1).expand_as(sigma)
ret = ONEOVERSQRT2PI * torch.exp(-0.5 * ((target - mu) / sigma)**2) / sigma
return torch.prod(ret, 2)
def mdn_loss(pi, sigma, mu, target):
"""Calculates the error, given the MoG parameters and the target
The loss is the negative log likelihood of the data given the MoG
parameters.
"""
prob = pi * gaussian_probability(sigma, mu, target)
nll = -torch.log(torch.sum(prob, dim=1))
return torch.mean(nll)
network = MDN(n_hidden=20, num_gaussians=5)
optimizer = torch.optim.Adam(network.parameters(), lr=0.0001)
data = pd.read_csv("./experiment/data/sorted_lineitem_1g_quantity_extendedprice.csv")
data.sort_values("extendedprice", inplace=True)
data.reset_index(drop=True, inplace=True)
sample = data.sample(frac=0.001)
quantity = np.array(sample["quantity"])
extendedprice = np.array(sample["extendedprice"])
max_x = np.max(extendedprice)
min_x = np.min(extendedprice)
mean_x = np.mean(extendedprice)
stdev_x = np.std(extendedprice)
max_y = np.max(quantity)
min_y = np.min(quantity)
mean_y = np.mean(quantity)
stdev_y = np.std(quantity)
# mean_y = np.mean(quantity)
def normalize(x, min_x, max_x):
return (x - min_x) / (max_x - min_x)
def standardization(x, mean, stdev):
return (x - mean) / stdev
x_data = np.array([standardization(i, mean_x, stdev_x) for i in extendedprice])
# x_data = extendedprice
y_data = np.array([standardization(i, mean_y, stdev_y) for i in quantity])
# y_data = quantity
x_tensor = torch.from_numpy(np.float32(x_data).reshape(-1, 1))
y_tensor = torch.from_numpy(np.float32(y_data).reshape(-1, 1))
mdn_x_data = x_data
mdn_y_data = y_data
mdn_x_tensor = x_tensor
mdn_y_tensor = y_tensor
x_variable = torch.tensor(mdn_x_tensor)
y_variable = torch.tensor(mdn_y_tensor, requires_grad=False)
def train_mdn():
for epoch in range(10000):
pi_variable, sigma_variable, mu_variable = network(x_variable)
loss = mdn_loss(pi_variable, sigma_variable, mu_variable, y_variable)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch % 500 == 0:
print(epoch, loss.item())
train_mdn()
代码结果
0 1.9179250001907349
500 0.6071352362632751
1000 0.12722773849964142
1500 0.05316021665930748
2000 0.026438187807798386
2500 0.011702506802976131
3000 -0.00010265530727338046
3500 -0.011923379264771938
我的问题是:为什么损失函数的值随着训练过程变成负数?