我创建了一个神经网络,它采用两个 14x14 像素的灰度图像来描绘一个数字(来自 MNIST 数据库),如果第一个数字小于或等于第二个数字,则返回 1,否则返回 0。 代码运行,但每次初始权重都相同。它们应该是随机的 通过在 Net 类中使用以下代码行来强制初始权重是随机的,无济于事。
torch.nn.init.normal_(self.layer1.weight, mean=0.0, std=0.01)
这是“main.py”文件的代码:
import os; os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
import torch
import torch.nn as nn
from dlc_practical_prologue import *
class Net(nn.Module):
def __init__(self):
super().__init__()
self.layer1 = nn.Linear(2*14*14, 32)
#torch.nn.init.normal_(self.layer1.weight, mean=0.0, std=0.01)
#self.layer2 = nn.Linear(100, 100)
#self.layer3 = nn.Linear(100, 100)
self.layer2 = nn.Linear(32, 1)
def forward(self, x):
x = torch.relu(self.layer1(x))
#x = torch.relu(self.layer2(x))
#x = torch.relu(self.layer3(x))
x = torch.sigmoid(self.layer2(x))
return x
if __name__ == '__main__':
# Data initialization
N = 1000
train_input, train_target, train_classes, _, _, _, = generate_pair_sets(N)
_, _, _, test_input, test_target, test_classes = generate_pair_sets(N)
train_input = train_input.view(-1, 2*14*14)
test_input = test_input.view(-1, 2*14*14)
train_target = train_target.view(-1, 1)
test_target = test_target.view(-1, 1)
# I convert the type to torch.float32
train_input, train_target, train_classes, test_input, test_target, test_classes = \
train_input.type(torch.float32), train_target.type(torch.float32), train_classes.type(torch.long), \
test_input.type(torch.float32), test_target.type(torch.float32), test_classes.type(torch.long)
# Create the neural network
net = Net()
# Training
learning_rate = 0.01
# Use MSELoss
loss = nn.MSELoss()
# Use Adam optimizer
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
EPOCHS = 50
for param in net.parameters():
print(param)
for epoch in range(EPOCHS):
target_predicted = net(train_input)
l = loss(train_target, target_predicted) #loss = nn.MSELoss()
#l = loss(target_predicted, train_target)
l.backward()
optimizer.step()
optimizer.zero_grad()
#print(l)
# Testing
total = 1000
correct = 0
with torch.no_grad():
correct = ( test_target == net(test_input).round() ).sum()
print("Accuracy %.2f%%" % (correct / total * 100))
这是“dlc_practical_monologue.py”的代码:
import os; os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import torch
from torchvision import datasets
import argparse
import os
import urllib
######################################################################
parser = argparse.ArgumentParser(description='DLC prologue file for practical sessions.')
parser.add_argument('--full',
action='store_true', default=False,
help = 'Use the full set, can take ages (default False)')
parser.add_argument('--tiny',
action='store_true', default=False,
help = 'Use a very small set for quick checks (default False)')
parser.add_argument('--seed',
type = int, default = 0,
help = 'Random seed (default 0, < 0 is no seeding)')
parser.add_argument('--cifar',
action='store_true', default=False,
help = 'Use the CIFAR data-set and not MNIST (default False)')
parser.add_argument('--data_dir',
type = str, default = None,
help = 'Where are the PyTorch data located (default $PYTORCH_DATA_DIR or \'./data\')')
# Timur's fix
parser.add_argument('-f', '--file',
help = 'quick hack for jupyter')
args = parser.parse_args()
if args.seed >= 0:
torch.manual_seed(args.seed)
######################################################################
# The data
def convert_to_one_hot_labels(input, target):
tmp = input.new_zeros(target.size(0), target.max() + 1)
tmp.scatter_(1, target.view(-1, 1), 1.0)
return tmp
def load_data(cifar = None, one_hot_labels = False, normalize = False, flatten = True):
if args.data_dir is not None:
data_dir = args.data_dir
else:
data_dir = os.environ.get('PYTORCH_DATA_DIR')
if data_dir is None:
data_dir = './data'
if args.cifar or (cifar is not None and cifar):
print('* Using CIFAR')
cifar_train_set = datasets.CIFAR10(data_dir + '/cifar10/', train = True, download = True)
cifar_test_set = datasets.CIFAR10(data_dir + '/cifar10/', train = False, download = True)
train_input = torch.from_numpy(cifar_train_set.data)
train_input = train_input.transpose(3, 1).transpose(2, 3).float()
train_target = torch.tensor(cifar_train_set.targets, dtype = torch.int64)
test_input = torch.from_numpy(cifar_test_set.data).float()
test_input = test_input.transpose(3, 1).transpose(2, 3).float()
test_target = torch.tensor(cifar_test_set.targets, dtype = torch.int64)
else:
print('* Using MNIST')
######################################################################
# import torchvision
# raw_folder = data_dir + '/mnist/raw/'
# resources = [
# ("https://fleuret.org/dlc/data/train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873"),
# ("https://fleuret.org/dlc/data/train-labels-idx1-ubyte.gz", "d53e105ee54ea40749a09fcbcd1e9432"),
# ("https://fleuret.org/dlc/data/t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3"),
# ("https://fleuret.org/dlc/data/t10k-labels-idx1-ubyte.gz", "ec29112dd5afa0611ce80d1b7f02629c")
# ]
# os.makedirs(raw_folder, exist_ok=True)
# # download files
# for url, md5 in resources:
# filename = url.rpartition('/')[2]
# torchvision.datasets.utils.download_and_extract_archive(url, download_root=raw_folder, filename=filename, md5=md5)
######################################################################
mnist_train_set = datasets.MNIST(data_dir + '/mnist/', train = True, download = True)
mnist_test_set = datasets.MNIST(data_dir + '/mnist/', train = False, download = True)
train_input = mnist_train_set.data.view(-1, 1, 28, 28).float()
train_target = mnist_train_set.targets
test_input = mnist_test_set.data.view(-1, 1, 28, 28).float()
test_target = mnist_test_set.targets
if flatten:
train_input = train_input.clone().reshape(train_input.size(0), -1)
test_input = test_input.clone().reshape(test_input.size(0), -1)
if args.full:
if args.tiny:
raise ValueError('Cannot have both --full and --tiny')
else:
if args.tiny:
print('** Reduce the data-set to the tiny setup')
train_input = train_input.narrow(0, 0, 500)
train_target = train_target.narrow(0, 0, 500)
test_input = test_input.narrow(0, 0, 100)
test_target = test_target.narrow(0, 0, 100)
else:
print('** Reduce the data-set (use --full for the full thing)')
train_input = train_input.narrow(0, 0, 1000)
train_target = train_target.narrow(0, 0, 1000)
test_input = test_input.narrow(0, 0, 1000)
test_target = test_target.narrow(0, 0, 1000)
print('** Use {:d} train and {:d} test samples'.format(train_input.size(0), test_input.size(0)))
if one_hot_labels:
train_target = convert_to_one_hot_labels(train_input, train_target)
test_target = convert_to_one_hot_labels(test_input, test_target)
if normalize:
mu, std = train_input.mean(), train_input.std()
train_input.sub_(mu).div_(std)
test_input.sub_(mu).div_(std)
return train_input, train_target, test_input, test_target
######################################################################
def mnist_to_pairs(nb, input, target):
input = torch.functional.F.avg_pool2d(input, kernel_size = 2)
a = torch.randperm(input.size(0))
a = a[:2 * nb].view(nb, 2)
input = torch.cat((input[a[:, 0]], input[a[:, 1]]), 1)
classes = target[a]
target = (classes[:, 0] <= classes[:, 1]).long()
return input, target, classes
######################################################################
def generate_pair_sets(nb):
if args.data_dir is not None:
data_dir = args.data_dir
else:
data_dir = os.environ.get('PYTORCH_DATA_DIR')
if data_dir is None:
data_dir = './data'
train_set = datasets.MNIST(data_dir + '/mnist/', train = True, download = True)
train_input = train_set.data.view(-1, 1, 28, 28).float()
train_target = train_set.targets
test_set = datasets.MNIST(data_dir + '/mnist/', train = False, download = True)
test_input = test_set.data.view(-1, 1, 28, 28).float()
test_target = test_set.targets
return mnist_to_pairs(nb, train_input, train_target) + \
mnist_to_pairs(nb, test_input, test_target)
######################################################################
请注意,我必须添加以下代码行才能在 Windows 10 上运行代码,而无需在 Linux 上运行。
import os; os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
同样在 Linux 上,我总是得到相同的初始权重。
拜托,你能帮我吗?
答案 0 :(得分:2)
如果我错了,请纠正我,但每次运行时只有第一层的权重应该相同。问题是当您导入 dlc_practical_monologue.py
时,里面有这个东西:
if args.seed >= 0:
torch.manual_seed(args.seed)
如果种子 >=0(默认为 0),则启动。 这应该只为每次运行使用相同的权重初始化第一层。检查是否是这种情况。
答案 1 :(得分:-1)
解决方案是从“dlv_practical_prologue.py”中删除以下几行:
if args.seed >= 0:
torch.manual_seed(args.seed)