我用随机像素做了一个可复制的例子。我正在尝试卷积层后为密集层的张量变平。问题在于卷积层和密集层的相交处。我不知道如何放置正确数量的神经元。
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
x = np.random.rand(1_00, 3, 100, 100)
y = np.random.randint(0, 2, 1_00)
if torch.cuda.is_available():
x = torch.from_numpy(x.astype('float32')).cuda()
y = torch.from_numpy(y.astype('float32')).cuda()
class ConvNet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 32, 3)
self.conv2 = nn.Conv2d(32, 64, 3)
self.conv3 = nn.Conv2d(64, 128, 3)
self.fc1 = nn.Linear(128, 1024) # 128 is wrong here
self.fc2 = nn.Linear(1024, 1)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
x = torch.sigmoid(self.fc2(x))
return x
net = ConvNet()
net.cuda()
optimizer = optim.Adam(net.parameters(), lr=0.03)
loss_function = nn.BCELoss()
class Train:
def __init__(self):
self.len = x.shape[0]
self.x_train = x
self.y_train = y
def __getitem__(self, index):
return x[index], y[index].unsqueeze(0)
def __len__(self):
return self.len
train = Train()
train_loader = DataLoader(dataset=train, batch_size=64, shuffle=True)
epochs = 1
train_losses = list()
for e in range(epochs):
running_loss = 0
for images, labels in train_loader:
optimizer.zero_grad()
log_ps = net(images)
loss = loss_function(log_ps, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print('It\'s working.')
答案 0 :(得分:1)
这是我在平整卷积张量时自动适应正确数量的神经元的功能:
def flatten(w, k=3, s=1, p=0, m=True):
"""
Returns the right size of the flattened tensor after
convolutional transformation
:param w: width of image
:param k: kernel size
:param s: stride
:param p: padding
:param m: max pooling (bool)
:return: proper shape and params: use x * x * previous_out_channels
Example:
r = flatten(*flatten(*flatten(w=100, k=3, s=1, p=0, m=True)))[0]
self.fc1 = nn.Linear(r*r*128, 1024)
"""
return int((np.floor((w - k + 2 * p) / s) + 1) / 2 if m else 1), k, s, p, m
在您的情况下:
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 32, 3)
self.conv2 = nn.Conv2d(32, 64, 3)
self.conv3 = nn.Conv2d(64, 128, 3)
r = flatten(*flatten(*flatten(w=100, k=3, s=1, p=0, m=True)))[0]
self.fc1 = nn.Linear(r*r*128, 1024)
self.fc2 = nn.Linear(1024, 1)
def forward(self, x): ...
答案 1 :(得分:0)
您一定会遇到尺寸不匹配的错误,对吧?
这是因为应用卷积后结果的输出形状为[B, 128, 10, 10]
,因此.flatten
的结果将为[B, 128*10*10]
。因此,您需要使用输入大小为12800
的线性图层。那应该解决问题。
所以,只需更改
self.fc1 = nn.Linear(128, 1024) # 128 is wrong here
到
self.fc1 = nn.Linear(12800, 1024)
通常,为了获得合适的尺寸,您可以计算纸上输出的形状,或者仅在正确位置的forward函数中使用print(x.shape)
调试语句即可。