我有一个使用PyTorch
的语义细分模型。为了参加比赛,我将test.py
和PyInstaller
编译成UPX
到exe文件。尽管生成的可执行文件可以正常运行,但其大小几乎为 800MB 。如何缩小尺寸?
这是我的test.py:
from torch import nn
from torch.autograd import Variable as V
from torch import Tensor
from torch import cuda
from torch import load
import cv2
import os
import numpy as np
from time import time
from networks.unet import Unet
# from networks.dunet import Dunet
# from networks.dinknet import LinkNet34, DinkNet34, DinkNet50, DinkNet101, DinkNet34_less_pool
# from networks.dinkbranch import DinkBranch50, DinkBranch34
BATCHSIZE_PER_CARD = 2
class TTAFrame():
def __init__(self, net):
self.net = net().cuda()
self.net = nn.DataParallel(self.net, device_ids=range(cuda.device_count()))
def test_one_img_from_path(self, path, evalmode = True):
if evalmode:
self.net.eval()
batchsize = cuda.device_count() * BATCHSIZE_PER_CARD
if batchsize >= 8:
return self.test_one_img_from_path_1(path)
elif batchsize >= 4:
return self.test_one_img_from_path_2(path)
elif batchsize >= 2:
return self.test_one_img_from_path_4(path)
def test_one_img_from_path_8(self, path):
img = cv2.imread(path)#.transpose(2,0,1)[None]
img90 = np.array(np.rot90(img))
img1 = np.concatenate([img[None],img90[None]])
img2 = np.array(img1)[:,::-1]
img3 = np.array(img1)[:,:,::-1]
img4 = np.array(img2)[:,:,::-1]
img1 = img1.transpose(0,3,1,2)
img2 = img2.transpose(0,3,1,2)
img3 = img3.transpose(0,3,1,2)
img4 = img4.transpose(0,3,1,2)
img1 = V(Tensor(np.array(img1, np.float32)/255.0 * 3.2 -1.6).cuda())
img2 = V(Tensor(np.array(img2, np.float32)/255.0 * 3.2 -1.6).cuda())
img3 = V(Tensor(np.array(img3, np.float32)/255.0 * 3.2 -1.6).cuda())
img4 = V(Tensor(np.array(img4, np.float32)/255.0 * 3.2 -1.6).cuda())
maska = self.net.forward(img1).squeeze().cpu().data.numpy()
maskb = self.net.forward(img2).squeeze().cpu().data.numpy()
maskc = self.net.forward(img3).squeeze().cpu().data.numpy()
maskd = self.net.forward(img4).squeeze().cpu().data.numpy()
mask1 = maska + maskb[:,::-1] + maskc[:,:,::-1] + maskd[:,::-1,::-1]
mask2 = mask1[0] + np.rot90(mask1[1])[::-1,::-1]
return mask2
def test_one_img_from_path_4(self, path):
img = cv2.imread(path)#.transpose(2,0,1)[None]
img90 = np.array(np.rot90(img))
img1 = np.concatenate([img[None],img90[None]])
img2 = np.array(img1)[:,::-1]
img3 = np.array(img1)[:,:,::-1]
img4 = np.array(img2)[:,:,::-1]
img1 = img1.transpose(0,3,1,2)
img2 = img2.transpose(0,3,1,2)
img3 = img3.transpose(0,3,1,2)
img4 = img4.transpose(0,3,1,2)
img1 = V(Tensor(np.array(img1, np.float32)/255.0 * 3.2 -1.6).cuda())
img2 = V(Tensor(np.array(img2, np.float32)/255.0 * 3.2 -1.6).cuda())
img3 = V(Tensor(np.array(img3, np.float32)/255.0 * 3.2 -1.6).cuda())
img4 = V(Tensor(np.array(img4, np.float32)/255.0 * 3.2 -1.6).cuda())
maska = self.net.forward(img1).squeeze().cpu().data.numpy()
maskb = self.net.forward(img2).squeeze().cpu().data.numpy()
maskc = self.net.forward(img3).squeeze().cpu().data.numpy()
maskd = self.net.forward(img4).squeeze().cpu().data.numpy()
mask1 = maska + maskb[:,::-1] + maskc[:,:,::-1] + maskd[:,::-1,::-1]
mask2 = mask1[0] + np.rot90(mask1[1])[::-1,::-1]
return mask2
def test_one_img_from_path_2(self, path):
img = cv2.imread(path)#.transpose(2,0,1)[None]
img90 = np.array(np.rot90(img))
img1 = np.concatenate([img[None],img90[None]])
img2 = np.array(img1)[:,::-1]
img3 = np.concatenate([img1,img2])
img4 = np.array(img3)[:,:,::-1]
img5 = img3.transpose(0,3,1,2)
img5 = np.array(img5, np.float32)/255.0 * 3.2 -1.6
img5 = V(Tensor(img5).cuda())
img6 = img4.transpose(0,3,1,2)
img6 = np.array(img6, np.float32)/255.0 * 3.2 -1.6
img6 = V(Tensor(img6).cuda())
maska = self.net.forward(img5).squeeze().cpu().data.numpy()#.squeeze(1)
maskb = self.net.forward(img6).squeeze().cpu().data.numpy()
mask1 = maska + maskb[:,:,::-1]
mask2 = mask1[:2] + mask1[2:,::-1]
mask3 = mask2[0] + np.rot90(mask2[1])[::-1,::-1]
return mask3
def test_one_img_from_path_1(self, path):
img = cv2.imread(path)#.transpose(2,0,1)[None]
img90 = np.array(np.rot90(img))
img1 = np.concatenate([img[None],img90[None]])
img2 = np.array(img1)[:,::-1]
img3 = np.concatenate([img1,img2])
img4 = np.array(img3)[:,:,::-1]
img5 = np.concatenate([img3,img4]).transpose(0,3,1,2)
img5 = np.array(img5, np.float32)/255.0 * 3.2 -1.6
img5 = V(Tensor(img5).cuda())
mask = self.net.forward(img5).squeeze().cpu().data.numpy()#.squeeze(1)
mask1 = mask[:4] + mask[4:,:,::-1]
mask2 = mask1[:2] + mask1[2:,::-1]
mask3 = mask2[0] + np.rot90(mask2[1])[::-1,::-1]
return mask3
def load(self, path):
self.net.load_state_dict(load(path))
#source = 'dataset/test/'
import sys
if len(sys.argv) < 2:
arg1 = r'dataset/504/original'
else:
arg1 = sys.argv[1]
# source = r'dataset/504/original'
source = arg1
source_path = os.path.join(os.getcwd(), source)
val = os.listdir(source_path)
solver = TTAFrame(Unet)
model_path = '/'
model_path = r'weights/log02_Unet.th'
solver.load(os.path.join(os.getcwd(), model_path))
tic = time()
target = r'submits/log02_baseline504'
target_path = os.path.join(os.getcwd(), target)
if os.path.exists(target_path):
pass
else:
os.makedirs(target_path)
for i,name in enumerate(val):
if i%10 == 0:
print(i/10, ' ','%.2f'%(time()-tic))
mask = solver.test_one_img_from_path(os.path.join(source_path, name))
mask[mask>4.0] = 255
mask[mask<=4.0] = 0
mask = np.concatenate([mask[:,:,None],mask[:,:,None],mask[:,:,None]],axis=2)
cv2.imwrite(target_path+r'/'+name[:-7]+'mask.png', mask.astype(np.uint8))
这是“ Unet”文件:
from torch import autograd, cat
from torch import nn
class Unet(nn.Module):
def __init__(self):
super(Unet, self).__init__()
self.down1 = self.conv_stage(3, 8)
self.down2 = self.conv_stage(8, 16)
self.down3 = self.conv_stage(16, 32)
self.down4 = self.conv_stage(32, 64)
self.down5 = self.conv_stage(64, 128)
self.down6 = self.conv_stage(128, 256)
self.down7 = self.conv_stage(256, 512)
self.center = self.conv_stage(512, 1024)
#self.center_res = self.resblock(1024)
self.up7 = self.conv_stage(1024, 512)
self.up6 = self.conv_stage(512, 256)
self.up5 = self.conv_stage(256, 128)
self.up4 = self.conv_stage(128, 64)
self.up3 = self.conv_stage(64, 32)
self.up2 = self.conv_stage(32, 16)
self.up1 = self.conv_stage(16, 8)
self.trans7 = self.upsample(1024, 512)
self.trans6 = self.upsample(512, 256)
self.trans5 = self.upsample(256, 128)
self.trans4 = self.upsample(128, 64)
self.trans3 = self.upsample(64, 32)
self.trans2 = self.upsample(32, 16)
self.trans1 = self.upsample(16, 8)
self.conv_last = nn.Sequential(
nn.Conv2d(8, 1, 3, 1, 1),
nn.Sigmoid()
)
self.max_pool = nn.MaxPool2d(2)
for m in self.modules():
if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
if m.bias is not None:
m.bias.data.zero_()
def conv_stage(self, dim_in, dim_out, kernel_size=3, stride=1, padding=1, bias=True, useBN=False):
if useBN:
return nn.Sequential(
nn.Conv2d(dim_in, dim_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias),
nn.BatchNorm2d(dim_out),
#nn.LeakyReLU(0.1),
nn.ReLU(),
nn.Conv2d(dim_out, dim_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias),
nn.BatchNorm2d(dim_out),
#nn.LeakyReLU(0.1),
nn.ReLU(),
)
else:
return nn.Sequential(
nn.Conv2d(dim_in, dim_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias),
nn.ReLU(),
nn.Conv2d(dim_out, dim_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=bias),
nn.ReLU()
)
def upsample(self, ch_coarse, ch_fine):
return nn.Sequential(
nn.ConvTranspose2d(ch_coarse, ch_fine, 4, 2, 1, bias=False),
nn.ReLU()
)
def forward(self, x):
conv1_out = self.down1(x)
conv2_out = self.down2(self.max_pool(conv1_out))
conv3_out = self.down3(self.max_pool(conv2_out))
conv4_out = self.down4(self.max_pool(conv3_out))
conv5_out = self.down5(self.max_pool(conv4_out))
conv6_out = self.down6(self.max_pool(conv5_out))
conv7_out = self.down7(self.max_pool(conv6_out))
out = self.center(self.max_pool(conv7_out))
#out = self.center_res(out)
out = self.up7(cat((self.trans7(out), conv7_out), 1))
out = self.up6(cat((self.trans6(out), conv6_out), 1))
out = self.up5(cat((self.trans5(out), conv5_out), 1))
out = self.up4(cat((self.trans4(out), conv4_out), 1))
out = self.up3(cat((self.trans3(out), conv3_out), 1))
out = self.up2(cat((self.trans2(out), conv2_out), 1))
out = self.up1(cat((self.trans1(out), conv1_out), 1))
out = self.conv_last(out)
return out
答案 0 :(得分:1)
pyinstaller
是一种受骗的.exe
。它不会编译脚本,而是将需要的内容(包括python解释器)捆绑到一个(或多个)文件中。
要真正成为Python不可知论者,您应该使用torchscript
转换模型(了解here)。您将可以使用C ++ libtorch
来运行模块,而无需使用Python解释器。
答案 1 :(得分:0)
您也可以执行以下操作: *将您的模型和参数转换为onnx *使用只有14mb的onnx(pip install onnx)
请参阅: https://pytorch.org/tutorials/advanced/super_resolution_with_caffe2.html