我是Python的新手(并且暂时没有使用过文件IO),所以如果我在初学者身上犯了错误,请跟我说。
我有大约5 MB的.bmp图像。我想取两个图像的平均值并将平均值保存在另一个文件目录中。该公司笔记本电脑是8 GB RAM,64位,处理器是AMD A10-7300 Radeon R6,10计算核心4C + 6G 1.9 GHz
我做到了这一点,但现在我的实习生经理希望我更快地完成保存过程(现在需要大约2-3分钟才能完成500张图像)。我正在使用函数imageResult.save(currentSavePath," bmp")。
以下是图像保存代码:
# function for file selection 2
def FileSelect2(self, event):
dirDialog = wx.DirDialog(self, "Choose a directory:", style=wx.DD_DEFAULT_STYLE);
# user canceled file opening
if dirDialog.ShowModal() == wx.ID_CANCEL:
return
# otherwise, proceed loading the file chosen by the user
self.rootDir2 = dirDialog.GetPath()
self.subdirArray2 = [];
for dirName, subdirList, fileList in os.walk(self.rootDir2):
for fname in fileList:
if os.path.splitext(fname)[1] == '.bmp':
self.subdirArray2.append(dirName+'\\'+fname)
self.fileDisplay2.Clear()
self.statusText.SetForegroundColour(wx.BLACK)
self.blocker = False
self.fileDisplay2.AppendText(self.rootDir2)
# function for making sure the directory matches
def CheckIfFilesMatch(self):
if(self.subdirArray1.__len__() != self.subdirArray2.__len__()):
self.statusText.SetValue("please enter same amount of files")
self.blocker = True
self.statusText.SetForegroundColour(wx.RED)
return False
for f in self.subdirArray1:
if f.replace(self.rootDir1,self.rootDir2) not in self.subdirArray2:
self.statusText.SetValue("This file: " + f + " does not correspond to any file in parallel.")
self.blocker = True
self.statusText.SetForegroundColour(wx.RED)
return False
for f in self.subdirArray2:
if f.replace(self.rootDir2,self.rootDir1) not in self.subdirArray1:
self.statusText.SetValue("This file: " + f + " does not correspond to any file in parallel.")
self.blocker = True
self.statusText.SetForegroundColour(wx.RED)
return False
def Average(self, event):
self.CheckIfFilesMatch()
if self.blocker:
return
self.count = 0
# save file
saveDialog = wx.DirDialog(self, "Choose a directory(Your files will be saved in same file names under this):", style=wx.FD_SAVE | wx.FD_OVERWRITE_PROMPT);
# cancel
if saveDialog.ShowModal() == wx.ID_CANCEL:
# update status
self.statusText.SetValue("Did not save")
self.statusText.SetForegroundColour(wx.BLACK)
# ok
return
else:
savePath = saveDialog.GetPath()
# start reading file
for i in self.subdirArray1:
postfix = i.replace(self.rootDir1, "")
print postfix
print i
f = self.rootDir2+postfix
if not os.path.isdir(os.path.dirname(savePath+postfix)):
os.makedirs(os.path.dirname(savePath+postfix))
currentSavePath = savePath+postfix
try:
# update status
self.statusText.SetValue("Processing...")
self.statusText.SetForegroundColour(wx.BLACK)
# try reading the files
print "first path: "+i
print "second path: "+f
self.im1 = Image.open(i)
self.im2 = Image.open(f)
self.count += 1
# convert to matrix
self.mat1 = numpy.array(self.im1)
self.mat2 = numpy.array(self.im2)
# convert to uint16 for addition
self.mat1 = self.mat1.astype('uint16')
self.mat2 = self.mat2.astype('uint16')
# get offset
try:
self.offset = int(self.offsetCtrl.GetValue())
except ValueError:
#throw error
self.statusText.SetValue("Error: please enter integer offset")
self.statusText.SetForegroundColour(wx.RED)
return
# add and convert back to image (with offset)
self.result = (self.mat1 + self.mat2 + self.offset)/2
self.result[self.result > 255] = 255
# convert back to uint 8 for saving
self.result = self.result.astype('uint8')
self.imResult = Image.fromarray(self.result)
# self.imResult = Image.blend(self.im1, self.im2, 1)
self.imResult.save(currentSavePath,"bmp")
# update status
self.statusText.SetValue("Saved image to " + currentSavePath)
self.statusText.SetForegroundColour(wx.BLACK)
except IOError:
# throw error
self.statusText.SetValue("Error: cannot read file : " + i + " or " + f)
self.statusText.SetForegroundColour(wx.RED)
return
2-3分钟正常吗?可以更快吗?我应该降低最终图像的分辨率吗?
答案 0 :(得分:2)
您可以计算它代表的总IO工作负载。
你有500张图片,每张5 MB,你需要阅读其中两张图片才能写一张。所以你读500 * 5 * 2 = 5 GB,你在磁盘上写2.5 GB。
我们假设它持续3分钟。这意味着读取模式下的I / O吞吐量为27.7 MB / s,写入模式下的吞吐量为13.8 MB / s。对于传统的旋转磁盘来说,这个结果并不是那么糟糕。
现在,如果你在这台笔记本电脑上安装了SSD,这意味着你远远没有使I / O带宽饱和,而且你可能做得更好。例如,您可以尝试并行化该过程(通过引入一个线程池)。
答案 1 :(得分:1)
您可以使用GPU加速计算。 500张图像的2-3分钟并不是那么奇怪,对于大型图像处理研究,经常使用专用服务器。
至于保存,磁盘是这里的慢因素。为此目的使用专用堆栈,或者如果可以,则更改为SSD。
答案 2 :(得分:1)
这更像是一个codeReview的答案。似乎Didier所观察到的图像保存过程非常快,因此我将仅针对所涉及的其他过程提出一些优化,即CheckIfFilesMatch
方法。这段代码现在具有复杂度O(N 2 )
for f in self.subdirArray1:
if f.replace(self.rootDir1,self.rootDir2) not in self.subdirArray2:
self.statusText.SetValue("This file: " + f + " does not correspond to any file in parallel.")
self.blocker = True
self.statusText.SetForegroundColour(wx.RED)
return False
for f in self.subdirArray2:
if f.replace(self.rootDir2,self.rootDir1) not in self.subdirArray1:
self.statusText.SetValue("This file: " + f + " does not correspond to any file in parallel.")
self.blocker = True
self.statusText.SetForegroundColour(wx.RED)
return False
您可以通过从self.subdirArray1
或self.subdirArray2
创建一个集来使其成为O(N)。然后代码现在看起来像:
def CheckIfFilesMatch(self):
if(len(self.subdirArray1) != len(self.subdirArray2)):
self.__FileMatchError("please enter same amount of files")
return False
tmp = set(self.subdirArray2)
for f in self.subdirArray1:
frev = f.replace(self.rootDir1,self.rootDir2);
if frev not in tmp:
self.__FileMatchError("This file: " + f + " does not correspond to any file in parallel.")
return False
tmp.discard(frev)
if tmp:
self.__FileMatchError("This file: " + tmp.pop() + " does not correspond to any file in parallel.")
return False
return True
def __FileMatchError(self, txt):
self.statusText.SetValue(txt)
self.blocker = True
self.statusText.SetForegroundColour(wx.RED)