我有以下代码用于提取tar.gz文件,同时密切关注进度:
from __future__ import division
import tarfile
import os
theArchive = "/Users/Dennis/Instances/atlassian-jira-enterprise-4.1.2-standalone.tar.gz"
a = tarfile.open(theArchive)
tarsize = 0
print "Computing total size"
for tarinfo in a:
tarsize = tarsize + tarinfo.size
realz = tarsize
print "compressed size: " + str(a.fileobj.size)
print "uncompressed size: " + str(tarsize)
tarsize = 0
for tarinfo in a:
print tarinfo.name, "is", tarinfo.size, "bytes in size and is",
if tarinfo.isreg():
print "a regular file."
elif tarinfo.isdir():
print "a directory."
else:
print "something else."
a.extract(tarinfo)
tarsize = tarsize + tarinfo.size
print str(tarsize) + "/" + str(realz)
outout = tarsize / realz
print "progress: " + str(outout)
a.close()
速度非常快,在10秒内提取100MB tar.gz。我想在视觉上也有这个,所以我把它改成了包含一个tkinter进度条:
from __future__ import division
import tarfile
import os
import Tkinter
class Meter(Tkinter.Frame):
def __init__(self, master, width=300, height=20, bg='white', fillcolor='orchid1',\
value=0.0, text=None, font=None, textcolor='black', *args, **kw):
Tkinter.Frame.__init__(self, master, bg=bg, width=width, height=height, *args, **kw)
self._value = value
self._canv = Tkinter.Canvas(self, bg=self['bg'], width=self['width'], height=self['height'],\
highlightthickness=0, relief='flat', bd=0)
self._canv.pack(fill='both', expand=1)
self._rect = self._canv.create_rectangle(0, 0, 0, self._canv.winfo_reqheight(), fill=fillcolor,\
width=0)
self._text = self._canv.create_text(self._canv.winfo_reqwidth()/2, self._canv.winfo_reqheight()/2,\
text='', fill=textcolor)
if font:
self._canv.itemconfigure(self._text, font=font)
self.set(value, text)
self.bind('<Configure>', self._update_coords)
def _update_coords(self, event):
'''Updates the position of the text and rectangle inside the canvas when the size of
the widget gets changed.'''
# looks like we have to call update_idletasks() twice to make sure
# to get the results we expect
self._canv.update_idletasks()
self._canv.coords(self._text, self._canv.winfo_width()/2, self._canv.winfo_height()/2)
self._canv.coords(self._rect, 0, 0, self._canv.winfo_width()*self._value, self._canv.winfo_height())
self._canv.update_idletasks()
def get(self):
return self._value, self._canv.itemcget(self._text, 'text')
def set(self, value=0.0, text=None):
#make the value failsafe:
if value < 0.0:
value = 0.0
elif value > 1.0:
value = 1.0
self._value = value
if text == None:
#if no text is specified use the default percentage string:
text = "Extraction: " + str(int(round(100 * value))) + ' %'
self._canv.coords(self._rect, 0, 0, self._canv.winfo_width()*value, self._canv.winfo_height())
self._canv.itemconfigure(self._text, text=text)
self._canv.update_idletasks()
##-------------demo code--------------------------------------------##
def _goExtract(meter, value):
meter.set(value)
if value < 1.0:
value = value + 0.005
meter.after(50, lambda: _demo(meter, value))
else:
meter.set(value, 'Demo successfully finished')
if __name__ == '__main__':
root = Tkinter.Tk(className='meter demo')
m = Meter(root, relief='ridge', bd=3)
m.pack(fill='x')
m.set(0.0, 'Computing file size...')
m.after(1000)
theArchive = "/Users/Dennis/Instances/atlassian-jira-enterprise-4.1.2-standalone.tar.gz"
a = tarfile.open(theArchive)
tarsize = 0
for tarinfo in a:
tarsize = tarsize + tarinfo.size
realz = tarsize
print "real size: " + str(tarsize)
print "compressed size: " + str(a.fileobj.size)
m.set(0.0, 'Done computing!')
m.after(1000)
tarsize = 0
for tarinfo in a:
print tarinfo.name, "is", tarinfo.size, "bytes in size and is",
if tarinfo.isreg():
print "a regular file."
elif tarinfo.isdir():
print "a directory."
else:
print "something else."
a.extract(tarinfo)
tarsize = tarsize + tarinfo.size
print str(tarsize) + "/" + str(realz)
outout = tarsize / realz
m.set(outout)
print "progress: " + str(outout)
a.close()
m.set(1.0, 'Extraction complete!')
m.after(1000)
m.after(1000, lambda: _goExtract(m, 0.0))
它完美无缺,但现在这个过程需要超过 2 分钟。为什么会发生这种情况?我该如何解决这个问题?
谢谢!
丹尼斯
答案 0 :(得分:5)
存档中的文件有多大?您几乎肯定会更新进度条比您需要的更多 - 通常在您的set()
函数中包含一个检查,这样如果从最后一个值的变化太小,它就会返回而不更新。使用300px画布时,更新变化小于0.3%绝对没有意义,并且更新频率可能不会超过每1%。
由于您的流程通常在10秒内完成,您可能也想要引入基于时间的检查,因为即使每1%更新一次也会是每秒10次,这比您需要的更多。如果你从一个简单的for
循环中驱动它,那么看看Tk绘制条形图需要多长时间会很有趣。