过去几天我一直在为一个程序工作,应该从webcomic Homestuck下载一系列页面。我在python 3中创建了一个工作版本,但效率非常低。任何人都可以看到改进和缩短此代码的方法吗?
import urllib.request
range1 = int(input("Enter the 1st page you want: "))
range2 = int(input("Enter the last page you want: ")) + 1
current = range1 + 1900
final = range2 + 1900
page = ''
nextPage = ''
while current != final:
page = str(current)
nextPage = str(current+1)
while len(page) != 6:
page = '0'+ page
while len(nextPage) != 6:
nextPage = '0'+ nextPage
html = 'http://www.mspaintadventures.com/?s=6&p='+page
site = urllib.request.urlopen(html)
s = site.read()
s = s.decode("utf8")
s = s.replace("<!-- end comic content -->", "<!-- begin comic content -->")
s = s.replace("http://cdn.mspaintadventures.com/storyfiles/hs2/", "")
s = s.replace("?s=6&p=" + str(nextPage), str(int(nextPage))+".html")
s = s.replace(page+"/"+page, page)
a,b,c = s.split('<!-- begin comic content -->')
b = "<title> Page " + page + "</title>" + b
t = open(str(current)+".html", 'w+')
t.write(b)
t.close()
page = str((int(page)-1900))
while len(page) != 5:
page = '0'+ page
t = open(str(current)+".html", 'a')
swfname=page+".swf"
t.write("<object width='1000' height='1000'> <param name='movie' value='"+swfname+"'>")
t.write("<embed src="+swfname+" width=650 height=450>")
t.write("</embed>")
t.write("</object>")
t.close()
try:
img = "http://cdn.mspaintadventures.com/storyfiles/hs2/"+page+".gif"
urllib.request.urlretrieve(img, page+".gif")
except:
try:
img = "http://cdn.mspaintadventures.com/storyfiles/hs2/"+page+"_1.gif"
urllib.request.urlretrieve(img, page+"_1.gif")
img = "http://cdn.mspaintadventures.com/storyfiles/hs2/"+page+"_2.gif"
urllib.request.urlretrieve(img, page+"_2.gif")
except:
try:
img = "http://cdn.mspaintadventures.com/storyfiles/hs2/"+page+"/"+page+".swf"
urllib.request.urlretrieve(img, page+".swf")
except:
print("Image "+img+" failed to download")
print ("Page " + str(page) + " of " + str(final-1901) + " downloaded")
current+=1
print("DONE")
答案 0 :(得分:0)
1)我不明白这些界限:
t = open(str(current)+".html", 'w+')
t.write
2)你应该避免在文件中多次写入。最好使用字符串格式然后再写一次。
text='''<object width='1000' height='1000'> <param name='movie' value='{0}'>
<embed src="{0}" width=650 height=450>
</embed>
</object>'''.format(swfname)
t.write(text)
t.close()