我想在频繁出现的消息板的特定论坛帖子的线程中下载每个图像(project1999.org)。这不是功课。我编程非常糟糕,我使用linux。我被困了可以有人帮忙吗?我非常感激。
有问题的线程是关于猫图像大声笑。这是:http://www.project1999.org/forums/showthread.php?t=37779
我正在使用BeautifulSoup好不好?请帮助:{
import urllib2
from BeautifulSoup import BeautifulSoup
def DownloadImagesVB(startUrl, saveDirectory):
startPage = 1
while True:
url = startUrl + "&p=" + str(startPage)
print url
startPage += 1
urllib2.urlopen(startUrl)
if __name__=="__main__":
url = "http://www.project1999.org/forums/showthread.php?t=37779"
path = "/home/r00t/cats"
DownloadImagesVB(url, path)
答案 0 :(得分:1)
import requests
import lxml.html
# here's some ugly code I've glued together from my IPython %history:
#
# I know it's crap, It's about ~10mins from start to finish, one
# alternative would be to simply generate <img src=""> links for each of
# the images and then rely on Firefox/Chrome to save the whole page...
# this would make prettier file names, and I get the impression this
# is a one-off script...
# --Stuart
def find_images(url):
root=lxml.html.parse(url).getroot()
root.make_links_absolute()
imgs = []
for i in root.xpath('//div[contains(@id, "post_message")]//img'):
src = i.attrib.get('src','')
if 'project1999' not in src:
imgs.append(i.attrib.get('src',''))
return imgs
def main():
nums=[x+1 for x in range(52)]
urls=['http://www.project1999.org/forums/showthread.php?s=6be291d52837a8ab512858dde188569c&t=37779&page=%d' %num for num in nums]
todownload = []
for url in urls:
for img in find_images(url):
todownload.append(img)
todownload = list(set(todownload)) #remove duplicates
print "downloading %d images" % len(todownload)
# save all the images without extensions.. (lazy)
for count, i in enumerate(todownload):
try:
print "%d downloading %s" % (count, i)
open('imgs/%d' % count, 'w').write(requests.get(i).content)
except:
print "couldn't download %s" %i
if __name__ == '__main__':
main()
目前正在开始......
...
92 downloading http://i117.photobucket.com/albums/o60/mven42/f1b88059.jpg
93 downloading https://lh4.googleusercontent.com/-cXKgVQVodRI/TmvYln0uj6I/AAAAAAAAO1k/H4sx5srDX6Q/Cat-Gifs-Shared-by-Gplus-Jay-Puri_62.gif
94 downloading http://26.media.tumblr.com/tumblr_ls8pbds3sL1qfjjglo1_400.gif
95 downloading http://i43.tinypic.com/169goet.gif
96 downloading http://i.imgur.com/5qbXk.gif
97 downloading http://img818.imageshack.us/img818/547/hahajkg.jpg
98 downloading http://img815.imageshack.us/img815/6856/catmb.jpg
99 downloading http://i.imgur.com/PDiEa.gif
100 downloading http://29.media.tumblr.com/tumblr_lnybntpx2o1qlue6co1_100.gif
101 downloading http://1.bp.blogspot.com/-G6LADm3UlmE/TfeDHI9iQNI/AAAAAAAAAsw/sZ0R6wcdZgc/s640/cat+vs+dog+002.jpg
102 downloading http://i1179.photobucket.com/albums/x393/Drogula/gifs/1312351009032.gif
103 downloading http://26.media.tumblr.com/tumblr_lltfczZDdA1qkbyimo1_500.gif
104 downloading http://desmond.yfrog.com/Himg860/scaled.php?tn=0&server=860&filename=snajs.jpg&xsize=640&ysize=640
105 downloading http://i357.photobucket.com/albums/oo12/azen32/2011-11-0919-25-15998.jpg
106 downloading http://img641.imageshack.us/img641/2678/caturday35.png
107 downloading http://icanhascheezburger.files.wordpress.com/2007/12/funny-pictures-cat-gravity-wins.jpg
108 downloading http://s3-ak.buzzfed.com/static/enhanced/web05/2011/12/7/17/anigif_enhanced-buzz-2926-1323297290-29.gif
109 downloading http://a5.sphotos.ak.fbcdn.net/hphotos-ak-snc7/s720x720/315738_2385906201789_1074780041_32733561_1154490844_n.jpg