下载的图像的内容长度为零

时间:2017-09-21 01:37:20

标签: python

我使用python从某些网站下载图像,有时图像的内容长度为零。可以在Web浏览器中正常访问该图像。

我尝试了三种方法,得到了相同的结果,那么如何解决这个问题?

# -*- coding: utf-8 -*-
"""
Created on Wed Sep 20 13:51:42 2017
"""
import urllib
import urllib2
import re
import uuid
import os
import requests
from lxml import etree
from multiprocessing import Pool

url = 'https://www.sina.com.cn/'
user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'
request = urllib2.Request(url)
request.add_header('User-Agent', user_agent)
response = urllib2.urlopen(request)
content = response.read() 
tree=etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8'))
node=tree.xpath("//img/@src")
dic1={}
dic2={}
localPath='E:\\pictures\\'
def generateFileName():  
    return str(uuid.uuid1()) 

def createFileWithFileName(localPathParam,fileName):  
    totalPath=localPathParam+'\\'+fileName  
    if not os.path.exists(totalPath):  
        file=open(totalPath,'wb')  
        file.close()  
        return totalPath 

def worker(i):
    path = node[i]
    if not (dic1.has_key(path)):
        dic1[path] = 1
        index = path.rfind('/')
        suffix = path[index+1:]
        filename = suffix
        #filename = generateFileName()+'.'+suffix

        if(re.search(r'^(https?:)?\/\/', path)):  
            #print('save picture %s as %s' % (path,filename))

            '''
                        #this code get the same result too
            try:
                urllib.urlretrieve(path, createFileWithFileName(localPath, filename)) 
            except Exception, ex:
                print(ex.message)
            '''

            with open(localPath + filename, 'wb') as handle:
                response = requests.get(path, timeout=60)

                if not response.ok:
                    print response
                else:
                    print 'wrong when get ' + path

                for block in response.iter_content(1024):
                    if not block:
                        break

                    handle.write(block)

            '''
                        #this code get the same result too
            try:
                req = urllib2.Request(path)
                req.add_header('User-Agent', user_agent)
                picture = urllib2.urlopen(url=path, timeout=5).read()
                document = open(localPath+filename,'wb')
                document.write(picture)
                document.close() 
            except Exception, ex:
                print(ex.message)
            '''


if __name__=='__main__':
    p = Pool()
    for i in range(len(node)):
        p.apply_async(worker, args=(i,))
    print 'Waiting for all subprocesses done...'
    p.close()
    p.join()
    print 'All subprocesses done.'

0 个答案:

没有答案