有谁能告诉我,我做错了什么?我一直在使用此代码收到错误。
我正试图从 primaryschoolgames 下载所有swf,就像一个实验,但我似乎无法做到:
#!/usr/bin/env python
# encoding: utf-8
import sys, getopt
import os, urllib, urllib2, re, string, math
help_message = '''
'''
no_param = '''
'''
verbose = False
fakeMode = False
curPath = os.getcwd() + "/"
urlRegex = ''
FileRegex = ''
outputPath = ''
currentFile = ''
def removeDuplicates(seq):
# Not order preserving
keys = {}
for e in seq:
keys[e] = 1
return keys.keys()
def go(filename):
print "Having a look at " + string.capwords(filename)
global urlRegex, FileRegex, outputPath, currentFile
url = 'http://cdn.primarygames.com' + filename
urlRegex = '/'+filename+'/.+/download'
FileRegex = '/'+filename+'/(.*?)/download'
outputPath = curPath+"Swfs"+"/"
if not os.path.exists(outputPath):
os.makedirs(outputPath)
filelist = []
while(len(url)):
# looping system
newlist, url = scrapePage(url, filename)
filelist.extend(newlist)
print 'Found %s Files.' % len(filelist)
for swf in filelist:
swfurl = swf['url']
name = swf['name']
currentFile = name
#print 'Downloading '+name,
if not fakeMode:
#print ''
urllib.urlretrieve('http://cdn.primarygames.com' + swfurl, outputPath+name)
else:
print 'Not downloading %s.' % name
print "All done with %s!" % filename
def scrapePage(url, filename):
print 'Looking through '+url
html = urllib2.urlopen(url).read()
swflist = re.findall(urlRegex, html)
swflist = removeDuplicates(swflist)
swfs = []
for swfurl in swflist:
r = re.compile(FileRegex)
swfname = r.search(swfurl).group(1)
swfname = swfname.replace('-', ' ')
name = filename + "/" + swfname + ".swf"
name = string.capwords(name)
swf.append({'name':name,'url':swfurl})
r = re.compile(nextRegex)
result = r.search(html)
if result:
nextUrl = 'http://cdn.primarygames.com' + result.group(1)
else:
nextUrl = ''
return swfs, nextUrl
def main(argv=None):
global verbose, fakeMode
if argv is None:
argv = sys.argv
try:
try:
opts, args = getopt.getopt(argv[1:], "ho:vf", ["help", "output="])
except getopt.error, msg:
raise Usage(msg)
# option processing
for option, value in opts:
if option == "-v":
verbose = True
if option in ("-f", "--fake"):
fakeMode = True
if option in ("-h", "--help"):
raise Usage(help_message)
if option in ("-o", "--output"):
output = value
if len(args):
swfs = args
else:
raise Usage(no_param)
except Usage, err:
print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
if err.msg != help_message:
print >> sys.stderr, "\t for help use --help"
return 2
for swf in swfs:
go(swf)
if __name__ == "__main__":
sys.exit(main())
这是我不断得到的错误:
Having a look at *
Looking through http://cdn.primarygames.com/*
Traceback (most recent call last):
File "C:\PrimarySchoolGames Swf Downloader.py"
, line 129, in <module>
sys.exit(main())
File "C:\PrimarySchoolGames Swf Downloader.py"
, line 125, in main
go(swf)
File "C:\PrimarySchoolGames Swf Downloader.py"
, line 48, in go
newlist, url = scrapePage(url, filename)
File "C:\Users\Terrii\Desktop\VB Extra's\PrimarySchoolGames Swf Downloader.py"
, line 67, in scrapePage
html = urllib2.urlopen(url).read()
File "C:\Python27\lib\urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 400, in open
response = self._open(req, data)
File "C:\Python27\lib\urllib2.py", line 418, in _open
'_open', req)
File "C:\Python27\lib\urllib2.py", line 378, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 1207, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "C:\Python27\lib\urllib2.py", line 1177, in do_open
raise URLError(err)
urllib2.URLError: <urlopen error [Errno 11004] getaddrinfo failed>
答案 0 :(得分:1)
失败的getaddrinfo
通常表示您提供的网址存在问题。既然我能够解析地址你确定你不在代理服务器后面吗?这可能导致DNS查找失败,从而导致此消息。
Python如何确定在Windows上使用哪个代理:
在Windows环境中,如果未设置代理环境变量, 代理设置是从注册表的Internet设置获得的 部分。
为了获得更多帮助,我同意@MikeHunter。我试图修复你的代码,但由于我必须实现你的Exception-Class以使代码运行,我认为你应该重新缩进代码并提供更多信息。遗憾。