import httplib
import urlparse
def getUrl(url):
try:
parts = urlparse.urlsplit(url)
server = parts[1]
path = parts[2]
obj = httplib.HTTPConnection(server,80)
obj.connect()
obj.putrequest('HEAD',path)
obj.putheader('Accept','*/*')
obj.endheaders()
response = obj.getresponse()
contentType = response.getheader("content-type", "unknown")
obj.close()
if response.status !=200:
print 'Error'
else:
print 'Awesome'
except Exception, e:
print e
我编写了上面的代码来检查给定的URL是否有效。但不知何故,当我测试它时,对于每个无效的url,它会引发异常。
>>> getUrl('http://www.idlebrfffain.com')
[Errno -2] Name or service not known
Python版本:
chaitu@ubuntu:~$ python -V
Python 2.6.4
任何人都可以帮我找出错误究竟在哪里吗?
答案 0 :(得分:2)
应该发生这种情况。抛出异常是因为无法解析URL。这是在if response.status != 200
行之前抛出的,它会将控制权转移到except
块。
您需要花些时间研究Exceptions的工作原理。这是你可以尝试的一个例子。
def getUrl(url):
status = None
try:
# do your normal stuff...
status = response.status
except Exception, e:
# do whatever you want here...
pass
finally:
if status != 200:
print "Error"
else:
print "Awesome"
答案 1 :(得分:2)
你必须抓住socket.error
:
import httplib, socket
import urlparse
def getUrl(url):
parts = urlparse.urlsplit(url)
server = parts[1]
path = parts[2]
obj = httplib.HTTPConnection(server,80)
try:
obj.connect()
except socket.gaierror:
print "Host %s does not exist" % server
return
except socket.error:
print "Cannot connect to %s:%s." % (server, 80)
return
obj.putrequest('HEAD',path)
obj.putheader('Accept','*/*')
obj.endheaders()
response = obj.getresponse()
contentType = response.getheader("content-type", "unknown")
obj.close()
if response.status !=200:
print 'Error'
else:
print 'Awesome'
getUrl('http://www.idlebrfffain.com') # not a registered domain
getUrl('http://8.8.8.8') # not a http server
仅在特定行周围try: except:
且仅在您知道发生了什么的情况下。 Python将向您展示未捕获异常的回溯,因此您可以轻松找到问题所在。
答案 2 :(得分:1)
#The following code validates a url. This is a 2 step process, to do that. First I validate the domain and next the path attached to the domain.
from urlparse import urlparse
import urllib2
import socket
class ValidateURL:
def __init__(self, url):
self._url = url
def startActivity(self):
self._parts = urlparse(self._url)
a = self._checkDomain(self._parts[1])
if a:
b = self._checkUrl(self._url)
if b == 1:
print self._url,' is valid'
else:
print 'The path ',self._parts[2],' is not valid'
else:
print self._parts[1],' domain does not exist'
#Checks whether the domain is right or not
def _checkDomain(self,domain):
x = 1
try:
socket.gethostbyname_ex(domain)
except socket.gaierror:
x = 0
except socket.error:
x = 0
finally:
return x
#Checks whether the path is right or not
def _checkUrl(self,url):
x = 1
self._req = urllib2.Request(url)
try:
urllib2.urlopen(self._req)
except urllib2.URLError, e:
#print e
x = 0
finally:
return x
if __name__ == "__main__":
valid = ValidateURL('http://stackoverflow.com/questions/invalid-urls-throw-an-exception-python')
valid.startActivity()
valid1 = ValidateURL('http://stackoverflow.com/questions/6414417/invalid-urls-throw-an-exception-python')
valid1.startActivity()
希望我得出的解决方案是明智的。