我有一个所有重定向到其他网址的网址列表。我想使用python获取重定向网址的列表,想知道最好的方法去做这个吗?
一个例子可能是:http://apple.co/1ka9kIX让苹果公司
答案 0 :(得分:1)
此代码适用于我,基于a diveintopython tutorial。
import urllib2
import httplib
class SmartRedirectHandler(urllib2.HTTPRedirectHandler):
"""
Stolen from http://www.diveintopython.net/http_web_services/redirects.html
Originally defined in openanything.py
"""
def http_error_301(self, req, fp, code, msg, headers):
result = urllib2.HTTPRedirectHandler.http_error_301(
self, req, fp, code, msg, headers)
result.status = code
return result
def http_error_302(self, req, fp, code, msg, headers):
result = urllib2.HTTPRedirectHandler.http_error_302(
self, req, fp, code, msg, headers)
result.status = code
return result
inputURLs = ['http://apple.co/1ka9kIX', 'http://fb.com']
httplib.HTTPConnection.debuglevel = 1
opener = urllib2.build_opener(SmartRedirectHandler())
getRedirect = lambda url: opener.open(urllib2.Request(url)).url
print map(getRedirect, inputURLs)
答案 1 :(得分:0)
您应该使用urllib2
和httplib
,代码如下:
import httplib
import urllib2
def get_request_direct(url):
httplib.HTTPConnection.debuglevel = 1
req = urllib2.Request(url)
req.add_header("Accept", "text/html,*/*")
req.add_header("Connection", "Keep-Alive")
op = urllib2.build_opener()
f = op.open(req)
return f.url
# if your url not start `http` or `https`, please add it.
>>> print get_request_direct('http://apple.co/1ka9kIX')
http://www.apple.com/