#!/usr/bin/python
from urllib import urlopen
import re
webpage = urlopen('http://en.wikipedia.org/wiki/Python_(programming_language)').read()
patFinderTitle = re.compile('<title>(.*)</title>')
patFinderLink = re.compile('<link rel.*href="(.*)"/>')
findPatTitle = re.findall(PatFinderTitle,webpage)
findPatLink = re.findall(PatFinderLink,webpage)
listIterator = []
listIterator [:] = range(1,5)
for i in listIterator:
print findPatTitle[i]
print findpatLink[i]
print "\n"
当我运行上面的代码时,我遇到以下错误:
bash-3.2$ ./webscrapping.py
Traceback (most recent call last):
File "./webscrapping.py", line 6, in ?
webpage = urlopen('http://en.wikipedia.org/wiki/Python_(programming_language)').read()
File "/usr/lib/python2.4/urllib.py", line 82, in urlopen
return opener.open(url)
File "/usr/lib/python2.4/urllib.py", line 190, in open
return getattr(self, name)(url)
File "/usr/lib/python2.4/urllib.py", line 313, in open_http
h.endheaders()
File "/usr/lib/python2.4/httplib.py", line 798, in endheaders
self._send_output()
File "/usr/lib/python2.4/httplib.py", line 679, in _send_output
self.send(msg)
File "/usr/lib/python2.4/httplib.py", line 646, in send
self.connect()
File "/usr/lib/python2.4/httplib.py", line 614, in connect
socket.SOCK_STREAM):
IOError: [Errno socket error] (8, 'node name or service name not known')
我认为我的python版本支持urlopen模块
bash-3.2$ python
Python 2.4.6 (#1, Dec 13 2009, 23:45:11) [C] on sunos5
>>> from urllib import urlopen
>>>
它没有给我任何错误
错误的原因是什么?我认为这不是语法错误。 有什么建议吗?
谢谢:)