import requests
from bs4 import BeautifulSoup
import re
search = "bolån"
goog_search = "https://www.google.se/search?source=hp&ei=3_l6WsHAKMiasgHf26zYBQ&q=" + search
r = requests.get(goog_search)
soup = BeautifulSoup(r.text, "html.parser")
elements = soup.find_all('cite', {'class': '_WGk'})
f = open('urls.txt','w')
pattern=r'((https?|ftp):\/\/|\b(?:[a-z\d]+\.))(([^\s()<>]+|\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))?\))+(?:\(([^\s()<>]+|(\(?:[^\s()<>]+\)))?\)|[^\s`!()\[\]{};:<>]))?'
prog = re.compile(pattern)
for tag in elements:
result = prog.match(pattern, str(tag))
print(result)
当我在字符串上使用正则表达式时,无法摆脱此错误,以获取网址。
这是我得到的错误:
for tag in elements:
---> 17 result = prog.match(pattern, str(tag))
18 print(result)
TypeError: 'str' object cannot be interpreted as an integer
答案 0 :(得分:2)
来自python documentation似乎你混合了两种语法
序列
prog = re.compile(pattern)
result = prog.match(string)
相当于
result = re.match(pattern, string)
string
必须传递给prog.match
而不是pattern
,所以只需
prog.match(str(tag))