我没有太多使用Python的经验,而且我在制作网络抓取工具时遇到了问题。我尝试更改数组中的项目会产生此错误
AttributeError:' list'对象没有属性'替换'
import urllib
import mechanize
import requests
import re
from bs4 import BeautifulSoup
def getGoogle(link):
br = mechanize.Browser()
br.set_handle_robots(False)
br.addheaders=[('User-agent','chrome')]
term = link.replace(" ", "+")
query = "http://www.google.com/search?q="+term
htmltext = br.open(query).read()
soup2 = BeautifulSoup(htmltext, "html.parser")
search = soup2.findAll('div', attrs={'id':'search'})
searchtext = str(search[:])
soup3 = BeautifulSoup(searchtext, "html.parser")
list_items = soup3.findAll('li', attrs={'class', 'g'})
for li in list_items:
for h3 in li.findAll('h3'):
for a in h3.findAll('a'):
regex = "q(?!.*q).*?amp"
pattern = re.compile(regex)
source_url = re.findall(pattern, str(a))
results_array = [source_url]
for result in results_array:
result.append(str(source_url[:].replace("q=","").replace("&", "")))
for url in results_array:
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
g_data = soup.find_all(attrs={"name":"viewport"})
for item in g_data:
return r.url, 'bevat een viewport'
break
else:
return r.url, 'bevat GEEN viewport'
答案 0 :(得分:1)
你可以这样做:
result.append(''.join(source_url).replace("q=","").replace("&", ""))