从网站的主页“http://myWebPage.com”,我需要找到所有指向页面和子页面的链接,最后计算每个页面中某些单词/术语的出现次数。
我的代码
from bs4 import BeautifulSoup
import re
import urllib2
soup = BeautifulSoup(urllib2.urlopen("https://myWebPage.com"))
#searching all the links matching "https:/solutions/blah-blah-blah-
#blah.html"
tags = soup.find_all(href=re.compile("/solutions/......html")
# write all the links to a notepad and go to those link and search for the
#terms
f = open('test.txt', 'wt')`enter code here`
f.write(tags)
f.close()
错误:
f = open('test.txt', 'wb')
invalid syntax