我正在尝试自学编程,而且我遇到了多线程的墙,我正在使用它尝试并加快我的谷歌爬虫,有人能指出我正确的方向吗?
Intent intent = getIntent();
if(intent !=null){
String[] myStrings = intent.getStringArrayExtra("strings");
}
和
# Requires a search string and page numbers to scan
def google(search_string, start):
temp = []
url = 'http://www.google.com/search'
payload = { 'q' : search_string, 'start' : start }
my_headers = { 'User-agent' : 'Mozilla/11.0' }
r = requests.get( url, params = payload, headers = my_headers )
r.text.encode('utf-8')
soup = BeautifulSoup( r.text, 'html.parser' )
h3tags = soup.find_all( 'h3', class_='r' )
# Prints and writes output of scrapped URLs
with open(str("test.txt"), "w") as out_15:
for h3 in h3tags:
try:
print( re.search('url\?q=(.+?)\&sa', h3.a['href']).group(1) )
temp.append( re.search('url\?q=(.+?)\&sa', h3.a['href']).group(1) )
except:
continue
return temp