我写了一些代码来从Google搜索中抓取数据 代码运行,但数据未插入数据库
我使用python 3.7和mongodb
while check<soBaiViet:
i=0;
try:
thea=driver1.find_elements_by_xpath('''//div[@class='r']/a[1]|//g-inner-card/div/a[1]''')
thetext=driver1.find_elements_by_xpath('''//div[@class='r']/a[1]/h3[@class='LC20lb']|//g-inner-card/div/a[1]''')
while (i<len(thea)) and (sobaiviet<soBaiViet):
google_search=db.newgoogle.find({'org_link':str(thea[i].get_attribute('href')).strip()})
ds=[]
for doc in google_search:
ds.append(doc)
if(len(ds)==0):
driver2.get(thea[i].get_attribute('href'))
DSSource.append(str(driver2.page_source))
document = {"id_user": "google-search", "id_post": "google-search", "title": str(thetext[i].text).strip(),"tutimkiem":tutimkiem,
"created_time": datetime.datetime.now(),"tags": "","source": str(driver2.page_source), "org_link": str(thea[i].get_attribute('href')).strip()}
db.newgoogle.insertone(document)
i=i+1
sobaiviet=sobaiviet+1
check=check+i;
driver1.find_element_by_xpath('//*[@id="pnnext"]').click()
except:
break;