我使用此代码来抓取此代码。 前几个出来很好 但是错误 我想要你的建议。 我该怎么办?
from selenium import webdriver
from time import sleep
from bs4 import BeautifulSoup, Comment
import pandas as pd
#Setting up Chrome webdriver Options
#chrome_options = webdriver.ChromeOptions()
#setting up local path of chrome binary file
#chrome_options.binary_location = "/Users/Norefly/chromedriver2/chromedriver.exec"
#creating Chrome webdriver instance with the set chrome_options
driver = webdriver.PhantomJS("C:/Python/phantomjs-2.1.1-windows/bin/phantomjs.exe")
link = "https://play.google.com/store/apps/details?id=com.supercell.clashofclans&hl=en"
driver.get(link)
#driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
Ptitle = driver.find_element_by_class_name('id-app-title').text.replace(' ','')
print(Ptitle)
#driver.find_element_by_xpath('//*[@id="body-content"]/div/div/div[1]/div[2]/div[2]/div[1]/div[4]/button[2]/div[2]').click()
sleep(1)
driver.find_element_by_xpath('//*[@id="body-content"]/div/div/div[1]/div[2]/div[2]/div[1]/div[4]/button[2]/div[2]/div/div').click()
#select_newest.select_by_visible_text('Newest')
#driver.find_element_by_xpath('//*[@id="body-content"]/div/div/div[1]/div[2]/div[2]/div[1]/div[4]/button[2]/div[2]/div/div').click()
sleep(2)
#driver.find_element_by_css_selector('.review-filter.id-review-sort-filter.dropdown-menu-container').click()
driver.find_element_by_css_selector('.displayed-child').click()
#driver.find_element_by_xpath("//button[@data-dropdown-value='1']").click()
driver.execute_script("document.querySelectorAll('button.dropdown-child')[0].click()")
reviews_df = []
for i in range(1,10000):
try:for elem in driver.find_elements_by_class_name('single-review'):
print(str(i))
content = elem.get_attribute('outerHTML')
soup = BeautifulSoup(content, "html.parser")
#print(soup.prettify())
date = soup.find('span',class_='review-date').get_text()
rating = soup.find('div',class_='tiny-star')['aria-label'][6:7]
title = soup.find('span',class_='review-title').get_text()
txt = soup.find('div',class_='review-body').get_text().replace('Full Review','')[len(title)+1:]
print(soup.get_text())
temp = pd.DataFrame({'Date':date,'Rating':rating,'Review
Title':title,'Review Text':txt},index=[0])
print('-'*10)
reviews_df.append(temp)
#print(elem)
except:
print('s')
driver.find_element_by_xpath('//*[@id="body-content"]/div/div/div[1]/div[2]/div[2]/div[1]/div[4]/button[2]/div[2]/div/div').click()
reviews_df = pd.concat(reviews_df,ignore_index=True)
reviews_df.to_csv(Ptitle+'review_google.csv', encoding='utf-8')
#driver.close()
抓取期间发生此错误,但我不明白此错误。
操作系统是Windows,我正在用Python分析并使用phantomjs。
* google playstore抓取
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.ElementNotVisibleException:
Message: {
"errorMessage": "Element is not currently visible and may not be manipulated",
"request":{
"headers":{
"Accept":"application/json",
"Accept-Encoding":"identity",
"Connection":"close",
"Content-Length":"81",
"Content-Type":"application/json; charset=UTF-8",
"Host":"127.0.0.1:58041",
"User-Agent":"Python http auth"
},
"httpVersion":"1.1",
"method":"POST",
"post":"{
\"id\": \":wdc:1505360987512\",
\"sessionId\": \"b7c59070-98ff-11e7-8363-fdfc8cdfd230 \"
}",
"url":"/click",
"urlParsed": {
"anchor":"",
"query":"",
"file":"click",
"directory":"/",
"path":" /click",
"relative":" /click",
"port":"",
"host":"",
"password":"",
"user":"",
"userInfo":"",
"authority":" ",
"protocol":"",
"source":"/click",
"queryKey":{},
"chunks": ["click"]
},
"urlOriginal":"/session/b7c59070-98ff-11e7-8363-fdfc8cdfd230/element /:wdc:1505360987512/click"
}
}
屏幕截图:可通过屏幕获取