所以我试图翻译tripadvisor上的所有评论以保存评论(非翻译,原文)和翻译评论(从葡萄牙语到英语)。
因此,刮刀首先选择要显示的葡萄牙语注释,然后像往常一样将它们逐个转换为英语,并将翻译的注释保存在com_中,而扩展的非翻译注释则保存在expand_comments中。
现在的问题是,对于已经使用英语的评论,其中没有任何“Google翻译”小部件。但是,如果没有,那么我想至少保存这些评论作为英语本身。但我无法处理缺少元素。
基本上save_comments(driver)
函数就是它发生的地方。
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
com_=[]
expanded_comments=[]
date_=[]
driver = webdriver.Chrome("C:\Users\shalini\Downloads\chromedriver_win32\chromedriver.exe")
driver.maximize_window()
from bs4 import BeautifulSoup
def expand_reviews(driver):
# TRYING TO EXPAND REVIEWS (& CLOSE A POPUP)
try:
driver.find_element_by_class_name("moreLink").click()
except:
print "err"
try:
driver.find_element_by_class_name("ui_close_x").click()
except:
print "err2"
try:
driver.find_element_by_class_name("moreLink").click()
except:
print "err3"
def save_comments(driver):
# SELECTING ALL GOOGLE-TRANSLATOR links
gt= driver.find_elements(By.CSS_SELECTOR,".googleTranslation>.link")
# NOW PRINTING TRANSLATED COMMENTS
for i in gt:
try:
driver.execute_script("arguments[0].click()",i)
#com=driver.find_element_by_class_name("ui_overlay").text
com= driver.find_element_by_xpath(".//span[@class = 'ui_overlay ui_modal ']//div[@class='entry']")
com_.append(com.text)
time.sleep(5)
driver.find_element_by_class_name("ui_close_x").click().perform()
time.sleep(5)
except Exception as e:
pass
#AS PER user : BREAKS_SOFTWARE
if gt.size()==0:
print "ERR"
# ITERATING THROIGH ALL 200 tripadvisor webpages and saving comments & translated comments
for i in range(56,58):
page=i*10
url="https://www.tripadvisor.com/Airline_Review-d8729164-Reviews-Cheap-Flights-or"+str(page)+"-TAP-Portugal#REVIEWS"
driver.get(url)
wait = WebDriverWait(driver, 10)
if i==0:
# SELECTING PORTUGUESE COMMENTS ONLY # Run for one time then iterate over pages
try:
langselction = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "span.sprite-date_picker-triangle")))
langselction.click()
driver.find_element_by_xpath("//div[@class='languageList']//li[normalize-space(.)='Englsih first']").click()
time.sleep(5)
except Exception as e:
print e
save_comments(driver)
答案 0 :(得分:0)
if (gt.size() == 0):
insert code here to extract the english comments