这是有关如何单击网页上的“更多”按钮的扩展问题。 以下是我以前的问题,一个人对此很好地回答。 由于我不太熟悉“按类名查找元素”功能,因此我只是将该人的修改后的代码添加到了现有代码中。因此,我修改后的代码效率不高(我很抱歉)。
Python click 'More' button is not working
情况是,“更多”按钮有两种类型。第一个位于属性描述部分,第二个位于文本评论部分。如果您仅从任何评论中单击一个“更多”按钮,评论将被展开,以便您可以看到全文评论。
我遇到的问题是,我可以为第一页中的评论单击“更多”按钮,但对于第二页中的评论则无法单击。 以下是我收到的错误消息,但我的代码仍在运行(一旦看到错误,便不会停止)。 讯息:
没有这样的元素:无法找到元素:{“ method”:“标签名”,“ selector”:“ span”}
根据我的理解,每个评论都有入门级和相应的跨度。我不明白为什么它说python无法找到它。
from selenium import webdriver
from selenium.webdriver import ActionChains
from bs4 import BeautifulSoup
review_list=[]
review_appended_list=[]
review_list_v2=[]
review_appended_list_v2=[]
listed_reviews=[]
listed_reviews_v2=[]
listed_reviews_total=[]
listed_reviews_total_v2=[]
final_list=[]
#Incognito Mode
option = webdriver.ChromeOptions()
option.add_argument("--incognito")
#Open Chrome
driver=webdriver.Chrome(executable_path="C:/Users/chromedriver.exe",options=option)
#url I want to visit (I'm going to loop over multiple listings but for simplicity, I just added one listing url).
lists = ['https://www.tripadvisor.com/VacationRentalReview-g30196-d6386734-Hot_51st_St_Walk_to_Mueller_2BDR_Modern_sleeps_7-Austin_Texas.html']
for k in lists:
driver.get(k)
time.sleep(3)
#click 'More' on description part.
link = driver.find_element_by_link_text('More')
try:
ActionChains(driver).move_to_element(link)
time.sleep(1) # time to move to link
link.click()
time.sleep(1) # time to update HTML
except Exception as ex:
print(ex)
time.sleep(3)
# first "More" shows text in all reviews - there is no need to search other "More"
try:
first_entry = driver.find_element_by_class_name('entry')
more = first_entry.find_element_by_tag_name('span')
#more = first_entry.find_element_by_link_text('More')
except Exception as ex:
print(ex)
try:
ActionChains(driver).move_to_element(more)
time.sleep(1) # time to move to link
more.click()
time.sleep(1) # time to update HTML
except Exception as ex:
print(ex)
#begin parsing html and scraping data.
html =driver.page_source
soup=BeautifulSoup(html,"html.parser")
listing=soup.find_all("div", class_="review-container")
all_reviews = driver.find_elements_by_class_name('wrap')
for review in all_reviews:
all_entries = review.find_elements_by_class_name('partial_entry')
if all_entries:
review_list=[all_entries[0].text]
review_appended_list.extend([review_list])
for i in range(len(listing)):
review_id=listing[i]["data-reviewid"]
listing_v1=soup.find_all("div", class_="rating reviewItemInline")
rating=listing_v1[i].span["class"][1]
review_date=listing_v1[i].find("span", class_="ratingDate relativeDate")
review_date_detail=review_date["title"]
listed_reviews=[review_id, review_date_detail, rating[7:8]]
listed_reviews.extend([k])
listed_reviews_total.append(listed_reviews)
for a,b in zip (listed_reviews_total,review_appended_list):
final_list.append(a+b)
#loop over from the 2nd page of the reviews for the same listing.
for j in range(5,20,5):
url_1='-'.join(k.split('-',3)[:3])
url_2='-'.join(k.split('-',3)[3:4])
middle="-or%d-" % j
final_k=url_1+middle+url_2
driver.get(final_k)
time.sleep(3)
link = driver.find_element_by_link_text('More')
try:
ActionChains(driver).move_to_element(link)
time.sleep(1) # time to move to link
link.click()
time.sleep(1) # time to update HTML
except Exception as ex:
print(ex)
# first "More" shows text in all reviews - there is no need to search other "More"
try:
first_entry = driver.find_element_by_class_name('entry')
more = first_entry.find_element_by_tag_name('span')
except Exception as ex:
print(ex)
try:
ActionChains(driver).move_to_element(more)
time.sleep(2) # time to move to link
more.click()
time.sleep(2) # time to update HTML
except Exception as ex:
print(ex)
html =driver.page_source
soup=BeautifulSoup(html,"html.parser")
listing=soup.find_all("div", class_="review-container")
all_reviews = driver.find_elements_by_class_name('wrap')
for review in all_reviews:
all_entries = review.find_elements_by_class_name('partial_entry')
if all_entries:
#print('--- review ---')
#print(all_entries[0].text)
#print('--- end ---')
review_list_v2=[all_entries[0].text]
#print (review_list)
review_appended_list_v2.extend([review_list_v2])
#print (review_appended_list)
for i in range(len(listing)):
review_id=listing[i]["data-reviewid"]
#print review_id
listing_v1=soup.find_all("div", class_="rating reviewItemInline")
rating=listing_v1[i].span["class"][1]
review_date=listing_v1[i].find("span", class_="ratingDate relativeDate")
review_date_detail=review_date["title"]
listed_reviews_v2=[review_id, review_date_detail, rating[7:8]]
listed_reviews_v2.extend([k])
listed_reviews_total_v2.append(listed_reviews_v2)
for a,b in zip (listed_reviews_total_v2,review_appended_list_v2):
final_list.append(a+b)
print (final_list)
if len(listing) !=5:
break
如何在第二页和其余页面上单击“更多”按钮?这样我就可以抓取全文评论了吗?
编辑如下:
我得到的错误消息是这两行。
Message: no such element: Unable to locate element: {"method":"tag name","selector":"span"}
Message: stale element reference: element is not attached to the page document
我猜我的整个代码仍在运行,因为我使用了try和except函数?通常,当python遇到错误时,它将停止运行。
答案 0 :(得分:0)
尝试一下:
let
Source = Excel.CurrentWorkbook(){[Name="Table3"]}[Content],
#"Changed Type" = Table.TransformColumnTypes(Source,{{"Column1", Int64.Type}, {"Column2", Int64.Type}}),
#"Merged Columns" = Table.CombineColumns(Table.TransformColumnTypes(#"Changed Type", {{"Column1", type text}, {"Column2", type text}}, "en-AU"),{"Column1", "Column2"},Combiner.CombineTextByDelimiter(":", QuoteStyle.None),"Merged"),
#"Split Column by Delimiter" = Table.ExpandListColumn(Table.TransformColumns(#"Merged Columns", {{"Merged", Splitter.SplitTextByDelimiter(":", QuoteStyle.Csv), let itemType = (type nullable text) meta [Serialized.Text = true] in type {itemType}}}), "Merged"),
#"Changed Type1" = Table.TransformColumnTypes(#"Split Column by Delimiter",{{"Merged", Int64.Type}}),
#"Filtered Rows" = Table.SelectRows(#"Changed Type1", each ([Merged] <> null)),
#"Merged Queries" = Table.NestedJoin(#"Filtered Rows", {"Merged"}, Range1, {"Merged - Copy"}, "Range1", JoinKind.LeftOuter),
#"Expanded Range1" = Table.ExpandTableColumn(#"Merged Queries", "Range1", {"Count"}, {"Count"}),
#"Renamed Columns" = Table.RenameColumns(#"Expanded Range1",{{"Merged", "Range2"}, {"Count", "No. of Duplications from Range1"}})
in
#"Renamed Columns"