在我的项目中,我试图抓取 YouTube 观众数量、评论数量、喜欢和不喜欢数量。我无法接受评论数量,我尝试了不同的方法,但没有任何改变。这是我的代码,请帮助我:
import selenium
from selenium import webdriver
import pandas as pd
import time
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
#we choose our browser chromedriver must be in the path
driver = webdriver.Chrome()
#we need data to save variables
data = {'Likes' : [], 'Dislikes' : [], 'Comments' : [], 'Views' : []}
dataframe = pd.DataFrame(data)
# we get the link
driver.get("https://www.youtube.com/watch?v=fHI8X4OXluQ")
# we wait for opening the link
time.sleep(5)
# we find element by xpatch which means manually
Likes = driver.find_element_by_xpath('/html/body/ytd-app/div/ytd-page-manager/ytd-watch-
flexy/div[5]/div[1]/div/div[8]/div[2]/ytd-video-primary-info-
renderer/div/div/div[3]/div/ytdmenu-renderer/div[2]/ytd-toggle-button-renderer[1]/a/yt-
formatted-string').text
Dislikes = driver.find_element_by_xpath('/html/body/ytd-app/div/ytd-page-manager/ytd-watch-
flexy/div[5]/div[1]/div/div[8]/div[2]/ytd-video-primary-info-renderer/div/div/div[3]/div/ytd-
menu-renderer/div[2]/ytd-toggle-button-renderer[2]/a/yt-formatted-string').text
View = driver.find_elements_by_xpath('//div[@id="count"]')
Comments=driver.find_elements_by_xpath('/html/body/ytd-app/div/ytd-page-manager/ytd-watch-
flexy/div[5]/div[1]/div/ytd-comments/ytd-item-section-renderer/div[1]/ytd-comments-header-
renderer/div[1]/h2/yt-formatted-string/span[1]')
print(Likes)
print(Dislikes)
print(View[1].text)
print(Comments)
driver.quit()
答案 0 :(得分:0)
基本上这样的事情应该工作
import selenium
from selenium import webdriver
import pandas as pd
import time
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
#we choose our browser chromedriver must be in the path
driver = webdriver.Chrome()
#we need data to save variables
data = {'Likes' : [], 'Dislikes' : [], 'Comments' : [], 'Views' : []}
dataframe = pd.DataFrame(data)
# we get the link
driver.get("https://www.youtube.com/watch?v=fHI8X4OXluQ")
# we wait for opening the link
time.sleep(5)
likes_xpath = '(//div[@id="top-level-buttons-computed"]//*[contains(@aria-label," likes")])[last()]'
# we find element by xpatch which means manually
Likes = driver.find_element_by_xpath(likes_xpath).text
dislikes_xpath = '//div[@id="top-level-buttons-computed"]//*[contains(@aria-label," dislikes")]'
Dislikes = driver.find_element_by_xpath(dislikes_xpath).text
views_xpath = '//*[name()="ytd-video-view-count-renderer"]/span[@class="view-count style-scope ytd-video-view-count-renderer"]'
View = driver.find_elements_by_xpath(views_xpath)
comments_xpath = '//*[name()="ytd-comment-renderer"]//*[name()="yt-formatted-string" and @id="content-text"]'
Comments=driver.find_elements_by_xpath(comments_xpath)
print(Likes)
print(Dislikes)
print(View[1].text)
print(Comments)
driver.quit()
但是那里有很多评论,所以为了获得所有评论,您必须滚动此页面
答案 1 :(得分:0)
看看这是否适用于评论计数:-
elem = driver.find_element_by_xpath(".//div[@class='style-scope ytd-comments-header-renderer' and @id='title']//following-sibling::yt-formatted-string[contains(@class,'ytd-comments-header-renderer')]/span[1]")
driver.execute_script("arguments[0].scrollIntoView();", elem)
elem.text