我一直在尝试使用Python从YouTube上的给定视频中获取评论(线索和回复)(作为练习语言的练习)。
根据官方网站(https://developers.google.com/youtube/v3/docs/commentThreads/list)提供的示例,我能够得到一些评论,但不是全部。 我尝试添加一些代码来处理多个页面,但是我只有一页的视频才能获得评论。
例如,https://www.youtube.com/watch?v=Gd_L7DVKTA8有17个评论(包括回复),但我只能获得7个帖子和2个回复。有趣的是,我使用上面链接中提供的API Explorer获得了相同的结果(只有7个线程)。
我的代码如下:
#!/usr/bin/python
# Usage:
# python scraper.py --videoid='<video_id>'
from apiclient.errors import HttpError
from oauth2client.tools import argparser
from apiclient.discovery import build
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
DEVELOPER_KEY = 'key'
def get_comment_threads(youtube, video_id, comments):
threads = []
results = youtube.commentThreads().list(
part="snippet",
videoId=video_id,
textFormat="plainText",
).execute()
#Get the first set of comments
for item in results["items"]:
threads.append(item)
comment = item["snippet"]["topLevelComment"]
text = comment["snippet"]["textDisplay"]
comments.append(text)
#Keep getting comments from the following pages
while ("nextPageToken" in results):
results = youtube.commentThreads().list(
part="snippet",
videoId=video_id,
pageToken=results["nextPageToken"],
textFormat="plainText",
).execute()
for item in results["items"]:
threads.append(item)
comment = item["snippet"]["topLevelComment"]
text = comment["snippet"]["textDisplay"]
comments.append(text)
print "Total threads: %d" % len(threads)
return threads
def get_comments(youtube, parent_id, comments):
results = youtube.comments().list(
part="snippet",
parentId=parent_id,
textFormat="plainText"
).execute()
for item in results["items"]:
text = item["snippet"]["textDisplay"]
comments.append(text)
return results["items"]
if __name__ == "__main__":
argparser.add_argument("--videoid", help="Required; ID for video for which the comment will be inserted.")
args = argparser.parse_args()
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)
try:
output_file = open("output.txt", "w")
comments = []
video_comment_threads = get_comment_threads(youtube, args.videoid, comments)
for thread in video_comment_threads:
get_comments(youtube, thread["id"], comments)
for comment in comments:
output_file.write(comment.encode("utf-8") + "\n")
output_file.close()
print "Total comments: %d" % len(comments)
except HttpError, e:
print "An HTTP error %d occurred:\n%s" % (e.resp.status, e.content)
提前感谢任何建议!
答案 0 :(得分:1)
您可以使用nextPageToken获取所有评论。 YouTube v3 API已经变得混乱了。但不要担心,我认为这是你正在寻找的东西。
YOUTUBE_COMMENT_URL = 'https://www.googleapis.com/youtube/v3/commentThreads'
def get_video_comment(self):
def load_comments(self):
for item in mat["items"]:
comment = item["snippet"]["topLevelComment"]
author = comment["snippet"]["authorDisplayName"]
text = comment["snippet"]["textDisplay"]
print("Comment by {}: {}".format(author, text))
if 'replies' in item.keys():
for reply in item['replies']['comments']:
rauthor = reply['snippet']['authorDisplayName']
rtext = reply["snippet"]["textDisplay"]
print("\n\tReply by {}: {}".format(rauthor, rtext), "\n")
parser = argparse.ArgumentParser()
mxRes = 20
vid = str()
parser.add_argument("--c", help="calls comment function by keyword function", action='store_true')
parser.add_argument("--max", help="number of comments to return")
parser.add_argument("--videourl", help="Required URL for which comments to return")
parser.add_argument("--key", help="Required API key")
args = parser.parse_args()
if not args.max:
args.max = mxRes
if not args.videourl:
exit("Please specify video URL using the --videourl=parameter.")
if not args.key:
exit("Please specify API key using the --key=parameter.")
try:
video_id = urlparse(str(args.videourl))
q = parse_qs(video_id.query)
vid = q["v"][0]
except:
print("Invalid YouTube URL")
parms = {
'part': 'snippet,replies',
'maxResults': args.max,
'videoId': vid,
'key': args.key
}
try:
matches = self.openURL(YOUTUBE_COMMENT_URL, parms)
i = 2
mat = json.loads(matches)
nextPageToken = mat.get("nextPageToken")
print("\nPage : 1")
print("------------------------------------------------------------------")
load_comments(self)
while nextPageToken:
parms.update({'pageToken': nextPageToken})
matches = self.openURL(YOUTUBE_COMMENT_URL, parms)
mat = json.loads(matches)
nextPageToken = mat.get("nextPageToken")
print("\nPage : ", i)
print("------------------------------------------------------------------")
load_comments(self)
i += 1
except KeyboardInterrupt:
print("User Aborted the Operation")
except:
print("Cannot Open URL or Fetch comments at a moment")
找到其他实用工具的完整源代码
此脚本可以获取评论(连同回复),以分类的形式执行搜索和返回视频,频道和播放列表,还可以返回基于国家/地区的搜索结果。
希望这会有所帮助。
答案 1 :(得分:1)
我正在使用此代码
import os
import pickle
import google.oauth2.credentials
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
CLIENT_SECRETS_FILE = "client_secret.json" # for more information to create your credentials json please visit https://python.gotrained.com/youtube-api-extracting-comments/
SCOPES = ['https://www.googleapis.com/auth/youtube.force-ssl']
API_SERVICE_NAME = 'youtube'
API_VERSION = 'v3'
def get_authenticated_service():
credentials = None
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
credentials = pickle.load(token)
# Check if the credentials are invalid or do not exist
if not credentials or not credentials.valid:
# Check if the credentials have expired
if credentials and credentials.expired and credentials.refresh_token:
credentials.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
CLIENT_SECRETS_FILE, SCOPES)
credentials = flow.run_console()
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(credentials, token)
return build(API_SERVICE_NAME, API_VERSION, credentials = credentials)
def get_video_comments(service, **kwargs):
comments = []
results = service.commentThreads().list(**kwargs).execute()
while results:
for item in results['items']:
comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
comments.append(comment)
# Check if another page exists
if 'nextPageToken' in results:
kwargs['pageToken'] = results['nextPageToken']
results = service.commentThreads().list(**kwargs).execute()
else:
break
return comments
if __name__ == '__main__':
# When running locally, disable OAuthlib's HTTPs verification. When
# running in production *do not* leave this option enabled.
os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
service = get_authenticated_service()
videoId = input('Enter Video id : ') # video id here (the video id of https://www.youtube.com/watch?v=vedLpKXzZqE -> is vedLpKXzZqE)
comments = get_video_comments(service, part='snippet', videoId=videoId, textFormat='plainText')
print(len(comments),comments)
祝你好运
答案 2 :(得分:0)
if (item['snippet']['totalReplyCount']>0):
res2 = comments_list(youtube, 'snippet', item['id'])
for item2 in res2['items']:
commentL = list()
commentL.append(item2['id'])
commentL.append(item2['snippet']['authorChannelUrl'])
def comments_list(service, part, parent_id):
results = service.comments().list(
parentId=parent_id,
part=part
).execute()
return results
答案 3 :(得分:0)
我不知道这是否是相同的根本原因,但是最近在尝试访问视频的所有评论时遇到麻烦。我将获得评论线程列表,但是当我尝试查找对这些评论的所有回复时:有些评论会显示,而有些则不会。但是,我注意到,您可以在documentation中进行尝试的API查询通常看起来比我在自己的代码中尝试的结果更多。我检查了“网络”面板,发现API文档中的示例正在调用https://content.googleapis.com-而不是https://www.googleapis.com,因为它指示其他人这样做。我最好改用content
URL,但是我不确定为什么两者之间会有这样的差异。
答案 4 :(得分:0)
在最新版本的API中,您只能回复顶级评论。无法获取未回复顶级评论的其他回复。 来源-https://developers.google.com/youtube/v3/docs/comments/list
这有助于大大减少评论数量。