我正在尝试从Youtube视频生成评论数据集,并且无法使用Google API循环遍历pageToken。下面是一段代码。为什么“while”循环不起作用?
base_url <- "https://www.googleapis.com/youtube/v3/commentThreads/"
data = "list"
api_opts <- list(
part = "snippet",
maxResults = 100,
textFormat = "plainText",
videoId = "N708P-A45D0", # This is an example of a video id
key = "google developer key goes here",
fields = "items,nextPageToken",
orderBy = "published")
init_results <- httr::content(httr::GET(base_url, query = api_opts))
data <- init_results$items
api_opts$pageToken <- init_results$nextPageToken
api_opts$pageToken <- gsub("\\=", "", init_results$nextPageToken)
print(api_opts$pageToken)
while (api_opts$pageToken != "") {
print(api_opts$pageToken)
next_results <- httr::content(httr::GET(base_url, query = api_opts))
data <- c(data, next_results$items)
api_opts$pageToken <- gsub("\\=", "", next_results$nextPageToken)
}
organize_data = function(){
sub_data <- lapply(data, function(x) {
data.frame(
Comment = x$snippet$topLevelComment$snippet$textDisplay,
User = x$snippet$topLevelComment$snippet$authorDisplayName,
ReplyCount = x$snippet$totalReplyCount,
LikeCount = x$snippet$topLevelComment$snippet$likeCount,
PublishTime = x$snippet$topLevelComment$snippet$publishedAt,
CommentId = x$snippet$topLevelComment$id,
stringsAsFactors=FALSE)
})
}
sample <- organize_data()
L <- length(sample)
sample <- data.frame(matrix(unlist(sample), nrow=L, byrow=T))
colnames(sample) <- c("Comment", "User", "ReplyCount", "LikeCount", "PublishTime", "CommentId")
head(sample)
答案 0 :(得分:0)
感谢您查看,以防其他人将来遇到此问题,以下是我为解决此问题所采取的措施。我仍然无法得到答复的答复。
####
# NEW TRY
# Note: according to YouTube "YouTube currently supports replies only for top-level comments. However, replies to replies may be supported in the future."
####
rm(list=ls())
data = "list"
# Initialize
init_results <- httr::content(httr::GET("https://www.googleapis.com/youtube/v3/commentThreads?part=snippet%2C+replies&maxResults=100&textFormat=plainText&videoId=N708P-A45D0&fields=items%2CnextPageToken&key=[my google developer key]"))
data <- init_results$items
init_results$nextPageToken
print(init_results$nextPageToken)
# Begin loop
while (init_results$nextPageToken != ""){
# Make the page token URL encoded
api_opts_pageToken <- gsub("=", "%3D", init_results$nextPageToken)
# Write the call with the updated page token
get_call <- gsub("api_pageToken", api_opts_pageToken, "https://www.googleapis.com/youtube/v3/commentThreads?part=snippet%2C+replies&maxResults=100&pageToken=api_pageToken&textFormat=plainText&videoId=N708P-A45D0&fields=items%2CnextPageToken&key==[my google developer key]")
# Pull out the data from this page token call
next_results <- httr::content(httr::GET(get_call))
# Update the datafile
data <- c(data,next_results$items)
# Update the page token
print(next_results$nextPageToken)
init_results$nextPageToken <- next_results$nextPageToken
}
organize_data = function(){
sub_data <- lapply(data, function(x) {
data.frame(
Comment = x$snippet$topLevelComment$snippet$textDisplay,
User = x$snippet$topLevelComment$snippet$authorDisplayName,
ReplyCount = x$snippet$totalReplyCount,
LikeCount = x$snippet$topLevelComment$snippet$likeCount,
PublishTime = x$snippet$topLevelComment$snippet$publishedAt,
CommentId = x$snippet$topLevelComment$id,
stringsAsFactors=FALSE)
})
}
sample <- organize_data()
L <- length(sample)
sample <- data.frame(matrix(unlist(sample), nrow=L, byrow=T))
colnames(sample) <- c("Comment", "User", "ReplyCount", "LikeCount", "PublishTime", "CommentId")
head(sample)
dim(sample)