在R中抓取Youtube注释时的pageToken

时间:2016-01-28 06:14:30

标签: r while-loop youtube-api

我正在尝试从Youtube视频生成评论数据集,并且无法使用Google API循环遍历pageToken。下面是一段代码。为什么“while”循环不起作用?

base_url <- "https://www.googleapis.com/youtube/v3/commentThreads/"
data = "list"

 api_opts <- list(
  part = "snippet",
  maxResults = 100, 
  textFormat = "plainText",
  videoId = "N708P-A45D0", # This is an example of a video id 
  key = "google developer key goes here", 
  fields = "items,nextPageToken", 
  orderBy = "published")

init_results <- httr::content(httr::GET(base_url, query = api_opts))
data <- init_results$items
api_opts$pageToken <- init_results$nextPageToken
api_opts$pageToken <- gsub("\\=", "", init_results$nextPageToken)
print(api_opts$pageToken)

while (api_opts$pageToken != "") {  
        print(api_opts$pageToken)
        next_results <- httr::content(httr::GET(base_url, query = api_opts))
        data <- c(data, next_results$items)
        api_opts$pageToken <- gsub("\\=", "", next_results$nextPageToken)          
}

organize_data = function(){

    sub_data <- lapply(data, function(x) {
          data.frame(
            Comment = x$snippet$topLevelComment$snippet$textDisplay,
            User = x$snippet$topLevelComment$snippet$authorDisplayName,
            ReplyCount = x$snippet$totalReplyCount,
            LikeCount = x$snippet$topLevelComment$snippet$likeCount,
            PublishTime = x$snippet$topLevelComment$snippet$publishedAt,
            CommentId = x$snippet$topLevelComment$id,
            stringsAsFactors=FALSE)
        })
}

sample <- organize_data()
L <- length(sample)
sample <- data.frame(matrix(unlist(sample), nrow=L, byrow=T))
colnames(sample) <- c("Comment", "User", "ReplyCount", "LikeCount", "PublishTime", "CommentId")
head(sample)

1 个答案:

答案 0 :(得分:0)

感谢您查看,以防其他人将来遇到此问题,以下是我为解决此问题所采取的措施。我仍然无法得到答复的答复。

####
# NEW TRY
# Note: according to YouTube "YouTube currently supports replies only for top-level comments. However, replies to replies may be supported in the future."

####

rm(list=ls())
data = "list"

# Initialize
init_results <- httr::content(httr::GET("https://www.googleapis.com/youtube/v3/commentThreads?part=snippet%2C+replies&maxResults=100&textFormat=plainText&videoId=N708P-A45D0&fields=items%2CnextPageToken&key=[my google developer key]"))
data <- init_results$items
init_results$nextPageToken
print(init_results$nextPageToken)

# Begin loop
while (init_results$nextPageToken != ""){
    # Make the page token URL encoded
    api_opts_pageToken <- gsub("=", "%3D", init_results$nextPageToken)
    # Write the call with the updated page token
    get_call <- gsub("api_pageToken", api_opts_pageToken, "https://www.googleapis.com/youtube/v3/commentThreads?part=snippet%2C+replies&maxResults=100&pageToken=api_pageToken&textFormat=plainText&videoId=N708P-A45D0&fields=items%2CnextPageToken&key==[my google developer key]") 
    # Pull out the data from this page token call
    next_results <- httr::content(httr::GET(get_call))
    # Update the datafile
    data <- c(data,next_results$items)
    # Update the page token
    print(next_results$nextPageToken)
    init_results$nextPageToken <- next_results$nextPageToken
}


organize_data = function(){

    sub_data <- lapply(data, function(x) {
          data.frame(
            Comment = x$snippet$topLevelComment$snippet$textDisplay,
            User = x$snippet$topLevelComment$snippet$authorDisplayName,
            ReplyCount = x$snippet$totalReplyCount,
            LikeCount = x$snippet$topLevelComment$snippet$likeCount,
            PublishTime = x$snippet$topLevelComment$snippet$publishedAt,
            CommentId = x$snippet$topLevelComment$id,
            stringsAsFactors=FALSE)
        })
}

sample <- organize_data()
L <- length(sample)
sample <- data.frame(matrix(unlist(sample), nrow=L, byrow=T))
colnames(sample) <- c("Comment", "User", "ReplyCount", "LikeCount", "PublishTime", "CommentId")
head(sample)
dim(sample)