使用比原始数据框架更少的元素创建数据框时出错:JSON

时间:2016-02-14 21:40:20

标签: r rstudio lapply jsonlite

library(jsonlite)
paths <- list.files(
  pattern="data.json",
  full.names=TRUE,
  recursive=TRUE
)
colNames = c("BillType",
             "Congress",
             "IntroducedAt",
             "OfficialTitle",
             "Number",
             "Status",
             "SubjectsTopTerm",
             "UpdatedAt")
trimData <- function(x) {
  a <- list(x$bill_type,
            x$congress,
            x$introduced_at,
            x$official_title,
            x$number,
            x$status,
            x$subjects_top_term,
            x$updated_at)
  result <- as.data.frame(a)
  return(result)
}
rawData <- do.call(
    "rbind",
    lapply(paths, function(x) fromJSON(txt = x, simplifyDataFrame = TRUE))
)
prunedData <- do.call(
    "rbind",
    lapply(rawData, function(x) trimData(x))
)
colnames(test) <- colNames
write.csv(prunedData, "test3.csv")

我使用此脚本的目标是获取数据帧的JSON数据,并将其转换为更简洁的数据帧以进行CSV输出。 rawData变量最终大约有100列。当我在RStudio中执行此脚本时,出现以下错误:

> prunedData <- do.call("rbind", lapply(rawData, function(x) trimData(x)))
Error in data.frame(NULL, NULL, NULL, NULL, NULL, c(NA, "PASS_OVER:HOUSE",  : 
  arguments imply differing number of rows: 0, 4

我不是R和SQL等声明性语言方面的专家,所以如果你能为我愚蠢的话,那将会有很长的路要走!

1 个答案:

答案 0 :(得分:0)

使用嵌套的do.call()lapply()命令考虑此JSON到数据帧迁移方法。外do.call行跨文件绑定数据,内do.call行绑定每个文件中的json数据。 paste()将列表数据折叠为一个元素,如果您的json文件打印得很漂亮并且没有在一行上压缩,则删除EOF。

library(jsonlite)

paths <- list.files(pattern="data.json", full.names=TRUE, recursive=TRUE)
colNames = c("BillType", "Congress", "IntroducedAt", "OfficialTitle",
             "Number", "Status", "SubjectsTopTerm", "UpdatedAt")

rawData <- do.call(rbind,
                   lapply(paths, 
                          function(x)
                          do.call(rbind, 
                                  lapply(paste(readLines(x, warn=FALSE),
                                               collapse=""), 
                                         jsonlite::fromJSON)
                          )
                   )
           )

# TRIM TO NEEDED COLUMNS
prunedData <- rawdata[colNames]