如何在R中从复杂的JSON文件和带有此文件的循环创建data.frame

时间:2016-08-13 20:09:05

标签: json r dataframe

我对R中的循环有点新意,也是从Json转换为data.frame的东西。

我在json文件中有这样的数据:

{
"players": {
    "111111": {
        "tLastEvent": 1432523322.429,
        "tActive": 30793.195000172,
        "tSocial": 1915.0230002403,
        "nBlockBreak": 2315,
        "nBlockPlace": 3051,
        "sMove": 141554.58326606,
        "loc": {
            "w": "111111",
            "x": 222222,
            "y": 222222,
            "z": 2222222
        },
        "social": {
            "22222": 42.579999923706,
            "333333": 2318.8910028934,
            "44444": 440.19000005722,
            "55555": 5417.4040002823,
            "66666": 913.04000043869,
            "7777": 1737.2639997005,
            "88888": 419.68400025368,
            "6666666": 972.16700005531,
            "999999": 241.88300061226
        }

}

目前我从这里得到了一些好主意,而且我到目前为止

origininfo<-fromJSON("stats_100_players.json")  
socialpl<-origininfo$players[[1]]$tSocial 
socialinfo <- as.data.frame(lapply(socialpl,rbind))
colnames(socialinfo, prefix = "social")

结果我得到了一个小行:

    structure.1915.0230002403...Dim...c.1L..1L..
1
1915.023

当然我有更多的球员,结构是一样的,但我该如何崩溃呢?

所以这是我的其他问题,我可以更改添加其他变量的哪些内容?如何在不必每次编写代码的情况下为30个文件执行此操作(所有都具有相同的结构?

提前致谢。

编辑1:

对于答案中的功能:

   {
    "players": {
        "209": {
            "tLastEvent": 1428843834.06,
            "tActive": 38087.053002357,
            "tSocial": 12595.439997435,
            "nBlockBreak": 1203,
            "nBlockPlace": 1259,
            "sMove": 159226.12087558,
            "loc": {
                "w": "279576",
                "x": -150.46255768861,
                "y": 105.531226699,
                "z": 11111
            },
            "social": {
                "279578": 4131.6009998322,
                "279581": 578.5170006752,
                "279579": 104.48099970818,
                "279586": 194.46699929237,
                "279592": 1223.5819990635,
                "279594": 657.69799971581,
                "279738": 979.95199990273,
                "279740": 3092.1240000725,
                "279959": 114.17499995232,
                "282352": 6.5269999504089,
                "282354": 454.35900020599,
                "283245": 21.192000150681,
                "283257": 185.67299985886,
                "283262": 198.18899941444,
                "283280": 33.197999954224,
                "284033": 200.12299990654,
                "284035": 419.58199977875
            }
        },
        "210": {
            "tLastEvent": 1429132071.012,
            "tActive": 88717.513002872,
            "tSocial": 10737.395000458,
            "nBlockBreak": 23681,
            "nBlockPlace": 20924,
            "sMove": 106778.98294399,
            "loc": {
                "w": "2222",
                "x": -1693.8889200061,
                "y": 71,
                "z": 1111
            },
            "social": {
                "279577": 4131.6009998322,
                "279579": 271.47599959373,
                "279959": 4425.5970008373,
                "279988": 1.0309998989105,
                "279738": 325.50400018692,
                "279740": 1133.4730000496,
                "280310": 1.0169999599457,
                "282301": 1.0130000114441,
                "282354": 415.23199987411,
                "283257": 1.0079998970032,
                "284033": 20.141000270844,
                "284035": 10.302000045776
            }
        },
        "211": {
            "tLastEvent": 1428688172,
            "tActive": 1059.9219996929,
            "tSocial": 375.95699930191,
            "nBlockBreak": 219,
            "nBlockPlace": 6,
            "sMove": 883.21344060341,
            "loc": {
                "w": "279576",
                "x": -130.9778811327,
                "y": 81,
                "z": 111111
            },
            "social": {
                "279578": 271.47599959373,
                "279577": 104.48099970818
            }
        },
        "212": {
            "tLastEvent": 1428254467.317,
            "tActive": 2135.6659991741,
            "tSocial": 659.75200080872,
            "nBlockBreak": 40,
            "nBlockPlace": 20,
            "sMove": 4789.0751244105,
            "loc": {
                "w": "22222",
                "x": 1269.528314042,
                "y": 72,
                "z": 1111111
            },
            "social": {
                "279577": 578.5170006752,
                "279592": 81.235000133514
            }
        }
    }
   }    

2 个答案:

答案 0 :(得分:0)

考虑从目录中提取的json文件列表中带有lapply()的已定义函数。在构建数据框转换列表时,您需要unlist()嵌套项: loc social

library(jsonlite)

mypath = "path/to/json/files"
jsonfiles <- list.files(path = mypath, pattern=".json", full.names=TRUE)

jsonparse <- function(file){
  origininfo <- fromJSON(file) 

  tempList <- lapply(seq_along(origininfo), function(i){
    temp <- c(list(player = names(origininfo[[i]])),
              origininfo$players[[i]][1:6], 
              lapply(origininfo$players[[i]]$loc, unlist),
              lapply(origininfo$players[[i]]$social, unlist))
    df <- data.frame(temp, stringsAsFactors = FALSE)
  })

  jsondf <- do.call(rbind, tempList)
}

dfList <- lapply(jsonfiles, jsonparse)

finaldf <- do.call(rbind, dfList)
colnames(df) <- colnames(df, prefix="social")

输出 (使用已发布的示例)

str(finaldf)

#  'data.frame' : 1 obs. of  20 variables:
#  $ player     : chr "111111"
#  $ tLastEvent : num 1.43e+09
#  $ tActive    : num 30793
#  $ tSocial    : num 1915
#  $ nBlockBreak: int 2315
#  $ nBlockPlace: int 3051
#  $ sMove      : num 141555
#  $ w          : chr "111111"
#  $ x          : int 222222
#  $ y          : int 222222
#  $ z          : int 2222222
#  $ X22222     : num 42.6
#  $ X333333    : num 2319
#  $ X44444     : num 440
#  $ X55555     : num 5417
#  $ X66666     : num 913
#  $ X7777      : num 1737
#  $ X88888     : num 420
#  $ X6666666   : num 972
#  $ X999999    : num 242

答案 1 :(得分:0)

尝试使用可以使用devtools安装的tidyjson的最新开发版本。

如果您从JSON文件的路径的字符向量json_files开始,代码可能是这样的:

library(tidyjson)
library(dplyr)
library(readr)
library(purrr)

# Read the data in using readr and flatten to a character vector
json <- json_files %>% map_chr(read_file)

# Parse the json and gather the players by number
players <- json %>% enter_object("players") %>% gather_object("player.num")

# Spread out all the top level fields
main <- players %>% spread_all(recursive = FALSE)

# Spread out the location data
locs <- players %>% enter_object("loc") %>% spread_all

# Gather social data
social <- players %>% enter_object("social") %>%
  gather_object("social.key") %>% append_values_number("social.value")

然后,如果mainlocssocialplayer.num根据需要操纵或加入document.id,如果它们在文档或player.num和{{1}中是唯一的如果不使用dp\ lyr

请注意,我在不知道它们的真正含义的情况下,使用了一些自由命名列(如"social.key")。