我对R中的循环有点新意,也是从Json转换为data.frame的东西。
我在json文件中有这样的数据:
{
"players": {
"111111": {
"tLastEvent": 1432523322.429,
"tActive": 30793.195000172,
"tSocial": 1915.0230002403,
"nBlockBreak": 2315,
"nBlockPlace": 3051,
"sMove": 141554.58326606,
"loc": {
"w": "111111",
"x": 222222,
"y": 222222,
"z": 2222222
},
"social": {
"22222": 42.579999923706,
"333333": 2318.8910028934,
"44444": 440.19000005722,
"55555": 5417.4040002823,
"66666": 913.04000043869,
"7777": 1737.2639997005,
"88888": 419.68400025368,
"6666666": 972.16700005531,
"999999": 241.88300061226
}
}
目前我从这里得到了一些好主意,而且我到目前为止
origininfo<-fromJSON("stats_100_players.json")
socialpl<-origininfo$players[[1]]$tSocial
socialinfo <- as.data.frame(lapply(socialpl,rbind))
colnames(socialinfo, prefix = "social")
结果我得到了一个小行:
structure.1915.0230002403...Dim...c.1L..1L..
1
1915.023
当然我有更多的球员,结构是一样的,但我该如何崩溃呢?
所以这是我的其他问题,我可以更改添加其他变量的哪些内容?如何在不必每次编写代码的情况下为30个文件执行此操作(所有都具有相同的结构?
提前致谢。
编辑1:
对于答案中的功能:
{
"players": {
"209": {
"tLastEvent": 1428843834.06,
"tActive": 38087.053002357,
"tSocial": 12595.439997435,
"nBlockBreak": 1203,
"nBlockPlace": 1259,
"sMove": 159226.12087558,
"loc": {
"w": "279576",
"x": -150.46255768861,
"y": 105.531226699,
"z": 11111
},
"social": {
"279578": 4131.6009998322,
"279581": 578.5170006752,
"279579": 104.48099970818,
"279586": 194.46699929237,
"279592": 1223.5819990635,
"279594": 657.69799971581,
"279738": 979.95199990273,
"279740": 3092.1240000725,
"279959": 114.17499995232,
"282352": 6.5269999504089,
"282354": 454.35900020599,
"283245": 21.192000150681,
"283257": 185.67299985886,
"283262": 198.18899941444,
"283280": 33.197999954224,
"284033": 200.12299990654,
"284035": 419.58199977875
}
},
"210": {
"tLastEvent": 1429132071.012,
"tActive": 88717.513002872,
"tSocial": 10737.395000458,
"nBlockBreak": 23681,
"nBlockPlace": 20924,
"sMove": 106778.98294399,
"loc": {
"w": "2222",
"x": -1693.8889200061,
"y": 71,
"z": 1111
},
"social": {
"279577": 4131.6009998322,
"279579": 271.47599959373,
"279959": 4425.5970008373,
"279988": 1.0309998989105,
"279738": 325.50400018692,
"279740": 1133.4730000496,
"280310": 1.0169999599457,
"282301": 1.0130000114441,
"282354": 415.23199987411,
"283257": 1.0079998970032,
"284033": 20.141000270844,
"284035": 10.302000045776
}
},
"211": {
"tLastEvent": 1428688172,
"tActive": 1059.9219996929,
"tSocial": 375.95699930191,
"nBlockBreak": 219,
"nBlockPlace": 6,
"sMove": 883.21344060341,
"loc": {
"w": "279576",
"x": -130.9778811327,
"y": 81,
"z": 111111
},
"social": {
"279578": 271.47599959373,
"279577": 104.48099970818
}
},
"212": {
"tLastEvent": 1428254467.317,
"tActive": 2135.6659991741,
"tSocial": 659.75200080872,
"nBlockBreak": 40,
"nBlockPlace": 20,
"sMove": 4789.0751244105,
"loc": {
"w": "22222",
"x": 1269.528314042,
"y": 72,
"z": 1111111
},
"social": {
"279577": 578.5170006752,
"279592": 81.235000133514
}
}
}
}
答案 0 :(得分:0)
考虑从目录中提取的json文件列表中带有lapply()
的已定义函数。在构建数据框转换列表时,您需要unlist()
嵌套项: loc 和 social :
library(jsonlite)
mypath = "path/to/json/files"
jsonfiles <- list.files(path = mypath, pattern=".json", full.names=TRUE)
jsonparse <- function(file){
origininfo <- fromJSON(file)
tempList <- lapply(seq_along(origininfo), function(i){
temp <- c(list(player = names(origininfo[[i]])),
origininfo$players[[i]][1:6],
lapply(origininfo$players[[i]]$loc, unlist),
lapply(origininfo$players[[i]]$social, unlist))
df <- data.frame(temp, stringsAsFactors = FALSE)
})
jsondf <- do.call(rbind, tempList)
}
dfList <- lapply(jsonfiles, jsonparse)
finaldf <- do.call(rbind, dfList)
colnames(df) <- colnames(df, prefix="social")
输出 (使用已发布的示例)
str(finaldf)
# 'data.frame' : 1 obs. of 20 variables:
# $ player : chr "111111"
# $ tLastEvent : num 1.43e+09
# $ tActive : num 30793
# $ tSocial : num 1915
# $ nBlockBreak: int 2315
# $ nBlockPlace: int 3051
# $ sMove : num 141555
# $ w : chr "111111"
# $ x : int 222222
# $ y : int 222222
# $ z : int 2222222
# $ X22222 : num 42.6
# $ X333333 : num 2319
# $ X44444 : num 440
# $ X55555 : num 5417
# $ X66666 : num 913
# $ X7777 : num 1737
# $ X88888 : num 420
# $ X6666666 : num 972
# $ X999999 : num 242
答案 1 :(得分:0)
尝试使用可以使用devtools安装的tidyjson的最新开发版本。
如果您从JSON文件的路径的字符向量json_files
开始,代码可能是这样的:
library(tidyjson)
library(dplyr)
library(readr)
library(purrr)
# Read the data in using readr and flatten to a character vector
json <- json_files %>% map_chr(read_file)
# Parse the json and gather the players by number
players <- json %>% enter_object("players") %>% gather_object("player.num")
# Spread out all the top level fields
main <- players %>% spread_all(recursive = FALSE)
# Spread out the location data
locs <- players %>% enter_object("loc") %>% spread_all
# Gather social data
social <- players %>% enter_object("social") %>%
gather_object("social.key") %>% append_values_number("social.value")
然后,如果main
上locs
,social
和player.num
根据需要操纵或加入document.id
,如果它们在文档或player.num
和{{1}中是唯一的如果不使用dp\
lyr。
请注意,我在不知道它们的真正含义的情况下,使用了一些自由命名列(如"social.key"
)。