我不知道如何将我的JSON数据正确转换为有用的数据帧。这是显示我的数据结构的一些示例数据:
{
"data":[
{"track":[
{"time":"2015","midpoint":{"x":6,"y":8},"realworld":{"x":1,"y":3},"coordinate":{"x":16,"y":38}},
{"time":"2015","midpoint":{"x":6,"y":8},"realworld":{"x":1,"y":3},"coordinate":{"x":16,"y":37}},
{"time":"2016","midpoint":{"x":6,"y":9},"realworld":{"x":2,"y":3},"coordinate":{"x":16,"y":38}}
]},
{"track":[
{"time":"2015","midpoint":{"x":5,"y":9},"realworld":{"x":-1,"y":3},"coordinate":{"x":16,"y":38}},
{"time":"2015","midpoint":{"x":5,"y":9},"realworld":{"x":-1,"y":3},"coordinate":{"x":16,"y":38}},
{"time":"2016","midpoint":{"x":5,"y":9},"realworld":{"x":-1,"y":3},"coordinate":{"x":16,"y":38}},
{"time":"2015","midpoint":{"x":3,"y":15},"realworld":{"x":-9,"y":2},"coordinate":{"x":17,"y":38}}
]},
{"track":[
{"time":"2015","midpoint":{"x":6,"y":7},"realworld":{"x":-2,"y":3},"coordinate":{"x":16,"y":39}}
]}]}
我有很多曲目,我希望数据集看起来像这样:
track time midpoint realworld coordinate
1
1
1
2
2
2
2
3
到目前为止,我有这个:
json_file <- "testdata.json"
data <- fromJSON(json_file)
data2 <- list.stack(data, fill=TRUE)
现在它出现了这样:
如何以正确的格式获得此信息?
答案 0 :(得分:4)
使用flatten = TRUE
阅读时添加fromJSON
参数。这将为您提供一个嵌套列表,其中包含三个数据帧的最深层次。使用:
library(jsonlite)
# read the json
jsondata <- fromJSON(txt, flatten = TRUE)
# bind the dataframes in the nested 'track' list together
dat <- do.call(rbind, jsondata$data$track)
# add a track variable
dat$track <- rep(1:length(jsondata$data$track), sapply(jsondata$data$track, nrow))
给出:
> dat
time midpoint.x midpoint.y realworld.x realworld.y coordinate.x coordinate.y track
1 2015 6 8 1 3 16 38 1
2 2015 6 8 1 3 16 37 1
3 2016 6 9 2 3 16 38 1
4 2015 5 9 -1 3 16 38 2
5 2015 5 9 -1 3 16 38 2
6 2016 5 9 -1 3 16 38 2
7 2015 3 15 -9 2 17 38 2
8 2015 6 7 -2 3 16 39 3
另一种更短的方法是将jsonlite
与rbindlist
包中的data.table
结合使用:
library(jsonlite)
library(data.table)
# read the json
jsondata <- fromJSON(txt, flatten = TRUE)
# bind the dataframes in the nested 'track' list together
# and include an id-column at the same time
dat <- rbindlist(jsondata$data$track, idcol = 'track')
或来自bind_rows
包的dplyr
以类似的方式:
library(dplyr)
dat <- bind_rows(jsondata$data$track, .id = 'track')
使用过的数据:
txt <- '{
"data":[
{"track":[
{"time":"2015","midpoint":{"x":6,"y":8},"realworld":{"x":1,"y":3},"coordinate":{"x":16,"y":38}},
{"time":"2015","midpoint":{"x":6,"y":8},"realworld":{"x":1,"y":3},"coordinate":{"x":16,"y":37}},
{"time":"2016","midpoint":{"x":6,"y":9},"realworld":{"x":2,"y":3},"coordinate":{"x":16,"y":38}}
]},
{"track":[
{"time":"2015","midpoint":{"x":5,"y":9},"realworld":{"x":-1,"y":3},"coordinate":{"x":16,"y":38}},
{"time":"2015","midpoint":{"x":5,"y":9},"realworld":{"x":-1,"y":3},"coordinate":{"x":16,"y":38}},
{"time":"2016","midpoint":{"x":5,"y":9},"realworld":{"x":-1,"y":3},"coordinate":{"x":16,"y":38}},
{"time":"2015","midpoint":{"x":3,"y":15},"realworld":{"x":-9,"y":2},"coordinate":{"x":17,"y":38}}
]},
{"track":[
{"time":"2015","midpoint":{"x":6,"y":7},"realworld":{"x":-2,"y":3},"coordinate":{"x":16,"y":39}}
]}]}'
答案 1 :(得分:2)
Sahil的答案(如果它尚未删除)会产生误导,因为stream_in
是针对ndjson而你没有ndjson。你只需要稍微纠缠嵌套列表。我认为以下内容可以做得更小,但这是一个快速,直接攻击的黑客攻击:
library(jsonlite)
library(purrr)
library(readr)
dat <- fromJSON(txt, simplifyVector=FALSE) # read in your JSON
map(dat$data, "track") %>% # move past the top-level "data" element and iterate over the "track"s
map_df(function(track) { # iterate over each element of "track"
map_df(track, ~as.list(unlist(track))) # convert it to a data frame
}, .id="track") %>% # add in the track "id"
type_convert() # convert mangled types
## # A tibble: 8 × 8
## track time midpoint.x midpoint.y realworld.x realworld.y coordinate.x coordinate.y
## <int> <int> <int> <int> <int> <int> <int> <int>
## 1 1 2016 6 9 2 3 16 38
## 2 1 2016 6 9 2 3 16 38
## 3 1 2016 6 9 2 3 16 38
## 4 2 2015 3 15 -9 2 17 38
## 5 2 2015 3 15 -9 2 17 38
## 6 2 2015 3 15 -9 2 17 38
## 7 2 2015 3 15 -9 2 17 38
## 8 3 2015 6 7 -2 3 16 39
这也为您提供了不错的列类型,但您可能希望使用col_types
参数readr::type_convert
将time
转换为字符向量。
可替换地:
library(jsonlite)
library(purrr)
library(tibble)
dat <- fromJSON(txt, flatten=TRUE) # read in your JSON
map_df(dat$data$track, as_tibble, .id="track")