在将json文件转换为dataframe方面,我是R面临的新问题。我有如下所示的json文件:
json_file = '[{"id": "abc", "model": "honda", "date": "20190604", "cols": {"action": 15, "values": 18, "not": 29}},
{"id": "abc", "model": "honda", "date": "20190604", "cols": {"hello": 14, "hi": 85, "wow": 14}},
{"id": "mno", "model": "ford", "date": "20190604", "cols": {"yesterday": 21, "today": 21, "tomorrow": 29}},
{"id": "mno", "model": "ford", "date": "20190604", "cols": {"docs": 25, "ok": 87, "none": 42}}]'
我想将上述json文件转换为以下格式的数据帧:
预期结果
df =
id model date cols values_cols
abc honda 20190604 action 15
abc honda 20190604 values 18
abc honda 20190604 not 29
abc honda 20190604 hello 14
abc honda 20190604 hi 85
abc honda 20190604 wow 14
mno ford 20190604 yesterday 21
mno ford 20190604 today 21
mno ford 20190604 tomorrow 29
mno ford 20190604 docs 25
mno ford 20190604 ok 87
我的解决方案
require(RJSONIO)
df = fromJSON(json_file)
我的结果
id model date cols id.1 model.1 date.1 cols.1 id.2 model.2 date.2 cols.2 id.3 model.3 date.3 cols.3
action abc honda 20190604 15 abc honda 20190604 14 mno ford 20190604 21 mno ford 20190604 25
values abc honda 20190604 18 abc honda 20190604 85 mno ford 20190604 21 mno ford 20190604 87
not abc honda 20190604 29 abc honda 20190604 14 mno ford 20190604 29 mno ford 20190604 42
这是不正确的,因为它正在使用索引,该索引应该作为列名出现。
答案 0 :(得分:0)
这是您想要的吗?
> library(jsonlite)
> library(tidyverse)
>
> json_file = '[
+ {"id": "abc",
+ "model": "honda",
+ "date": "20190604",
+ "cols": {"action": 15, "values": 18, "not": 29}},
+ {"id": "abc",
+ "model": "honda",
+ "date": "20190604",
+ "cols": {"hello": 14, "hi": 85, "wow": 14}},
+ {"id": "mno",
+ "model": "ford",
+ "date": "20190604",
+ "cols": {"yesterday": 21, "today": 21, "tomorrow": 29}},
+ {"id": "mno",
+ "model": "ford",
+ "date": "20190604",
+ "cols": {"docs": 25, "ok": 87, "none": 42}}]'
>
> df <- fromJSON(json_file)
>
> # need to convert the internal dataframes in 'cols' to vectors
> x <- map_df(seq_along(df),
+ ~tibble(id = df$id[.x],
+ model = df$model[.x],
+ date = df$date[.x],
+ cols = names(df$cols),
+ values = sapply(df$cols, '[', .x)
+ )
+ )
>
> ## remove NAs
> x[complete.cases(x), ]
# A tibble: 12 x 5
id model date cols values
<chr> <chr> <chr> <chr> <int>
1 abc honda 20190604 action 15
2 abc honda 20190604 values 18
3 abc honda 20190604 not 29
4 abc honda 20190604 hello 14
5 abc honda 20190604 hi 85
6 abc honda 20190604 wow 14
7 mno ford 20190604 yesterday 21
8 mno ford 20190604 today 21
9 mno ford 20190604 tomorrow 29
10 mno ford 20190604 docs 25
11 mno ford 20190604 ok 87
12 mno ford 20190604 none 42
>