我有一个很长的JSON列表,我希望将其转换为数据框。我希望有人能帮我解决这个问题。
{"body":{"overall_standings":{"years":[{"standings":null,"id":"2006"},{"standings":null,"id":"2007"},{"standings":null,"id":"2008"},{"standings":null,"id":"2009"},{"standings":null,"id":"2010"},{"standings":null,"id":"2011"},{"standings":{"teams":[{"Pitching":{"roto_points":"47.0","categories":[{"abbr":"S","roto_points":"91","value":"New York Yankees","diff":"9","rank":5},{"roto_points":"90","value":"New York Yankees","abbr":"W","diff":"7","rank":7},{"roto_points":"1383","value":"New York Yankees","abbr":"K","diff":"10","rank":4},{"abbr":"WHIP","roto_points":"1.2451","value":"New York Yankees","diff":"10","rank":4},{"abbr":"ERA","roto_points":"3.685","value":"New York Yankees","diff":"11","rank":3}]},"Total":{"behind":"0.0","roto_points":"98.0","diff":"-4.0","rank":1},"order":1,"name":"New York Yankees","Batting":{"roto_points":"51.0","categories":[{"abbr":"OBP","roto_points":"0.3371","value":"New York Yankees","diff":"7","rank":7},{"roto_points":"905","value":"New York Yankees","abbr":"RBI","diff":"10","rank":4},{"roto_points":"955","value":"New York Yankees","abbr":"R","diff":"12","rank":2},{"abbr":"SB","roto_points":"183","value":"New York Yankees","diff":"13","rank":1},{"abbr":"HR","roto_points":"247","value":"New York Yankees","diff":"9","rank":5}]},"id":"2"},{"Pitching":{"roto_points":"44.5","categories":[{"abbr":"S","roto_points":"105","value":"Los Angeles Dodgers","diff":"12","rank":2},{"roto_points":"96","value":"Los Angeles Dodgers","abbr":"W","diff":"10.5","rank":3},{"roto_points":"1410","value":"Los Angeles Dodgers","abbr":"K","diff":"11","rank":3},{"abbr":"WHIP","roto_points":"1.2798","value":"Los Angeles Dodgers","diff":"3","rank":11},{"abbr":"ERA","roto_points":"3.810","value":"Los Angeles Dodgers","diff":"8","rank":6}]},"Total":{"behind":"4.0","roto_points":"94.0","diff":"0.0","rank":2},"order":2,"name":"Los Angeles Dodgers","Batting":{"roto_points":"49.5","categories":[{"abbr":"OBP","roto_points":"0.3446","value":"Los Angeles Dodgers","diff":"11","rank":3},{"roto_points":"907","value":"Los Angeles Dodgers","abbr":"RBI","diff":"11","rank":3},{"roto_points":"909","value":"Los Angeles Dodgers","abbr":"R","diff":"9","rank":5},{"abbr":"SB","roto_points":"152","value":"Los Angeles Dodgers","diff":"11","rank":3},{"abbr":"HR","roto_points":"234","value":"Los Angeles Dodgers","diff":"7.5","rank":6}]},"id":"1"}]},"id":"2012"}]}}}
在逗号后,列表一直持续到ID为2017。
{"standings":{"teams":[{"Pitching":{"roto_points":"40.5","categories":[{"abbr":"S","roto_points":"100","value":"Los Angeles
谢谢!
答案 0 :(得分:0)
这有点丑陋。我真的希望有人能提出一个更好的答案,尤其是因为它保留了嵌套的框架,使子集有点痛苦。
js <- jsonlite::fromJSON('{"body":{"overall_standings":{"years":[{"standings":null,"id":"2006"},{"standings":null,"id":"2007"},{"standings":null,"id":"2008"},{"standings":null,"id":"2009"},{"standings":null,"id":"2010"},{"standings":null,"id":"2011"},{"standings":{"teams":[{"Pitching":{"roto_points":"47.0","categories":[{"abbr":"S","roto_points":"91","value":"New York Yankees","diff":"9","rank":5},{"roto_points":"90","value":"New York Yankees","abbr":"W","diff":"7","rank":7},{"roto_points":"1383","value":"New York Yankees","abbr":"K","diff":"10","rank":4},{"abbr":"WHIP","roto_points":"1.2451","value":"New York Yankees","diff":"10","rank":4},{"abbr":"ERA","roto_points":"3.685","value":"New York Yankees","diff":"11","rank":3}]},"Total":{"behind":"0.0","roto_points":"98.0","diff":"-4.0","rank":1},"order":1,"name":"New York Yankees","Batting":{"roto_points":"51.0","categories":[{"abbr":"OBP","roto_points":"0.3371","value":"New York Yankees","diff":"7","rank":7},{"roto_points":"905","value":"New York Yankees","abbr":"RBI","diff":"10","rank":4},{"roto_points":"955","value":"New York Yankees","abbr":"R","diff":"12","rank":2},{"abbr":"SB","roto_points":"183","value":"New York Yankees","diff":"13","rank":1},{"abbr":"HR","roto_points":"247","value":"New York Yankees","diff":"9","rank":5}]},"id":"2"},{"Pitching":{"roto_points":"44.5","categories":[{"abbr":"S","roto_points":"105","value":"Los Angeles Dodgers","diff":"12","rank":2},{"roto_points":"96","value":"Los Angeles Dodgers","abbr":"W","diff":"10.5","rank":3},{"roto_points":"1410","value":"Los Angeles Dodgers","abbr":"K","diff":"11","rank":3},{"abbr":"WHIP","roto_points":"1.2798","value":"Los Angeles Dodgers","diff":"3","rank":11},{"abbr":"ERA","roto_points":"3.810","value":"Los Angeles Dodgers","diff":"8","rank":6}]},"Total":{"behind":"4.0","roto_points":"94.0","diff":"0.0","rank":2},"order":2,"name":"Los Angeles Dodgers","Batting":{"roto_points":"49.5","categories":[{"abbr":"OBP","roto_points":"0.3446","value":"Los Angeles Dodgers","diff":"11","rank":3},{"roto_points":"907","value":"Los Angeles Dodgers","abbr":"RBI","diff":"11","rank":3},{"roto_points":"909","value":"Los Angeles Dodgers","abbr":"R","diff":"9","rank":5},{"abbr":"SB","roto_points":"152","value":"Los Angeles Dodgers","diff":"11","rank":3},{"abbr":"HR","roto_points":"234","value":"Los Angeles Dodgers","diff":"7.5","rank":6}]},"id":"1"}]},"id":"2012"}]}}}')
它从列表中的列表中的列表开始,所以让我们摆脱不必要的图层。 (我展示的中间代码只是说明性的,没有输出,因为它很冗长……以为我正在逐步引导您。)
str(js[[1]])
str(js[[1]][[1]])
str(js[[1]][[1]][[1]])
dat <- js[[1]][[1]][[1]]
在最后一个str
上,您可能会注意到dat$standings$teams
有很多NULL
,我们将它们过滤掉:
dat <- Filter(length, dat$standings$teams)[[1]]
确定,快速帮助功能和输出:
library(dplyr)
addnames <- function(x, nm, sep = "_") setNames(x, paste0(nm, sep, colnames(x)))
dat2 <- tbl_df(bind_cols(
dat[c("id","name","order")],
addnames(dat$Total, "Total"),
addnames(dat$Pitching["roto_points"], "Pitching"),
addnames(dat$Batting["roto_points"], "Batting")
)) %>%
mutate(
Pitching_categories = dat$Pitching$categories,
Batting_categories = dat$Batting$categories
)
dat2
# # A tibble: 2 x 11
# id name order Total_behind Total_roto_points Total_diff Total_rank Pitching_roto_points Batting_roto_points Pitching_categories Batting_categories
# <chr> <chr> <int> <chr> <chr> <chr> <int> <chr> <chr> <list> <list>
# 1 2 New York Yankees 1 0.0 98.0 -4.0 1 47.0 51.0 <data.frame [5 x 5]> <data.frame [5 x 5]>
# 2 1 Los Angeles Dodgers 2 4.0 94.0 0.0 2 44.5 49.5 <data.frame [5 x 5]> <data.frame [5 x 5]>
它有两个嵌套框架,因此在使用它们时会变得很有创意。我建议这不是查看它的最佳方法,但这只是一个开始。当然可以(用更多的肘油脂)将这两行(每组增加一排)扩展为十行(每组五排,反映嵌套在Pitching
和Batting
中的五行) ,但这可能不是您所需要的。
unnest(dat2)
# # A tibble: 10 x 19
# id name order Total_behind Total_roto_points Total_diff Total_rank Pitching_roto_poi~ Batting_roto_po~ abbr roto_points value diff rank abbr1 roto_points1 value1 diff1 rank1
# <chr> <chr> <int> <chr> <chr> <chr> <int> <chr> <chr> <chr> <chr> <chr> <chr> <int> <chr> <chr> <chr> <chr> <int>
# 1 2 New York ~ 1 0.0 98.0 -4.0 1 47.0 51.0 S 91 New York~ 9 5 OBP 0.3371 New York~ 7 7
# 2 2 New York ~ 1 0.0 98.0 -4.0 1 47.0 51.0 W 90 New York~ 7 7 RBI 905 New York~ 10 4
# 3 2 New York ~ 1 0.0 98.0 -4.0 1 47.0 51.0 K 1383 New York~ 10 4 R 955 New York~ 12 2
# 4 2 New York ~ 1 0.0 98.0 -4.0 1 47.0 51.0 WHIP 1.2451 New York~ 10 4 SB 183 New York~ 13 1
# 5 2 New York ~ 1 0.0 98.0 -4.0 1 47.0 51.0 ERA 3.685 New York~ 11 3 HR 247 New York~ 9 5
# 6 1 Los Angel~ 2 4.0 94.0 0.0 2 44.5 49.5 S 105 Los Ange~ 12 2 OBP 0.3446 Los Ange~ 11 3
# 7 1 Los Angel~ 2 4.0 94.0 0.0 2 44.5 49.5 W 96 Los Ange~ 10.5 3 RBI 907 Los Ange~ 11 3
# 8 1 Los Angel~ 2 4.0 94.0 0.0 2 44.5 49.5 K 1410 Los Ange~ 11 3 R 909 Los Ange~ 9 5
# 9 1 Los Angel~ 2 4.0 94.0 0.0 2 44.5 49.5 WHIP 1.2798 Los Ange~ 3 11 SB 152 Los Ange~ 11 3
# 10 1 Los Angel~ 2 4.0 94.0 0.0 2 44.5 49.5 ERA 3.810 Los Ange~ 8 6 HR 234 Los Ange~ 7.5 6
就像我说的那样,这看起来并不漂亮甚至优雅,但这也许对您来说是一个好的开始,或者也许其他人可以跳出这个阴影以获得更好的解决方案。