在R中读取json,其中包含许多不同级别的变量

时间:2018-09-25 03:36:42

标签: r json

问题

嗨,我正在尝试在R中读取Json数据。我需要一个数据框,用于以下可重现数据框中的所有变量,并为jsonData列中的所有数据级别使用不同的列。

我从jsonliste尝试过:fromJSON(),flatten(),转换为dataframe(如果嵌套列表)或list(如果小标题)和bind_cols()创建一个数据帧。

问题是变量列表中有列表。

可重复的数据:

jsonId <- c(1214, 1194, 2032, 923, 4208, 4412, 37729, 136004, 7059, 1448, 
            4074, 1614)


id <- c(34, 34, 34, 36, 34, 34, 47, 36, 36, 36, 37, 47)


category <- c("A", "A", "B", "B", "C", "C", "D", "D", "F", "F", "G", "G")


jsonData <- c("{\"comments\":{\"data\":[{\"id\":\"id1\",\"created_time\":\"2017-04-19T08:22:40+0000\",\"message\":\"comment\",\"from\":{\"name\":\"name1\",\"id\":\"11\"},\"like_count\":1}],\"paging\":{\"cursors\":{\"before\":\"some_cursor1\",\"after\":\"some_cursor2\"}}},\"id\":\"id2\"}", 
              "{\"comments\":{\"data\":[{\"id\":\"id2\",\"created_time\":\"2017-04-20T08:22:40+0000\",\"message\":\"comment\",\"from\":{\"name\":\"name2\",\"id\":\"21\"},\"like_count\":2}],\"paging\":{\"cursors\":{\"before\":\"some_cursor21\",\"after\":\"some_cursor22\"}}},\"id\":\"id22\"}", 
              "{\"posts\":{\"data\":[{\"id\":\"4\",\"created_time\":\"2017-04-20T03:24:57+0000\",\"message\":\"Test - location check-in\",\"status_type\":\"mobile_status_update\",\"story\":\"some_story\",\"from\":{\"name\":\"name1\",\"id\":\"id1\"},\"place\":{\"id\":\"id1\",\"name\":\"The Irish\",\"location\":{\"city\":\"city\",\"country\":\"Australia\",\"latitude\":-lat,\"longitude\":long,\"state\":\"VIC\",\"street\":\"add\",\"zip\":\"zip\"}},\"likes\":{\"data\":[],\"summary\":{\"total_count\":0,\"can_like\":true,\"has_liked\":false}},\"comments\":{\"data\":[],\"summary\":{\"order\":\"chronological\",\"total_count\":0,\"can_comment\":true}}}],\"paging\":{\"previous\":\"some link\"}},\"id\":\"id1\"}", 
              "{\"id\":\"4\"}", "{\"reactions\":{\"data\":[{\"id\":\"id1\",\"type\":\"LIKE\"}],\"paging\":{\"cursors\":{\"before\":\"before1\",\"after\":\"after1\"}}},\"id\":\"id1\"}", 
              "{\"reactions\":{\"data\":[{\"id\":\"id2\",\"type\":\"LIKE\"}],\"paging\":{\"cursors\":{\"before\":\"before2\",\"after\":\"after2\"}}},\"id\":\"id2\"}", 
              "[{\"battery\":\"Medium\",\"deviceVersion\":\"Flex 2\",\"features\":[],\"id\":\"id\",\"lastSyncTime\":\"2017-07-21T21:13:00.000\",\"mac\":\"num1\",\"type\":\"TRACKER\"}]", 
              "[]", "{\"activities-minutesFairlyActive\":[{\"dateTime\":\"2017-10-12\",\"value\":\"0\"}]}", 
              "{\"activities-minutesFairlyActive\":[{\"dateTime\":\"2017-05-08\",\"value\":\"43\"},{\"dateTime\":\"2017-05-09\",\"value\":\"9\"}]}", 
              "{\"activities-minutesLightlyActive\":[{\"dateTime\":\"2017-07-20\",\"value\":\"85\"}]}", 
              "{\"activities-minutesLightlyActive\":[{\"dateTime\":\"2017-04-12\",\"value\":\"127\"},{\"dateTime\":\"2017-04-13\",\"value\":\"211\"},{\"dateTime\":\"2017-04-14\",\"value\":\"270\"},{\"dateTime\":\"2017-04-15\",\"value\":\"263\"},{\"dateTime\":\"2017-04-16\",\"value\":\"259\"},{\"dateTime\":\"2017-04-17\",\"value\":\"181\"},{\"dateTime\":\"2017-04-18\",\"value\":\"72\"}]}"
)


timestamp <- structure(c(1494205440, 1494119040, 1495328700, 1493773440, 1500958620, 
                         1501563420, 1518584460, 1535864460, 1507870680, 1494378240, 1500613020, 
                         1494551100), class = c("POSIXct", "POSIXt"), tzone = "UTC")

df <- as.data.frame(cbind(jsonId, id, category, jsonData, timestamp))

我已经尝试了https://community.rstudio.com/t/how-to-read-multilevel-json-data-and-convert-to-data-frame-in-r/7571/10中的一些代码,但是并没有解决具有不同级别问题的不同变量:

# reply from ttrodrigz 
x <- jsonLog$JsonData[2]  %>%

  # make json, then make list
  fromJSON() %>%

  # remove classification level
  purrr::flatten() %>%

  # turn nested lists into dataframes
  map_if(is_list, as_tibble) %>%

  # bind_cols needs tibbles to be in lists
  map_if(is_tibble, list) %>%

  # creates nested dataframe
  bind_cols()

我遇到此错误(例如对于第2行):cbind_all(x)错误:参数2的长度必须为7,而不是1。

对不起,如果这很麻烦。不幸的是,这是R的json的非结构化数据性质。

1 个答案:

答案 0 :(得分:0)

尝试一下:

  • 我对您的json_Data做了一些修改:请参见代码注释。
  • 我创建了2个数据帧df和df2进行比较(在RStudio中查看)

_

library(tidyverse)
library(jsonlite)

jsonId <- c(1214, 1194, 2032, 923, 4208, 4412, 37729, 136004, 7059, 1448,
            4074, 1614)


id <- c(34, 34, 34, 36, 34, 34, 47, 36, 36, 36, 37, 47)


category <- c("A", "A", "B", "B", "C", "C", "D", "D", "F", "F", "G", "G")

# I have slightly modified your JSON: quoted -long, lat, wrapped all in a pair of []
jsonData <- c("[{\"comments\":{\"data\":[{\"id\":\"id1\",\"created_time\":\"2017-04-19T08:22:40+0000\",\"message\":\"comment\",\"from\":{\"name\":\"name1\",\"id\":\"11\"},\"like_count\":1}],\"paging\":{\"cursors\":{\"before\":\"some_cursor1\",\"after\":\"some_cursor2\"}}},\"id\":\"id2\"}",
              "{\"comments\":{\"data\":[{\"id\":\"id2\",\"created_time\":\"2017-04-20T08:22:40+0000\",\"message\":\"comment\",\"from\":{\"name\":\"name2\",\"id\":\"21\"},\"like_count\":2}],\"paging\":{\"cursors\":{\"before\":\"some_cursor21\",\"after\":\"some_cursor22\"}}},\"id\":\"id22\"}",
              "{\"posts\":{\"data\":[{\"id\":\"4\",\"created_time\":\"2017-04-20T03:24:57+0000\",\"message\":\"Test - location check-in\",\"status_type\":\"mobile_status_update\",\"story\":\"some_story\",\"from\":{\"name\":\"name1\",\"id\":\"id1\"},\"place\":{\"id\":\"id1\",\"name\":\"The Irish\",\"location\":{\"city\":\"city\",\"country\":\"Australia\",\"latitude\":\"-lat\",\"longitude\":\"long\",\"state\":\"VIC\",\"street\":\"add\",\"zip\":\"zip\"}},\"likes\":{\"data\":[],\"summary\":{\"total_count\":0,\"can_like\":true,\"has_liked\":false}},\"comments\":{\"data\":[],\"summary\":{\"order\":\"chronological\",\"total_count\":0,\"can_comment\":true}}}],\"paging\":{\"previous\":\"some link\"}},\"id\":\"id1\"}",
              "{\"id\":\"4\"}", "{\"reactions\":{\"data\":[{\"id\":\"id1\",\"type\":\"LIKE\"}],\"paging\":{\"cursors\":{\"before\":\"before1\",\"after\":\"after1\"}}},\"id\":\"id1\"}",
              "{\"reactions\":{\"data\":[{\"id\":\"id2\",\"type\":\"LIKE\"}],\"paging\":{\"cursors\":{\"before\":\"before2\",\"after\":\"after2\"}}},\"id\":\"id2\"}",
              "[{\"battery\":\"Medium\",\"deviceVersion\":\"Flex 2\",\"features\":[],\"id\":\"id\",\"lastSyncTime\":\"2017-07-21T21:13:00.000\",\"mac\":\"num1\",\"type\":\"TRACKER\"}]",
              "[]", "{\"activities-minutesFairlyActive\":[{\"dateTime\":\"2017-10-12\",\"value\":\"0\"}]}",
              "{\"activities-minutesFairlyActive\":[{\"dateTime\":\"2017-05-08\",\"value\":\"43\"},{\"dateTime\":\"2017-05-09\",\"value\":\"9\"}]}",
              "{\"activities-minutesLightlyActive\":[{\"dateTime\":\"2017-07-20\",\"value\":\"85\"}]}",
              "{\"activities-minutesLightlyActive\":[{\"dateTime\":\"2017-04-12\",\"value\":\"127\"},{\"dateTime\":\"2017-04-13\",\"value\":\"211\"},{\"dateTime\":\"2017-04-14\",\"value\":\"270\"},{\"dateTime\":\"2017-04-15\",\"value\":\"263\"},{\"dateTime\":\"2017-04-16\",\"value\":\"259\"},{\"dateTime\":\"2017-04-17\",\"value\":\"181\"},{\"dateTime\":\"2017-04-18\",\"value\":\"72\"}]}]"
)
timestamp <- structure(c(1494205440, 1494119040, 1495328700, 1493773440, 1500958620,
                         1501563420, 1518584460, 1535864460, 1507870680, 1494378240, 1500613020,
                         1494551100), class = c("POSIXct", "POSIXt"), tzone = "UTC")


jsonData_p <- reduce( jsonData, str_c, sep=",") %>% fromJSON()

df <- data_frame(jsonId, id, category, timestamp)
df2 <- df
df2$jsonData <- jsonData_p
#df2$jsonData <- map(jsonData_p, unlist)     # alternative: remove keys