我从Twitter API获取数据。将数据从JSON对象转换为数据框并加载到数据仓库中。查找以下输入和代码段。
我是R编程的新手。
stats_campaign.data <- content(stats_campaign.request)
print(stats_campaign.data)
O / P:
`{
"data_type": [ "stats" ],
"time_series_length": [ 1 ],
"data": [
{
"id": [ "XXXXX" ],
"id_data": [
{
"segment": {},
"metrics": {
"impressions": {},
"tweets_send": {},
"qualified_impressions": {},
"follows": {},
"app_clicks": {},
"retweets": {},
"likes": {},
"engagements": {},
"clicks": {},
"card_engagements": {},
"replies": {},
"url_clicks": {},
"carousel_swipes": {}
}
}
]
},
{
"id": [ "XXXX1" ],
"id_data": [
{
"segment": {},
"metrics": {
"impressions": {},
"tweets_send": {},
"qualified_impressions": {},
"follows": {},
"app_clicks": {},
"retweets": {},
"likes": {},
"engagements": {},
"clicks": {},
"card_engagements": {},
"replies": {},
"url_clicks": {},
"carousel_swipes": {}
}
}
]
},`
当我读这个JSON值时,
stats_json_file <- sprintf("P:/R Repos/R
Applications/TwitterAPIData/stats_test_data-%s.json", TODAY)
jsonlite::fromJSON(stats_json_file)
**Result :**
id id_data
1 5wcaz NULL
2 5ub2u NULL
3 5wb8x NULL
4 5wb1j NULL
5 5yqwj NULL
6 5pq5i NULL
7 5u197 NULL
8 5z2js NULL
9 6fqh0 333250, 4, 9, 19, 111, 3189, 3156, 5, 1091
10 5tvr1 NULL
11 5yqw4 NULL
12 5qqps NULL
13 5yqvw NULL
14 5ygom NULL
15 5nc88 NULL
16 5yg94 NULL
17 65t9e NULL
18 5peck NULL
19 63pg1 247283, 17, 22, 35, 297, 5514, 5450, 6, 2971
20 6cdvy 156705, 1, 2, 6, 112, 10933, 605, 170
From my JSON file I want Id and whole "metrics": {
"impressions": {},
"tweets_send": {},
"qualified_impressions": {},
"follows": {},
"app_clicks": {},
"retweets": {},
"likes": {},
"engagements": {},
"clicks": {},
"card_engagements": {},
"replies": {},
"url_clicks": {},
"carousel_swipes": {}
}
and convert to Data Frame to load into Data Base. Plzz Help..!
如何解析此JSON对象。我想要检索Id&amp;整个Metrics对象。然后想转换成数据帧加载到SQL表中。
阅读多个ID&amp;我在代码下面使用的度量标准值
`test <- list()
for(i in 1:len)
{ test <- unlist(stats_campaign.data$data[[i]])
print(test)}`
**Output:**
id
"5wcaz"
id
"5ub2u"
id
"5wb8x"
id
"5wb1j"
id
"5yqwj"
id
"5pq5i"
id
"5u197"
id
"5z2js"
id
"5tvr1"
id
"5yqw4"
id
"5qqps"
id
"5yqvw"
id
"5ygom"
id
"5nc88"
id
"5yg94"
id
"65t9e"
id
"5peck"
id id_data.metrics.impressions
"63pg1" "133227"
id_data.metrics.tweets_send id_data.metrics.follows
"10" "9"
id_data.metrics.retweets id_data.metrics.likes
"17" "96"
id_data.metrics.engagements id_data.metrics.clicks
"2165" "2134"
id_data.metrics.replies id_data.metrics.url_clicks
"5" "1204"
id id_data.metrics.impressions
"6cdvy" "176164"
id_data.metrics.tweets_send id_data.metrics.retweets
"2" "10"
id_data.metrics.likes id_data.metrics.engagements
"121" "9708"
id_data.metrics.clicks id_data.metrics.url_clicks
"620" "160"
在一个for中我必须使用list或其他东西来每次追加值,我怎么能这样做.. ??我使用的是正确的方法吗?有没有其他方法可以解析嵌套的JSON对象并直接放入数据框..?
请帮助..!在此先感谢..!
答案 0 :(得分:0)
如评论中所述,有关您正在寻找的输出的更多信息会有所帮助。无论如何,我希望以下内容将提供有用的指导。 tidyjson
README提供了一些有用的概述。
不幸的是,JSON对象中缺少数据使得很难说明数据中可能存在的内容(在null对象中会发生什么),而且我很难确定您正在寻找的Twitter API的哪个部分在。 tidyjson
使您能够生成一致的data.frame
输出,即使您没有数据也是如此!关键动词为gather
和spread
,与tidyr
非常相似,但具有JSON风格。
str <- "{\"data_type\":[\"stats\"],\"time_series_length\":[1],\"data\":[{\"id\":[\"XXXXX\"],\"id_data\":[{\"segment\":{},\"metrics\":{\"impressions\":{},\"tweets_send\":{},\"qualified_impressions\":{},\"follows\":{},\"app_clicks\":{},\"retweets\":{},\"likes\":{},\"engagements\":{},\"clicks\":{},\"card_engagements\":{},\"replies\":{},\"url_clicks\":{},\"carousel_swipes\":{}}}]},{\"id\":[\"XXXX1\"],\"id_data\":[{\"segment\":{},\"metrics\":{\"impressions\":{},\"tweets_send\":{},\"qualified_impressions\":{},\"follows\":{},\"app_clicks\":{},\"retweets\":{},\"likes\":{},\"engagements\":{},\"clicks\":{},\"card_engagements\":{},\"replies\":{},\"url_clicks\":{},\"carousel_swipes\":{}}}]}]} "
library(dplyr)
library(tidyjson)
prep <- as.tbl_json(str) %>% enter_object("data") %>% gather_array("objid")
p1 <- prep %>% enter_object("id") %>%
gather_array("idnum") %>% append_values_string("id")
p2 <- prep %>% enter_object("id_data") %>% gather_array("datanum") %>%
enter_object("metrics") %>%
spread_values(
impressions = jstring("impressions", "value")
, tweets_send = jnumber("tweets_send", "somekey")
)
p1 %>% tbl_df() %>% left_join(p2 %>% tbl_df(), by = c("document.id", "objid"))
#> # A tibble: 2 x 7
#> document.id objid idnum id datanum impressions tweets_send
#> <int> <int> <int> <chr> <int> <chr> <dbl>
#> 1 1 1 1 XXXXX 1 <NA> NA
#> 2 1 2 1 XXXX1 1 <NA> NA