我有一个JSON格式的推文摘录。我附上了一份数据样本。我需要将此JSON转换为数据帧。
到目前为止,我设法使用“jsonlite”包转换它:
json_data <- jsonlite::stream_in(file("myjsonfile.txt"))
但它不会加载推文中包含的所有信息。例如,我只看到转发但未发布推文的用户。 您可以通过复制粘贴文件并选择格式来更好地查看json文件:http://jsonviewer.stack.hu/
数据来自Twitter API(有关此数据的更多信息,请访问:https://dev.twitter.com/overview/api/tweets
提前感谢您的时间和帮助。
ML_Enthousiast
{"favorited": false, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "truncated": false, "in_reply_to_user_id_str": null, "coordinates": null, "retweeted": false, "text": "RT @Antoniotalks: Revenue streams for #OpenData companies!\n#Cloud #StartUp #SMM #AI #IoT #Fintech #BigData #deeplearning #Mpgvip\u2026 ", "retweet_count": 0, "filter_level": "low", "created_at": "Thu Jun 29 18:47:18 +0000 2017", "favorite_count": 0, "retweeted_status": {"favorited": false, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "display_text_range": [0, 140], "truncated": true, "in_reply_to_user_id_str": null, "coordinates": null, "retweeted": false, "text": "Revenue streams for #OpenData companies!\n#Cloud #StartUp #SMM #AI #IoT #Fintech #BigData #deeplearning #Mpgvip\u2026 ", "retweet_count": 38, "filter_level": "low", "created_at": "Wed Jun 28 12:45:08 +0000 2017", "favorite_count": 48, "in_reply_to_screen_name": null, "extended_tweet": {"extended_entities": {"media": [{"media_url_https": "", "sizes": {"thumb": {"w": 150, "h": 150, "resize": "crop"}, "large": {"w": 1200, "h": 927, "resize": "fit"}, "medium": {"w": 1200, "h": 927, "resize": "fit"}, "small": {"w": 680, "h": 525, "resize": "fit"}}, "type": "photo", "expanded_url": "", "id": 880044388679901184, "media_url": "http://pbs.twimg.com/media/DDaLtXXXYAAI2eM.jpg", "id_str": "880044388679901184", "display_url": "pic.twitter.com/aw9HeukUYv", "indices": [139, 162], "url": ""}]}, "full_text": "Revenue streams for #OpenData companies!\n#Cloud #StartUp #SMM #AI #IoT #Fintech #BigData #deeplearning #Mpgvip #defstar5 #DataScience #CIO ", "entities": {"user_mentions": [], "hashtags": [{"text": "OpenData", "indices": [20, 29]}, {"text": "Cloud", "indices": [41, 47]}, {"text": "StartUp", "indices": [48, 56]}, {"text": "SMM", "indices": [57, 61]}, {"text": "AI", "indices": [62, 65]}, {"text": "IoT", "indices": [66, 70]}, {"text": "Fintech", "indices": [71, 79]}, {"text": "BigData", "indices": [80, 88]}, {"text": "deeplearning", "indices": [89, 102]}, {"text": "Mpgvip", "indices": [103, 110]}, {"text": "defstar5", "indices": [111, 120]}, {"text": "DataScience", "indices": [121, 133]}, {"text": "CIO", "indices": [134, 138]}], "media": [{"media_url_https": "", "sizes": {"thumb": {"w": 150, "h": 150, "resize": "crop"}, "large": {"w": 1200, "h": 927, "resize": "fit"}, "medium": {"w": 1200, "h": 927, "resize": "fit"}, "small": {"w": 680, "h": 525, "resize": "fit"}}, "type": "photo", "expanded_url": "", "id": 880044388679901184, "media_url": "", "id_str": "880044388679901184", "display_url": "pic.twitter.com/aw9HeukUYv", "indices": [139, 162], "url": ""}], "symbols": [], "urls": []}, "display_text_range": [0, 138]}, "in_reply_to_status_id": null, "source": "<a href=\"\" rel=\"nofollow\">Buffer</a>", "id_str": "880044392110796800", "entities": {"user_mentions": [], "hashtags": [{"text": "OpenData", "indices": [20, 29]}, {"text": "Cloud", "indices": [41, 47]}, {"text": "StartUp", "indices": [48, 56]}, {"text": "SMM", "indices": [57, 61]}, {"text": "AI", "indices": [62, 65]}, {"text": "IoT", "indices": [66, 70]}, {"text": "Fintech", "indices": [71, 79]}, {"text": "BigData", "indices": [80, 88]}, {"text": "deeplearning", "indices": [89, 102]}, {"text": "Mpgvip", "indices": [103, 110]}], "symbols": [], "urls": [{"display_url": "twitter.com/i/web/status/8\u2026", "indices": [112, 135], "expanded_url": "", "url": "8H"}]}, "lang": "en", "id": 880044392110796800, "is_quote_status": false, "geo": null, "user": {"screen_name": "Antoniotalks", "profile_background_image_url": "", "profile_image_url": "jpg", "follow_request_sent": null, "profile_background_tile": false, "id": 2445890839, "is_translator": false, "description": "A father & CEO of Recruitd (@imrecruitd). Helping companies magnify their #employer and #recruitment #brand and #jobseekers with the #skillstosucceed.", "listed_count": 198, "favourites_count": 398, "created_at": "Tue Apr 15 19:13:52 +0000 2014", "notifications": null, "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", "contributors_enabled": false, "profile_background_color": "C0DEED", "following": null, "friends_count": 6792, "protected": false, "default_profile": true, "profile_use_background_image": true, "name": "Antonio Giugno", "location": "London, England", "geo_enabled": true, "id_str": "2445890839", "utc_offset": -25200, "profile_banner_url": "0", "profile_text_color": "333333", "lang": "en-gb", "statuses_count": 4058, "profile_sidebar_fill_color": "DDEEF6", "default_profile_image": false, "profile_image_url_https": "4433/dVeGYfTX_normal.jpg", "profile_link_color": "1DA1F2", "url": "rnontubein", "verified": false, "profile_sidebar_border_color": "C0DEED", "followers_count": 6323, "time_zone": "Pacific Time (US & Canada)"}, "contributors": null, "possibly_sensitive": false, "place": null}, "in_reply_to_screen_name": null, "timestamp_ms": "1498762038396", "in_reply_to_status_id": null, "source": "<a href=\".com\" rel=\"nofollow\">Mobile Web (M2)</a>", "id_str": "880497923150286848", "entities": {"user_mentions": [{"screen_name": "Antoniotalks", "id": 2445890839, "id_str": "2445890839", "name": "Antonio Giugno", "indices": [3, 16]}], "hashtags": [{"text": "OpenData", "indices": [38, 47]}, {"text": "Cloud", "indices": [59, 65]}, {"text": "StartUp", "indices": [66, 74]}, {"text": "SMM", "indices": [75, 79]}, {"text": "AI", "indices": [80, 83]}, {"text": "IoT", "indices": [84, 88]}, {"text": "Fintech", "indices": [89, 97]}, {"text": "BigData", "indices": [98, 106]}, {"text": "deeplearning", "indices": [107, 120]}, {"text": "Mpgvip", "indices": [121, 128]}], "symbols": [], "urls": [{"indices": [130, 130], "expanded_url": null, "url": ""}]}, "lang": "en", "id": 880497923150286848, "is_quote_status": false, "geo": null, "user": {"screen_name": "henrymbuguak", "profile_background_image_url": "://abs.twimg.com/images/themes/theme3/bg.gif", "profile_image_url": "://pbs.twimg.com/profile_images/822772556818239489/0yTbHCGj_normal.jpg", "follow_request_sent": null, "profile_background_tile": false, "id": 310697279, "is_translator": false, "description": "I enjoy coding. Visit my github project: :// ://github.com/henrymbuguak", "listed_count": 62, "favourites_count": 978, "created_at": "Sat Jun 04 05:55:09 +0000 2011", "notifications": null, "profile_background_image_url_https": "://abs.twimg.com/images/themes/theme3/bg.gif", "contributors_enabled": false, "profile_background_color": "EDECE9", "following": null, "friends_count": 2540, "protected": false, "default_profile": false, "profile_use_background_image": true, "name": "kiarie henry mbugua", "location": "Njoro, Kenya.", "geo_enabled": false, "id_str": "310697279", "utc_offset": 10800, "profile_banner_url": "://pbs.twimg.com/profile_banners/310697279/1484999353", "profile_text_color": "634047", "lang": "en", "statuses_count": 3775, "profile_sidebar_fill_color": "E3E2DE", "default_profile_image": false, "profile_image_url_https": "//pbs.twimg.com/profile_images/822772556818239489/0yTbHCGj_normal.jpg", "profile_link_color": "088253", "url": null, "verified": false, "profile_sidebar_border_color": "D3D2CF", "followers_count": 2141, "time_zone": "Nairobi"}, "contributors": null, "place": null}
答案 0 :(得分:0)
如果我使用
读入您的数据indata <- jsonlite::read_json("myjsonfile.json")
然后我获得JSON文件中包含的所有信息。它是一个嵌套列表,因此您可能需要从列表中的一个元素中提取所需的信息
> names(indata)
[1] "favorited" "in_reply_to_status_id_str"
[3] "in_reply_to_user_id" "truncated"
[5] "in_reply_to_user_id_str" "coordinates"
[7] "retweeted" "text"
[9] "retweet_count" "filter_level"
[11] "created_at" "favorite_count"
[13] "retweeted_status" "in_reply_to_screen_name"
[15] "timestamp_ms" "in_reply_to_status_id"
[17] "source" "id_str"
[19] "entities" "lang"
[21] "id" "is_quote_status"
[23] "geo" "user"
[25] "contributors" "place"
有关用户的信息(例如,仅显示部分)
> indata$user
$screen_name
[1] "henrymbuguak"
$profile_background_image_url
[1] "://abs.twimg.com/images/themes/theme3/bg.gif"
$profile_image_url
[1] "://pbs.twimg.com/profile_images/822772556818239489/0yTbHCGj_normal.jpg"
$follow_request_sent
NULL
$profile_background_tile
[1] FALSE
$id
[1] 310697279
因此您可以通过indata$user$screen_name