我在下面显示了一条推文,我需要的只是:
如何才能提取这些字段?
{
"created_at": "Tue Dec 12 23:56:48 +0000 2017",
"id": 940732198180872195,
"id_str": "940732198180872195",
"text": "if you're khoja and never heard Imran Khan during your weird high school times, what was wrong with you? lool",
"truncated": false,
"entities": {
"hashtags": [],
"symbols": [],
"user_mentions": [],
"urls": []
},
"metadata": {"iso_language_code": "en", "result_type": "recent"},
"source": "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>",
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 728047869438078977,
"id_str": "728047869438078977",
"name": "Kazim Mohamed",
"screen_name": "kmohamed313",
"location": "",
"description": "\u0663\u0661\u0663 // a //",
"url": null,
"entities": {"description": {"urls": []}},
"protected": false,
"followers_count": 1352,
"friends_count": 880,
"listed_count": 10,
"created_at": "Thu May 05 02:25:13 +0000 2016",
"favourites_count": 5472,
"utc_offset": null,
"time_zone": null,
"geo_enabled": true,
"verified": false,
"statuses_count": 20493,
"lang": "en",
"contributors_enabled": false,
"is_translator": false,
"is_translation_enabled": false,
"profile_background_color": "000000",
"profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_tile": false,
"profile_image_url": "http://pbs.twimg.com/profile_images/939196888292646912/BekCH3lo_normal.jpg",
"profile_image_url_https": "https://pbs.twimg.com/profile_images/939196888292646912/BekCH3lo_normal.jpg",
"profile_banner_url": "https://pbs.twimg.com/profile_banners/728047869438078977/1513462067",
"profile_link_color": "7FDBB6",
"profile_sidebar_border_color": "000000",
"profile_sidebar_fill_color": "000000",
"profile_text_color": "000000",
"profile_use_background_image": false,
"has_extended_profile": true,
"default_profile": false,
"default_profile_image": false,
"following": false,
"follow_request_sent": false,
"notifications": false,
"translator_type": "none"
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"is_quote_status": false,
"retweet_count": 0,
"favorite_count": 0,
"favorited": false,
"retweeted": false,
"lang": "en"
}
答案 0 :(得分:0)
您可以使用每条推文
执行此操作# line = the_tweet
data = json.loads(line)
with open('out.csv', 'w') as f:
print(data['text']+ ',' data['created_at'] + ',' + data['lang'] + ',' + data['location'], file = f)
[编辑]一种更具可扩展性的方法:
data_list.append(data['text'])
data_list.append(data['created_at'])
data_list.append(data['lang'])
data_list.append(data['location'])
import csv
with open('out.csv', 'w') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(data_list)
答案 1 :(得分:0)
几天前面临类似的问题。解决了它:
import pandas as pd
#results is the JSON tweet data.
#Define the columns you want to extract
resultFrame = pd.DataFrame(columns=["username","created_at","tweet"])
print len(results)
for i in range(len(results)):
resultFrame.loc[i,"username"] = results[i].user.screen_name
resultFrame.loc[i, "created_at"] = results[i].created_at
resultFrame.loc[i, "tweet"] = results[i].text
答案 2 :(得分:0)
您可以使用dict comprehension轻松提取该数据,如:
import json
data = json.loads("""
{
"created_at": "Tue Dec 12 23:56:48 +0000 2017",
"id": 940732198180872195,
"id_str": "940732198180872195",
"text": "if you're khoja and never heard Imran Khan during your weird high school times, what was wrong with you? lool",
"truncated": false,
"user": {
"id": 728047869438078977,
"id_str": "728047869438078977",
"name": "Kazim Mohamed",
"screen_name": "kmohamed313",
"location": "",
},
"retweet_count": 0,
"favorite_count": 0,
"favorited": false,
"lang": "en"
}
""")
sub_data = {k: data[k] for k in ('text', 'created_at', 'lang',)}
sub_data['user_location'] = data['user']['location']
print(sub_data)
{
'text': "if you're khoja and never heard Imran Khan during your weird high school times, what was wrong with you? lool",
'created_at': 'Tue Dec 12 23:56:48 +0000 2017',
'lang': 'en',
'user_location': ''
}
24 views * 1 point/view + 23 spaces * 5 points/space = 139 < 300 = sv.bounds.width