我正在尝试使用list comprehesion从其他列表中的一个列表中保存特定项目。
我下载了几条推文,并将其保存在文本文件中。
我调用了txt文件并将所有项目保存在列表中。
到目前为止,这是我的代码:
import json, random
allTweets = []
i = range(1,50)
for n in i:
tweetFile = [line.rstrip() for line in open('twitfull' + str(n) + '.txt')]
allTweets.extend(tweetFile)
tweets = [json.loads(item) for item in allTweets]
列表中的数据示例:
json object
{'id': 746029083335680003, 'in_reply_to_user_id': None, 'in_reply_to_status_id': None, 'source': '<a href="..." rel="nofollow">TweetDeck</a>', 'favorited': False, 'contributors': None, 'favorite_count': 0, 'retweeted': False, 'is_quote_status': False, 'lang': 'es', 'created_at': 'Thu Jun 23 17:16:08 +0000 2016', 'in_reply_to_screen_name': None, 'coordinates': None, 'geo': None, 'id_str': '746029083335680003', 'filter_level': 'low', 'timestamp_ms': '1466702168674', 'in_reply_to_user_id_str': None, 'retweet_count': 0, 'place': None, 'truncated': False, 'in_reply_to_status_id_str': None, 'text': 'Se da lectura a los acuerdos entre Gobierno y FARC-EP. #FinDelConflicto #AdiósALaGuerra #PazenColombia #ElUltimoDiaDeLaGuerra #Cuba', 'user': {'friends_count': 356, 'id': 814202096, 'notifications': None, 'profile_sidebar_border_color': '5ED4DC', 'profile_image_url': 'http://pbs.twimg.com/profile_images/2594545116/r7de57w8q920u7p0hft6_normal.jpeg', 'favourites_count': 3, 'utc_offset': -25200, 'url': 'http://yamimontoya.blogspot.com', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme4/bg.gif', 'verified': False, 'profile_sidebar_fill_color': '95E8EC', 'followers_count': 348, 'created_at': 'Mon Sep 10 01:05:29 +0000 2012', 'location': 'Cuba', 'profile_background_color': '0099B9', 'name': 'Yami Montoya', 'lang': 'es', 'time_zone': 'Pacific Time (US & Canada)', 'following': None, 'id_str': '814202096', 'is_translator': False, 'contributors_enabled': False, 'profile_background_tile': False, 'listed_count': 13, 'default_profile': False, 'follow_request_sent': None, 'default_profile_image': False, 'profile_link_color': '0099B9', 'screen_name': 'yami679', 'description': 'Periodista cubana, editora de Tiempo21.cu, orgullosa de vivir en mi Isla.', 'profile_use_background_image': True, 'profile_text_color': '3C3940', 'profile_image_url_https': '...', 'protected': False, 'profile_background_image_url_https': '...', 'statuses_count': 115828, 'geo_enabled': False}, 'entities': {'hashtags': [{'text': 'FinDelConflicto', 'indices': [55, 71]}, {'text': 'AdiósALaGuerra', 'indices': [72, 87]}, {'text': 'PazenColombia', 'indices': [88, 102]}, {'text': 'ElUltimoDiaDeLaGuerra', 'indices': [103, 125]}, {'text': 'Cuba', 'indices': [126, 131]}], 'user_mentions': [], 'urls': [], 'symbols': []}}
tweets
列表的示例:
[{'id': 746029040851521536, 'in_reply_to_user_id': None, 'in_reply_to_status_id': None, 'source': '<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>', 'favorited': False, 'contributors': None, 'favorite_count': 0, 'possibly_sensitive': False, 'retweeted': False, 'is_quote_status': False, 'lang': 'es', 'created_at': 'Thu Jun 23 17:15:58 +0000 2016', 'in_reply_to_screen_name': None, 'coordinates': None, 'geo': None, 'id_str': '746029040851521536', 'filter_level': 'low', 'timestamp_ms': '1466702158545', 'in_reply_to_user_id_str': None, 'retweeted_status': {'id': 745637507493093377, 'in_reply_to_user_id': None, 'in_reply_to_status_id': None, 'source': '<a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>', 'favorited': False, 'contributors': None, 'favorite_count': 0, 'possibly_sensitive': False, 'retweeted': False, 'is_quote_status': False, 'lang': 'es', 'created_at': 'Wed Jun 22 15:20:09 +0000 2016', 'in_reply_to_screen_name': None, 'coordinates': None, 'geo': None, 'id_str': '745637507493093377', 'filter_level': 'low', 'in_reply_to_user_id_str': None, 'retweet_count': 1, 'place': None, 'truncated': False, 'in_reply_to_status_id_str': None, 'text': 'Denuncian por traición a la patria a Juan Manuel Santos: COLOMBIAN NEWS\n21de junio de 2016\n\xa0\n1.\xa0\xa0\xa0\xa0Denuncian ... ...', 'user': {'friends_count': 834, 'id': 1090274636, 'notifications': None, 'profile_sidebar_border_color': 'FFFFFF', 'profile_image_url': 'http://pbs.twimg.com/profile_images/3459756751/f7d00d504bdc55a4e30f214c46a73188_normal.jpeg', 'favourites_count': 14, 'utc_offset': -18000, 'url': 'http://www.periodicodebate.com', 'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/772390136/0a690154c8cb12a6b4b918b66222bbd9.jpeg', 'verified': False, 'profile_sidebar_fill_color': 'EFEFEF', 'followers_count': 6272, 'created_at': 'Mon Jan 14 21:55:47 +0000 2013', 'location': 'Colombia', 'profile_background_color': 'F5F8FA', 'name': 'Periódico Debate', 'lang': 'es', 'time_zone': 'Bogota', 'profile_banner_url': '...', 'following': None, 'id_str': '1090274636', 'is_translator': False, 'contributors_enabled': False, 'profile_background_tile': False, 'listed_count': 47, 'default_profile': False, 'follow_request_sent': None, 'default_profile_image': False, 'profile_link_color': '009999', 'screen_name': 'DebateCol', 'description': None, 'profile_use_background_image': True, 'profile_text_color': '333333', 'profile_image_url_https': '...', 'protected': False, 'profile_background_image_url_https': '...', 'statuses_count': 11887, 'geo_enabled': False}, 'entities': {'hashtags': [], 'user_mentions': [], 'urls': [{'display_url': 'bit.ly/28WKaS0', 'indices': [113, 136], 'expanded_url': '...', 'url': '...'}], 'symbols': []}}, 'retweet_count': 0, 'place': None, 'truncated': False, 'in_reply_to_status_id_str': None, 'text': 'RT @DebateCol: Denuncian por traición a la patria a Juan Manuel Santos: COLOMBIAN NEWS\n21de junio de 2016\n\xa0\n1.\xa0\xa0\xa0\xa0Denuncian ... ...', 'user': {'friends_count': 563, 'id': 274595199, 'notifications': None, 'profile_sidebar_border_color': 'C0DEED', 'profile_image_url': 'http://pbs.twimg.com/profile_images/378800000541161030/4eebcd7336d7aa698bdcb13601869f87_normal.jpeg', 'favourites_count': 152080, 'utc_offset': -14400, 'url': None, 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png', 'verified': False, 'profile_sidebar_fill_color': 'DDEEF6', 'followers_count': 1515, 'created_at': 'Wed Mar 30 16:06:52 +0000 2011', 'location': 'usa', 'profile_background_color': 'C0DEED', 'name': 'Raul Escobar', 'lang': 'es', 'time_zone': 'Eastern Time (US & Canada)', 'following': None, 'id_str': '274595199', 'is_translator': False, 'contributors_enabled': False, 'profile_background_tile': False, 'listed_count': 17, 'default_profile': True, 'follow_request_sent': None, 'default_profile_image': False, 'profile_link_color': '0084B4', 'screen_name': 'RaulEscobar1154', 'description': None, 'profile_use_background_image': True, 'profile_text_color': '333333', 'profile_image_url_https': '...', 'protected': False, 'profile_background_image_url_https': '...', 'statuses_count': 166211, 'geo_enabled': False}, 'entities': {'hashtags': [], 'user_mentions': [{'id_str': '1090274636', 'id': 1090274636, 'screen_name': 'DebateCol', 'indices': [3, 13], 'name': 'Periódico Debate'}], 'urls': [{'display_url': 'bit.ly/28WKaS0', 'indices': [139, 140], 'expanded_url': '...', 'url': '..'}], 'symbols': []}}, {'id': 746029040658710528, 'in_reply_to_user_id': None, 'in_reply_to_status_id': None, 'source': '<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>', 'favorited': False, 'contributors': None, 'favorite_count': 0, 'retweeted': False, 'is_quote_status': False, 'lang': 'es', 'created_at': 'Thu Jun 23 17:15:58 +0000 2016', 'in_reply_to_screen_name': None, 'coordinates': None, 'geo': None, 'id_str': '746029040658710528', 'filter_level': 'low', 'timestamp_ms': '1466702158499', 'in_reply_to_user_id_str': None, 'retweet_count': 0, 'place': None, 'truncated': False, 'in_reply_to_status_id_str': None, 'text': '#AdiosALaGuerra y será que todos los integrantes de la guerrilla estan de acuerdo? estos señores tienen total control de este grupo? Ojala!', 'user': {'friends_count': 2, 'id': 704447284524744704, 'notifications': None, 'profile_sidebar_border_color': 'C0DEED', 'profile_image_url': 'http://pbs.twimg.com/profile_images/704448554119647232/OCMVOVr4_normal.jpg', 'favourites_count': 6, 'utc_offset': None, 'url': None, 'profile_background_image_url': '', 'verified': False, 'profile_sidebar_fill_color': 'DDEEF6', 'followers_count': 6, 'created_at': 'Mon Feb 29 23:24:55 +0000 2016', 'location': None, 'profile_background_color': 'F5F8FA', 'name': 'Desafinado', 'lang': 'pt', 'time_zone': None, 'following': None, 'id_str': '704447284524744704', 'is_translator': False, 'contributors_enabled': False, 'profile_background_tile': False, 'listed_count': 0, 'default_profile': True, 'follow_request_sent': None, 'default_profile_image': False, 'profile_link_color': '2B7BB9', 'screen_name': 'eg0max', 'description': None, 'profile_use_background_image': True, 'profile_text_color': '333333', 'profile_image_url_https': '...', 'protected': False, 'profile_background_image_url_https': '', 'statuses_count': 18, 'geo_enabled': False}, 'entities': {'hashtags': [{'text': 'AdiosALaGuerra', 'indices': [0, 15]}], 'user_mentions': [], 'urls': [], 'symbols': []}}]
当我从tweets
调用项目以打印文本或 ids 时,效果很好:
itemtest = random.randint(0, len(tweets) - 1)
print (tweets[itemtest]['id'])
print (tweets[itemtest]['created_at'])
print (tweets[itemtest]['text'])
print (tweets[itemtest]['user']['screen_name'])
但是,如果我想使用list comprehesion将文本保存在一个列表中,则会显示错误:
这是我使用的代码:
tweetText = [tw['text'] for tw in tweets]
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-2-5ea2c15220c6> in <module>()
----> 1 tweetText = [tw['text'] for tw in tweets]
<ipython-input-2-5ea2c15220c6> in <listcomp>(.0)
----> 1 tweetText = [tw['text'] for tw in tweets]
KeyError: 'text'
items = range(0, len(tweetsdata))
tweet_ids = []
tweet_source = []
tweet_text = []
tweet_users = []
trash = []
for n in items:
try:
ids = tweets[int(n)]['id']
links = 'https://twitter.com/statuses/' + str(ids)
text = tweets[int(n)]['text']
users = tweets[int(n)]['user']['screen_name']
tweet_ids.append(ids)
tweet_source.append(links)
tweet_text.append(text)
tweet_users.append(users)
except KeyError:
unknown = tweets[int(n)]['limit']
trash.append(unknown)
但是,可以通过更好的代码来改进此解决方案。 任何建议表示赞赏!感谢
答案 0 :(得分:1)
这适用于我(从json对象手动制作JSON字符串):
import json, random
j = """
{
"id": 746029083335680003,
"created_at": "Thu Jun 23 17:16:08 +0000 2016",
"text": "The text",
"user": {
"screen_name": "usr123",
"description": "description",
"statuses_count": 123,
"geo_enabled": null
},
"entities": {
"hashtags": [{
"text": "a hash tag",
"indices": [55, 71]
}],
"user_mentions": [],
"urls": [],
"symbols": []
}
}
"""
tweets = [json.loads(j)]
itemtest = 0
print (tweets[itemtest]['id'])
print (tweets[itemtest]['created_at'])
print (tweets[itemtest]['text'])
print (tweets[itemtest]['user']['screen_name'])
tweetText = [tw['text'] for tw in tweets]
print(tweetText)
输出:
746029083335680003
Thu Jun 23 17:16:08 +0000 2016
The text
usr123
['The text']
还有两个要素:
import json, random
j = ["""
{
"id": 746029083335680003,
"created_at": "Thu Jun 23 17:16:08 +0000 2016",
"text": "The text",
"user": {
"screen_name": "usr123",
"description": "description",
"statuses_count": 123,
"geo_enabled": null
},
"entities": {
"hashtags": [{
"text": "a hash tag",
"indices": [55, 71]
}],
"user_mentions": [],
"urls": [],
"symbols": []
}
}""","""
{
"id": 746029083335680003,
"created_at": "Thu Jun 23 17:16:08 +0000 2016",
"text": "The text",
"user": {
"screen_name": "usr123",
"description": "description",
"statuses_count": 123,
"geo_enabled": null
},
"entities": {
"hashtags": [{
"text": "a hash tag",
"indices": [55, 71]
}],
"user_mentions": [],
"urls": [],
"symbols": []
}
}
"""]
tweets = [json.loads(i) for i in j]
print(len(tweets), tweets)
itemtest = 0
print (tweets[itemtest]['id'])
print (tweets[itemtest]['created_at'])
print (tweets[itemtest]['text'])
print (tweets[itemtest]['user']['screen_name'])
tweetText = [tw['text'] for tw in tweets]
print(tweetText)