使用列表comprehesion提取列表中的json对象

时间:2016-06-24 19:24:14

标签: python json python-3.x twitter list-comprehension

我正在尝试使用list comprehesion从其他列表中的一个列表中保存特定项目。

我下载了几条推文,并将其保存在文本文件中。

我调用了txt文件并将所有项目保存在列表中。

到目前为止,这是我的代码:

import json, random
allTweets = []
i = range(1,50)
for n in i:
    tweetFile = [line.rstrip() for line in open('twitfull' + str(n) + '.txt')]
    allTweets.extend(tweetFile)
tweets = [json.loads(item) for item in allTweets]

列表中的数据示例:

json object
{'id': 746029083335680003, 'in_reply_to_user_id': None, 'in_reply_to_status_id': None, 'source': '<a href="..." rel="nofollow">TweetDeck</a>', 'favorited': False, 'contributors': None, 'favorite_count': 0, 'retweeted': False, 'is_quote_status': False, 'lang': 'es', 'created_at': 'Thu Jun 23 17:16:08 +0000 2016', 'in_reply_to_screen_name': None, 'coordinates': None, 'geo': None, 'id_str': '746029083335680003', 'filter_level': 'low', 'timestamp_ms': '1466702168674', 'in_reply_to_user_id_str': None, 'retweet_count': 0, 'place': None, 'truncated': False, 'in_reply_to_status_id_str': None, 'text': 'Se da lectura a los acuerdos entre Gobierno y FARC-EP. #FinDelConflicto #AdiósALaGuerra #PazenColombia #ElUltimoDiaDeLaGuerra #Cuba', 'user': {'friends_count': 356, 'id': 814202096, 'notifications': None, 'profile_sidebar_border_color': '5ED4DC', 'profile_image_url': 'http://pbs.twimg.com/profile_images/2594545116/r7de57w8q920u7p0hft6_normal.jpeg', 'favourites_count': 3, 'utc_offset': -25200, 'url': 'http://yamimontoya.blogspot.com', 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme4/bg.gif', 'verified': False, 'profile_sidebar_fill_color': '95E8EC', 'followers_count': 348, 'created_at': 'Mon Sep 10 01:05:29 +0000 2012', 'location': 'Cuba', 'profile_background_color': '0099B9', 'name': 'Yami Montoya', 'lang': 'es', 'time_zone': 'Pacific Time (US & Canada)', 'following': None, 'id_str': '814202096', 'is_translator': False, 'contributors_enabled': False, 'profile_background_tile': False, 'listed_count': 13, 'default_profile': False, 'follow_request_sent': None, 'default_profile_image': False, 'profile_link_color': '0099B9', 'screen_name': 'yami679', 'description': 'Periodista cubana, editora de Tiempo21.cu, orgullosa de vivir en mi Isla.', 'profile_use_background_image': True, 'profile_text_color': '3C3940', 'profile_image_url_https': '...', 'protected': False, 'profile_background_image_url_https': '...', 'statuses_count': 115828, 'geo_enabled': False}, 'entities': {'hashtags': [{'text': 'FinDelConflicto', 'indices': [55, 71]}, {'text': 'AdiósALaGuerra', 'indices': [72, 87]}, {'text': 'PazenColombia', 'indices': [88, 102]}, {'text': 'ElUltimoDiaDeLaGuerra', 'indices': [103, 125]}, {'text': 'Cuba', 'indices': [126, 131]}], 'user_mentions': [], 'urls': [], 'symbols': []}}

tweets列表的示例:

[{'id': 746029040851521536, 'in_reply_to_user_id': None, 'in_reply_to_status_id': None, 'source': '<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>', 'favorited': False, 'contributors': None, 'favorite_count': 0, 'possibly_sensitive': False, 'retweeted': False, 'is_quote_status': False, 'lang': 'es', 'created_at': 'Thu Jun 23 17:15:58 +0000 2016', 'in_reply_to_screen_name': None, 'coordinates': None, 'geo': None, 'id_str': '746029040851521536', 'filter_level': 'low', 'timestamp_ms': '1466702158545', 'in_reply_to_user_id_str': None, 'retweeted_status': {'id': 745637507493093377, 'in_reply_to_user_id': None, 'in_reply_to_status_id': None, 'source': '<a href="http://twitterfeed.com" rel="nofollow">twitterfeed</a>', 'favorited': False, 'contributors': None, 'favorite_count': 0, 'possibly_sensitive': False, 'retweeted': False, 'is_quote_status': False, 'lang': 'es', 'created_at': 'Wed Jun 22 15:20:09 +0000 2016', 'in_reply_to_screen_name': None, 'coordinates': None, 'geo': None, 'id_str': '745637507493093377', 'filter_level': 'low', 'in_reply_to_user_id_str': None, 'retweet_count': 1, 'place': None, 'truncated': False, 'in_reply_to_status_id_str': None, 'text': 'Denuncian por traición a la patria a Juan Manuel Santos: COLOMBIAN NEWS\n21de junio de 2016\n\xa0\n1.\xa0\xa0\xa0\xa0Denuncian ... ...', 'user': {'friends_count': 834, 'id': 1090274636, 'notifications': None, 'profile_sidebar_border_color': 'FFFFFF', 'profile_image_url': 'http://pbs.twimg.com/profile_images/3459756751/f7d00d504bdc55a4e30f214c46a73188_normal.jpeg', 'favourites_count': 14, 'utc_offset': -18000, 'url': 'http://www.periodicodebate.com', 'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/772390136/0a690154c8cb12a6b4b918b66222bbd9.jpeg', 'verified': False, 'profile_sidebar_fill_color': 'EFEFEF', 'followers_count': 6272, 'created_at': 'Mon Jan 14 21:55:47 +0000 2013', 'location': 'Colombia', 'profile_background_color': 'F5F8FA', 'name': 'Periódico Debate', 'lang': 'es', 'time_zone': 'Bogota', 'profile_banner_url': '...', 'following': None, 'id_str': '1090274636', 'is_translator': False, 'contributors_enabled': False, 'profile_background_tile': False, 'listed_count': 47, 'default_profile': False, 'follow_request_sent': None, 'default_profile_image': False, 'profile_link_color': '009999', 'screen_name': 'DebateCol', 'description': None, 'profile_use_background_image': True, 'profile_text_color': '333333', 'profile_image_url_https': '...', 'protected': False, 'profile_background_image_url_https': '...', 'statuses_count': 11887, 'geo_enabled': False}, 'entities': {'hashtags': [], 'user_mentions': [], 'urls': [{'display_url': 'bit.ly/28WKaS0', 'indices': [113, 136], 'expanded_url': '...', 'url': '...'}], 'symbols': []}}, 'retweet_count': 0, 'place': None, 'truncated': False, 'in_reply_to_status_id_str': None, 'text': 'RT @DebateCol: Denuncian por traición a la patria a Juan Manuel Santos: COLOMBIAN NEWS\n21de junio de 2016\n\xa0\n1.\xa0\xa0\xa0\xa0Denuncian ... ...', 'user': {'friends_count': 563, 'id': 274595199, 'notifications': None, 'profile_sidebar_border_color': 'C0DEED', 'profile_image_url': 'http://pbs.twimg.com/profile_images/378800000541161030/4eebcd7336d7aa698bdcb13601869f87_normal.jpeg', 'favourites_count': 152080, 'utc_offset': -14400, 'url': None, 'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png', 'verified': False, 'profile_sidebar_fill_color': 'DDEEF6', 'followers_count': 1515, 'created_at': 'Wed Mar 30 16:06:52 +0000 2011', 'location': 'usa', 'profile_background_color': 'C0DEED', 'name': 'Raul Escobar', 'lang': 'es', 'time_zone': 'Eastern Time (US & Canada)', 'following': None, 'id_str': '274595199', 'is_translator': False, 'contributors_enabled': False, 'profile_background_tile': False, 'listed_count': 17, 'default_profile': True, 'follow_request_sent': None, 'default_profile_image': False, 'profile_link_color': '0084B4', 'screen_name': 'RaulEscobar1154', 'description': None, 'profile_use_background_image': True, 'profile_text_color': '333333', 'profile_image_url_https': '...', 'protected': False, 'profile_background_image_url_https': '...', 'statuses_count': 166211, 'geo_enabled': False}, 'entities': {'hashtags': [], 'user_mentions': [{'id_str': '1090274636', 'id': 1090274636, 'screen_name': 'DebateCol', 'indices': [3, 13], 'name': 'Periódico Debate'}], 'urls': [{'display_url': 'bit.ly/28WKaS0', 'indices': [139, 140], 'expanded_url': '...', 'url': '..'}], 'symbols': []}}, {'id': 746029040658710528, 'in_reply_to_user_id': None, 'in_reply_to_status_id': None, 'source': '<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>', 'favorited': False, 'contributors': None, 'favorite_count': 0, 'retweeted': False, 'is_quote_status': False, 'lang': 'es', 'created_at': 'Thu Jun 23 17:15:58 +0000 2016', 'in_reply_to_screen_name': None, 'coordinates': None, 'geo': None, 'id_str': '746029040658710528', 'filter_level': 'low', 'timestamp_ms': '1466702158499', 'in_reply_to_user_id_str': None, 'retweet_count': 0, 'place': None, 'truncated': False, 'in_reply_to_status_id_str': None, 'text': '#AdiosALaGuerra y será que todos los integrantes de la guerrilla estan de acuerdo? estos señores tienen total control de este grupo? Ojala!', 'user': {'friends_count': 2, 'id': 704447284524744704, 'notifications': None, 'profile_sidebar_border_color': 'C0DEED', 'profile_image_url': 'http://pbs.twimg.com/profile_images/704448554119647232/OCMVOVr4_normal.jpg', 'favourites_count': 6, 'utc_offset': None, 'url': None, 'profile_background_image_url': '', 'verified': False, 'profile_sidebar_fill_color': 'DDEEF6', 'followers_count': 6, 'created_at': 'Mon Feb 29 23:24:55 +0000 2016', 'location': None, 'profile_background_color': 'F5F8FA', 'name': 'Desafinado', 'lang': 'pt', 'time_zone': None, 'following': None, 'id_str': '704447284524744704', 'is_translator': False, 'contributors_enabled': False, 'profile_background_tile': False, 'listed_count': 0, 'default_profile': True, 'follow_request_sent': None, 'default_profile_image': False, 'profile_link_color': '2B7BB9', 'screen_name': 'eg0max', 'description': None, 'profile_use_background_image': True, 'profile_text_color': '333333', 'profile_image_url_https': '...', 'protected': False, 'profile_background_image_url_https': '', 'statuses_count': 18, 'geo_enabled': False}, 'entities': {'hashtags': [{'text': 'AdiosALaGuerra', 'indices': [0, 15]}], 'user_mentions': [], 'urls': [], 'symbols': []}}]

当我从tweets调用项目以打印文本 ids 时,效果很好:

itemtest = random.randint(0, len(tweets) - 1)
print (tweets[itemtest]['id'])
print (tweets[itemtest]['created_at'])
print (tweets[itemtest]['text'])
print (tweets[itemtest]['user']['screen_name'])

但是,如果我想使用list comprehesion将文本保存在一个列表中,则会显示错误:

这是我使用的代码:

tweetText = [tw['text'] for tw in tweets]

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-2-5ea2c15220c6> in <module>()
----> 1 tweetText = [tw['text'] for tw in tweets]

<ipython-input-2-5ea2c15220c6> in <listcomp>(.0)
----> 1 tweetText = [tw['text'] for tw in tweets]

KeyError: 'text'

这是我找到的解决方案

2016年5月25日编辑

items = range(0, len(tweetsdata))
tweet_ids = []
tweet_source = []
tweet_text = []
tweet_users = []
trash = []
for n in items:
    try:
        ids = tweets[int(n)]['id']
        links = 'https://twitter.com/statuses/' + str(ids)
        text = tweets[int(n)]['text']
        users = tweets[int(n)]['user']['screen_name']
        tweet_ids.append(ids)
        tweet_source.append(links)
        tweet_text.append(text)
        tweet_users.append(users)
    except KeyError:
        unknown = tweets[int(n)]['limit']
        trash.append(unknown)

但是,可以通过更好的代码来改进此解决方案。 任何建议表示赞赏!感谢

1 个答案:

答案 0 :(得分:1)

这适用于我(从json对象手动制作JSON字符串):

import json, random

j = """
{
    "id": 746029083335680003,
    "created_at": "Thu Jun 23 17:16:08 +0000 2016",
    "text": "The text",
    "user": {
        "screen_name": "usr123",
        "description": "description",
        "statuses_count": 123,
        "geo_enabled": null
    },
    "entities": {
        "hashtags": [{
            "text": "a hash tag",
            "indices": [55, 71]
        }],
        "user_mentions": [],
        "urls": [],
        "symbols": []
    }
}
"""
tweets = [json.loads(j)]
itemtest = 0
print (tweets[itemtest]['id'])
print (tweets[itemtest]['created_at'])
print (tweets[itemtest]['text'])
print (tweets[itemtest]['user']['screen_name'])

tweetText = [tw['text'] for tw in tweets]

print(tweetText)

输出:

746029083335680003
Thu Jun 23 17:16:08 +0000 2016
The text
usr123
['The text']

还有两个要素:

import json, random

j = ["""
{
    "id": 746029083335680003,
    "created_at": "Thu Jun 23 17:16:08 +0000 2016",
    "text": "The text",
    "user": {
        "screen_name": "usr123",
        "description": "description",
        "statuses_count": 123,
        "geo_enabled": null
    },
    "entities": {
        "hashtags": [{
            "text": "a hash tag",
            "indices": [55, 71]
        }],
        "user_mentions": [],
        "urls": [],
        "symbols": []
    }
}""","""
{
    "id": 746029083335680003,
    "created_at": "Thu Jun 23 17:16:08 +0000 2016",
    "text": "The text",
    "user": {
        "screen_name": "usr123",
        "description": "description",
        "statuses_count": 123,
        "geo_enabled": null
    },
    "entities": {
        "hashtags": [{
            "text": "a hash tag",
            "indices": [55, 71]
        }],
        "user_mentions": [],
        "urls": [],
        "symbols": []
    }
}
"""]

tweets = [json.loads(i) for i in j]
print(len(tweets), tweets)
itemtest = 0
print (tweets[itemtest]['id'])
print (tweets[itemtest]['created_at'])
print (tweets[itemtest]['text'])
print (tweets[itemtest]['user']['screen_name'])

tweetText = [tw['text'] for tw in tweets]

print(tweetText)