通常当我使用twitter流API时,我可以直接从以下位置直接访问主题标签:
推文 - >实体 - > hashtags
在搜索推文时,如果用tweepy搜索关键字/主题标签,则会下载<class 'tweepy.models.Status'>
。
c = tweepy.Cursor(api.search,q=hashtag,include_entities=True,rpp=100).items(limit)
while (True) :
try:
__tweet = next(c)
_tweet = jsonpickle.encode(__tweet)
tweet = json.loads(_tweet)
....
当我搜索实体/主题标签时,我找到了作者下的第一个(我寻找)。
推文 - &gt;作者 - &gt;实体 - &gt;主题标签
这很奇怪。
&#34; hashtags&#34;位于
下推文 - &gt;实体 - &gt; hashtags
看起来像这样:
(u'entities', {
u'symbols': {u'py/id': 17},
u'user_mentions': {u'py/id': 20},
u'hashtags': {u'py/id': 13},
u'urls': {u'py/id': 18},
}),
当我尝试从
中提取主题标签时推文 - &gt;作者 - &gt;实体 - &gt; hashtags - &gt;文字
循环中的:
_hashtags = []
__hashtags = []
try:
_hashtags = tweet['author']['entities']['hashtags']
for element in _hashtags:
__hashtags.append(element['text'])
hashtags = ' '.join(e for e in __hashtags)
except KeyError, e:
hashtags = None
logger.warning (e.__doc__)
logger.warning (e.message)
exc_type, exc_obj, exc_tb = sys.exc_info()
logger.warning (exc_type)
logger.warning (fname)
logger.warning (exc_tb.tb_lineno)
结果:hashtags是一个空字符串..
使用时:
推文 - &gt;实体 - &gt; hashtags - &gt;文本
_hashtags = []
__hashtags = []
try:
_hashtags = tweet['entities']['hashtags']
for element in _hashtags:
__hashtags.append(element['text'])
hashtags = ' '.join(e for e in __hashtags)
except KeyError, e:
hashtags = None
logger.warning (e.__doc__)
logger.warning (e.message)
exc_type, exc_obj, exc_tb = sys.exc_info()
logger.warning (exc_type)
logger.warning (fname)
logger.warning (exc_tb.tb_lineno)
生成此错误:
__hashtags.append(element['text'])
TypeError: string indices must be integers
我记得我最后一次使用最后一次工作并且它正在工作......我不知道为什么它不能给出好的结果!
Twitter说,实体可以在推文响应中直接访问:https://dev.twitter.com/docs/platform-objects/tweets这是pprint(tweet)
的输出:
[
(u'contributors', None),
(u'truncated', False),
(u'retweeted', False),
(u'in_reply_to_status_id', None),
(u'id', 487988233016340482L),
(u'favorite_count', 0),
(u'py/object', u'tweepy.models.Status'),
(u'_api', {
u'py/object': u'tweepy.api.API',
u'wait_on_rate_limit': False,
u'cache': None,
u'secure': True,
u'retry_errors': None,
u'search_host': u'search.twitter.com',
u'parser': {u'py/object': u'tweepy.parsers.ModelParser',
u'json_lib': {u'py/repr': u'json/json'},
u'model_factory': {u'py/type': u'tweepy.models.ModelFactory'
}},
u'auth': {
u'py/object': u'tweepy.auth.OAuthHandler',
u'username': None,
u'_consumer': {u'py/object': u'tweepy.oauth.OAuthConsumer',
u'secret': u'xxxxxx'
, u'key': u'xxxxxx'},
u'secure': True,
u'_sigmethod': {u'py/object': u'tweepy.oauth.OAuthSignatureMethod_HMAC_SHA1'
},
u'access_token': {u'py/object': u'tweepy.oauth.OAuthToken',
u'secret':xxxxx'
,
u'key': u'xxxxxx'
},
u'callback': None,
u'request_token': None,
},
u'cached_result': False,
u'search_root': u'',
u'retry_count': 0,
u'host': u'api.twitter.com',
u'timeout': 60,
u'api_root': u'/1.1',
u'retry_delay': 0,
u'wait_on_rate_limit_notify': False,
u'last_response': {
u'py/object': u'httplib.HTTPResponse',
u'fp': None,
u'will_close': False,
u'chunk_left': u'UNKNOWN',
u'length': 0,
u'strict': 0,
u'reason': u'OK',
u'version': 11,
u'status': 200,
u'debuglevel': 0,
u'msg': {
u'py/object': u'httplib.HTTPMessage',
u'fp': None,
u'startofbody': None,
u'startofheaders': None,
u'headers': [
u'cache-control: no-cache, no-store, must-revalidate, pre-check=0, post-check=0\r\n'
,
u'content-length: 64932\r\n',
u'content-type: application/json;charset=utf-8\r\n'
,
u'date: Sat, 12 Jul 2014 15:59:00 GMT\r\n',
u'expires: Tue, 31 Mar 1981 05:00:00 GMT\r\n',
u'last-modified: Sat, 12 Jul 2014 15:59:00 GMT\r\n'
,
u'pragma: no-cache\r\n',
u'server: tfe\r\n',
u'set-cookie: lang=en\r\n',
u'set-cookie: guest_id=v1%3A140518074073079236; Domain=.twitter.com; Path=/; Expires=Mon, 11-Jul-2016 15:59:00 UTC\r\n'
,
u'status: 200 OK\r\n',
u'strict-transport-security: max-age=631138519\r\n'
,
u'x-access-level: read-write-directmessages\r\n',
u'x-content-type-options: nosniff\r\n',
u'x-frame-options: SAMEORIGIN\r\n',
u'x-rate-limit-limit: 180\r\n',
u'x-rate-limit-remaining: 177\r\n',
u'x-rate-limit-reset: 1405181566\r\n',
u'x-transaction: 9bf3522d6235b71a\r\n',
u'x-xss-protection: 1; mode=block\r\n',
],
u'plisttext': u';charset=utf-8',
u'maintype': u'application',
u'subtype': u'json',
u'status': u'',
u'typeheader': u'application/json;charset=utf-8',
u'encodingheader': None,
u'seekable': 0,
u'dict': {
u'status': u'200 OK',
u'x-rate-limit-remaining': u'177',
u'content-length': u'64932',
u'expires': u'Tue, 31 Mar 1981 05:00:00 GMT',
u'x-transaction': u'9bf3522d6235b71a',
u'x-content-type-options': u'nosniff',
u'set-cookie': u'lang=en, guest_id=v1%3A140518074073079236; Domain=.twitter.com; Path=/; Expires=Mon, 11-Jul-2016 15:59:00 UTC'
,
u'strict-transport-security': u'max-age=631138519',
u'x-access-level': u'read-write-directmessages',
u'server': u'tfe',
u'last-modified': u'Sat, 12 Jul 2014 15:59:00 GMT',
u'x-xss-protection': u'1; mode=block',
u'x-rate-limit-reset': u'1405181566',
u'pragma': u'no-cache',
u'cache-control': u'no-cache, no-store, must-revalidate, pre-check=0, post-check=0'
,
u'date': u'Sat, 12 Jul 2014 15:59:00 GMT',
u'x-rate-limit-limit': u'180',
u'x-frame-options': u'SAMEORIGIN',
u'content-type': u'application/json;charset=utf-8',
},
u'unixfrom': u'',
u'type': u'application/json',
u'plist': [u'charset=utf-8'],
},
u'chunked': 0,
u'_method': u'GET',
},
u'compression': False,
}),
(u'author', {
u'follow_request_sent': False,
u'profile_use_background_image': True,
u'profile_sidebar_fill_color': u'171106',
u'id': 14076230,
u'py/object': u'tweepy.models.User',
u'_api': {u'py/id': 1},
u'verified': False,
u'profile_text_color': u'8A7302',
u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/378800000592595006/b0dce59ad7eb453c70b32cb1cf79657e_normal.jpeg'
,
u'_json': {
u'follow_request_sent': False,
u'profile_use_background_image': True,
u'id': 14076230,
u'verified': False,
u'profile_text_color': u'8A7302',
u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/378800000592595006/b0dce59ad7eb453c70b32cb1cf79657e_normal.jpeg'
,
u'profile_sidebar_fill_color': u'171106',
u'is_translator': False,
u'geo_enabled': True,
u'entities': {u'url': {u'urls': {u'py/id': 32}},
u'description': {u'urls': {u'py/id': 31}}},
u'followers_count': 974,
u'profile_sidebar_border_color': u'FFFFFF',
u'id_str': u'14076230',
u'default_profile_image': False,
u'location': u'Adelaide, Australia',
u'is_translation_enabled': False,
u'utc_offset': 34200,
u'statuses_count': 6856,
u'description': u'eBusiness Advisor, online communications advocate and student. Creating, sharing and curating media. also e-learning, websites and business use of online tools'
,
u'friends_count': 786,
u'profile_link_color': u'473623',
u'profile_image_url': u'http://pbs.twimg.com/profile_images/378800000592595006/b0dce59ad7eb453c70b32cb1cf79657e_normal.jpeg'
,
u'notifications': False,
u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/378800000167684076/EcbKsmde.jpeg'
,
u'profile_background_color': u'0F0A02',
u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/14076230/1381709041'
,
u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/378800000167684076/EcbKsmde.jpeg'
,
u'name': u'Rhys Moult',
u'lang': u'en',
u'profile_background_tile': True,
u'favourites_count': 115,
u'screen_name': u'rhysatwork',
u'url': u'http://t.co/hUmuflFD3V',
u'created_at': u'Tue Mar 04 04:03:39 +0000 2008',
u'contributors_enabled': False,
u'time_zone': u'Adelaide',
u'protected': False,
u'default_profile': False,
u'following': False,
u'listed_count': 71,
},
u'is_translator': False,
u'geo_enabled': True,
u'entities': {u'url': {u'urls': {u'py/id': 32}},
u'description': {u'urls': {u'py/id': 31}}},
u'followers_count': 974,
u'profile_sidebar_border_color': u'FFFFFF',
u'location': u'Adelaide, Australia',
u'default_profile_image': False,
u'id_str': u'14076230',
u'is_translation_enabled': False,
u'utc_offset': 34200,
u'statuses_count': 6856,
u'description': u'eBusiness Advisor, online communications advocate and student. Creating, sharing and curating media. also e-learning, websites and business use of online tools'
,
u'friends_count': 786,
u'profile_link_color': u'473623',
u'profile_image_url': u'http://pbs.twimg.com/profile_images/378800000592595006/b0dce59ad7eb453c70b32cb1cf79657e_normal.jpeg'
,
u'notifications': False,
u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/378800000167684076/EcbKsmde.jpeg'
,
u'profile_background_color': u'0F0A02',
u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/14076230/1381709041'
,
u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/378800000167684076/EcbKsmde.jpeg'
,
u'name': u'Rhys Moult',
u'lang': u'en',
u'profile_background_tile': True,
u'favourites_count': 115,
u'screen_name': u'rhysatwork',
u'url': u'http://t.co/hUmuflFD3V',
u'created_at': {u'py/object': u'datetime.datetime',
u'__reduce__': [{u'py/type': u'datetime.datetime'
}, [u'B9gDBAQDJwAAAA==']]},
u'contributors_enabled': False,
u'time_zone': u'Adelaide',
u'protected': False,
u'default_profile': False,
u'following': False,
u'listed_count': 71,
}),
(u'_json', {
u'contributors': None,
u'truncated': False,
u'text': u'Our #govhack app FB page for @unleashedADL https://t.co/3VyvgUurCu #opendata @WhatGrowsHere #natureninjas'
,
u'in_reply_to_status_id': None,
u'in_reply_to_user_id': None,
u'id': 487988233016340482L,
u'favorite_count': 0,
u'source': u'<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>'
,
u'retweeted': False,
u'coordinates': {u'type': u'Point',
u'coordinates': [138.48741864, -34.84890577]},
u'entities': {
u'symbols': [],
u'user_mentions': [{
u'indices': [29, 42],
u'id_str': u'1287906530',
u'screen_name': u'unleashedADL',
u'name': u'Unleashed Adelaide',
u'id': 1287906530,
}, {
u'indices': [77, 91],
u'id_str': u'2620349570',
u'screen_name': u'WhatGrowsHere',
u'name': u'What grows here',
u'id': 2620349570L,
}],
u'hashtags': [{u'indices': [4, 12], u'text': u'govhack'},
{u'indices': [67, 76], u'text': u'opendata'},
{u'indices': [92, 105],
u'text': u'natureninjas'}],
u'urls': [{
u'indices': [43, 66],
u'url': u'https://t.co/3VyvgUurCu',
u'expanded_url': u'https://m.facebook.com/WhatGrowsHere'
,
u'display_url': u'm.facebook.com/WhatGrowsHere',
}],
},
u'in_reply_to_screen_name': None,
u'id_str': u'487988233016340482',
u'retweet_count': 0,
u'metadata': {u'iso_language_code': u'en',
u'result_type': u'recent'},
u'favorited': False,
u'user': {
u'follow_request_sent': False,
u'profile_use_background_image': True,
u'id': 14076230,
u'verified': False,
u'profile_text_color': u'8A7302',
u'profile_image_url_https': u'https://pbs.twimg.com/profile_images/378800000592595006/b0dce59ad7eb453c70b32cb1cf79657e_normal.jpeg'
,
u'profile_sidebar_fill_color': u'171106',
u'is_translator': False,
u'geo_enabled': True,
u'entities': {u'url': {u'urls': [{
u'indices': [0, 22],
u'url': u'http://t.co/hUmuflFD3V',
u'expanded_url': u'http://rhysatwork.com',
u'display_url': u'rhysatwork.com',
}]}, u'description': {u'urls': []}},
u'followers_count': 974,
u'profile_sidebar_border_color': u'FFFFFF',
u'id_str': u'14076230',
u'default_profile_image': False,
u'location': u'Adelaide, Australia',
u'is_translation_enabled': False,
u'utc_offset': 34200,
u'statuses_count': 6856,
u'description': u'eBusiness Advisor, online communications advocate and student. Creating, sharing and curating media. also e-learning, websites and business use of online tools'
,
u'friends_count': 786,
u'profile_link_color': u'473623',
u'profile_image_url': u'http://pbs.twimg.com/profile_images/378800000592595006/b0dce59ad7eb453c70b32cb1cf79657e_normal.jpeg'
,
u'notifications': False,
u'profile_background_image_url_https': u'https://pbs.twimg.com/profile_background_images/378800000167684076/EcbKsmde.jpeg'
,
u'profile_background_color': u'0F0A02',
u'profile_banner_url': u'https://pbs.twimg.com/profile_banners/14076230/1381709041'
,
u'profile_background_image_url': u'http://pbs.twimg.com/profile_background_images/378800000167684076/EcbKsmde.jpeg'
,
u'name': u'Rhys Moult',
u'lang': u'en',
u'profile_background_tile': True,
u'favourites_count': 115,
u'screen_name': u'rhysatwork',
u'url': u'http://t.co/hUmuflFD3V',
u'created_at': u'Tue Mar 04 04:03:39 +0000 2008',
u'contributors_enabled': False,
u'time_zone': u'Adelaide',
u'protected': False,
u'default_profile': False,
u'following': False,
u'listed_count': 71,
},
u'geo': {u'type': u'Point', u'coordinates': [-34.84890577,
138.48741864]},
u'in_reply_to_user_id_str': None,
u'possibly_sensitive': False,
u'lang': u'en',
u'created_at': u'Sat Jul 12 15:53:55 +0000 2014',
u'in_reply_to_status_id_str': None,
u'place': {
u'full_name': u'Adelaide',
u'url': u'https://api.twitter.com/1.1/geo/id/01e8a1a140ccdc5c.json'
,
u'country': u'Australia',
u'place_type': u'city',
u'bounding_box': {u'type': u'Polygon',
u'coordinates': [[[138.44212992,
-35.348970061], [138.780189824,
-35.348970061], [138.780189824,
-34.652564053], [138.44212992,
-34.652564053]]]},
u'contained_within': [],
u'country_code': u'AU',
u'attributes': {},
u'id': u'01e8a1a140ccdc5c',
u'name': u'Adelaide',
},
}),
(u'coordinates', {u'type': u'Point',
u'coordinates': {u'py/id': 12}}),
(u'in_reply_to_user_id_str', None),
(u'entities', {
u'symbols': {u'py/id': 17},
u'user_mentions': {u'py/id': 20},
u'hashtags': {u'py/id': 13},
u'urls': {u'py/id': 18},
}),
(u'in_reply_to_screen_name', None),
(u'in_reply_to_user_id', None),
(u'text',
u'Our #govhack app FB page for @unleashedADL https://t.co/3VyvgUurCu #opendata @WhatGrowsHere #natureninjas'
),
(u'retweet_count', 0),
(u'metadata', {u'iso_language_code': u'en',
u'result_type': u'recent'}),
(u'favorited', False),
(u'source_url', u'http://twitter.com/download/iphone'),
(u'user', {u'py/id': 34}),
(u'geo', {u'type': u'Point', u'coordinates': {u'py/id': 23}}),
(u'id_str', u'487988233016340482'),
(u'possibly_sensitive', False),
(u'lang', u'en'),
(u'created_at', {u'py/object': u'datetime.datetime',
u'__reduce__': [{u'py/type': u'datetime.datetime'},
[u'B94HDA81NwAAAA==']]}),
(u'in_reply_to_status_id_str', None),
(u'place', {
u'py/object': u'tweepy.models.Place',
u'_api': {u'py/id': 1},
u'country_code': u'AU',
u'url': u'https://api.twitter.com/1.1/geo/id/01e8a1a140ccdc5c.json'
,
u'country': u'Australia',
u'place_type': u'city',
u'bounding_box': {
u'py/object': u'tweepy.models.BoundingBox',
u'_api': {u'py/id': 1},
u'type': u'Polygon',
u'coordinates': {u'py/id': 24},
},
u'contained_within': {
u'py/object': u'tweepy.models.ResultSet',
u'_since_id': None,
u'_max_id': None,
u'py/seq': [],
},
u'full_name': u'Adelaide',
u'attributes': {},
u'id': u'01e8a1a140ccdc5c',
u'name': u'Adelaide',
}),
(u'source', u'Twitter for iPhone'),
]
答案 0 :(得分:0)
使用&#34; tweet - &gt;作者 - &gt;实体 - &gt; hashtags - &gt;文字&#34;,代码:
try:
_hashtags = tweet['author']['entities']['hashtags']
for element in _hashtags:
__hashtags.append(element['text'])
hashtags = ' '.join(e for e in __hashtags)
你的__hashtags在哪里宣布?清空?为什么&#34; _&#34; ? &#34; __&#34; ?这不可读也不可调试,我更愿意:
try:
hashtags_texts = []
hashtags = tweet['author']['entities']['hashtags']
for hashtag in hashtags:
hashtags_texts.append(hashtag['text'])
hashtags = ' '.join(hashtags_text)
或
try:
hashtags = ' '.join(hashtag['text'] for hashtag in
tweet['author']['entities']['hashtags'])
然后你用你的标签做什么?您确定的推文[&#39;作者&#39;] [&#39;实体&#39;] [&#39; hashtags&#39;]实际上是否包含数据?你确定hashtags是一个空字符串吗?
答案 1 :(得分:0)
在我的代码中使用jsonpickle时:
__tweet = next(c)
_tweet = jsonpickle.encode(__tweet)
tweet = json.loads(_tweet)
我发现结构发生了变化。
因此可以在hashtags
_json->entities->..etc