我试图从每个JSON结果(在tweets.txt文件中)提取特定的字符串(URL地址),然后使用提取的URL地址创建HTTP GET请求,HTTP响应应另存为新的HTML目录中的文件。 我尝试提取的字符串是特定JSON值的值。
例如: “ display_url”: test.com/y8HTdfdfKMFz
我的代码:
# # # # TWITTER STREAM LISTENER # # # #
class StdOutListener(StreamListener):
"""
This is a basic listener that just prints received tweets to stdout.
"""
def __init__(self, fetched_tweets_filename):
self.fetched_tweets_filename = fetched_tweets_filename
def on_data(self, data):
try:
print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
tf.write(data)
return True
except BaseException as e:
print("Error on_data %s" % str(e))
return True
def on_error(self, status):
print(status)
if __name__ == '__main__':
# Authenticate using config.py and connect to Twitter Streaming API.
hash_tag_list = ["donal trump"]
fetched_tweets_filename = "tweets.txt"
twitter_streamer = TwitterStreamer()
twitter_streamer.stream_tweets(fetched_tweets_filename, hash_tag_list)
JSON结果:
{"created_at":"Wed Nov 14 11:12:59 +0000 2018","id":1062664687601496064,"id_str":"1062664687601496064","text":"This is test https:\/\/t.co\/V3tNm99tdn fdfd\n\n#osectraining","source":"\u003ca href=\"http:\/\/twitter.com\" rel=\"nofollow\"\u003eTwitter Web Client\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":961508561217052675,"id_str":"961508561217052675","name":"line Sec","screen_name":"oseining","location":"US","url":"https:\/\/www.ocurity.com","description":"field","translator_type":"none","protected":false,"verified":false,"followers_count":2,"friends_count":51,"listed_count":0,"favourites_count":0,"statuses_count":3,"created_at":"Thu Feb 08 07:54:39 +0000 2018","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"000000","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"1B95E0","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"000000","profile_text_color":"000000","profile_use_background_image":false,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/961510231346958336\/d_KhBeTD_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/961510231346958336\/d_KhBeTD_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/961508561217052675\/1518076913","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"osectraining","indices":[44,57]}],"urls":[{"url":"https:\/\/t.co\/V3tNm99tdn","expanded_url":"https:\/\/pastebin.com\/y8HTKMFz","display_url":"pastebin.com\/y8HTKMFz","indices":[13,36]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"tr","timestamp_ms":"1542193979536"}enter code here
答案 0 :(得分:0)
我不确定我是否100%理解您,但是,如果将tweet保存到文件中,则作为json转储,您可以轻松地将json结果转换为python dict,如下所示:
import json
with open('tweets.txt') as handle:
dictdump = json.loads(handle.read())
然后您只有一个python dict值:
my_url = dictdump['display_url']
,然后使用requests
模块将获取/发送的内容发送到url
import requests
r = requests.get(my_url)
答案 1 :(得分:-1)
这是基于@Avishay Cohen的答案的,因为您以附加模式打开文件,所以tweets.txt
中应该有多个json字符串
所以您可以做的是这样的:
import json
import requests
with open('tweets.txt') as input_file:
for line in input_file:
tweet_json = json.loads(line)
response = requests.get(tweet_json.get('display_url')) if 'display_url' in tweet_json else {}
if response and response.status_code()==200:
print(response.text)