我正在使用Tweepy Stream API收集推文,看起来工作非常好。每当我尝试打开json文件时,它会在第一条推文之后给出非空白问题。 我正在使用带有Canopy和tweepy包的python 2.6。我也安装了pycharm和python 2.7来解决这个问题,但它没有用。我也尝试用R来解压json文件,但它没有用。
有人对此有任何线索吗? 谢谢,
# -*- coding: utf-8 -*-
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import json
from auth import TwitterAuth
class StdOutListener(StreamListener):
#This function gets called every time a new tweet is received on the stream
def on_data(self, data):
#Convert the data to a json object (shouldn't do this in production; might slow down and miss tweets)
j=json.loads(data)
text=j["text"] #The text of the tweet
print(text) #Print it out
#Just write data to one line in the file
fhOut.write(data)
def on_error(self, data):
print("ERROR")
print(data)
if __name__ == '__main__':
try:
#Create a file to store output. "a" means append (add on to previous file)
fhOut = open("JsonTest.json","a")
#Create the StdOutListener
l = StdOutListener()
auth = OAuthHandler(TwitterAuth.consumer_key, TwitterAuth.consumer_secret)
auth.set_access_token(TwitterAuth.access_token, TwitterAuth.access_token_secret)
#Connect to the Twitter stream
stream = Stream(auth, l)
#Terms to track
stream.filter(track=["#NationalSiblingsDay"])
except KeyboardInterrupt:
#User pressed ctrl+c -- get ready to exit the program
pass
#Close the
fhOut.close()
我的json文件中有3条推文,它是问题的例子。如果您使用“http://pro.jsonlint.com/”,您可以看到第233行存在非空白问题。
{
"created_at": "Fri Apr 03 16:15:15 +0000 2015",
"id": 584026386416959488,
"id_str": "584026386416959488",
"text": "RT @Gezi_Revir: Bu ak\u015fam olas\u0131 toplumsal olaylarda acil sa\u011fl\u0131k e\u011fitimi duyurusu yap\u0131lacakt\u0131r\nKat\u0131l\u0131m ve duyuru yapmak \u00f6nemli!\nL\u00fctfen destek\u2026",
"source": "\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 412536931,
"id_str": "412536931",
"name": "D. U. O",
"screen_name": "utkuodabasi",
"location": "\u0130stanbul\/Kad\u0131k\u00f6y",
"url": null,
"description": "ALF\/ELF",
"protected": false,
"verified": false,
"followers_count": 183,
"friends_count": 276,
"listed_count": 2,
"favourites_count": 1125,
"statuses_count": 2639,
"created_at": "Mon Nov 14 20:28:49 +0000 2011",
"utc_offset": 10800,
"time_zone": "Baghdad",
"geo_enabled": true,
"lang": "tr",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "C3C1EB",
"profile_background_image_url": "http:\/\/pbs.twimg.com\/profile_background_images\/432571975454572544\/l1MU-K7P.jpeg",
"profile_background_image_url_https": "https:\/\/pbs.twimg.com\/profile_background_images\/432571975454572544\/l1MU-K7P.jpeg",
"profile_background_tile": true,
"profile_link_color": "000000",
"profile_sidebar_border_color": "FFFFFF",
"profile_sidebar_fill_color": "DDEEF6",
"profile_text_color": "080808",
"profile_use_background_image": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/559996372615954432\/dcFLUV8m_normal.jpeg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/559996372615954432\/dcFLUV8m_normal.jpeg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/412536931\/1423493305",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"retweeted_status": {
"created_at": "Fri Apr 03 13:08:16 +0000 2015",
"id": 583979327701835776,
"id_str": "583979327701835776",
"text": "Bu ak\u015fam olas\u0131 toplumsal olaylarda acil sa\u011fl\u0131k e\u011fitimi duyurusu yap\u0131lacakt\u0131r\nKat\u0131l\u0131m ve duyuru yapmak \u00f6nemli!\nL\u00fctfen destek RT\n#1May\u0131s #Gezi",
"source": "\u003ca href=\"https:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 100980247,
"id_str": "100980247",
"name": "GEZ\u0130 REV\u0130R",
"screen_name": "Gezi_Revir",
"location": "Gezi Park\u0131",
"url": "http:\/\/www.gezirevir.com",
"description": "Gezi Revir S\u00f6zc\u00fcs\u00fc\n-\n\u0130leti\u015fim:http:\/\/www.gezirevir.com\/p\/iletisim.html",
"protected": false,
"verified": false,
"followers_count": 7421,
"friends_count": 355,
"listed_count": 36,
"favourites_count": 7220,
"statuses_count": 7802,
"created_at": "Fri Jan 01 13:56:42 +0000 2010",
"utc_offset": 10800,
"time_zone": "Istanbul",
"geo_enabled": true,
"lang": "tr",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "C0DEED",
"profile_background_image_url": "http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png",
"profile_background_image_url_https": "https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png",
"profile_background_tile": false,
"profile_link_color": "0084B4",
"profile_sidebar_border_color": "C0DEED",
"profile_sidebar_fill_color": "DDEEF6",
"profile_text_color": "333333",
"profile_use_background_image": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/378800000717849688\/a7d66518ea87a59e031bd51834341926_normal.jpeg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/378800000717849688\/a7d66518ea87a59e031bd51834341926_normal.jpeg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/100980247\/1374700982",
"default_profile": true,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"retweet_count": 6,
"favorite_count": 3,
"entities": {
"hashtags": [
{
"text": "1May\u0131s",
"indices": [
127,
134
]
},
{
"text": "Gezi",
"indices": [
135,
140
]
}
],
"trends": [
],
"urls": [
],
"user_mentions": [
],
"symbols": [
]
},
"favorited": false,
"retweeted": false,
"possibly_sensitive": false,
"filter_level": "low",
"lang": "tr"
},
"retweet_count": 0,
"favorite_count": 0,
"entities": {
"hashtags": [
{
"text": "1May\u0131s",
"indices": [
139,
140
]
},
{
"text": "Gezi",
"indices": [
139,
140
]
}
],
"trends": [
],
"urls": [
],
"user_mentions": [
{
"screen_name": "Gezi_Revir",
"name": "GEZ\u0130 REV\u0130R",
"id": 100980247,
"id_str": "100980247",
"indices": [
3,
14
]
}
],
"symbols": [
]
},
"favorited": false,
"retweeted": false,
"possibly_sensitive": false,
"filter_level": "low",
"lang": "tr",
"timestamp_ms": "1428077715864"
}{
"created_at": "Fri Apr 03 16:17:08 +0000 2015",
"id": 584026858406207489,
"id_str": "584026858406207489",
"text": "RT @birlesik: Ya\u015fas\u0131n #1May\u0131s! B\u0131j\u00ee yek g\u00fblan!\n\nEmperyalizm d\u00fcnya kaynaklar\u0131n\u0131 ya\u011fmal\u0131yor, halklar\u0131 k\u00f6leli\u011fe mahk\u00fbm etmek i\u00e7in... http:\/\/t.\u2026",
"source": "\u003ca href=\"http:\/\/twitter.com\" rel=\"nofollow\"\u003eTwitter Web Client\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 2982089170,
"id_str": "2982089170",
"name": "astare",
"screen_name": "astare23",
"location": "",
"url": null,
"description": null,
"protected": false,
"verified": false,
"followers_count": 406,
"friends_count": 717,
"listed_count": 2,
"favourites_count": 15,
"statuses_count": 8788,
"created_at": "Sat Jan 17 02:34:02 +0000 2015",
"utc_offset": 3600,
"time_zone": "Dublin",
"geo_enabled": false,
"lang": "en",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "4A913C",
"profile_background_image_url": "http:\/\/pbs.twimg.com\/profile_background_images\/556485972225843202\/FlunZySF.jpeg",
"profile_background_image_url_https": "https:\/\/pbs.twimg.com\/profile_background_images\/556485972225843202\/FlunZySF.jpeg",
"profile_background_tile": true,
"profile_link_color": "CC3366",
"profile_sidebar_border_color": "C0DEED",
"profile_sidebar_fill_color": "DDEEF6",
"profile_text_color": "333333",
"profile_use_background_image": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/581147326162575360\/Uc48cvr0_normal.jpg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/581147326162575360\/Uc48cvr0_normal.jpg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/2982089170\/1427391449",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"retweeted_status": {
"created_at": "Fri Apr 03 16:11:14 +0000 2015",
"id": 584025375199596544,
"id_str": "584025375199596544",
"text": "Ya\u015fas\u0131n #1May\u0131s! B\u0131j\u00ee yek g\u00fblan!\n\nEmperyalizm d\u00fcnya kaynaklar\u0131n\u0131 ya\u011fmal\u0131yor, halklar\u0131 k\u00f6leli\u011fe mahk\u00fbm etmek i\u00e7in... http:\/\/t.co\/lMzEJOEqA9",
"source": "\u003ca href=\"http:\/\/www.facebook.com\/twitter\" rel=\"nofollow\"\u003eFacebook\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 321693960,
"id_str": "321693960",
"name": "FKBC",
"screen_name": "birlesik",
"location": "Enternasyonal",
"url": "http:\/\/fasizmekarsibirlesikcephe.blogspot.co.uk",
"description": "Fa\u015fizme Kar\u015f\u0131 Birle\u015fik Cephe",
"protected": false,
"verified": false,
"followers_count": 23004,
"friends_count": 2030,
"listed_count": 92,
"favourites_count": 1608,
"statuses_count": 72498,
"created_at": "Wed Jun 22 00:13:10 +0000 2011",
"utc_offset": -18000,
"time_zone": "Quito",
"geo_enabled": false,
"lang": "tr",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "1A1B1F",
"profile_background_image_url": "http:\/\/pbs.twimg.com\/profile_background_images\/431941624231239680\/1U-zI3Hq.png",
"profile_background_image_url_https": "https:\/\/pbs.twimg.com\/profile_background_images\/431941624231239680\/1U-zI3Hq.png",
"profile_background_tile": true,
"profile_link_color": "2FC2EF",
"profile_sidebar_border_color": "000000",
"profile_sidebar_fill_color": "DDEEF6",
"profile_text_color": "333333",
"profile_use_background_image": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/553123309202579457\/1rK5Laap_normal.png",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/553123309202579457\/1rK5Laap_normal.png",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/321693960\/1401743234",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"retweet_count": 3,
"favorite_count": 0,
"entities": {
"hashtags": [
{
"text": "1May\u0131s",
"indices": [
8,
15
]
}
],
"trends": [
],
"urls": [
{
"url": "http:\/\/t.co\/lMzEJOEqA9",
"expanded_url": "http:\/\/fb.me\/6wR4SzUfh",
"display_url": "fb.me\/6wR4SzUfh",
"indices": [
116,
138
]
}
],
"user_mentions": [
],
"symbols": [
]
},
"favorited": false,
"retweeted": false,
"possibly_sensitive": false,
"filter_level": "low",
"lang": "tr"
},
"retweet_count": 0,
"favorite_count": 0,
"entities": {
"hashtags": [
{
"text": "1May\u0131s",
"indices": [
22,
29
]
}
],
"trends": [
],
"urls": [
{
"url": "http:\/\/t.co\/lMzEJOEqA9",
"expanded_url": "http:\/\/fb.me\/6wR4SzUfh",
"display_url": "fb.me\/6wR4SzUfh",
"indices": [
139,
140
]
}
],
"user_mentions": [
{
"screen_name": "birlesik",
"name": "FKBC",
"id": 321693960,
"id_str": "321693960",
"indices": [
3,
12
]
}
],
"symbols": [
]
},
"favorited": false,
"retweeted": false,
"possibly_sensitive": false,
"filter_level": "low",
"lang": "tr",
"timestamp_ms": "1428077828395"
}{
"created_at": "Fri Apr 03 16:56:39 +0000 2015",
"id": 584036803377790976,
"id_str": "584036803377790976",
"text": "RT @Gezi_Revir: Bu ak\u015fam olas\u0131 toplumsal olaylarda acil sa\u011fl\u0131k e\u011fitimi duyurusu yap\u0131lacakt\u0131r\nKat\u0131l\u0131m ve duyuru yapmak \u00f6nemli!\nL\u00fctfen destek\u2026",
"source": "\u003ca href=\"http:\/\/twitter.com\/#!\/download\/ipad\" rel=\"nofollow\"\u003eTwitter for iPad\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 1566649928,
"id_str": "1566649928",
"name": "Cihangir Forum",
"screen_name": "beyogluforum",
"location": "",
"url": null,
"description": "info@beyogluforum.com https:\/\/www.facebook.com\/cihangirparki",
"protected": false,
"verified": false,
"followers_count": 4866,
"friends_count": 201,
"listed_count": 37,
"favourites_count": 71,
"statuses_count": 2016,
"created_at": "Wed Jul 03 20:40:52 +0000 2013",
"utc_offset": 10800,
"time_zone": "Istanbul",
"geo_enabled": false,
"lang": "en",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "C0DEED",
"profile_background_image_url": "http:\/\/pbs.twimg.com\/profile_background_images\/378800000015019189\/8416aa2147458e211b9849a732718dc1.jpeg",
"profile_background_image_url_https": "https:\/\/pbs.twimg.com\/profile_background_images\/378800000015019189\/8416aa2147458e211b9849a732718dc1.jpeg",
"profile_background_tile": false,
"profile_link_color": "0084B4",
"profile_sidebar_border_color": "FFFFFF",
"profile_sidebar_fill_color": "DDEEF6",
"profile_text_color": "333333",
"profile_use_background_image": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/440773343919476736\/4xdPpxlX_normal.jpeg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/440773343919476736\/4xdPpxlX_normal.jpeg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/1566649928\/1401273304",
"default_profile": false,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"retweeted_status": {
"created_at": "Fri Apr 03 13:08:16 +0000 2015",
"id": 583979327701835776,
"id_str": "583979327701835776",
"text": "Bu ak\u015fam olas\u0131 toplumsal olaylarda acil sa\u011fl\u0131k e\u011fitimi duyurusu yap\u0131lacakt\u0131r\nKat\u0131l\u0131m ve duyuru yapmak \u00f6nemli!\nL\u00fctfen destek RT\n#1May\u0131s #Gezi",
"source": "\u003ca href=\"https:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e",
"truncated": false,
"in_reply_to_status_id": null,
"in_reply_to_status_id_str": null,
"in_reply_to_user_id": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"user": {
"id": 100980247,
"id_str": "100980247",
"name": "GEZ\u0130 REV\u0130R",
"screen_name": "Gezi_Revir",
"location": "Gezi Park\u0131",
"url": "http:\/\/www.gezirevir.com",
"description": "Gezi Revir S\u00f6zc\u00fcs\u00fc\n-\n\u0130leti\u015fim:http:\/\/www.gezirevir.com\/p\/iletisim.html",
"protected": false,
"verified": false,
"followers_count": 7421,
"friends_count": 355,
"listed_count": 36,
"favourites_count": 7221,
"statuses_count": 7802,
"created_at": "Fri Jan 01 13:56:42 +0000 2010",
"utc_offset": 10800,
"time_zone": "Istanbul",
"geo_enabled": true,
"lang": "tr",
"contributors_enabled": false,
"is_translator": false,
"profile_background_color": "C0DEED",
"profile_background_image_url": "http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png",
"profile_background_image_url_https": "https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png",
"profile_background_tile": false,
"profile_link_color": "0084B4",
"profile_sidebar_border_color": "C0DEED",
"profile_sidebar_fill_color": "DDEEF6",
"profile_text_color": "333333",
"profile_use_background_image": true,
"profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/378800000717849688\/a7d66518ea87a59e031bd51834341926_normal.jpeg",
"profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/378800000717849688\/a7d66518ea87a59e031bd51834341926_normal.jpeg",
"profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/100980247\/1374700982",
"default_profile": true,
"default_profile_image": false,
"following": null,
"follow_request_sent": null,
"notifications": null
},
"geo": null,
"coordinates": null,
"place": null,
"contributors": null,
"retweet_count": 7,
"favorite_count": 3,
"entities": {
"hashtags": [
{
"text": "1May\u0131s",
"indices": [
127,
134
]
},
{
"text": "Gezi",
"indices": [
135,
140
]
}
],
"trends": [
],
"urls": [
],
"user_mentions": [
],
"symbols": [
]
},
"favorited": false,
"retweeted": false,
"possibly_sensitive": false,
"filter_level": "low",
"lang": "tr"
},
"retweet_count": 0,
"favorite_count": 0,
"entities": {
"hashtags": [
{
"text": "1May\u0131s",
"indices": [
139,
140
]
},
{
"text": "Gezi",
"indices": [
139,
140
]
}
],
"trends": [
],
"urls": [
],
"user_mentions": [
{
"screen_name": "Gezi_Revir",
"name": "GEZ\u0130 REV\u0130R",
"id": 100980247,
"id_str": "100980247",
"indices": [
3,
14
]
}
],
"symbols": [
]
},
"favorited": false,
"retweeted": false,
"possibly_sensitive": false,
"filter_level": "low",
"lang": "tr",
"timestamp_ms": "1428080199461"
}
答案 0 :(得分:0)
打开您的文件,写入并关闭它,所有文件都在on_data
的同一位置。我认为,一旦你连接到流,你就永远不会关闭它,它会在它之后追加,而不是在下一行。