Tweepy JSON意外的非空白字符

时间:2015-04-11 15:11:17

标签: json tweepy

我正在使用Tweepy Stream API收集推文,看起来工作非常好。每当我尝试打开json文件时,它会在第一条推文之后给出非空白问题。 我正在使用带有Canopy和tweepy包的python 2.6。我也安装了pycharm和python 2.7来解决这个问题,但它没有用。我也尝试用R来解压json文件,但它没有用。

有人对此有任何线索吗? 谢谢,

# -*- coding: utf-8 -*-


from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import json
from auth import TwitterAuth



class StdOutListener(StreamListener):   
    #This function gets called every time a new tweet is received on the stream
    def on_data(self, data):

        #Convert the data to a json object (shouldn't do this in production; might slow down and miss tweets)
        j=json.loads(data)

        text=j["text"] #The text of the tweet
        print(text) #Print it out

        #Just write data to one line in the file
        fhOut.write(data)

    def on_error(self, data):
        print("ERROR")
        print(data)

if __name__ == '__main__':
    try:
        #Create a file to store output. "a" means append (add on to previous file)
        fhOut = open("JsonTest.json","a")

        #Create the StdOutListener
        l = StdOutListener()
        auth = OAuthHandler(TwitterAuth.consumer_key, TwitterAuth.consumer_secret)
        auth.set_access_token(TwitterAuth.access_token, TwitterAuth.access_token_secret)

        #Connect to the Twitter stream
        stream = Stream(auth, l)    

    #Terms to track
        stream.filter(track=["#NationalSiblingsDay"])

    except KeyboardInterrupt:
        #User pressed ctrl+c -- get ready to exit the program
        pass

    #Close the 
    fhOut.close()

我的json文件中有3条推文,它是问题的例子。如果您使用“http://pro.jsonlint.com/”,您可以看到第233行存在非空白问题。

{
    "created_at": "Fri Apr 03 16:15:15 +0000 2015",
    "id": 584026386416959488,
    "id_str": "584026386416959488",
    "text": "RT @Gezi_Revir: Bu ak\u015fam olas\u0131 toplumsal olaylarda acil sa\u011fl\u0131k e\u011fitimi duyurusu yap\u0131lacakt\u0131r\nKat\u0131l\u0131m ve duyuru yapmak \u00f6nemli!\nL\u00fctfen destek\u2026",
    "source": "\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e",
    "truncated": false,
    "in_reply_to_status_id": null,
    "in_reply_to_status_id_str": null,
    "in_reply_to_user_id": null,
    "in_reply_to_user_id_str": null,
    "in_reply_to_screen_name": null,
    "user": {
        "id": 412536931,
        "id_str": "412536931",
        "name": "D. U. O",
        "screen_name": "utkuodabasi",
        "location": "\u0130stanbul\/Kad\u0131k\u00f6y",
        "url": null,
        "description": "ALF\/ELF",
        "protected": false,
        "verified": false,
        "followers_count": 183,
        "friends_count": 276,
        "listed_count": 2,
        "favourites_count": 1125,
        "statuses_count": 2639,
        "created_at": "Mon Nov 14 20:28:49 +0000 2011",
        "utc_offset": 10800,
        "time_zone": "Baghdad",
        "geo_enabled": true,
        "lang": "tr",
        "contributors_enabled": false,
        "is_translator": false,
        "profile_background_color": "C3C1EB",
        "profile_background_image_url": "http:\/\/pbs.twimg.com\/profile_background_images\/432571975454572544\/l1MU-K7P.jpeg",
        "profile_background_image_url_https": "https:\/\/pbs.twimg.com\/profile_background_images\/432571975454572544\/l1MU-K7P.jpeg",
        "profile_background_tile": true,
        "profile_link_color": "000000",
        "profile_sidebar_border_color": "FFFFFF",
        "profile_sidebar_fill_color": "DDEEF6",
        "profile_text_color": "080808",
        "profile_use_background_image": true,
        "profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/559996372615954432\/dcFLUV8m_normal.jpeg",
        "profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/559996372615954432\/dcFLUV8m_normal.jpeg",
        "profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/412536931\/1423493305",
        "default_profile": false,
        "default_profile_image": false,
        "following": null,
        "follow_request_sent": null,
        "notifications": null
    },
    "geo": null,
    "coordinates": null,
    "place": null,
    "contributors": null,
    "retweeted_status": {
        "created_at": "Fri Apr 03 13:08:16 +0000 2015",
        "id": 583979327701835776,
        "id_str": "583979327701835776",
        "text": "Bu ak\u015fam olas\u0131 toplumsal olaylarda acil sa\u011fl\u0131k e\u011fitimi duyurusu yap\u0131lacakt\u0131r\nKat\u0131l\u0131m ve duyuru yapmak \u00f6nemli!\nL\u00fctfen destek RT\n#1May\u0131s #Gezi",
        "source": "\u003ca href=\"https:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for  Android\u003c\/a\u003e",
        "truncated": false,
        "in_reply_to_status_id": null,
        "in_reply_to_status_id_str": null,
        "in_reply_to_user_id": null,
        "in_reply_to_user_id_str": null,
        "in_reply_to_screen_name": null,
        "user": {
            "id": 100980247,
            "id_str": "100980247",
            "name": "GEZ\u0130 REV\u0130R",
            "screen_name": "Gezi_Revir",
            "location": "Gezi Park\u0131",
            "url": "http:\/\/www.gezirevir.com",
            "description": "Gezi Revir S\u00f6zc\u00fcs\u00fc\n-\n\u0130leti\u015fim:http:\/\/www.gezirevir.com\/p\/iletisim.html",
            "protected": false,
            "verified": false,
            "followers_count": 7421,
            "friends_count": 355,
            "listed_count": 36,
            "favourites_count": 7220,
            "statuses_count": 7802,
            "created_at": "Fri Jan 01 13:56:42 +0000 2010",
            "utc_offset": 10800,
            "time_zone": "Istanbul",
            "geo_enabled": true,
            "lang": "tr",
            "contributors_enabled": false,
            "is_translator": false,
            "profile_background_color": "C0DEED",
            "profile_background_image_url": "http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png",
            "profile_background_image_url_https": "https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png",
            "profile_background_tile": false,
            "profile_link_color": "0084B4",
            "profile_sidebar_border_color": "C0DEED",
            "profile_sidebar_fill_color": "DDEEF6",
            "profile_text_color": "333333",
            "profile_use_background_image": true,
            "profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/378800000717849688\/a7d66518ea87a59e031bd51834341926_normal.jpeg",
            "profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/378800000717849688\/a7d66518ea87a59e031bd51834341926_normal.jpeg",
            "profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/100980247\/1374700982",
            "default_profile": true,
            "default_profile_image": false,
            "following": null,
            "follow_request_sent": null,
            "notifications": null
        },
        "geo": null,
        "coordinates": null,
        "place": null,
        "contributors": null,
        "retweet_count": 6,
        "favorite_count": 3,
        "entities": {
            "hashtags": [
                {
                    "text": "1May\u0131s",
                    "indices": [
                        127,
                        134
                    ]
                },
                {
                    "text": "Gezi",
                    "indices": [
                        135,
                        140
                    ]
                }
            ],
            "trends": [
                
            ],
            "urls": [
                
            ],
            "user_mentions": [
                
            ],
            "symbols": [
                
            ]
        },
        "favorited": false,
        "retweeted": false,
        "possibly_sensitive": false,
        "filter_level": "low",
        "lang": "tr"
    },
    "retweet_count": 0,
    "favorite_count": 0,
    "entities": {
        "hashtags": [
            {
                "text": "1May\u0131s",
                "indices": [
                    139,
                    140
                ]
            },
            {
                "text": "Gezi",
                "indices": [
                    139,
                    140
                ]
            }
        ],
        "trends": [
            
        ],
        "urls": [
            
        ],
        "user_mentions": [
            {
                "screen_name": "Gezi_Revir",
                "name": "GEZ\u0130 REV\u0130R",
                "id": 100980247,
                "id_str": "100980247",
                "indices": [
                    3,
                    14
                ]
            }
        ],
        "symbols": [
            
        ]
    },
    "favorited": false,
    "retweeted": false,
    "possibly_sensitive": false,
    "filter_level": "low",
    "lang": "tr",
    "timestamp_ms": "1428077715864"
}{
    "created_at": "Fri Apr 03 16:17:08 +0000 2015",
    "id": 584026858406207489,
    "id_str": "584026858406207489",
    "text": "RT @birlesik: Ya\u015fas\u0131n #1May\u0131s! B\u0131j\u00ee yek g\u00fblan!\n\nEmperyalizm d\u00fcnya kaynaklar\u0131n\u0131 ya\u011fmal\u0131yor, halklar\u0131 k\u00f6leli\u011fe mahk\u00fbm etmek i\u00e7in... http:\/\/t.\u2026",
    "source": "\u003ca href=\"http:\/\/twitter.com\" rel=\"nofollow\"\u003eTwitter Web Client\u003c\/a\u003e",
    "truncated": false,
    "in_reply_to_status_id": null,
    "in_reply_to_status_id_str": null,
    "in_reply_to_user_id": null,
    "in_reply_to_user_id_str": null,
    "in_reply_to_screen_name": null,
    "user": {
        "id": 2982089170,
        "id_str": "2982089170",
        "name": "astare",
        "screen_name": "astare23",
        "location": "",
        "url": null,
        "description": null,
        "protected": false,
        "verified": false,
        "followers_count": 406,
        "friends_count": 717,
        "listed_count": 2,
        "favourites_count": 15,
        "statuses_count": 8788,
        "created_at": "Sat Jan 17 02:34:02 +0000 2015",
        "utc_offset": 3600,
        "time_zone": "Dublin",
        "geo_enabled": false,
        "lang": "en",
        "contributors_enabled": false,
        "is_translator": false,
        "profile_background_color": "4A913C",
        "profile_background_image_url": "http:\/\/pbs.twimg.com\/profile_background_images\/556485972225843202\/FlunZySF.jpeg",
        "profile_background_image_url_https": "https:\/\/pbs.twimg.com\/profile_background_images\/556485972225843202\/FlunZySF.jpeg",
        "profile_background_tile": true,
        "profile_link_color": "CC3366",
        "profile_sidebar_border_color": "C0DEED",
        "profile_sidebar_fill_color": "DDEEF6",
        "profile_text_color": "333333",
        "profile_use_background_image": true,
        "profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/581147326162575360\/Uc48cvr0_normal.jpg",
        "profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/581147326162575360\/Uc48cvr0_normal.jpg",
        "profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/2982089170\/1427391449",
        "default_profile": false,
        "default_profile_image": false,
        "following": null,
        "follow_request_sent": null,
        "notifications": null
    },
    "geo": null,
    "coordinates": null,
    "place": null,
    "contributors": null,
    "retweeted_status": {
        "created_at": "Fri Apr 03 16:11:14 +0000 2015",
        "id": 584025375199596544,
        "id_str": "584025375199596544",
        "text": "Ya\u015fas\u0131n #1May\u0131s! B\u0131j\u00ee yek g\u00fblan!\n\nEmperyalizm d\u00fcnya kaynaklar\u0131n\u0131 ya\u011fmal\u0131yor, halklar\u0131 k\u00f6leli\u011fe mahk\u00fbm etmek i\u00e7in... http:\/\/t.co\/lMzEJOEqA9",
        "source": "\u003ca href=\"http:\/\/www.facebook.com\/twitter\" rel=\"nofollow\"\u003eFacebook\u003c\/a\u003e",
        "truncated": false,
        "in_reply_to_status_id": null,
        "in_reply_to_status_id_str": null,
        "in_reply_to_user_id": null,
        "in_reply_to_user_id_str": null,
        "in_reply_to_screen_name": null,
        "user": {
            "id": 321693960,
            "id_str": "321693960",
            "name": "FKBC",
            "screen_name": "birlesik",
            "location": "Enternasyonal",
            "url": "http:\/\/fasizmekarsibirlesikcephe.blogspot.co.uk",
            "description": "Fa\u015fizme Kar\u015f\u0131 Birle\u015fik Cephe",
            "protected": false,
            "verified": false,
            "followers_count": 23004,
            "friends_count": 2030,
            "listed_count": 92,
            "favourites_count": 1608,
            "statuses_count": 72498,
            "created_at": "Wed Jun 22 00:13:10 +0000 2011",
            "utc_offset": -18000,
            "time_zone": "Quito",
            "geo_enabled": false,
            "lang": "tr",
            "contributors_enabled": false,
            "is_translator": false,
            "profile_background_color": "1A1B1F",
            "profile_background_image_url": "http:\/\/pbs.twimg.com\/profile_background_images\/431941624231239680\/1U-zI3Hq.png",
            "profile_background_image_url_https": "https:\/\/pbs.twimg.com\/profile_background_images\/431941624231239680\/1U-zI3Hq.png",
            "profile_background_tile": true,
            "profile_link_color": "2FC2EF",
            "profile_sidebar_border_color": "000000",
            "profile_sidebar_fill_color": "DDEEF6",
            "profile_text_color": "333333",
            "profile_use_background_image": true,
            "profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/553123309202579457\/1rK5Laap_normal.png",
            "profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/553123309202579457\/1rK5Laap_normal.png",
            "profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/321693960\/1401743234",
            "default_profile": false,
            "default_profile_image": false,
            "following": null,
            "follow_request_sent": null,
            "notifications": null
        },
        "geo": null,
        "coordinates": null,
        "place": null,
        "contributors": null,
        "retweet_count": 3,
        "favorite_count": 0,
        "entities": {
            "hashtags": [
                {
                    "text": "1May\u0131s",
                    "indices": [
                        8,
                        15
                    ]
                }
            ],
            "trends": [
                
            ],
            "urls": [
                {
                    "url": "http:\/\/t.co\/lMzEJOEqA9",
                    "expanded_url": "http:\/\/fb.me\/6wR4SzUfh",
                    "display_url": "fb.me\/6wR4SzUfh",
                    "indices": [
                        116,
                        138
                    ]
                }
            ],
            "user_mentions": [
                
            ],
            "symbols": [
                
            ]
        },
        "favorited": false,
        "retweeted": false,
        "possibly_sensitive": false,
        "filter_level": "low",
        "lang": "tr"
    },
    "retweet_count": 0,
    "favorite_count": 0,
    "entities": {
        "hashtags": [
            {
                "text": "1May\u0131s",
                "indices": [
                    22,
                    29
                ]
            }
        ],
        "trends": [
            
        ],
        "urls": [
            {
                "url": "http:\/\/t.co\/lMzEJOEqA9",
                "expanded_url": "http:\/\/fb.me\/6wR4SzUfh",
                "display_url": "fb.me\/6wR4SzUfh",
                "indices": [
                    139,
                    140
                ]
            }
        ],
        "user_mentions": [
            {
                "screen_name": "birlesik",
                "name": "FKBC",
                "id": 321693960,
                "id_str": "321693960",
                "indices": [
                    3,
                    12
                ]
            }
        ],
        "symbols": [
            
        ]
    },
    "favorited": false,
    "retweeted": false,
    "possibly_sensitive": false,
    "filter_level": "low",
    "lang": "tr",
    "timestamp_ms": "1428077828395"
}{
    "created_at": "Fri Apr 03 16:56:39 +0000 2015",
    "id": 584036803377790976,
    "id_str": "584036803377790976",
    "text": "RT @Gezi_Revir: Bu ak\u015fam olas\u0131 toplumsal olaylarda acil sa\u011fl\u0131k e\u011fitimi duyurusu yap\u0131lacakt\u0131r\nKat\u0131l\u0131m ve duyuru yapmak \u00f6nemli!\nL\u00fctfen destek\u2026",
    "source": "\u003ca href=\"http:\/\/twitter.com\/#!\/download\/ipad\" rel=\"nofollow\"\u003eTwitter for iPad\u003c\/a\u003e",
    "truncated": false,
    "in_reply_to_status_id": null,
    "in_reply_to_status_id_str": null,
    "in_reply_to_user_id": null,
    "in_reply_to_user_id_str": null,
    "in_reply_to_screen_name": null,
    "user": {
        "id": 1566649928,
        "id_str": "1566649928",
        "name": "Cihangir Forum",
        "screen_name": "beyogluforum",
        "location": "",
        "url": null,
        "description": "info@beyogluforum.com https:\/\/www.facebook.com\/cihangirparki",
        "protected": false,
        "verified": false,
        "followers_count": 4866,
        "friends_count": 201,
        "listed_count": 37,
        "favourites_count": 71,
        "statuses_count": 2016,
        "created_at": "Wed Jul 03 20:40:52 +0000 2013",
        "utc_offset": 10800,
        "time_zone": "Istanbul",
        "geo_enabled": false,
        "lang": "en",
        "contributors_enabled": false,
        "is_translator": false,
        "profile_background_color": "C0DEED",
        "profile_background_image_url": "http:\/\/pbs.twimg.com\/profile_background_images\/378800000015019189\/8416aa2147458e211b9849a732718dc1.jpeg",
        "profile_background_image_url_https": "https:\/\/pbs.twimg.com\/profile_background_images\/378800000015019189\/8416aa2147458e211b9849a732718dc1.jpeg",
        "profile_background_tile": false,
        "profile_link_color": "0084B4",
        "profile_sidebar_border_color": "FFFFFF",
        "profile_sidebar_fill_color": "DDEEF6",
        "profile_text_color": "333333",
        "profile_use_background_image": true,
        "profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/440773343919476736\/4xdPpxlX_normal.jpeg",
        "profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/440773343919476736\/4xdPpxlX_normal.jpeg",
        "profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/1566649928\/1401273304",
        "default_profile": false,
        "default_profile_image": false,
        "following": null,
        "follow_request_sent": null,
        "notifications": null
    },
    "geo": null,
    "coordinates": null,
    "place": null,
    "contributors": null,
    "retweeted_status": {
        "created_at": "Fri Apr 03 13:08:16 +0000 2015",
        "id": 583979327701835776,
        "id_str": "583979327701835776",
        "text": "Bu ak\u015fam olas\u0131 toplumsal olaylarda acil sa\u011fl\u0131k e\u011fitimi duyurusu yap\u0131lacakt\u0131r\nKat\u0131l\u0131m ve duyuru yapmak \u00f6nemli!\nL\u00fctfen destek RT\n#1May\u0131s #Gezi",
        "source": "\u003ca href=\"https:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for  Android\u003c\/a\u003e",
        "truncated": false,
        "in_reply_to_status_id": null,
        "in_reply_to_status_id_str": null,
        "in_reply_to_user_id": null,
        "in_reply_to_user_id_str": null,
        "in_reply_to_screen_name": null,
        "user": {
            "id": 100980247,
            "id_str": "100980247",
            "name": "GEZ\u0130 REV\u0130R",
            "screen_name": "Gezi_Revir",
            "location": "Gezi Park\u0131",
            "url": "http:\/\/www.gezirevir.com",
            "description": "Gezi Revir S\u00f6zc\u00fcs\u00fc\n-\n\u0130leti\u015fim:http:\/\/www.gezirevir.com\/p\/iletisim.html",
            "protected": false,
            "verified": false,
            "followers_count": 7421,
            "friends_count": 355,
            "listed_count": 36,
            "favourites_count": 7221,
            "statuses_count": 7802,
            "created_at": "Fri Jan 01 13:56:42 +0000 2010",
            "utc_offset": 10800,
            "time_zone": "Istanbul",
            "geo_enabled": true,
            "lang": "tr",
            "contributors_enabled": false,
            "is_translator": false,
            "profile_background_color": "C0DEED",
            "profile_background_image_url": "http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png",
            "profile_background_image_url_https": "https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png",
            "profile_background_tile": false,
            "profile_link_color": "0084B4",
            "profile_sidebar_border_color": "C0DEED",
            "profile_sidebar_fill_color": "DDEEF6",
            "profile_text_color": "333333",
            "profile_use_background_image": true,
            "profile_image_url": "http:\/\/pbs.twimg.com\/profile_images\/378800000717849688\/a7d66518ea87a59e031bd51834341926_normal.jpeg",
            "profile_image_url_https": "https:\/\/pbs.twimg.com\/profile_images\/378800000717849688\/a7d66518ea87a59e031bd51834341926_normal.jpeg",
            "profile_banner_url": "https:\/\/pbs.twimg.com\/profile_banners\/100980247\/1374700982",
            "default_profile": true,
            "default_profile_image": false,
            "following": null,
            "follow_request_sent": null,
            "notifications": null
        },
        "geo": null,
        "coordinates": null,
        "place": null,
        "contributors": null,
        "retweet_count": 7,
        "favorite_count": 3,
        "entities": {
            "hashtags": [
                {
                    "text": "1May\u0131s",
                    "indices": [
                        127,
                        134
                    ]
                },
                {
                    "text": "Gezi",
                    "indices": [
                        135,
                        140
                    ]
                }
            ],
            "trends": [
                
            ],
            "urls": [
                
            ],
            "user_mentions": [
                
            ],
            "symbols": [
                
            ]
        },
        "favorited": false,
        "retweeted": false,
        "possibly_sensitive": false,
        "filter_level": "low",
        "lang": "tr"
    },
    "retweet_count": 0,
    "favorite_count": 0,
    "entities": {
        "hashtags": [
            {
                "text": "1May\u0131s",
                "indices": [
                    139,
                    140
                ]
            },
            {
                "text": "Gezi",
                "indices": [
                    139,
                    140
                ]
            }
        ],
        "trends": [
            
        ],
        "urls": [
            
        ],
        "user_mentions": [
            {
                "screen_name": "Gezi_Revir",
                "name": "GEZ\u0130 REV\u0130R",
                "id": 100980247,
                "id_str": "100980247",
                "indices": [
                    3,
                    14
                ]
            }
        ],
        "symbols": [
            
        ]
    },
    "favorited": false,
    "retweeted": false,
    "possibly_sensitive": false,
    "filter_level": "low",
    "lang": "tr",
    "timestamp_ms": "1428080199461"
}

1 个答案:

答案 0 :(得分:0)

打开您的文件,写入并关闭它,所有文件都在on_data的同一位置。我认为,一旦你连接到流,你就永远不会关闭它,它会在它之后追加,而不是在下一行。