
时间:2016-09-04 23:08:48

标签: python-3.x tweetstream

from TwitterSearch import *

import sys
import csv

tso = TwitterSearchOrder() # create a TwitterSearchOrder object
tso.set_keywords(['gmo']) # let's define all words we would like to have a look for
tso.set_language('en') # we want to see English tweets only
tso.set_include_entities(False) # and don't give us all those entity information

max_range = 1           # search range in kilometres
num_results = 500       # minimum results to obtain
outfile = "output.csv"

# create twitter API object
twitter = TwitterSearch(
                        access_token = "764537836884242432-GzJmUSL4hcC2DOJD71TiQXwCA0aGosz",
                        access_token_secret = "zDGYDeigRqDkmdqTgBOltcfNcNnfLwRZPkPLlnFyY3xqQ",
                        consumer_key = "Kr9ThiJWvPa1uTXZoj4O0YaSG",
                        consumer_secret = "ozGCkXtTCyCdOcL7ZFO4PJs85IaijjEuhl6iIdZU0AdH9CCoxS"

# Create an array of USA states
ustates = [

def linearSearch(item, obj, start=0):
    for i in range(start, len(obj)):
        if item == obj[i]:
            return True
    return False
# open a file to write (mode "w"), and create a CSV writer object
csvfile = file(outfile, "w")
csvwriter = csv.writer(csvfile)

# add headings to our CSV file
row = [ "user", "text", "place"]

# the twitter API only allows us to query up to 100 tweets at a time.
# to search for more, we will break our search up into 10 "pages", each
# of which will include 100 matching tweets.
result_count = 0
last_id = None

while result_count <  num_results:
    # perform a search based on latitude and longitude
    # twitter API docs: https://dev.twitter.com/docs/api/1/get/search
    query = twitter.search_tweets_iterable(tso)

    for result in query:
        state = 0
        if result["place"]:
            user = result["user"]["screen_name"]
            text = result["text"]
            text = text.encode('utf-8', 'replace')
            place = result["place"]["full_name"]
            state = place.split(",")[1]
        if linearSearch(state,ustates):
            print state
            # now write this row to our CSV file
            row = [ user, text, place ]
            result_count += 1
        last_id = result["id"]

    print "got %d results" % result_count



1 个答案:

答案 0 :(得分:0)


>>> x=" WY "
>>> x.strip()


  1. 要加快ustates中的成员资格测试,请使用set而不是列表,因为set有一个恒定的时间检查,而list是线性搜索

  2. 打开文件的首选方法是使用context manager,以确保在块结束时关闭文件,或者在块中出现错误。也可以使用open而不是file

  3. 使用这些提示代码应该看起来像

    ... # all the previous stuff
    # Create an set of USA states
    ustates = {  
               "AL", "AK", "AS", "AZ", "AR",
               "CA", "CO", "CT",
               "DE", "DC",
               "FM", "FL",
               "GA", "GU",
               "ID", "IL", "IN", "IA",
               "KS", "KY",
               "ME", "MH", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "MP",
               "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND",
               "OH", "OK", "OR",
               "PW", "PA", "PR",
               "SC", "SD",
               "TN", "TX",
               "VT", "VI", "VA",
               "WA", "WV", "WI", "WY",
               } # that arrange is just to take less lines, while grouping them alphabetically 
    # open a file to write (mode "w"), and create a CSV writer object
    with open(outfile,"w") as csvfile:
        ...    # the rest is the same
        while result_count <  num_results:
            # perform a search based on latitude and longitude
            # twitter API docs: https://dev.twitter.com/docs/api/1/get/search
            query = twitter.search_tweets_iterable(tso)
            for result in query:
                state = 0
                if result["place"]:
                    ... # all the other stuff
                    state = state.strip()     #<--- the strip part, add the .upper() if needed or just in case
                if state in ustates:
                    ... # all the other stuff
                ... # the rest of stuff
            print "got %d results" % result_count