#!/usr/bin/python
from TwitterSearch import *
import sys
import csv
tso = TwitterSearchOrder() # create a TwitterSearchOrder object
tso.set_keywords(['gmo']) # let's define all words we would like to have a look for
tso.set_language('en') # we want to see English tweets only
tso.set_include_entities(False) # and don't give us all those entity information
max_range = 1 # search range in kilometres
num_results = 500 # minimum results to obtain
outfile = "output.csv"
# create twitter API object
twitter = TwitterSearch(
access_token = "764537836884242432-GzJmUSL4hcC2DOJD71TiQXwCA0aGosz",
access_token_secret = "zDGYDeigRqDkmdqTgBOltcfNcNnfLwRZPkPLlnFyY3xqQ",
consumer_key = "Kr9ThiJWvPa1uTXZoj4O0YaSG",
consumer_secret = "ozGCkXtTCyCdOcL7ZFO4PJs85IaijjEuhl6iIdZU0AdH9CCoxS"
)
# Create an array of USA states
ustates = [
"AL",
"AK",
"AS",
"AZ",
"AR",
"CA",
"CO",
"CT",
"DE",
"DC",
"FM",
"FL",
"GA",
"GU",
"HI",
"ID",
"IL",
"IN",
"IA",
"KS",
"KY",
"LA",
"ME",
"MH",
"MD",
"MA",
"MI",
"MN",
"MS",
"MO",
"MT",
"NE",
"NV",
"NH",
"NJ",
"NM",
"NY",
"NC",
"ND",
"MP",
"OH",
"OK",
"OR",
"PW",
"PA",
"PR",
"RI",
"SC",
"SD",
"TN",
"TX",
"UT",
"VT",
"VI",
"VA",
"WA",
"WV",
"WI",
"WY",
"USA"
]
def linearSearch(item, obj, start=0):
for i in range(start, len(obj)):
if item == obj[i]:
return True
return False
# open a file to write (mode "w"), and create a CSV writer object
csvfile = file(outfile, "w")
csvwriter = csv.writer(csvfile)
# add headings to our CSV file
row = [ "user", "text", "place"]
csvwriter.writerow(row)
#-----------------------------------------------------------------------
# the twitter API only allows us to query up to 100 tweets at a time.
# to search for more, we will break our search up into 10 "pages", each
# of which will include 100 matching tweets.
#-----------------------------------------------------------------------
result_count = 0
last_id = None
while result_count < num_results:
# perform a search based on latitude and longitude
# twitter API docs: https://dev.twitter.com/docs/api/1/get/search
query = twitter.search_tweets_iterable(tso)
for result in query:
state = 0
if result["place"]:
user = result["user"]["screen_name"]
text = result["text"]
text = text.encode('utf-8', 'replace')
place = result["place"]["full_name"]
state = place.split(",")[1]
if linearSearch(state,ustates):
print state
# now write this row to our CSV file
row = [ user, text, place ]
csvwriter.writerow(row)
result_count += 1
last_id = result["id"]
print "got %d results" % result_count
csvfile.close()
我试图通过我的数组ustates对推文进行分类,但第二个if块似乎不起作用。我根本不知道。我做的是进行线性搜索,如果我的项目等于我的数组中的项目,我会将其写入csv文件。
答案 0 :(得分:0)
因为看起来问题是剩下一些空格,你可以使用.strip()
删除它们
>>> x=" WY "
>>> x.strip()
'WY'
>>>
还有其他一些提示
要加快ustates
中的成员资格测试,请使用set而不是列表,因为set有一个恒定的时间检查,而list是线性搜索
打开文件的首选方法是使用context manager,以确保在块结束时关闭文件,或者在块中出现错误。也可以使用open而不是file
使用这些提示代码应该看起来像
#!/usr/bin/python
... # all the previous stuff
# Create an set of USA states
ustates = {
"AL", "AK", "AS", "AZ", "AR",
"CA", "CO", "CT",
"DE", "DC",
"FM", "FL",
"GA", "GU",
"HI",
"ID", "IL", "IN", "IA",
"KS", "KY",
"LA",
"ME", "MH", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "MP",
"NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND",
"OH", "OK", "OR",
"PW", "PA", "PR",
"RI",
"SC", "SD",
"TN", "TX",
"UT",
"VT", "VI", "VA",
"WA", "WV", "WI", "WY",
"USA"
} # that arrange is just to take less lines, while grouping them alphabetically
# open a file to write (mode "w"), and create a CSV writer object
with open(outfile,"w") as csvfile:
... # the rest is the same
while result_count < num_results:
# perform a search based on latitude and longitude
# twitter API docs: https://dev.twitter.com/docs/api/1/get/search
query = twitter.search_tweets_iterable(tso)
for result in query:
state = 0
if result["place"]:
... # all the other stuff
state = state.strip() #<--- the strip part, add the .upper() if needed or just in case
if state in ustates:
... # all the other stuff
... # the rest of stuff
print "got %d results" % result_count