我有一个sqs队列的文件,我想从我的s3下载,所以我可以提取文本,但是当我尝试打开我的s3存储桶时,代码似乎总是退出而没有错误。
class MultiThread:
def __init__(self):
conn = boto.sqs.connect_to_region("us-west-2",
aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY)
self.sqs_q = conn.get_queue(QUEUE_NAME)
self.count = 0
def start(self, num_threads):
for i in xrange(num_threads):
t = threading.Thread(target=self.run, args=(self.do_work,))
t.start()
def run(self, func):
while self.sqs_q.count() > 0:
try:
rs = self.sqs_q.get_messages()
m = rs[0]
msg = m.get_body()
func(msg)
self.sqs_q.delete_message(m)
except:
print "empty"
def do_work(self, file_name):
doc = DocsScrapper(file_name)
text = doc.get_text()
class DocScrapper:
def __init__(self, file_name):
self.file_name = file_name
conn = boto.connect_s3(aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY)
bucket = conn.get_bucket('courtspider')
doc_key = bucket.get_key(file_name)
doc_key.get_contents_to_filename('doc/' + file_name)
def get_text(self):
doc_file = open('doc/' + self.file_name, 'rb')
txt_doc = TextExtractor(doc_file)
text = txt_doc.pdf_to_text()
doc_file.close()
os.remove('doc/' + self.file_name)
return text
是的,存在s3存储桶和文件。