为什么我的异步方法仍然阻止龙卷风?

时间:2016-03-16 15:33:28

标签: python asynchronous tornado

我还是龙卷风异步编程的新手。所以我不是百分之百确定我是以正确的方式做到的。

这是我正在尝试运行异步

的脚本
import locale
import logging

from tornado.gen import Task, Return, coroutine

from datetime import datetime, timedelta
from dateutil import tz
from mongo import db

@coroutine
def table():
  response = yield Task(driver)
  raise Return(response)

@coroutine
def driver():
  try:
    locale.setlocale(locale.LC_ALL, 'en_US')
  except:
    locale.setlocale(locale.LC_ALL, 'en_US.utf8')

  from_zone = tz.gettz('UTC')
  to_zone = tz.gettz('America/New_York')
  utc = datetime.utcnow()
  utc = utc.replace(tzinfo=from_zone)
  current_date = utc.astimezone(to_zone)
  start_at = current_date + timedelta(days=-2)
  end_at = current_date + timedelta(days=-1)

  start_date = start_at.strftime("%Y-%m-%d")
  start_day = start_at.strftime("%a")
  end_date = end_at.strftime("%Y-%m-%d")
  end_day = end_at.strftime("%a")

  year = end_at.strftime("%Y")
  prev_year = year
  month = end_at.strftime("%m")
  prev_month = int(month) - 1
  if prev_month < 1:
    prev_month = "12"
    prev_year = str(int(prev_year) - 1)
  else:
    if prev_month < 10:
      prev_month = "0%s" % prev_month
    prev_month = str(prev_month)

  dates = {start_date:{}}
  while start_at < end_at:
    end_slot = start_at + timedelta(days=1)
    start_at += timedelta(days=1)
    dates[end_slot.strftime("%Y-%m-%d")] = {}

  logging.info('getting sites from db')
  sites = list(db.sites.find())

  revenue_totals = {}
  provider_totals = {}
  rev_providers = {}
  # slugs, tags, name, PLACEMENT, TOTAL_IMPRESSIONS, FILLED_IMPRESSIONS, DEFAULTED_IMPRESSIONS, REVENUE, DATE, NETWORK, CTR, AD_SIZE, LOCATION, CPM
  for date in dates.keys():
    logging.info('retrieving data for {0}'.format(date))
    records = list(db.revenue_data.find({'date': date}))
    logging.info('data recieved, parsing...')
    for record in records:
      for rec in record['records']:
        rec['slugs'] = []
        try:
          rec['REVENUE'] = float(rec['REVENUE'].replace('$','').replace(',',''))
        except:
          rec['REVENUE'] = rec['REVENUE']

        # add to our revenue provider tallies
        if rec['PLACEMENT'].find('l2') > -1 or rec['PLACEMENT'].find('2nd') > -1 or rec['PLACEMENT'].find('second') > -1:
            rec['NETWORK'] = '%s_2nd' % rec['NETWORK']
        if rec['NETWORK'].lower() in rev_providers.keys():
          if date in rev_providers[rec['NETWORK'].lower()].keys():
            if float(rec['REVENUE']) > 0.0:
              rev_providers[rec['NETWORK'].lower()][date]['revenue'] += rec['REVENUE']
              rev_providers[rec['NETWORK'].lower()][date]['count'] += 1
          else:
            if float(rec['REVENUE']) > 0.0:
              rev_providers[rec['NETWORK'].lower()][date] = {'revenue':rec['REVENUE'],'count':1}
        else:
          if float(rec['REVENUE']) > 0.0:
            rev_providers[rec['NETWORK'].lower()] = {date:{'revenue':rec['REVENUE'],'count':1}}
        if date in provider_totals:
          if float(rec['REVENUE']) > 0.0:
            provider_totals[date]['revenue'] += rec['REVENUE']
            provider_totals[date]['count'] += 1
        else:
          if float(rec['REVENUE']) > 0.0:
            provider_totals[date] = {'revenue':rec['REVENUE'],'count':1}
        # determine what slugs match this line item
        for site in sites:
          append_slug = False
          if 'reporting' in site.keys():
            for r in site['reporting']:
              if r.strip() != '' and rec['PLACEMENT'].replace(' ','').lower().find(r.strip().lower()) > -1:
                append_slug = True
          if 'prevent_reporting' in site.keys():
            for r in site['prevent_reporting']:
              if r.strip() != '' and rec['PLACEMENT'].replace(' ','').lower().find(r.strip().lower()) > -1:
                append_slug = False
          if append_slug:
            rec['slugs'].append(site['slug'])
            try:
              rec['tags'] = site['tags']
            except:
              rec['tags'] = []
            rec['name'] = site['name']
        if 'name' in rec.keys() and float(rec['REVENUE']) > 0.0:
          # this is a line item we want to keep
          # determine if we have this site for this date yet
          if rec['name'] in dates[date].keys():
            # we have this site for this date; so let's just add this line item to it
            dates[date][rec['name']].append(rec)
          else:
            # we need to add this site for this date
            dates[date][rec['name']] = [rec]
          if date in revenue_totals.keys():
            revenue_totals[date] += float(rec['REVENUE'])
          else:
            revenue_totals[date] = float(rec['REVENUE'])

  mtd_sum = 0.0
  pmtd_sum = 0.0
  rp_html = ''
  rp_html += '<p>%s generated <strong>%s</strong> in revenue.</p>' % (end_date, locale.currency(revenue_totals[end_date], grouping=True))
  rp_html += '<p>%s generated <strong>%s</strong> in revenue.</p>' % (start_date, locale.currency(revenue_totals[start_date], grouping=True))
  rp_html += '<p>The following table shows the breakdown over these two days.</p>'
  rp_html += '<p></p>'
  rp_html += '<table border="1">'
  rp_html += '<tr>'
  rp_html += '<th>NETWORK</th>'
  rp_html += '<th>%s</th>' % start_date
  rp_html += '<th>TAGS</th>'
  rp_html += '<th>%s</th>' % end_date
  rp_html += '<th>TAGS</th>'
  rp_html += '<th>MTD</th>'
  rp_html += '<th>PM</th>'
  rp_html += '</tr>'
  keys = sorted(rev_providers.keys())
  logging.info('creating table')
  for network in keys:
    rp_html += '<tr>'
    rp_html += '<td>%s</td>' % network

    # show the start date details
    try:
      rp_html += '<td align="right">%s</td>' % locale.currency(rev_providers[network][start_date]['revenue'], grouping=True)
      # log in the revenue provider history
      date_bits = start_date.split('-')
      db.revenue_provider_history.update({'network':network, 'date':start_date}, {'network':network,'date':start_date,'revenue':rev_providers[network][start_date]['revenue'],'count':rev_providers[network][start_date]['count'],'day':date_bits[2],'month':date_bits[1],'year':date_bits[0],'day_of_week':start_day}, upsert=True)
    except:
      rp_html += '<td align="right">$0.00</td>'
    try:
      rp_html += '<td align="right">%s</td>' % rev_providers[network][start_date]['count']
    except:
      rp_html += '<td align="right">0</td>'

    # show the end date details
    try:
      rp_html += '<td align="right">%s</td>' % locale.currency(rev_providers[network][end_date]['revenue'], grouping=True)
      # log in the revenue provider history
      date_bits = end_date.split('-')
      db.revenue_provider_history.update({'network':network, 'date':end_date}, {'network':network,'date':end_date,'revenue':rev_providers[network][end_date]['revenue'],'count':rev_providers[network][end_date]['count'],'day':date_bits[2],'month':date_bits[1],'year':date_bits[0],'day_of_week':end_day}, upsert=True)
    except:
      rp_html += '<td align="right">$0.00</td>'
    try:
      rp_html += '<td align="right">%s</td>' % rev_providers[network][end_date]['count']
    except:
      rp_html += '<td align="right">0</td>'

    # get the MTD total
    mtd_results = db.revenue_provider_history.aggregate([
      {'$match':{'network':network,'year':year,'month':month}},
      {'$group':{'_id':{'network':'$network'}, 'total':{'$sum':'$revenue'}}}
    ])
    for res in mtd_results:
      mtd_rec = res
    try:
      rp_html += '<td align="right">%s</td>' % locale.currency(mtd_rec['total'], grouping=True)
      mtd_sum += mtd_rec['total']
    except:
      rp_html += '<td>N/A</td>'

    # get the PMTD total
    mtd_results = db.revenue_provider_history.aggregate([
      {'$match':{'network':network,'year':prev_year,'month':prev_month}},
      {'$group':{'_id':{'network':'$network'}, 'total':{'$sum':'$revenue'}}}
    ])
    for res in mtd_results:
      mtd_rec = res
    try:
      rp_html += '<td align="right">%s</td>' % locale.currency(mtd_rec['total'], grouping=True)
      pmtd_sum += mtd_rec['total']
    except:
      rp_html += '<td>N/A</td>'
    rp_html += '</tr>'
  rp_html += '<tr>'
  rp_html += '<td>&nbsp;</td>'
  rp_html += '<td align="right">%s</td>' % locale.currency(provider_totals[start_date]['revenue'], grouping=True)
  rp_html += '<td align="right">%s</td>' % provider_totals[start_date]['count']
  rp_html += '<td align="right">%s</td>' % locale.currency(provider_totals[end_date]['revenue'], grouping=True)
  rp_html += '<td align="right">%s</td>' % provider_totals[end_date]['count']
  rp_html += '<td align="right">%s</td>' % locale.currency(mtd_sum, grouping=True)
  rp_html += '<td align="right">%s</td>' % locale.currency(pmtd_sum, grouping=True)
  rp_html += '</tr>'
  rp_html += '</table>'

  logging.info("complete")

  return rp_html

def on_timeout():
  logging.info("timeout")

这是post方法/处理程序

#needs work not truely async
class ProviderTable(app.basic.BaseHandler):
  @tornado.web.authenticated
  @coroutine
  def post(self):
    seconds_to_wait = 300
    deadline = time.time() + seconds_to_wait
    IOLoop.instance().add_timeout(deadline, generate_provider_rev_table.on_timeout)

    notes = {}
    logging.info('running table script')
    result = yield generate_provider_rev_table.table()
    if result != '':
      notes['created']   = datetime.utcnow()
      notes['slug']      = ''
      notes['value']     = ''
      notes['logged_by'] = 'systems'
      notes['message']   = 'Revenue table generated at {0}'.format(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))
      audit_notesdb.log_audit_note(notes)
      self.api_response(result)

所以这就是我要做的...我想运行driver()方法,我正在使用table()方法运行,我假设它是异步的...然后我得到结果它应该是驱动程序方法中rp_html表中包含的内容。当我点击post方法时,脚本会运行,但它仍然阻止我的线程。我做错了什么,如何解决?

1 个答案:

答案 0 :(得分:3)

您的driver功能正在阻止。它会阻止,因为您正在使用mongo lib和logging模块,它们是同步的。所以,当你打电话

  sites = list(db.sites.find())

  logging.info('retrieving data for {0}'.format(date))
实际上你的循环被阻止等待响应。

所以,loggingtornado中有内置日志记录(请参阅here),您只需在应用设置中设置自定义日志记录功能,但它也会阻止。您可以尝试使用队列,例如将日志放入某个队列,然后一些worker(async)将它们放入stdout:每次ioloop次迭代,worker将启动(this way)并刷新所有内容。

对于mongo非阻塞内容,请使用示例motor库,如下所示:

    from motor.motor_tornado import MotorClient
    db = MotorClient().db

    cursor = db.sites.find()
    for document in (yield cursor.to_list(length=None)):
        # do something with the current record