使用Django StreamingHttpResponse超时流式传输大型解析的CSV?

时间:2016-10-25 02:07:02

标签: python django csv

以下基于Django文档1的代码可以很好地解析400MB CSV中的某些年份并通过StreamingHttpResponse下载(例如,仅请求2016产生13MB CSV)。添加过滤器参数以返回与特定位置匹配的记录(例如,不列颠哥伦比亚省)仅返回标题行。当我编辑CSV以将不列颠哥伦比亚省的记录样本放在CSV的顶部时,输出那些...然后下载停止。测试用例通过,我认为这是HTTPResponse的某种超时?有keepalive选项吗?

import csv
import os
import urllib
import datetime
from io import BytesIO
from zipfile import ZipFile
from django.http import HttpResponse
from django.http import StreamingHttpResponse
from STC_Arrivals import settings


class Echo(object):
def write(self, value):
    return value


def extract_years_from_csv(startyear, endyear, filter_text):
f = open(settings.MEDIA_ROOT + settings.CANSIM_FILE + ".csv")
reader = csv.DictReader(f)

pseudo_buffer = Echo()
writer = csv.writer(pseudo_buffer)
yield writer.writerow(reader.fieldnames)

try:
    for row in reader:
        readyears = row['Ref_Date']
        readyear = int(readyears[:4])
        if int(startyear) <= readyear <= int(endyear):
            if len(filter_text):
                search_text = row['GEO'] + row['TRAV']
                if filter_text.lower() in search_text.lower():
                    yield writer.writerow(
                        [row['Ref_Date'], row['GEO'], row['TRAV'], row['Vector'], row['Coordinate'], row['Value']])
            else:
                yield writer.writerow(
                    [row['Ref_Date'], row['GEO'], row['TRAV'], row['Vector'], row['Coordinate'], row['Value']])

except ValueError:
    pass
finally:
    f.close()


def download(request, startyear, endyear):
# if invalid years provided, return last year and current year
# if request contains an additional parameter to filter for, pass it to the extraction method.

try:
    if int(startyear) < 2005:
        startyear = datetime.datetime.now().year - 1
except ValueError:
    startyear = datetime.datetime.now().year - 1

try:
    if int(endyear) > datetime.datetime.now().year:
        endyear = datetime.datetime.now().year
except ValueError:
    endyear = datetime.datetime.now().year

filter_text = request.GET.get("filter", "")

response = StreamingHttpResponse(extract_years_from_csv(startyear, endyear, filter_text), content_type="text/csv")
response['Content-Disposition'] = 'attachment; filename="CANSIM04270001-eng-' + startyear + '-' + endyear + '.csv"'
return response

def test_extract_years_from_csv(self):

    testyear = "2016"
    filtertext = "British Columbia"

    rows = views.extract_years_from_csv(testyear, testyear, filtertext)

    for row in rows:
        try:
            readyear = int(row[:4])
            self.assertEqual(readyear, int(testyear))
            self.assertIn(filtertext, row)
        except ValueError:
            print "Conversion error"

0 个答案:

没有答案