以下基于Django文档1的代码可以很好地解析400MB CSV中的某些年份并通过StreamingHttpResponse下载(例如,仅请求2016产生13MB CSV)。添加过滤器参数以返回与特定位置匹配的记录(例如,不列颠哥伦比亚省)仅返回标题行。当我编辑CSV以将不列颠哥伦比亚省的记录样本放在CSV的顶部时,输出那些...然后下载停止。测试用例通过,我认为这是HTTPResponse的某种超时?有keepalive选项吗?
import csv
import os
import urllib
import datetime
from io import BytesIO
from zipfile import ZipFile
from django.http import HttpResponse
from django.http import StreamingHttpResponse
from STC_Arrivals import settings
class Echo(object):
def write(self, value):
return value
def extract_years_from_csv(startyear, endyear, filter_text):
f = open(settings.MEDIA_ROOT + settings.CANSIM_FILE + ".csv")
reader = csv.DictReader(f)
pseudo_buffer = Echo()
writer = csv.writer(pseudo_buffer)
yield writer.writerow(reader.fieldnames)
try:
for row in reader:
readyears = row['Ref_Date']
readyear = int(readyears[:4])
if int(startyear) <= readyear <= int(endyear):
if len(filter_text):
search_text = row['GEO'] + row['TRAV']
if filter_text.lower() in search_text.lower():
yield writer.writerow(
[row['Ref_Date'], row['GEO'], row['TRAV'], row['Vector'], row['Coordinate'], row['Value']])
else:
yield writer.writerow(
[row['Ref_Date'], row['GEO'], row['TRAV'], row['Vector'], row['Coordinate'], row['Value']])
except ValueError:
pass
finally:
f.close()
def download(request, startyear, endyear):
# if invalid years provided, return last year and current year
# if request contains an additional parameter to filter for, pass it to the extraction method.
try:
if int(startyear) < 2005:
startyear = datetime.datetime.now().year - 1
except ValueError:
startyear = datetime.datetime.now().year - 1
try:
if int(endyear) > datetime.datetime.now().year:
endyear = datetime.datetime.now().year
except ValueError:
endyear = datetime.datetime.now().year
filter_text = request.GET.get("filter", "")
response = StreamingHttpResponse(extract_years_from_csv(startyear, endyear, filter_text), content_type="text/csv")
response['Content-Disposition'] = 'attachment; filename="CANSIM04270001-eng-' + startyear + '-' + endyear + '.csv"'
return response
def test_extract_years_from_csv(self):
testyear = "2016"
filtertext = "British Columbia"
rows = views.extract_years_from_csv(testyear, testyear, filtertext)
for row in rows:
try:
readyear = int(row[:4])
self.assertEqual(readyear, int(testyear))
self.assertIn(filtertext, row)
except ValueError:
print "Conversion error"