我正在将一个具有大约30,000行的excel文件(.xlsx)作为输入。我正在使用 XLRD 来读取Excel文件。 读取文件后,我将对数据进行一些处理并将其存储在对象数组中。 关键是我的代码对于少量的行(例如100到200)行之有效,但对于较大的文件却陷入循环中。
我曾尝试使用python多线程,但从未得到适当的解决方案。
def read(file_object, request):
company_id = request.session[SessionKeys.COMPANY_KEY]
wb = xlrd.open_workbook(file_object.file.path)
data = dict()
for sheet_index, sheet in enumerate(wb.sheets()):
number_of_rows = sheet.nrows
number_of_columns = sheet.ncols
items = []
needs_edit = False
for row in range(1, number_of_rows):
al = AttendanceLog()
for index, col in enumerate(range(number_of_columns)):
al.company_id = company_id
al.file_id = file_object.id
if index not in [0, 1, 4, 6]:
value = sheet.cell(row, col).value
if index == 2:
al.code = value
try:
al.employee = Employee.objects.filter(company_id=company_id).get(code=value)
al.name = al.employee.name
except ObjectDoesNotExist:
needs_edit = True
if index == 3:
al.date = xlrd.xldate.xldate_as_datetime(value, wb.datemode).date()
if index == 5:
al.time = value
items.append(al)
data[sheet_index] = {'items': items, 'needs_edit': needs_edit}
return data