我正在使用Django创建一个网站,该网站允许用户上传2个csv文件,然后该网站将发现这2个文件之间的差异,打印差异并添加新的“更改”列。在将用于每条记录的新列中,每一行都会得到一个新的“更改”或“添加”或“删除”新记录。假设没有重复。到目前为止,这是views.py代码。
from django.shortcuts import render
from django.http import HttpResponse
import difflib
import datetime
import csv
from django.http import HttpResponseRedirect
from django.http import FileResponse
from .forms import FileForm
from .forms import UploadFileForm
def handle_uploaded_file(filename_1, filename_2):
""" handle_uploaded_file is a function that takes 2 files uploaded by the users """
with open(filename_1, newline='') as f_old:
csv_old = csv.reader(f_old, delimiter='\t')
header = next(csv_old)
old_data = {row[0] : row for row in csv_old}
with open(filename_2, newline='') as f_new:
csv_new = csv.reader(f_new, delimiter='\t')
header = next(csv_new)
new_data = {row[0] : row for row in csv_new}
set_new_data = set(new_data)
set_old_data = set(old_data)
added = [['Added'] + new_data[v] for v in set_new_data - set_old_data]
deleted = [['Deleted'] + old_data[v] for v in set_old_data - set_new_data]
in_both = set_old_data & set_new_data
changed = [['Changed'] + new_data[v] for v in in_both if old_data[v] != new_data[v]]
with open('difference.csv', 'w', newline='') as f_output:
csv_output = csv.writer(f_output, delimiter='\t')
csv_output.writerow(['History'] + header)
csv_output.writerows(sorted(added + deleted + changed, key=lambda x: x[1:]))
def index(request): # index is a function for the upload button
if request.method == 'POST': # POST method inserts something to the server
print(request.FILES)
form = UploadFileForm(request.POST, request.FILES)
print(form.errors)
if form.is_valid():
print("cool")
handle_uploaded_file(request.FILES.get('file1'),request.FILES.get('file2'))
return HttpResponseRedirect('results/')
else:
form = UploadFileForm()
return render(request, 'hello.html', {'form': form})
def results(request): # results is a function that sends difference.csv back to the user once the file is ready
file_path = (r'C:\Users\Public\Documents\PycharmProjects\filecomparison\difference.csv') # adding an absolute path in the server, pinpoints that exact file, very important, r is to produce raw string and handle unicodeescape error
response = FileResponse(open(file_path, 'rb'))
response['Content-Type'] = 'text/csv' # the type of the file that will be send is .txt/.csv
response['Content-Disposition'] = 'attachment; filename=difference.csv' # produces an attachment file for users to download called with difference in .csv file
return response
这是old.csv
Name Emp_ID Zone_ID Zone Name Customer Type First Name Cust_ID Balance Loan Asset Name Serial Purchase_ID
Jack 4145 10-34-2Z-71ABD Bakery Matt 41235 -123.567 123.44 43521_BLACK_BURNER 52+007XX QOLO-LW09
Bob 4146 10-35-2Z-71ABD Woodsmith Dylan 12315 -129.12 194.44 43521_BLACK_BURNER 50+001XX KDFL-LW10
Rick 4147 10-34-2Z-73ABD Gunrange Robin 64234 -134.12 133.44 43521_BLACK_BURNER 32+003XX HFDO-LW11
Shane 4148 10-34-2Z-72ABD Restaurant Aldo 31356 -121.23 949.44 43521_BLACK_BURNER 89+004XX BDSM-LW12
这是new.csv
Name Emp_ID Zone_ID Zone Name Customer Type First Name Cust_ID Balance Loan Asset Name Serial Purchase_ID
Jack 4145 10-34-2Z-71ABD Bakery Matt 41235 -123.567 123.44 43521_BLACK_BURNER 52+007XX QOLO-LW09
Bob 4146 10-35-2Z-71ABD Woodsmith Ron 12315 -129.12 194.44 43521_BLACK_BURNER 50+001XX KDFL-LW10
Jane 1234 19-35-2K-72XYZ EO Karen 50980 -547.95 544.39 43521_BLACK_BURNER 50+0076KK ERQW-BN66
Shane 4148 10-34-2Z-72ABD Restaurant Aldo 31356 -121.23 949.44 43521_BLACK_BURNER 89+004XX BDSM-LW12
这就是区别。csv
Name Emp_ID Zone_ID Zone Name Customer Type First Name Cust_ID Balance Loan Asset Name Serial Purchase_ID Changes
Bob 4146 10-35-2Z-71ABD Woodsmith Ron 12315 -129.12 194.44 43521_BLACK_BURNER 50+001XX KDFL-LW10 Changed
Jane 1234 19-35-2K-72XYZ EO Karen 50980 -547.95 544.39 43521_BLACK_BURNER 50+0076KK ERQW-BN66 Added
Rick 4147 10-34-2Z-73ABD Gunrange Robin 64234 -134.12 133.44 43521_BLACK_BURNER 32+003XX HFDO-LW11 Deleted
,这就是我得到的
当前,我收到“ TypeError:预期的str,字节或os.PathLike对象,而不是列表。”我将自己的代码与他的代码混合时出错。但是,如果我独立运行Martin的代码,则如下图所示,我得到了diff.csv,我猜delimiter='\t'
是“历史”和“列”混淆的原因。我试图添加2个定界符,希望它将它们分开,但是python不允许我这样做。另一方面,在搜索了TypeError的解决方案之后,我认为在handle_uploaded_file函数下使用open()方法将解决此问题,但它仍然存在,或者可能不是正确的解决方案。任何帮助都非常感激
答案 0 :(得分:1)
假设您不需要处理old.csv
包含重复条目或new.csv
包含重复条目(即同一个文件在同一文件中出现多次)的情况有人建议,实现这一目标的一种方法是利用Python的集合。
通过在每个文件中设置一组名称,可以轻松地使用set操作确定添加,删除和更改。例如:
import csv
with open('old.csv', newline='') as f_old:
csv_old = csv.reader(f_old, delimiter='\t')
header = next(csv_old)
old_data = {row[0] : row for row in csv_old}
with open('new.csv', newline='') as f_new:
csv_new = csv.reader(f_new, delimiter='\t')
header = next(csv_new)
new_data = {row[0] : row for row in csv_new}
set_new_data = set(new_data)
set_old_data = set(old_data)
added = [['Added'] + new_data[v] for v in set_new_data - set_old_data]
deleted = [['Deleted'] + old_data[v] for v in set_old_data - set_new_data]
in_both = set_old_data & set_new_data
changed = [['Changed'] + new_data[v] for v in in_both if old_data[v] != new_data[v]]
with open('difference.csv', 'w', newline='') as f_output:
csv_output = csv.writer(f_output, delimiter='\t')
csv_output.writerow(['History'] + header)
csv_output.writerows(sorted(added + deleted + changed, key=lambda x: x[1:]))
这将比较整个行的差异。在Python 3.6.3上测试
尝试将此脚本添加到代码中的一个问题是Python 3.x中的CSV库期望使用newline=''
参数打开文件(如果使用2.x,则使用二进制文件)。 。当您的函数已经打开文件句柄时,这是一个问题。请尝试以下操作:
import csv
def handle_uploaded_file(file_1, file_2):
""" handle_uploaded_file is a function that takes 2 files uploaded by the users """
csv_old = csv.reader(file_1, delimiter='\t')
header = next(csv_old)
old_data = {row[0] : row for row in csv_old}
csv_new = csv.reader(file_2, delimiter='\t')
header = next(csv_new)
new_data = {row[0] : row for row in csv_new}
set_new_data = set(new_data)
set_old_data = set(old_data)
added = [['Added'] + new_data[v] for v in set_new_data - set_old_data]
deleted = [['Deleted'] + old_data[v] for v in set_old_data - set_new_data]
in_both = set_old_data & set_new_data
changed = [['Changed'] + new_data[v] for v in in_both if old_data[v] != new_data[v]]
with open('difference.csv', 'w', newline='') as f_output:
csv_output = csv.writer(f_output, delimiter='\t')
csv_output.writerow(['History'] + header)
csv_output.writerows(sorted(added + deleted + changed, key=lambda x: x[1:]))