此脚本创建的主要目的是通过电子邮件限制40mb。当我在线安排报告时,如果报告大小超过40mb,我不会将其放在收件箱中。因此,我手动下载了文件,使用下面的脚本,然后重新发送文件。我的问题是我注意到文件中的日期不一致。
01)我想按日期对行进行排序,例如让第一个文件的日期最早于整个文件。
02)我想更改脚本并按每个文件而不是行的预期MB进行划分
import pandas as pd
import numpy as np
import csv
import sys
import os
def split(filehandler, delimiter=',', row_limit=70000,
output_name_template='LARGE EXCEL/OUTPUT/output_%s.csv', output_path='.', keep_headers=True):
import csv
reader = csv.reader(filehandler, delimiter=delimiter)
current_piece = 1
current_out_path = os.path.join(
output_path,
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w', newline=''), delimiter=delimiter)
current_limit = row_limit
if keep_headers:
headers = next(reader)
current_out_writer.writerow(headers)
for i, row in enumerate(reader):
if i + 1 > current_limit:
current_piece += 1
current_limit = row_limit * current_piece
current_out_path = os.path.join(
output_path,
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w', newline=''), delimiter=delimiter)
if keep_headers:
current_out_writer.writerow(headers)
current_out_writer.writerow(row)
split(open('LARGE EXCEL/FILE.csv', 'r'));