我一直在试图弄清楚如何解压缩然后将一个大的csv文件拆分成块但保留每个文件的头。我已经在这里和那里尝试了一些东西,但我不够精明,无法使其发挥作用。
这是我正在使用的代码:
def run():
# Get filename
subprocess.call( "path to file" ./original --recursive".split())
filename = subprocess.check_output("ls original/ ".split()).strip()
filename = 'original/' + filename
enter code here
# Ungzip then split file
file_content = ungzip(filename)
zip_content(split_content)
def ungzip(filename):
with gzip.open(filename) as f:
counter = 0;
name_counter = 0
split_content = ''
for line in f:
counter = counter + 1
split_content = split_content + str(line)
if(counter >= 250000):
print '-----------'
name_counter = name_counter + 1
name_string = 'file' + str(name_counter)
print "zipping " + name_string
zip_content(split_content, name_string )
split_content = ''
counter = 0
return file_content