假设我有一个10GB的文件,其中有20,000行填充了pi的数字。
如何使用unix命令sed -n
?
我希望每行都有一个换行符,使用下面的代码导出到文件。
到目前为止,我有以下内容:
com = "sed -n \' " + str(window[0]) + "," + str(window[1]) + "p\' " + "sample.txt" + ">" + "output.txt"
os.system(com)
但是它会引发连接错误。
我应该如何在下面的程序中为Python命令sed -n
?
inputFileName = "sample.txt"
import itertools
import linecache
def sliding_window(window_size, step_size, last_window_start):
for i in xrange(0, last_window_start, step_size):
yield (i, i + window_size)
yield (last_window_start, total_pi_digits)
def PiCrop(window_size, step_size):
f = open(inputFileName, 'r')
first_line = f.readline().split()
total_pi_digits = int(first_line[0])
last_window_start = total_pi_digits-(total_pi_digits%window_size)
lastcounter = (total_pi_digits//window_size)*(window_size/step_size)
flags = [False for i in range(lastcounter)]
first_line[0] = str(window_size)
second_line = f.readline().split()
offset = int(round(float(second_line[0].strip('\n'))))
first_line = " ".join(first_line)
f. close()
with open(inputFileName, 'r') as f:
header = f.readline()
for counter, window in enumerate(sliding_window(window_size,step_size,last_window_start)):
with open('PiCrop_{}.txt'.format(counter), 'w') as output:
if (flags[counter] == False):
flags[counter] = True
headerline = float(linecache.getline(inputFileName, window[1]+1)) - offset
output.write(str(window_size) + " " + str("{0:.4f}".format(headerline)) + " " + 'L' + '\n')
com = "sed -n \' " + str(window[0]) + "," + str(window[1]) + "p\' " + "sample.txt" + ">" + "output.txt"
os.system(com)
PiCrop(1000,500)
答案 0 :(得分:4)
您可以从文件中获取每一行:
def lines(filename):
with open(filename) as f:
for line in f:
yield line
您可以使用islice
:
from itertools import islice
with open('PiCrop.txt', 'w') as output:
for line in islice(lines('sample.txt'), 10000, 20001):
output.write(line)