Question

我需要获取文件的某个部分并将其写入新文件中。将其余部分保存在新文件中。所以我将有3个文件。 1）原始文件2）选定的行3）其余的。我有一个适用于第一次选择的代码。我有问题要做下一个选择等等。这是我的代码：

counter=0
with open('1','r') as file1: #open raw data
    with open('2','w') as file3:
        with open('3','w') as file_out: 
            for i in file1: 
                if counter <10: ############# Next I need to get line 10 to 20 followed by 20 to 30
                    file_out.write(i)
                else:
                     file3.write(i) 
                counter += 1

如何更改我的代码以便我可以进行下一次选择？

Answer 1

对于速度，我会假设文件足够小以容纳在内存中（而不是每次都重新读取文件）：

from itertools import islice

BLOCKSZ = 10      # lines per chunk

# file names
INPUT = "raw_data.txt"
OUTPUT_LINES  = lambda a, b: "data_lines_{}_to_{}.txt" .format(a, b-1)
OUTPUT_EXCEPT = lambda a, b: "data_except_{}_to_{}.txt".format(a, b-1)

def main():
    # read file as list of lines
    with open(INPUT) as inf:
        data = list(inf)

    num_blocks = (len(data) + BLOCKSZ - 1) // BLOCKSZ
    for block in range(num_blocks):
        # calculate start and end lines for this chunk
        start =  block      * BLOCKSZ
        end   = (block + 1) * BLOCKSZ

        # write out [start:end]
        with open(OUTPUT_RANGE(start, end), "w") as outf:
            for line in islice(data, start, end):
                outf.write(line)

        # write out [:start] + [end:]
        with open(OUTPUT_EXCEPT(start, end), "w") as outf:
            for line in islice(data, start):
                outf.write(line)
            for line in islice(data, end - start):
                pass
            for line in inf:
                outf.write(line)


if __name__=="__main__":
    main()

编辑：我刚刚意识到我在OUTPUT_EXCEPT的行切片中犯了一个错误（认为islice偏移是绝对不相对的）;现在已经修复了。

Answer 2

这应该做你想要的，用Python3.x编写。

#read file1, get the lines as an array, length of said array, and close it.
alpha=open('alpha.txt','r')
alphaLine=alpha.readlines()
alphaLength=len(alphaLine)
alpha.close()

#lines above 10 and below 20 are sent to beta, while 10 to 20 are sent to gamma.
beta=open('beta.txt','w')
gamma=open('gamma.txt','w')
for i in range(alphaLength):
    if i<9:
        beta.write(alphaLine[i])
    elif i<20:
        gamma.write(alphaLine[i])
    else:
        beta.write(alphaLine[i])
beta.close()
gamma.close()

Answer 3

这会成为你想要的吗？

def split_on_crosses(infile, chunk_size):
    head_num = 1  # counter for chunks
    head_file = open('1-head.txt', 'w')  # outport to first head file
    tails = []  # outports to tail files
    with open(infile,'r') as inport:  #open raw data
        for i, line in enumerate(inport, start=1):
            head_file.write(line)
            for t in tails:  # write to all tail files
                t.write(line)
            if i % chunk_size == 0:  # boundary of chunk is reached
                tails.append(open('%s-tail.txt' % head_num, 'w'))  # add one tail file
                head_num += 1
                head_file = open('%s-head.txt' % head_num, 'w')  # switch to next head file

split_on_crosses('infile.txt', 10)

如何使用python从文件中获取某些行

3 个答案: