我有一个解析文本文件并在stdout
上打印的代码,但我需要对现有文本文件进行更改并保留缩进
这是我的代码:
import re
import collections
class Group:
def __init__(self):
self.members = []
self.text = []
with open('text1.txt') as f:
groups = collections.defaultdict(Group)
group_pattern = re.compile(r'^(\S+)\((.*)\)$')
current_group = None
for line in f:
line = line.strip()
m = group_pattern.match(line)
if m: # this is a group definition line
group_name, group_members = m.groups()
groups[group_name].members += filter(lambda x: x not in groups[group_name].members , group_members.split(','))
current_group = group_name
else:
if (current_group is not None) and (len(line) > 0):
groups[current_group].text.append(line)
for group_name, group in groups.items():
print "%s(%s)" % (group_name, ','.join(group.members))
print '\n'.join(group.text)
print
INPUT Text.txt
Car(skoda,audi,benz,bmw)
The above mentioned cars are sedan type and gives long rides efficient
......
Car(Rangerover,audi,Hummer)
SUV cars are used for family time and spacious.
预期输出Text.txt
Car(skoda,audi,benz,bmw,Rangerover,Hummer)
The above mentioned cars are sedan type and gives long rides efficient
......
SUV cars are used for family time and spacious.
但输出为:
Car(skoda,audi,benz,bmw,Rangerover,Hummer)
The above mentioned cars are sedan type and gives long rides efficient
......
SUV cars are used for family time and spacious.
我如何保留缩进?
答案 0 :(得分:1)
正如您可以在python documentation中阅读,使用open
和修饰符w
打开文件以截断文件并允许写入,然后写入文件:
with open('text1.txt', 'w') as f:
for group_name, group in groups.items():
f.write("%s(%s)" % (group_name, ','.join(group.members)))
f.write('\n'.join(group.text) + '\n')
你也可以使用r+
一次打开文件,以允许读写,并改变你的代码:
with open('text1.txt', 'r+') as f:
groups = ...
...
... groups[current_group].text.append(line)
f.seek(0) # move the cursor to the beginning of the file
f.truncate() # deletes everything from the file
for group_name, group in groups.items():
f.write("%s(%s)" % (group_name, ','.join(group.members)))
f.write('\n'.join(group.text) + '\n')
答案 1 :(得分:0)
在代码中丢失缩进的原因是,在将其添加到dict之前首先删除空格行。
for line in f:
line = line.strip()
我重写了代码,以便它在第一个循环中找到组的所有成员,并在第二个循环中将成员附加到组的第一个实例,然后删除同一组的其他实例。 它有点hacky,但我相信你能够让它更快地运作。
import re
import collections
class Group:
def __init__(self):
self.members = []
self.text = []
with open('text1.txt', "r+") as f:
# so specific lines can be edited
lines = f.readlines()
groups = collections.defaultdict(Group)
group_pattern = re.compile(r'^(\S+)\((.*)\)$')
current_group = None
for line in range(len(lines)):
curr_line = lines[line]
# to prevent searches on lines with no group
if "(" in curr_line:
curr_line = curr_line.strip()
m = group_pattern.match(curr_line)
if m:
group_name, group_members = m.groups()
groups[group_name].members += filter(lambda x: x not in groups[group_name].members, group_members.split(','))
current_group = group_name
already_seen = []
for line in range(len(lines)):
curr_line = lines[line]
for key in groups.keys():
if key in curr_line.strip():
if key in already_seen:
lines[line] = ""
else:
already_seen.append(key)
open_par = curr_line.index("(")
close_par = curr_line.index(")")
member_str = ",".join(groups[key].members)
lines[line] = curr_line[:open_par+1] + member_str + curr_line[close_par:]
# clear the file and reset the file cursor
f.truncate()
f.seek(0)
for line in lines:
f.write(line)