我有一个像这样的文件[samplefile.txt]-
dummy line 1
dummy line 2
create_list animal -list
tiger
create_list bird -list
crow
dummy line 3
create_list car -list
ford/mustang
dummy line 4
dummy line 5
create_list truck
-list ford/F150
create_list human -list {
jack
john}
我想得到这样的输出-
['tiger', 'crow', 'ford/mustang', 'ford/F150']
我正在使用的代码-
import re
def extractListItems(File):
f = open(File,"r")
content = f.read()
list_items = []
for m in re.finditer(r'(?:\bcreate_list+\s+\S+\s+-list+\s*)((?:\b\S+\b(?:\s*))+)(?:\n)', content):
list_items.extend(re.split(r'\s+', m.group(1).strip()))
print (list_items)
f.close()
extractListItems("samplefile.txt")
要获得所需的输出,我需要进行哪些修改?
编辑-根据文件内容和所需的输出进行了更改。
答案 0 :(得分:1)
我认为您要匹配的单词总是缩进的。
import re
regex = r"^\s+(\S.*)"
test_str = ("dummy line 1\n"
"dummy line 2\n"
"create_list animal -list \n"
" tiger\n"
"create_list bird -list \n"
" crow\n"
"dummy line 3\n"
"create_list car -list \n"
" ford/mustang\n"
"dummy line 4\n"
"dummy line 5")
match = re.findall(regex, test_str, re.MULTILINE)
print(match)
['tiger', 'crow', 'ford/mustang']
答案 1 :(得分:0)
如果您的内容总是以空格开头,请尝试
import re
def extractListItems(File):
f = open(File,"r")
content = f.read().splitlines()
pattern=lambda data:len(re.findall(r"^\s+",data))
lst=[c.strip() for c in content if pattern(c)>0]
print(lst)
f.close()
extractListItems("samplefile.txt")
输出
['tiger', 'crow', 'ford/mustang']
OR
pattern=lambda data:re.findall(r"^[\s].*",data)
lst=[val[0].strip() for val in map(pattern,content) if val]
结果
['tiger', 'crow', 'ford/mustang']