有人可以帮助我使用正确的脚本来提取以下内容:
样本数据:
DHDR .///20198782827372
D 17639937 0 50k 0 6 TTT1234 CHECK-Hello ksdhujshds :19: 782382689jds :19B:kahdhsd
D 97837283 0 10k 0 6 TTT3456 CHECK-Hello akjdhjdks :19: 782382689jds :19B: kshdhd TTT3927837 CHECK-Hello
M 98382388 0 20k 0 6 TTT3457 CHECK-Hello ishadhsjh :19: 782382689jds :19B
当前代码:
import glob
files = glob.glob('*.csv')
strings = []
for file in files:
with open(file, "r", encoding='UTF-8') as fobj:
text = fobj.read()
import re
a = re.compile(':19:(.*) :19B:')
b=re.compile('TTT(.*)CHECK-Hello')
z = a.findall(text)
k=b.findall(text)
if len(z)>0 and len(k)>0:
strings.append(z + "," + k)
所需的输出:
TTT_ID ID
1234 782382689
3456 782382689
答案 0 :(得分:1)
import glob
import re
import csv
files = glob.glob('testing.csv')
strings = []
a = re.compile(':19:(.*) :19B:')
b = re.compile('TTT(.*)CHECK-Hello')
for file in files:
with open(file, "r", encoding='UTF-8') as fobj:
csv_reader = csv.reader(fobj, delimiter=',')
for row in csv_reader:
if row[0].strip() == 'D':
z = a.findall(row[1])
k = b.findall(row[1])
strings.append(k[0][:4].strip() + "," + z[0][:-3].strip())
print(strings)