我有一个文本文件,我想提取起始(Socket :)和结束关键字(Socket :)之间的界限,然后进行处理。
输入:
Socket: 1
Device ID: 0x0B028041 0xCC344007 0x10000834 0x00000011
CB: 3/ID: 0x445DDC13
BIBID: 0x65C
A:0xB0000190 D:0x310020FF
A:0xB0000194 D:0x00000000
A:0xB0000198 D:0x31002010
A:0xB000019C D:0x00000017
A:0xB00001A0 D:0x31002020
A:0xB00001A4 D:0x00000017
A:0xB00001A8 D:0x31002040
A:0xB00001AC D:0x00000000
A:0xB00001B0 D:0x31001000
ART: 0xB0000800 DRT: 0xB0000000
ART: 0xB0000804 DRT: 0xB0000000
ART: 0xB0000808 DRT: 0xB0000000
ART: 0xB000080C DRT: 0xB0000000
ART: 0xB0000810 DRT: 0xB0000000
ART: 0xB0000814 DRT: 0xB0000000
ART: 0xB0000818 DRT: 0xB0000000
ART: 0xB000081C DRT: 0xB0000000
ART: 0xB0000820 DRT: 0xB0000000
ART: 0xB0000824 DRT: 0xB0000000
ART: 0xB0000828 DRT: 0xB0000000
ART: 0xB000082C DRT: 0xB0000000
ART: 0xB0000830 DRT: 0xB0000000
ART: 0xB0000834 DRT: 0xB0000000
ART: 0xB0000838 DRT: 0xB0000000
ART: 0xB000083C DRT: 0xB0000000
ART: 0xB0000840 DRT: 0xB0000000
ART: 0xB0000844 DRT: 0xB0000000
ART: 0xB0000848 DRT: 0xB0000000
ART: 0xB000084C DRT: 0xB0000000
ART: 0xB0000850 DRT: 0xB0000000
ART: 0xB0000854 DRT: 0xB0000000
ART: 0xB0000858 DRT: 0xB0000000
ART: 0xB000085C DRT: 0xB0000000
ART: 0xB0000860 DRT: 0xB0000000
ART: 0xB0000864 DRT: 0xB0000000
ART: 0xB0000868 DRT: 0xB0000000
ART: 0xB000086C DRT: 0xB0000000
ART: 0xB0000870 DRT: 0xB0000000
ART: 0xB0000874 DRT: 0xB0000000
ART: 0xB0000878 DRT: 0xB0000000
ART: 0xB000087C DRT: 0xB0000000
...
Socket:2
...
当前代码:
import re
from collections import defaultdict
dict2=defaultdict(list)
dict3=defaultdict(list)
socket_position_status = False
dev_id_status = False
CB_noS_status = False
trf_val_flag = False
dict1=defaultdict(list)
pattern="QWL"
rd_case= "Digital"
setup_temp = "0C"
readout_temp = "0C"
address=[]
def tde_file():
with open(r'C:\Gert_batch file\DOE_parsing\Thebe\DOE 4 - 5K\NEW SFR\PF\tde\MRB_QWL_0c_Digital_PS60c_TC1798.tde', 'rb') as f:
for line in f:
pattern_tde = ":TEST_RESULT"
if pattern_tde in line:
tde_addr = ':TEST_RESULT (\d+); addr: ([0-9A-Za-z]{10})'
x0 =re.match(tde_addr, line)
if x0:
address_tde = x0.group(2)
tde_addr_1 = 'cfp_(vqs|vcs)_m(\d+) \// HB05_SB255'
x1 = re.search(tde_addr_1,line)
tde_addr_2 = 'cfp_(vqs|vcs)_m\dm\d_(vth\d.\d) \// HB05_SB255'
x2 = re.search(tde_addr_2,line)
tde_addrs_1 = '(DTS_)value_(before|after)_test_(start|finish)'
y0 = re.search(tde_addrs_1,line)
if x1:
hlp_s = x1.group(2).zfill(2)
identifier = x1.group(1)+"_m"+hlp_s
if x2:
identifier = x2.group(1)+"_m"+x2.group(2)
try:
tde_addr_3 = '(SBE|DBE)|(Number of (\ds) bit fail) \// HB05_SB255'
hlp_s = re.search(tde_addr_3,line).group(1)
if hlp_s:
dict1[pattern,rd_case,address_tde]=identifier+"_"+hlp_s
except AttributeError:pass
try:
tde_addr_3 = '(SBE|DBE)|(Number of (\ds) bit fail) \// HB05_SB255'
hlp_s = re.search(tde_addr_3,line).group(3)
if hlp_s:
dict1[pattern,rd_case,address_tde]=identifier+"_"+hlp_s
except AttributeError: pass
try:
if y0.group(1) and y0.group(3):
dict1[pattern,rd_case,address_tde]=y0.group(1)+y0.group(3)+"_temp"
except AttributeError: pass
#print dict1
#print len(dict1.keys())
#for k,v in sorted(dict1.items()):
#print k,v
def evaluate_lot_wxy(trf_dev_id_pattern):
import re
binary_value = ""
line = trf_dev_id_pattern
dev_id = 'Device ID: ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10})$'
hex_inp1 = re.search(dev_id,line)
#print hex_inp1.group()
hex_inp2 = hex_inp1.group(4)+hex_inp1.group(3)+hex_inp1.group(2)+hex_inp1.group(1)
hex_inp3 = re.sub('0x', '', hex_inp2)
#print hex_inp3
for i,val in enumerate(str(hex_inp3)):
binary_value1=str(bin(int(val,16))[2:]).zfill(4)
binary_value = str(binary_value) + str(binary_value1)
#print binary_value
wafer_val = binary_value[90:96]
wafer = int(wafer_val,2)
y_pos_val = binary_value[106:113]
y_pos = int(y_pos_val,2)
x_pos_val = binary_value[98:105]
x_pos = int(x_pos_val,2)
year_val = binary_value[63:67]
year = int(year_val,2)
production_week_val = binary_value[67:73]
production_week = int(production_week_val,2)
serial_no_val = binary_value[73:83]
serial_no=int(serial_no_val,2)
lot ="ZA"+str(year)+str(production_week)+str(serial_no)
if (1 <= wafer <= 25) and (1<= x_pos <= 65) and (1 <= y_pos <= 65):
dev_id_status = True
return lot,wafer,x_pos,y_pos,dev_id_status
tde_file()
with open(r"C:\Gert_batch file\DOE_parsing\Thebe\DOE 4 - 5K\NEW SFR\PF\1kCycling\Results_452_13384\Result Files\temp\452_20170111_021021_TC1798_MRB_QWL_0c_Digital_PS60c_1021002999.trf", "rt") as f1:
lines = f1.read()
print lines
#for lines in f1.read():
match = re.search(r'Socket:(.*?)Socket:', lines, flags=re.DOTALL)
#print match.group()
for line in match.group().splitlines():
if "Socket:" in line:
trf_addr = 'Socket: (\d+)$'
x0 =re.match(trf_addr, line)
try:
if x0.group(1).zfill(3):
socket_position = x0.group(1).zfill(3)
socket_position_status = True
#print socket_position
except AttributeError: pass
elif "Device ID:" in line:
dev_id = 'Device ID: ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10})$'
x1 = re.search(dev_id,line)
try:
if x1.group(1) and x1.group(1) and x1.group(1) and x1.group(1):
trf_dev_id_pattern = x1.group()#x1.group(4)+x1.group(3)+x1.group(2)+x1.group(1)
lot_wafer_x_y = evaluate_lot_wxy(trf_dev_id_pattern)
dev_id_status = True
#print lot_wafer_x_y
except AttributeError: pass
elif "CB:" in line:
CB_pat = 'CB: (\d+)\/'
x2 = re.search(CB_pat,line)
try:
if x2.group(1):
CB_noS_status = True
#print CB_noS_status
except AttributeError: pass
elif"ART:" in line:
regex = re.search("ART: ([0-9A-Za-z]{10}) DRT: ([0-9A-Za-z]{10})",line)
#print line
try:
if regex.group(1) and regex.group(2):
for key1,val1 in dict1.iteritems():
if regex.group(1) in key1:
#print "Address:"+regex.group(1)
hlp_a = val1
hlp_b = hlp_a.split("_")
identifier = hlp_b[0]
fail_class = hlp_b[1]
key_addtional = hlp_b[2]
val = regex.group(2)
value = int(val[3:],16)
dict2[rd_case,pattern,setup_temp,readout_temp,socket_position,fail_class,identifier,key_addtional]= value
except AttributeError: pass
for k,v in sorted(dict2.items()):
print k,v
当前输出:
目前代码打印第一场比赛的输出,我想获得输入文件中整个比赛的输出。
('Digital', 'QWL', '0C', '0C', '001', 'finish', 'DTS', 'temp') 16
('Digital', 'QWL', '0C', '0C', '001', 'm02', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm02', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm02', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm02', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm03', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm03', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm03', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm03', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm04', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm04', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm04', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm04', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm05', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm05', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm05', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm05', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm06', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm06', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm06', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm06', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm07', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm07', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm07', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm07', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm08', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm08', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm08', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm08', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm09', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm09', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm09', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm09', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm10', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm10', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm10', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm10', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm11', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm11', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm11', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm11', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm12', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm12', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm12', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm12', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm13', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm13', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm13', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm13', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm14', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm14', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm14', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm14', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm15', 'vqs', '0s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm15', 'vqs', '1s') 0
('Digital', 'QWL', '0C', '0C', '001', 'm15', 'vqs', 'DBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'm15', 'vqs', 'SBE') 0
('Digital', 'QWL', '0C', '0C', '001', 'start', 'DTS', 'temp') 14
以上是第一次匹配的输出,但我想得到文件中每个匹配的输出。有人可以帮助我吗?提前谢谢。
答案 0 :(得分:1)
试试这个:
TagPickerViewController
代码输出:
import re
txt = '''Test_Socket: 1
TestA ID: 0x0B028041 0xCC344007 0x10000834 0x00000011
TestA_CB: 3/ID: 0x445DDC13
TESTA_BD: 0x65C
A:0xB0000190 D:0x310020FF
ART: 0xB0000878 DRT: 0xB0000000
ART: 0xB000087C DRT: 0xB0000000 ... Test_Socket:2'''
match = re.search(r'Test_Socket:(.*?)Test_Socket:', txt, flags=re.DOTALL)
print(match.group(1))
提取行之后,您可以迭代它们或运行另一个正则表达式来从这些行中获取所需的内容。
TestA ID: 0x0B028041 0xCC344007 0x10000834 0x00000011
TestA_CB: 3/ID: 0x445DDC13
TESTA_BD: 0x65C
A:0xB0000190 D:0x310020FF
ART: 0xB0000878 DRT: 0xB0000000
ART: 0xB000087C DRT: 0xB0000000 ...
答案 1 :(得分:0)
我找到了一个解决方案,我在regex中尝试了re.finditer(),它按预期工作。请找到我的代码,如果有比这更好的方法,请告诉我。我要感谢大家。
CODE:
import re
from collections import defaultdict
dict2=defaultdict(list)
dict3=defaultdict(list)
dict1=defaultdict(list)
pattern="QWL"
rd_case= "Digital"
setup_temp = "0C"
readout_temp = "0C"
address=[]
def tde_file():
with open(r'C:\Gert_batch file\DOE_parsing\Thebe\DOE 4 - 5K\NEW SFR\PF\tde\MRB_QWL_0c_Digital_PS60c_TC1798.tde', 'rb') as f:
for line in f:
pattern_tde = ":TEST_RESULT"
if pattern_tde in line:
tde_addr = ':TEST_RESULT (\d+); addr: ([0-9A-Za-z]{10})'
x0 =re.match(tde_addr, line)
if x0:
address_tde = x0.group(2)
tde_addr_1 = 'cfp_(vqs|vcs)_m(\d+) \// HB05_SB255'
x1 = re.search(tde_addr_1,line)
tde_addr_2 = 'cfp_(vqs|vcs)_m\dm\d_(vth\d.\d) \// HB05_SB255'
x2 = re.search(tde_addr_2,line)
tde_addrs_1 = '(DTS_)value_(before|after)_test_(start|finish)'
y0 = re.search(tde_addrs_1,line)
if x1:
hlp_s = x1.group(2).zfill(2)
identifier = x1.group(1)+"_m"+hlp_s
if x2:
identifier = x2.group(1)+"_m"+x2.group(2)
try:
tde_addr_3 = '(SBE|DBE)|(Number of (\ds) bit fail) \// HB05_SB255'
hlp_s = re.search(tde_addr_3,line).group(1)
if hlp_s:
dict1[pattern,rd_case,address_tde]=identifier+"_"+hlp_s
except AttributeError:pass
try:
tde_addr_3 = '(SBE|DBE)|(Number of (\ds) bit fail) \// HB05_SB255'
hlp_s = re.search(tde_addr_3,line).group(3)
if hlp_s:
dict1[pattern,rd_case,address_tde]=identifier+"_"+hlp_s
except AttributeError: pass
try:
if y0.group(1) and y0.group(3):
dict1[pattern,rd_case,address_tde]=y0.group(1)+y0.group(3)+"_temp"
except AttributeError: pass
def evaluate_lot_wxy(trf_dev_id_pattern):
import re
binary_value = ""
dev_id_status = False
line = trf_dev_id_pattern
dev_id = 'Device ID: ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10})$'
hex_inp1 = re.search(dev_id,line)
#print hex_inp1.group()
hex_inp2 = hex_inp1.group(4)+hex_inp1.group(3)+hex_inp1.group(2)+hex_inp1.group(1)
hex_inp3 = re.sub('0x', '', hex_inp2)
#print hex_inp3
for i,val in enumerate(str(hex_inp3)):
binary_value1=str(bin(int(val,16))[2:]).zfill(4)
binary_value = str(binary_value) + str(binary_value1)
#print binary_value
wafer_val = binary_value[90:96]
wafer = int(wafer_val,2)
y_pos_val = binary_value[106:113]
y_pos = int(y_pos_val,2)
x_pos_val = binary_value[98:105]
x_pos = int(x_pos_val,2)
year_val = binary_value[63:67]
year = int(year_val,2)
production_week_val = binary_value[67:73]
production_week = int(production_week_val,2)
serial_no_val = binary_value[73:83]
serial_no=int(serial_no_val,2)
lot ="ZA"+str(year)+str(production_week)+str(serial_no)
if (1 <= wafer <= 25) and (1<= x_pos <= 65) and (1 <= y_pos <= 65):
dev_id_status = True
return lot,wafer,x_pos,y_pos,dev_id_status
tde_file()
with open(r"C:\Gert_batch file\DOE_parsing\Thebe\DOE 4 - 5K\NEW SFR\PF\1k Cycling\Results_452_13384\Result Files\452_20170111_021021_TC1798_MRB_QWL_0c_Digital_PS60c_1021002999.trf") as f1:
lines = f1.read()
socket_position_status = False
dev_id_status = False
CB_noS_status = False
trf_val_flag = False
for m in re.finditer(r'Socket:(.*?)ART: 0xB00017EC DRT: 0x00000000\n', lines,flags=re.DOTALL):
x1 = ('%s' % (m.group(0)))
for line in x1.splitlines():
if "Socket:" in line:
trf_addr = 'Socket: (\d+)$'
x0 =re.match(trf_addr, line)
try:
if x0.group(1).zfill(3):
socket_position = x0.group(1).zfill(3)
socket_position_status = True
#print socket_position
except AttributeError: pass
elif "Device ID:" in line:
dev_id = 'Device ID: ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10}) ([0-9a-zA-Z]{10})$'
x1 = re.search(dev_id,line)
try:
if x1.group(1) and x1.group(1) and x1.group(1) and x1.group(1):
trf_dev_id_pattern = x1.group()#x1.group(4)+x1.group(3)+x1.group(2)+x1.group(1)
lot_wafer_x_y = evaluate_lot_wxy(trf_dev_id_pattern)
dev_id_status = lot_wafer_x_y[4]
#print lot_wafer_x_y
except AttributeError: pass
elif "CB:" in line:
CB_pat = 'CB: (\d+)\/'
x2 = re.search(CB_pat,line)
try:
if x2.group(1):
CB_no = x2.group(1)
CB_noS_status = True
#print CB_noS_status
except AttributeError: pass
elif"ART:" in line:
regex = re.search("ART: ([0-9A-Za-z]{10}) DRT: ([0-9A-Za-z]{10}$)",line)
#print line
try:
if regex.group(1) and regex.group(2):
for key1,val1 in dict1.iteritems():
if regex.group(1) in key1:
#print "Address:"+regex.group(1)
hlp_a = val1
hlp_b = hlp_a.split("_")
identifier = hlp_b[0]
fail_class = hlp_b[1]
key_addtional = hlp_b[2]
val = regex.group(2)
value = int(val[3:],16)
trf_val_flag = True
if dev_id_status and trf_val_flag and CB_noS_status:
dict3[rd_case,pattern,setup_temp,readout_temp,CB_no,socket_position,fail_class,identifier,key_addtional]= value
continue
except AttributeError: pass
#elif socket_position_status and dev_id_status and CB_noS_status and trf_val_flag:
#dict3[rd_case,pattern,setup_temp,readout_temp,socket_position,fail_class,identifier]= value
print len(dict3.keys())