我正在编写一个解析.txt文件并抓取用户定义输入的键/值对的程序。我的代码的逻辑非常简单。它抓取程序应搜索的字符串列表,并将字符串列表存储在其适当的变量中。我编写了一个函数field_extract来执行键/值对。
我的调试问题是" currentJob = job_start.index(jobStart)+ 1"。逻辑上应该可以正常工作,但由于某种原因它不会。
以下代码
n2600RA1 = []
n2600RA2 = []
n2600RA3 = []
n2600RA4 = []
n2600RA5 = []
n2601CV4 = []
n2601IV4 = []
(etc)....
job_start = []
job_end = []
names = []
name = None
pk = []
for row in rows:
name = row[1]
fields = row[4].split(',')
start = row[2]
end = row[3]
prim = row[0]
if name == 'JOB - N2600RA1':
n2600RA1.extend(fields)
job_start.append(start)
job_end.append(end)
pk.append(prim)
names.append(name)
elif name == 'JOB - N2600RA2':
n2600RA2.extend(fields)
job_start.append(start)
job_end.append(end)
pk.append(prim)
names.append(name)
(etc)...
"""
DATABASE CONNECTION ENDED
"""
outFileName = "out3.txt"
# The regex pattern that is used to extract timestamp from file
# it will search for timestamps like this 2017-06-13-22.31.30.978293
dateRegEx = r"[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{2}\.[0-9]{2}\.[0-9]{2}\.[0-9]+"
# Compile the pattern
regExPtrn = re.compile(dateRegEx)
# Extract the current job fields
def field_Extract(fileLines, fieldsArray, delimit):
# Empty string in which we will append the
# extracted fields
matchStr = ""
for line in fileLines:
for field in fieldsArray:
if line.startswith(field):
key, value = line.split(delimit)
matchStr += key.strip() + "\t\t : " + value.strip() + "\n"
# Return the string hold the extracted fields
# Each field is onn a separate line
return matchStr
# Open input and output files
test_file = open(r'C:\Users\cqt7wny\Desktop\SAVERS_REPT_DT0712.txt', 'r+')
outFile = open(outFileName, 'w')
# Initialize used variables
currentJob = -1
currentJobData = []
startAppending = False
outFileStr = ""
for line in test_file:
# Get the current job
# We will loop over each job start and check if this start is
# contained in the current line, if so, this line will be considered
# the first line of the job
for jobStart in job_start:
if jobStart in line:
# the used search function return the index of the first
# match of the string. If the substring has more than one
# occurance in the searchable string, only the index of
# the first occurance will be returned
currentJob = job_start.index(jobStart) + 1
# Set the flag to start gathering job lines
# for each job we will apped alll its lines to an empty
# list, so we have all job lines separated
startAppending = True
# If job start found, gathar job lines
if startAppending == True:
currentJobData.append(line)
# Set the correct job
if currentJob == 1:
job = n2600RA1
elif currentJob == 2:
job = n2600RA2
elif currentJob == 3:
job = n2600RA3
elif currentJob == 4:
job = n2600RA4
elif currentJob == 5:
job = n2600RA5
elif currentJob == 6:
job = n2601CV4
elif currentJob == 7:
job = n2601IV4
elif currentJob == 8:
job = n2601CV1
#elif currentJob == 9:
# job = aTL10GV1
elif currentJob == 9:
job = n2601CW3
else:
currentJob = -1
# Check job end
# We will loop over each job end and check if this end is
# contained in the current line, if so, this line will be considered
# the last line of the job
for jobEnd in job_end:
# Check that this is a valid job and that the job ending
# string is contained in the current line
if (currentJob != -1) and (jobEnd in line):
print(currentJob)
# As a job end found, stop gathering lines
startAppending = False
# Get the time stamp
# We search for it in the currnet line using the previously
# compiled regex pattern
txt = "".join(currentJobData)
# Find all occurance of timestamps on the current job lines
#timeStamp = regExPtrn.findall(txt)
# Check that a timestamp found
#if len(timeStamp) >= 1:
# If there is more than one timestamp in the current
# job lines, get only the first one
#timeStamp = timeStamp[0]
# Append the found output to the output string
outFileStr += '########============ NEW JOB STARTS HERE ===========#########'
outFileStr += "\n"
outFileStr += "job# " + str(name[currentJob])
outFileStr += "\n"
#outFileStr += "Timestamp: " + timeStamp
outFileStr += "\n"
# Extract job fields values
outFileStr += field_Extract(currentJobData, job, ':')
# Erase completed job lines to be used with the next job
currentJobData = []
# Set job to invalid job
currentJob = -1
# Write the output to output file
outFile.write(outFileStr)
# Close opened files
outFile.close()
test_file.close()
调试currentjob下面的输出。
1
1
1
1
1
1
1
1
1
1
6
8
6
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
答案 0 :(得分:1)
你可能错过了休息时间:
for jobStart in job_start:
if jobStart in line:
currentJob = job_start.index(jobStart) + 1
startAppending = True
break # <-- THIS!
您也应该强烈考虑简化代码。它有很多不必要的重复代码。
考虑将您的工作存储在dict中,而不是为了保存自己: (作业名称也可以从db或文本文件中获取)
jobs_names = [
'n2600RA1', 'n2600RA2', 'n2600RA3', 'n2600RA4',
'n2600RA5', 'n2601CV4', 'n2601IV4', 'n2601CV1',
'aTL10GV1', 'bEL10GV1', 'cHL10GV1', 'cZL10GV1',
'dEL10GV1', 'dKL10GV1', 'eSL10GV1', 'fIL10GV1',
'fRL10GV1', 'gBL10GV1', 'hUL10GV1', 'iEL10GV1',
'iTL10GV1', 'nLL10GV1', 'nOL10GV1', 'pLL10GV1',
'pTL10GV1', 'sEL10GV1', 'n2601CW3',
]
jobs = {n: [] for n in jobs_names}
print(jobs)
输出:
{'n2600RA1': [], 'n2600RA2': [], 'n2600RA3': [], 'n2600RA4': [],
'n2600RA5': [], 'n2601CV4': [], 'n2601IV4': [], 'n2601CV1': [],
'aTL10GV1': [], 'bEL10GV1': [], 'cHL10GV1': [], 'cZL10GV1': [],
'dEL10GV1': [], 'dKL10GV1': [], 'eSL10GV1': [], 'fIL10GV1': [],
'fRL10GV1': [], 'gBL10GV1': [], 'hUL10GV1': [], 'iEL10GV1': [],
'iTL10GV1': [], 'nLL10GV1': [], 'nOL10GV1': [], 'pLL10GV1': [],
'pTL10GV1': [], 'sEL10GV1': [], 'n2601CW3': []}