我的代码完美无缺。它的作用是扫描文本文件并检索用户定义的信息并将其存储到访问中。我遇到的唯一问题是,当代码再次运行时,它会插入已经插入的数据。
有没有一种简单的方法可以解决这个问题?数据都有一个共同点就是它们都有时间戳。是否可以使用它来防止重复?
cur.execute("SELECT * FROM Main_Setup order by ID");
rows = cur.fetchall()
# outFileName = "out4.txt"
# The regex pattern that is used to extract timestamp from file
# it will search for timestamps like this 2017-06-13-22.31.30.978293
dateRegEx_1 = r"[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{2}\.[0-9]{2}\.[0-9]{2}\.[0-9]+"
dateRegEx_2 = r"[0-9]{4}-[0-9]{2}-[0-9]{2} \/ [0-9]+:[0-9]+:[0-9]+"
# Compile the pattern
regdExPtrn_1 = re.compile(dateRegEx_1)
regdExPtrn_2 = re.compile(dateRegEx_2)
field_names = ''
# Call insertToAccess function to insert into access database
cur.execute('SELECT * FROM lastran order by ID')
tlran = cur.fetchall()
def insertLastran(JobName,timeStamp):
print(JobName,timeStamp)
def insertToAccess(JobName, TableSeq, timeStamp, accessTableValues, field_names):
# try:
params = (JobName, timeStamp, TableSeq, accessTableValues[0], accessTableValues[1], accessTableValues[2],
accessTableValues[3], accessTableValues[4], \
accessTableValues[5], accessTableValues[6], accessTableValues[7], accessTableValues[8],
accessTableValues[9], field_names)
cur.execute("INSERT INTO Report_Table (Job_Name,Run_TS,Seq_Num,Field1,Field2,Field3,Field4,Field5,Field6,Field7,Field8,Field9,Field10,Field11) VALUES \
(?,?,?,?,?,?,?,?,?,?,?,?,?,?)", params);
conn.commit()
# except:
# conn.rollback()
# Extract the current job fields
def field_Extract(fileLines, fieldsArray, JobName, timeStamp, delimit):
# Empty string in which we will append the
# extracted fields
matchStr = ""
count = 0
TableSeq = 0
accessTableValues = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
field_names = ''
for line in fileLines:
for field in fieldsArray:
if field in line:
key, value = line.split(delimit)
matchStr += key.strip() + "\t\t : " + value.strip() + "\n"
accessTableValues[count] = value.strip()
field_names += key.strip() + ';'
count += 1
if count == 10:
TableSeq += 1
insertToAccess(JobName, TableSeq, timeStamp, accessTableValues, field_names)
count = 0
accessTableValues = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
field_names = ''
if count > 0 and count < 10:
TableSeq += 1
insertToAccess(JobName, TableSeq, timeStamp, accessTableValues, field_names)
# Return the string hold the extracted fields
# Each field is onn a separate line
return matchStr
# Open input and output files
test_file = open(r'C:\Users\cqt7wny\Desktop\new\SAVERS_REPT_DT0712.txt', 'r+')
# outFile = open(outFileName, 'w')
# Initialize used variables
currentJobData = []
startAppending = False
currentJobFound = False # Gaurav note
fields_To_Extract = []
outFileStr = ""
for line in test_file:
if startAppending == False:
# for jobStart in job_start:
if currentJobFound == False:
# Find the job name for the current report and exit the loop #====##########===== Gaurav note
for rowx in rows:
if rowx[1] in line:
currentJobName = rowx[1]
search_Start_Point = rowx[2]
search_End_Point = rowx[3]
fields_To_Extract = rowx[4].split(';')
currentJobFound = True
break
if currentJobName == 'xx':
currentJobName = previousJobName
search_Start_Point = previous_search_Start_Point
search_End_Point = previous_search_End_Point
fields_To_Extract = previous_fields_To_Extract
if search_Start_Point in line:
startAppending = True
if startAppending == True:
currentJobData.append(line)
if len(search_End_Point) > 1 and (search_End_Point in line):
# As a job end found, stop gathering lines
startAppending = False
# Get the time stamp
# We search for it in the currnet line using the previously
# compiled regex pattern
txt = "".join(currentJobData)
# Find all occurance of timestamps on the current job lines
timeStamp = regdExPtrn_1.findall(txt)
# Check that a timestamp found
if len(timeStamp) >= 1:
# If there is more than one timestamp in the current
# job lines, get only the first one
timeStamp = timeStamp[0]
else:
timeStamp = regdExPtrn_2.findall(txt)
if len(timeStamp) >= 1:
timeStamp = timeStamp[0]
# Append the found output to the output string
outFileStr += '########============ NEW JOB STARTS HERE ===========#########'
outFileStr += "\n"
outFileStr += "job# " + str(currentJobName)
outFileStr += "\n"
outFileStr += "Timestamp: " + timeStamp
outFileStr += "\n"
outFileStr = field_Extract(currentJobData, fields_To_Extract, currentJobName, timeStamp, ':')
insertLastran(currentJobName,timeStamp)
print('Current job Name :', currentJobName, ' : ', timeStamp)
print(outFileStr)
previousJobName = currentJobName
previous_search_Start_Point = search_Start_Point
previous_search_End_Point = search_End_Point
previous_fields_To_Extract = fields_To_Extract
currentJobName = 'xx'
currentJobFound = False
currentJobData = []
fields_To_Extract = []
search_Start_Point = ' '
search_End_Point = ' '
test_file.close()
答案 0 :(得分:0)
可以采用不同的方法来防止重复插入。
1.检查数据库/表中是否已插入/存在数据。如果数据不存在,则插入数据,否则忽略/不插入数据。
2.您可以创建一个您认为存在重复项的UNIQUE CONSTRAINT。
喜欢ALTER TABLE MYTABLE ADD CONSTRAINT constraint1 UNIQUE(column1)