Question

我的代码完美无缺。它的作用是扫描文本文件并检索用户定义的信息并将其存储到访问中。我遇到的唯一问题是，当代码再次运行时，它会插入已经插入的数据。

有没有一种简单的方法可以解决这个问题？数据都有一个共同点就是它们都有时间戳。是否可以使用它来防止重复？

cur.execute("SELECT * FROM Main_Setup order by ID");
rows = cur.fetchall()
# outFileName = "out4.txt"

# The regex pattern that is used to extract timestamp from file
# it will search for timestamps like this 2017-06-13-22.31.30.978293
dateRegEx_1 = r"[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{2}\.[0-9]{2}\.[0-9]{2}\.[0-9]+"
dateRegEx_2 = r"[0-9]{4}-[0-9]{2}-[0-9]{2} \/ [0-9]+:[0-9]+:[0-9]+"
# Compile the pattern
regdExPtrn_1 = re.compile(dateRegEx_1)
regdExPtrn_2 = re.compile(dateRegEx_2)
field_names = ''


# Call insertToAccess function to insert into access database

cur.execute('SELECT * FROM lastran order by ID')
tlran = cur.fetchall()

def insertLastran(JobName,timeStamp):
    print(JobName,timeStamp)

def insertToAccess(JobName, TableSeq, timeStamp, accessTableValues, field_names):


    # try:
    params = (JobName, timeStamp, TableSeq, accessTableValues[0], accessTableValues[1], accessTableValues[2],
              accessTableValues[3], accessTableValues[4], \
              accessTableValues[5], accessTableValues[6], accessTableValues[7], accessTableValues[8],
              accessTableValues[9], field_names)

    cur.execute("INSERT INTO Report_Table (Job_Name,Run_TS,Seq_Num,Field1,Field2,Field3,Field4,Field5,Field6,Field7,Field8,Field9,Field10,Field11) VALUES \
                    (?,?,?,?,?,?,?,?,?,?,?,?,?,?)", params);
    conn.commit()
    # except:
    #    conn.rollback()


# Extract the current job fields
def field_Extract(fileLines, fieldsArray, JobName, timeStamp, delimit):
    # Empty string in which we will append the
    # extracted fields
    matchStr = ""
    count = 0
    TableSeq = 0
    accessTableValues = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    field_names = ''
    for line in fileLines:
        for field in fieldsArray:
            if field in line:
                key, value = line.split(delimit)
                matchStr += key.strip() + "\t\t : " + value.strip() + "\n"
                accessTableValues[count] = value.strip()
                field_names += key.strip() + ';'
                count += 1
                if count == 10:
                    TableSeq += 1
                    insertToAccess(JobName, TableSeq, timeStamp, accessTableValues, field_names)
                    count = 0
                    accessTableValues = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
                    field_names = ''
    if count > 0 and count < 10:
        TableSeq += 1
        insertToAccess(JobName, TableSeq, timeStamp, accessTableValues, field_names)

    # Return the string hold the extracted fields
    # Each field is onn a separate line
    return matchStr


# Open input and output files
test_file = open(r'C:\Users\cqt7wny\Desktop\new\SAVERS_REPT_DT0712.txt', 'r+')
# outFile = open(outFileName, 'w')

# Initialize used variables
currentJobData = []
startAppending = False
currentJobFound = False  # Gaurav note
fields_To_Extract = []
outFileStr = ""
for line in test_file:
    if startAppending == False:
        # for jobStart in job_start:
        if currentJobFound == False:
            # Find the job name for the current report and exit the loop    #====##########=====  Gaurav note
            for rowx in rows:
                if rowx[1] in line:
                    currentJobName = rowx[1]
                    search_Start_Point = rowx[2]
                    search_End_Point = rowx[3]
                    fields_To_Extract = rowx[4].split(';')
                    currentJobFound = True
                    break
            if currentJobName == 'xx':
                currentJobName = previousJobName
                search_Start_Point = previous_search_Start_Point
                search_End_Point = previous_search_End_Point
                fields_To_Extract = previous_fields_To_Extract

        if search_Start_Point in line:
            startAppending = True

    if startAppending == True:
        currentJobData.append(line)

    if len(search_End_Point) > 1 and (search_End_Point in line):
        # As a job end found, stop gathering lines
        startAppending = False
        # Get the time stamp
        # We search for it in the currnet line using the previously
        # compiled regex pattern
        txt = "".join(currentJobData)
        # Find all occurance of timestamps on the current job lines
        timeStamp = regdExPtrn_1.findall(txt)
        # Check that a timestamp found
        if len(timeStamp) >= 1:
            # If there is more than one timestamp in the current
            # job lines, get only the first one
            timeStamp = timeStamp[0]
        else:
            timeStamp = regdExPtrn_2.findall(txt)
            if len(timeStamp) >= 1:
                timeStamp = timeStamp[0]
                # Append the found output to the output string
        outFileStr += '########============ NEW JOB STARTS HERE ===========#########'
        outFileStr += "\n"
        outFileStr += "job# " + str(currentJobName)
        outFileStr += "\n"
        outFileStr += "Timestamp: " + timeStamp
        outFileStr += "\n"

        outFileStr = field_Extract(currentJobData, fields_To_Extract, currentJobName, timeStamp, ':')
        insertLastran(currentJobName,timeStamp)
        print('Current job Name :', currentJobName, ' : ', timeStamp)
        print(outFileStr)
        previousJobName = currentJobName
        previous_search_Start_Point = search_Start_Point
        previous_search_End_Point = search_End_Point
        previous_fields_To_Extract = fields_To_Extract

        currentJobName = 'xx'
        currentJobFound = False

        currentJobData = []
        fields_To_Extract = []
        search_Start_Point = ' '
        search_End_Point = ' '

test_file.close()

Answer 1

可以采用不同的方法来防止重复插入。

1.检查数据库/表中是否已插入/存在数据。如果数据不存在，则插入数据，否则忽略/不插入数据。

2.您可以创建一个您认为存在重复项的UNIQUE CONSTRAINT。

喜欢ALTER TABLE MYTABLE ADD CONSTRAINT constraint1 UNIQUE(column1)

重复插入数据库表

1 个答案: