Question

我对Python比较陌生，我将Excel导入postgreSQL。 Excel中的地址列具有我想要捕获的重复项。什么是最可行的方式。

import psycopg2
import xlrd
book = xlrd.open_workbook("data.xlsx")
sheet = book.sheet_by_name("List")
database = psycopg2.connect (database = "Excel", user="SQL", password="PASS", host="YES", port="DB")
cursor = database.cursor()
delete = """Drop table if exists "Python".list"""
print (delete)
mydata = cursor.execute(delete)
cursor.execute('''CREATE TABLE "Python".list
   (DCAD_Prop_ID    varchar(50),
Address varchar(50),
Addition varchar(50),
Block   varchar(50),
Lot integer,
Project_ID integer
   );''')
print "Table created successfully"
query = """INSERT INTO "Python".list (DCAD_Prop_ID, Address,Addition,Block  ,Lot,Project_ID)
VALUES (%s, %s, %s, %s, %s, %s)"""
for r in range(1, sheet.nrows):
    DCAD_Prop_ID = sheet.cell(r,0).value
    Address = sheet.cell(r,1).value
    Addition = sheet.cell(r,2).value
    Block = sheet.cell(r,3).value   
    Lot = sheet.cell(r,4).value 
    Project_ID = sheet.cell(r,5).value
values = (DCAD_Prop_ID, Address,Addition,Block  ,Lot,Project_ID)
cursor.execute(query, values)
cursor.close()
database.commit()
database.close()
print ""
print "All Done! Bye, for now."
print ""
columns = str(sheet.ncols)
rows = str(sheet.nrows)
print "I just imported Excel into postgreSQL"

Answer 1

这将返回重复的行：

select DCAD_Prop_ID, Address,Addition,Block,Lot,Project_ID, count(*)
from "Python".list
group by 1,2,3,4,5,6
having count(*) > 1

要消除Python中的重复项，请使用set：

>>> t = ((1,2),(2,3),(1,2))
>>> set(t)
set([(1, 2), (2, 3)])

如何捕捉重复？

1 个答案: