我对Python比较陌生,我将Excel导入postgreSQL。 Excel中的地址列具有我想要捕获的重复项。什么是最可行的方式。
import psycopg2
import xlrd
book = xlrd.open_workbook("data.xlsx")
sheet = book.sheet_by_name("List")
database = psycopg2.connect (database = "Excel", user="SQL", password="PASS", host="YES", port="DB")
cursor = database.cursor()
delete = """Drop table if exists "Python".list"""
print (delete)
mydata = cursor.execute(delete)
cursor.execute('''CREATE TABLE "Python".list
(DCAD_Prop_ID varchar(50),
Address varchar(50),
Addition varchar(50),
Block varchar(50),
Lot integer,
Project_ID integer
);''')
print "Table created successfully"
query = """INSERT INTO "Python".list (DCAD_Prop_ID, Address,Addition,Block ,Lot,Project_ID)
VALUES (%s, %s, %s, %s, %s, %s)"""
for r in range(1, sheet.nrows):
DCAD_Prop_ID = sheet.cell(r,0).value
Address = sheet.cell(r,1).value
Addition = sheet.cell(r,2).value
Block = sheet.cell(r,3).value
Lot = sheet.cell(r,4).value
Project_ID = sheet.cell(r,5).value
values = (DCAD_Prop_ID, Address,Addition,Block ,Lot,Project_ID)
cursor.execute(query, values)
cursor.close()
database.commit()
database.close()
print ""
print "All Done! Bye, for now."
print ""
columns = str(sheet.ncols)
rows = str(sheet.nrows)
print "I just imported Excel into postgreSQL"
答案 0 :(得分:2)
这将返回重复的行:
select DCAD_Prop_ID, Address,Addition,Block,Lot,Project_ID, count(*)
from "Python".list
group by 1,2,3,4,5,6
having count(*) > 1
要消除Python中的重复项,请使用set:
>>> t = ((1,2),(2,3),(1,2))
>>> set(t)
set([(1, 2), (2, 3)])