我是编码新手,需要帮助完成我的第一项重大任务,使用python将数据从csv
链接到postgresql
。代码似乎起作用,但是当我使用postgresql
查看它时,对于每一行,它会附加前一行。关注表teams
,考虑到我有20 teams
我希望在不同的行和不同的team
上有20行,每个ID
。在这个阶段,考虑到我有110 games
,在10 games
之后,数据只是在不同的行中重复。解决方案是什么?谢谢!
#!/usr/bin/python
# -*- coding: utf-8 -*-
import psycopg2
import sys
import csv
from itertools import count, cycle
from _tkinter import create
from setuptools.dist import sequence
from email.policy import default
path = r'C:\Users\sammy\Downloads\E0.csv'
with open(path, "r") as csvfile:
readCSV = csv.reader(csvfile, delimiter=",")
firstline = 1
con = None
con = psycopg2.connect("host='localhost' dbname='football' user='postgres' password='XXX'")
cur = con.cursor()
cur.execute("DROP TABLE games")
cur.execute("DROP TABLE teams")
cur.execute("DROP TABLE referees")
cur.execute("CREATE TABLE teams (HomeTeamID SERIAL PRIMARY KEY, AllTeams123 VARCHAR)")
cur.execute("CREATE TABLE referees (RefereeID SERIAL PRIMARY KEY, RefereeName VARCHAR)")
cur.execute("CREATE TABLE games (GAMEID SERIAL PRIMARY KEY, HomeTeamID INTEGER, FOREIGN KEY (HomeTeamID) REFERENCES teams(HomeTeamID), HomeTeam VARCHAR, AwayTeamID VARCHAR, AwayTeam VARCHAR, FTHG INTEGER, ATHG INTEGER, FTR VARCHAR, RefereeID INTEGER, FOREIGN KEY (RefereeID) REFERENCES referees(RefereeID), RefereeName VARCHAR, HY INTEGER, AY INTEGER)")
hometeams = []
awayteams = []
uniqueteams = []
uniquereferees = []
allreferees = []
allteams = hometeams + awayteams
gameuniqueteams = sorted(uniqueteams)
gameuniquereferees = sorted(uniquereferees)
try:
for row in readCSV:
if firstline:
firstline=0
continue
game1 = row[2]
game2 = row[3]
HomeTeam = row[2]
AwayTeamID = row[3]
AwayTeam = row[3]
FTHG = row[4]
ATHG = row[5]
FTR = row[6]
RefereeID = row[10]
RefereeName = row[10]
HY = row[19]
AY = row[20]
hometeams.append(HomeTeam)
awayteams.append(AwayTeam)
allteams = hometeams + awayteams
allreferees.append(RefereeName)
uniqueteams = []
uniquereferees = []
for x in allteams:
if x not in uniqueteams:
uniqueteams.append(x)
## hi
for x in allreferees:
if x not in uniquereferees:
uniquereferees.append(x)
gameuniqueteams = sorted(uniqueteams)
gameuniquereferees = sorted(uniquereferees)
data1 = (gameuniqueteams,)
data2 = (gameuniquereferees,)
data3 = (HomeTeam, AwayTeamID, AwayTeam, FTHG, ATHG, FTR, RefereeName, HY, AY)
query1 = "INSERT INTO teams (AllTeams123) VALUES (%s);"
query2 = "INSERT INTO Referees (RefereeName) VALUES (%s);"
query3 = "INSERT INTO games (HomeTeam, AwayTeamID, AwayTeam, FTHG, ATHG, FTR, RefereeName, HY, AY) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s);"
cursor = con.cursor()
cursor.execute(query1, data1)
cursor.execute(query2, data2)
cursor.execute(query3, data3)
## hi
for x in allteams:
if x not in uniqueteams:
uniqueteams.append(x)
## hi
for x in allreferees:
if x not in uniquereferees:
uniquereferees.append(x)
except psycopg2.DatabaseError as e:
if con:
con.rollback()
print ("Error %s % e", e)
sys.exit(1)
finally:
if con:
con.commit()
con.close()
out=open("new_data.csv", "w")
output = csv.writer(out)
for row in data1:
output.writerow(row)
out.close()
gameuniqueteams = sorted(uniqueteams)
gameuniquereferees = sorted(uniquereferees)
print (hometeams)
print (awayteams)
print(uniqueteams)
print(gameuniqueteams)
print(uniquereferees)
print(gameuniquereferees)