我写了一个python脚本,将文件从一个文件夹批量上传到postgresql中。虽然该脚本有效,但我认为它不是超级有效。谁能告诉我如何改善它?
文件实际上传需要很长时间。
间距/缩进在发布时略有不同,这在实际脚本中不是问题。
def addFilesToDatabase(目录):
uploadedFiles = []
errorFiles = []
rows_to_chunk_by = 1000
for filename in os.listdir(directory):
try:
filename_used = filename.upper()
if filename_used.endswith(".CSV"):
file_directory = os.path.join(directory, filename)
tableName = filename_used.replace('.CSV','')
df = pd.read_csv(file_directory, header=0, nrows = 1)
columns = df.columns
while 1==1:
for skiprows in range(100000000):
if(skiprows == 0):
df = pd.read_csv(file_directory, header=0, nrows = rows_to_chunk_by, skiprows = skiprows*rows_to_chunk_by)
df.to_sql(name=tableName, con=engine, if_exists='append', schema=None, index=False)
else:
df = pd.read_csv(file_directory, header=None, nrows = rows_to_chunk_by, skiprows = skiprows*rows_to_chunk_by)
df.columns = columns
df.to_sql(name=tableName, con=engine, if_exists='append', schema=None, index=False)
if(len(df)<rows_to_chunk_by):
break
uploadedFiles.append(filename)
break
except Exception as e:
if str(e) == "No columns to parse from file":
uploadedFiles.append(filename)
elif str(e)[0:16] == "Length mismatch:":
uploadedFiles.append(filename)
else:
errorFiles.append(filename)
print('Error with ' + filename)
print(e)
continue