您好我正在处理一个需要导入csv文件的财务项目,以便可以轻松分析数据,但是当我运行导入脚本文件时,我最终得到:
sqlalchemy.exc.ProgrammingError: (psycopg2.ProgrammingError) could not open file "3aG5OjRRd1.csv" for reading: Permission denied
我使用python,psycopg2,sqlalchemy进行输入:下面是我的脚本文件:
import sqlalchemy
import csv
import os
import pandas as pd
import psycopg2
import random
import string
from odo import odo, resource, discover, dshape
import time
start_time = time.time()
HOST = 'localhost'
USER = 'user'
PASSWORD = 'password'
PORT = 5432
DATABASE = 'csv_database'
CSV_DB_TABLE = 'csv_table'
CSV_FILE = "csv/sample.csv"
def connect(user, password, db, host='localhost', port=5432):
url = 'postgresql://{}:{}@{}:{}/{}'
url = url.format(user, password, host, port, db)
con = sqlalchemy.create_engine(url, client_encoding='utf8')
meta = sqlalchemy.MetaData(bind=con, reflect=True)
return con, meta
def load_data_odo(table_name, filename, origin=CSV_FILE):
if origin == CSV_FILE:
ds = dshape(
"var * {id: int64,title1: int64,title2: int64,title3: int64,title4: float64,title5: datetime,title6: string,title7: string,title8: string,title9: string,title10: string,title11: string,title12: datetime,title13: string,title14: string}")
def random_string():
digits = "".join([random.choice(string.digits + string.ascii_letters) for i in range(10)])
return digits
def new_file_gen(filename, origin=None, start_index=0, column_name=None):
new_file_name = '{}.csv'.format(random_string())
# os.chmod(new_file_name, 0777)
df = pd.read_csv(filename)
# print (df)
if origin == CSV_FILE:
df['Customer No'] = df['Customer No'].astype(str).str.slice(0, 10)
df['Date'] = pd.to_datetime(df['Date'])
total_length = len(df.index)
if column_name:
column_name = column_name
else:
column_name = start_index + 1
new_column = pd.DataFrame({column_name: range(start_index + 1, start_index + total_length + 1)})
new_column = new_column.merge(df, left_index=True, right_index=True)
new_column.drop(new_column.index[0])
new_column.to_csv(new_file_name, index=False)
return {'file': new_file_name, 'shape': df.shape}
def create_file_with_header(filename):
new_file_name = '{}.csv'.format(random_string())
names = "title1,title2,title3,title4,title5,title6,title7,title8,title9,title10,title11,title12,title13,title14".split(",")
df = pd.read_csv(filename, header=None)
df.columns = names
df.to_csv(new_file_name, index=False, header=True)
return new_file_name
def get_last_id(table_name, columnname="id"):
conn = psycopg2.connect(database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT)
cursor = conn.cursor()
query = "SELECT {} from {} order by {} desc limit 1;".format(columnname, table_name, columnname)
# print query
cursor.execute(query)
records = cursor.fetchall()
if records and len(records[0]) > 0:
return records[0][0]
else:
return 0
def read_data(table_name):
con, meta = connect(USER, PASSWORD, DATABASE)
test1 = meta.tables[table_name]
for row in con.execute(test1.select()):
print row
print con
def print_data(table_name):
print "#" * 30, "Data", "#" * 30
read_data(table_name)
def csv_file_import():
start_time = time.time()
filename = CSV_FILE
table_name = CSV_DB_TABLE
index = get_last_id(table_name, columnname="id")
panda = new_file_gen(filename=filename, origin=filename, start_index=index, column_name="id")
load_data_odo(table_name, filename=panda['file'], origin=CSV_FILE)
os.remove(panda['file'])
return "File {} Shape {} Done in :--- {} seconds ---".format(filename, panda['shape'], time.time() - start_time)
if __name__ == "__main__":
print (csv_file_import())
一旦我运行脚本,就会创建一个临时文件,但是在文件读取过程中进程失败,我是一个中级python开发人员而不是专业人士。有人有想法吗?