当我尝试将CSV文件导入到我的sql server表中时。行数据会自动重新排序。为什么?
我的sql表的模式是:
创建表ZCPTble( 员工编号INT 员工姓名NVARCHAR(255), ProjectID NVARCHAR(255), WBSElement NVARCHAR(255), 浮动百分比分配, ProjSBUName NVARCHAR(255), ProjectPractice NVARCHAR(255), ProjectProfile NVARCHAR(255), ProjectManagerID INT, ProjectManager NVARCHAR(255), DeliveryManagerID INT, DeliveryManager NVARCHAR(255), SupervisorID INT, 主管NVARCHAR(255),
EmployeeStatus NVARCHAR(255), BusinessLead NVARCHAR(255), BusinessLeadName NVARCHAR(255) )
import glob
import os
import csv
import pyodbc
import time
from datetime import datetime
import pandas as pd
import datetime as dt
import urllib
from sqlalchemy import create_engine
def trim(dataset):
trim = lambda x: x.strip() if type(x) is str else x
return dataset.applymap(trim)
conn1 = pyodbc.connect(r'Driver={SQL Server};'
r'Server=XXX\SQLSERVER2017;'
r'Database=ABC;'
r'Trusted_Connection=yes;')
cur1 = conn1.cursor()
start = time.time()
# Names of all columns
list_of_column_names = {
'EmployeeID',
'EmployeeName',
'ProjectID',
'WBSElement',
'AllocationPercent',
'ProjSBUName',
'ProjectPractice',
'ProjectProfile',
'ProjectManagerID',
'ProjectManager',
'DeliveryManagerID',
'DeliveryManager',
'SupervisorID',
'Supervisor',
'EmployeeStatus',
'BusinessLead',
'BusinessLeadName'
}
print(len(list_of_column_names))
df = pd.DataFrame()
# impoting newest csv file
list_of_files = glob.glob(
'C://Users/ABX/SDD/ASY/Sample/*') # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime)
for files in os.listdir("C://Users/ABX/SDD/ASY/Sample/"):
print(files)
with open(latest_file, "r") as file:
df = pd.read_csv(file)
df = df[df.columns.drop(list(df.filter(regex='Unnamed: ')))]
# Rename all columns
df.columns = list_of_column_names
#converting all numeric to numeric type
try:
for col in ['EmployeeID', 'AllocationPercent', 'SupervisorID', 'ProjectManagerID', 'DeliveryManagerID']:
df[col] = df[col].apply(pd.to_numeric)
except KeyError:
continue
#writing to sql server
params = urllib.parse.quote_plus(
r'DRIVER={SQL Server};SERVER=XXX1\SQLSERVER2017;DATABASE=ABC;Trusted_Connection=yes')
conn_str = 'mssql+pyodbc:///?odbc_connect={}'.format(params)
engine = create_engine(conn_str)
df.to_sql(name='ProjectAllocationDetails', con=engine, if_exists='append', index=False)
print("The Data was inserted succesfully.")
end = time.time()
# release memory from the dataframe
del df
print(end - start)
数据随意移动。例如,sql表的Employee ID列中有Employee Name,而Date列中却有Name。为什么?
答案 0 :(得分:0)
更新:问题已解决。代码中的一个小错误。变量list_of_column_names应该是一个列表对象。代码中使用的括号是错误的。