Question

当我尝试将CSV文件导入到我的sql server表中时。行数据会自动重新排序。为什么？

我的sql表的模式是：

创建表ZCPTble（员工编号INT 员工姓名NVARCHAR（255）， ProjectID NVARCHAR（255）， WBSElement NVARCHAR（255），浮动百分比分配， ProjSBUName NVARCHAR（255）， ProjectPractice NVARCHAR（255）， ProjectProfile NVARCHAR（255）， ProjectManagerID INT， ProjectManager NVARCHAR（255）， DeliveryManagerID INT， DeliveryManager NVARCHAR（255）， SupervisorID INT，主管NVARCHAR（255），

EmployeeStatus NVARCHAR（255）， BusinessLead NVARCHAR（255）， BusinessLeadName NVARCHAR（255））

import glob
import os
import csv
import pyodbc
import time
from datetime import datetime

import pandas as pd
import datetime as dt
import urllib
from sqlalchemy import create_engine


def trim(dataset):
    trim = lambda x: x.strip() if type(x) is str else x
    return dataset.applymap(trim)


conn1 = pyodbc.connect(r'Driver={SQL Server};'
                       r'Server=XXX\SQLSERVER2017;'
                       r'Database=ABC;'
                       r'Trusted_Connection=yes;')

cur1 = conn1.cursor()

start = time.time()

# Names of all columns

list_of_column_names = {
                        'EmployeeID',
                        'EmployeeName',
                        'ProjectID',

                        'WBSElement',
                        'AllocationPercent',
                        'ProjSBUName',

                        'ProjectPractice',
                        'ProjectProfile',


                        'ProjectManagerID',
                        'ProjectManager',

                        'DeliveryManagerID',
                        'DeliveryManager',

                        'SupervisorID',
                        'Supervisor',

                        'EmployeeStatus',
                        'BusinessLead',
                        'BusinessLeadName'

                        }
print(len(list_of_column_names))

df = pd.DataFrame()

# impoting newest csv file
list_of_files = glob.glob(
    'C://Users/ABX/SDD/ASY/Sample/*')  # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime)

for files in os.listdir("C://Users/ABX/SDD/ASY/Sample/"):
    print(files)
    with open(latest_file, "r") as file:
        df = pd.read_csv(file)

        df = df[df.columns.drop(list(df.filter(regex='Unnamed: ')))]

        # Rename all columns
        df.columns = list_of_column_names
        #converting all numeric to numeric type
        try:
            for col in ['EmployeeID', 'AllocationPercent', 'SupervisorID', 'ProjectManagerID', 'DeliveryManagerID']:
                df[col] = df[col].apply(pd.to_numeric)
        except KeyError:
            continue

#writing to sql server

params = urllib.parse.quote_plus(
    r'DRIVER={SQL Server};SERVER=XXX1\SQLSERVER2017;DATABASE=ABC;Trusted_Connection=yes')
conn_str = 'mssql+pyodbc:///?odbc_connect={}'.format(params)

engine = create_engine(conn_str)
df.to_sql(name='ProjectAllocationDetails', con=engine, if_exists='append', index=False)

print("The Data was inserted succesfully.")

end = time.time()

# release memory from the dataframe
del df
print(end - start)

数据随意移动。例如，sql表的Employee ID列中有Employee Name，而Date列中却有Name。为什么？

Answer 1

更新：问题已解决。代码中的一个小错误。变量list_of_column_names应该是一个列表对象。代码中使用的括号是错误的。

CSV到SQL导入会偶然对CSV数据重新排序为什么？

1 个答案: