我循环遍历csv文件以附加到DataFrame表,但似乎每次循环和追加时,都会在表中添加一个索引列。很困惑,我很困难,任何帮助都会很棒。
我的代码:
import sqlite3 as sql
import pandas as pd
import hashlib
import os
import csv
from pandas import ExcelWriter
def obtain_data(filename, connect, type):
writer =
ExcelWriter('path\\new_excel_sheets\\'+filename+'.xlsx')
table =
ExcelWriter('path\\new_excel_sheets\\hash_table.xlsx')
if type == True:
print(filename)
df = pd.DataFrame.from_csv('path'+filename,
index_col=None)
else:
workbook = pd.ExcelFile('path' + filename)
df = workbook.parse('Sheet1')
df = df.rename(columns={'INDEX': 'INDX'})
df = df.rename(columns={'Index': 'INDXS'})
headers = df.dtypes.index
header_list = str(headers.tolist())
header_list = ''.join(header_list)
hash_t = str(hashlib.md5(header_list.encode('utf-8')).hexdigest())
c = connect.cursor()
print(filename)
print(hash_t)
if hash_t == 'd22db04a2f009f222da57e91acdce21b':
next_open = df['DATE'][1]
next_open_value = df['DATE'][2]
df.insert(3, next_open, next_open_value)
headers = df.dtypes.index
header_list = str(headers.tolist())
header_list = ''.join(header_list)
new_hash_t = str(hashlib.md5(header_list.encode('utf-
8')).hexdigest())
df = df.drop(df.index[1:])
hashing = {str(new_hash_t): str(filename)}
df2 = pd.DataFrame.from_dict(hashing, orient='index')
try:
df2.to_sql(name='Hash Table', con=connect, if_exists='append')
df.to_sql(name=new_hash_t, con=connect, if_exists='append')
except:
raise IndexError('Could not transform ' + str(filename) + ' into
database.')
elif hash_t == '484fbe4de83acb41480dd935d82d7fbe':
next_open = df['DATE'][1]
next_open_value = df['DATE'][2]
df.insert(3, next_open, next_open_value)
headers = df.dtypes.index
header_list = str(headers.tolist())
header_list = ''.join(header_list)
new_hash_t = str(hashlib.md5(header_list.encode('utf-
8')).hexdigest())
df = df.drop(df.index[2])
df['DATE'][1] = df['DATE'][0]
hashing = {new_hash_t: filename}
df2 = pd.DataFrame.from_dict(hashing, orient='index')
try:
df2.to_sql(name='Hash Table', con=connect, if_exists='append')
df.to_sql(name=new_hash_t, con=connect, if_exists='append')
except:
raise IndexError('Could not transform ' + str(filename) + ' into
database.')
else:
hashing = {hash_t: filename}
df2 = pd.DataFrame.from_dict(hashing, orient='index')
try:
df2.to_sql(name='Hash Table', con=connect, if_exists='append',
index=False)
df.to_sql(name=hash_t, con=connect, if_exists='append',
index=True)
except:
raise IndexError('Could not transform ' + str(filename) + '
into database.')
df.to_excel(writer)
print(filename + ' has been completed succesfully.')
final_results = {'df': df, 'hash_t': hash_t}
return final_results
csv_files = []
usable_files = []
for filename in os.listdir(filepath):
if filename.endswith(".xlsx"):
print('Found an XLSX file ' + str(filename))
usable_files.append(filename)
elif filename.endswith('.CSV'):
print('Found a CSV File ' + filename)
csv_files.append(filename)
else:
print('Found an unusable file ' + str(filename))
for file in usable_files:
connect = sql.connect(SQLite3 connection)
obtain_data(file, connect, False)
for file in csv_files:
connect = sql.connect(SQLite3 connection)
obtain_data(file, connect, True)
print('All files have been made into Tables')
SQLite3数据库做的一切正确,但当我追加它时,它会添加一个索引列。我不知道如何将索引列放在这里(随意教我)所以请耐心等待。表格看起来像这样
rowid, 0 , 1, 2, etc
0, value, value, value, etc
1, value, value, value, etc
但是当我循环(比如4次)时,它会改变为
rowid, index, 0, 1, 2, etc
0, 0, 0, 0, 0, value
0, 0, 0, 0, 0, value
这是一个非常奇怪的问题,所以任何帮助都会受到赞赏,谢谢!
答案 0 :(得分:0)
在所有to_sql()
调用中,只需将 index 参数设置为False(默认参数设置为True):
df2.to_sql(name='Hash Table', con=connect, if_exists='append', index=False)
任何平面文件输出:
df.to_excel(writer, index=False)
df.to_csv(filename, index=False)