我正在尝试将某些列转换为日期格式,但是没有任何运气。这是我的代码:
from datetime import date, datetime
from utils import misc_errors
from os import listdir, remove
from os.path import isfile, join
from pathlib import Path
import pandas as pd
import shutil
import csv
import io
import codecs
# getting paths that will be used later
path = str(Path().absolute()) + r'\\'
files = []
fetch = r'C:\' +'\\'
net = r'C:\' +'\\'
# getting the names of the files needed to copy
allfiles = [f for f in listdir(fetch) if isfile(join(fetch, f))]
for name in allfiles:
if name.endswith('csv'):
files.append(name)
for file_name in files:
#copy the file
shutil.copy2(fetch + file_name, path + file_name)
#get the date for later
file_date = date.today().strftime("%Y%m%d")
# Reading the data from the csv file
#file_df = pd.read_csv(file_name, sep=',', quotechar='"', thousands=',', encoding='Latin-1')
file_df = pd.read_csv(file_name, sep=',',delimiter=',', quotechar='"', thousands=',', encoding='Latin-1', dtype='object', low_memory=False, skiprows=5)
file_df.columns = [col.strip() for col in file_df.columns]
#populate the count column
total = len(file_df.index)
count = []
for i in range(0, total):
count.append('1')
file_df["count()"] = count
# get a list of the headers for use later
headers = file_df.columns.values.tolist()
file_df.fillna('',inplace=True)
if 'project' in file_name:
# remove all duplicates from the projects file
file_df = file_df.drop_duplicates(keep='first')
file_final = "PROJECTS.FULL." + file_date
supplier = []
for i in range(0, total):
supplier.append('Unclassified')
file_df["Suppliers - ERP Supplier ID"] = supplier
file_df["Suppliers - ERP Supplier"] = supplier
file_df = file_df.apply(lambda x: pd.Series([str(x[i]).replace("\n",'') for i in range(0, len(x))], index=headers), axis=1)
num_headers = [r"sum(Annual Spend Amount)", r"sum(Total Contract Value Amount)"]
for header in num_headers:
file_df[header] = ['{0:.0f}'.format(float(file_df[header][i])) if file_df[header][i] == 0 else '{0:,.2f}'.format(float(file_df[header][i])) if file_df[header][i] != '' else '' for i in range(0,len(file_df[header]))]
header = r"sum(% of Total Contract Value in US)"
file_df[header] = [int(float(file_df[header][i])) if file_df[header][i] != '' else '' for i in range(0, len(file_df[header]))]
header = "Reporting Year"
file_df[header] = [int(float(file_df[header][i])) if file_df[header][i] != '' else '' for i in range(0, len(file_df[header]))]
word_headers = ["Description", "Key Considerations", "Key Highlights / Value Statement", "Status Update"]
for header in word_headers:
file_df = misc_errors(file_df, header)
file_df.columns = [c.replace("–", "-") for c in file_df]
file_headers = ["Begin Date","End Date - Date","Estimated Completion Date - Date",
"Anticipated T&O Legal Engagement Date - Date","Benefits Start Date - Date", "Benefits End Date - Date" ]
pd.to_datetime(file_headers['Begin Date'], errors='ignore')
file_df.to_csv(file_final, index=False, encoding="latin-1")
remove(file_name)
shutil.copy2(path + file_final, net + file_final)
我正在尝试将file_header(靠近底部)中的列转换为日期
在此处更新并添加了完整的错误日志:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-3-f04094ba593a> in <module>
79 "Anticipated T&O Legal Engagement Date - Date","Benefits Start Date - Date", "Benefits End Date - Date" ]
80
---> 81 pd.to_datetime(file_headers['Begin Date'], errors='ignore')
82
83 file_df.to_csv(file_final, index=False, encoding="latin-1")
TypeError: list indices must be integers or slices, not str
虽然仍然无法正常工作。感谢您到目前为止提供的所有帮助,如果您还有其他事情可以告诉我。
答案 0 :(得分:0)
您可能要检查Date
列的内容。尝试遍历它并找出无效的值:
for row in df.itertuples():
try:
pd.to_datetime(row.Date)
except:
print(row.Date)
然后想想你想对他们做什么