我正在建立一个循环,以上传多个excel文件,浏览不同的工作表并收集信息-将它们存储到pandas数据框中。我经常遇到与我的代码所期望的稍有不同的excel文件或工作表问题:遇到错误时,如何检查excel文件名或有问题的文件/工作表的工作表号?我想我应该使用try和except,但是我不确定该怎么做。
我要在代码下方添加建议。.谢谢!
path = r"C:\Users\---"
allFiles = glob.glob(path + "/*.xlsx")
frame_one = pd.DataFrame()
list_one = []
frame_sheet = pd.DataFrame()
list_sheet = []
for file_ in allFiles:
xl = pd.ExcelFile(file_)
n = len(xl.sheet_names) - 1
workbook = xlrd.open_workbook(file_, encoding_override="cp1252")
worksheet_0 = workbook.sheet_by_index(0)
df = pd.read_excel(file_, 0, header = 6, index_col=1, error_bad_lines=False)
df = df.drop(df.columns[0], axis=1)
df = df.dropna(how='all')
df = df.reset_index()
df = df.rename(columns={ df.columns[0]: "Month" })
df_t = df.T
df_t.columns = df_t.iloc[0]
df_t = df_t.iloc[1:]
df_t['Factory'] = worksheet_0.cell(2,2).value
df_t['Factory_id'] = worksheet_0.cell(3,2 ).value
df_t['Country'] = worksheet_0.cell(4,2 ).value
df_t['Consultant'] = worksheet_0.cell(5,2 ).value
df_t.columns = df_t.columns.str.replace(r'[^\x00-\x7F]+', '')
list_one.append(df_t)
for i in range(1, n):
df_1 = pd.read_excel(file_, i , header = 4, index_col=1)
worksheet_1 = workbook.sheet_by_index(i)
df_1 = df_1.drop(df_1.index[0])
df_1 = df_1.drop(df_1.index[-1])
df_1 = df_1.drop(df_1.columns[0], axis=1)
df_1 = df_1.dropna(axis=1, how='all')
for col in df_1.columns[0:3]:
df_1[col] = pd.to_numeric(df_1[col], errors='coerce')
df_1['mean'] = df_1.iloc[:, 0:3].mean(axis=1)
df_1 = df_1[['mean']]
df_1_t = df_1.T
df_1_t['Month'] = i
df_1_t['Factory'] = worksheet_0.cell(2,2).value
df_1_t['Factory_id'] = worksheet_0.cell(3,2).value
df_1_t['Country'] = worksheet_0.cell(4,2).value
df_1_t['Consultant'] = worksheet_0.cell(5,2).value
df_1_t.columns = df_1_t.columns.str.replace(r'[^\x00-\x7F]+', '')
list_sheet.append(df_1_t)
frame_sheet = pd.concat(list_sheet)
frame_one = pd.concat(list_one)