我正在处理非结构化数据,有来自工具设备的数据。通过增加没有信息的列,数据中存在大量噪声。
例如,我在7个不同的文件中获取过去7天的数据,我需要从所有这7个文件中删除相同的数字或列数。我相信有一种更好,更有效的方法来删除这7个文件的列。
如果您能查看我的代码并建议我有效地删除或仅选择所需的列,我感谢您。
这是我到目前为止所尝试的,
df1.drop(df1.columns[[4,9,10,11,12,13, 14, 15, 16, 17, 18, 19, 20, 21, 22,23, 25, 28, 30,32, 31, 33]], axis=1, inplace=True)
df2.drop(df2.columns[[4,9,10,11,12,13, 14, 15, 16, 17, 18, 19, 20, 21, 22,23, 25, 28, 30,32, 31, 33]], axis=1, inplace=True)
df3.drop(df3.columns[[4,9,10,11,12,13, 14, 15, 16, 17, 18, 19, 20, 21, 22,23, 25, 28, 30,32, 31, 33]], axis=1, inplace=True)
df4.drop(df4.columns[[4,9,10,11,12,13, 14, 15, 16, 17, 18, 19, 20, 21, 22,23, 25, 28, 30,32, 31, 33]], axis=1, inplace=True)
df5.drop(df5.columns[[4,9,10,11,12,13, 14, 15, 16, 17, 18, 19, 20, 21, 22,23, 25, 28, 30,32, 31, 33]], axis=1, inplace=True)
df6.drop(df6.columns[[4,9,10,11,12,13, 14, 15, 16, 17, 18, 19, 20, 21, 22,23, 25, 28, 30,32, 31, 33]], axis=1, inplace=True)
df7.drop(df7.columns[[4,9,10,11,12,13, 14, 15, 16, 17, 18, 19, 20, 21, 22,23, 25, 28, 30,32, 31, 33]], axis=1, inplace=True)
答案 0 :(得分:1)
创建数据框列表
dfs = ['list of dataframes']
创建要删除的列的列表
cols = [4,9,10,11,12,13, 14, 15, 16, 17, 18, 19, 20, 21, 22,23, 25, 28, 30,32, 31, 33]
使用此,
for df in dfs:
df.drop(df.columns[cols],axis=1,inplace=True)
否则,列表理解方式,
dfs = [df.drop(df.columns[cols],axis=1) for df in dfs]
答案 1 :(得分:0)
def concatenate (indir="//xxyyzz/xyx/Profile/Desktop/Error_log_data", outfile="//xxyyzz/xyz/Profile/Desktop/Error_log_data//concat.csv"):
os.chdir(indir)
fileList=glob.glob("*.csv")
dfList=[]
colnames=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34]
for filename in fileList:
print(filename)
df=pd.read_csv(filename, header=None)
dfList.append(df)
concatDf=pd.concat(dfList, axis=0)
concatDf.columns=colnames
concatDf.to_csv(outfile, index=None)
concatenate()
df_err=pd.read_csv("//xxxyyyxx/xyz/Profile/Desktop/Error_log_data//concat.csv")
df=df_err[['0','1','2','3','5','6','7','8','9','24','26','27','29','34']]. rename(columns{'0':"Err_Class",'1':"Err_Code",'2':"Date_time",'3':"Microseconds",'5':"Err_Source",'6':"Err_Type",'7':"Err_Text1",'8':"Err_Text2",'9':"Err_Code_link",'24':"ToolID" ,'26':"Component1" ,'27':"Component2" ,
'29':"X_number" ,'34':"END"})