连接具有未知列数的多个.xls文件

时间:2017-01-05 16:40:17

标签: python pandas concatenation xls

使用Python,我想将目录中的所有.xls文件合并到一个数据框中,并将其保存为新的连接.xls文件。 .xls文件将具有未知数量的列,并且没有一致的标题。

我在这个论坛上使用了其他建议并最终得到了这个:

Private WithEvents Items As Outlook.Items

Private Sub Application_Startup()
    Dim olApp As Outlook.Application
    Dim objNS As Outlook.NameSpace
    Set olApp = Outlook.Application
    Set objNS = olApp.GetNamespace("MAPI")
    Set Items = objNS.GetDefaultFolder(olFolderInbox).Items
End Sub

Private Sub Items_ItemAdd(ByVal item As Object)

    On Error GoTo ErrorHandler

    'Only act if it's a MailItem
    Dim Msg As Outlook.MailItem
    If TypeName(item) = "MailItem" Then
        Set Msg = item

        'From specified user with specified subject
        If (Msg.SenderName = "teresa") And _
          (Msg.Subject = "emails") And _
          (Msg.Attachments.Count >= 1) Then

            'Set folder to save in.
            Dim olDestFldr As Outlook.MAPIFolder
            Dim myAttachments As Outlook.Attachments
            Dim Att As String

            'location to save in. 
            Const attPath As String = "Mailbox/Extra"

            ' save attachment
            Set myAttachments = item.Attachments
            Att = myAttachments.item(1).DisplayName
            myAttachments.item(1).SaveAsFile attPath & Att

            ' mark as read
            Msg.UnRead = False
        End If
    End If

ProgramExit:
    Exit Sub

ErrorHandler:
    MsgBox Err.Number & " - " & Err.Description
    Resume ProgramExit

End Sub

我收到了这些错误:

import os
import pandas as pd

path = os.getcwd()
files = os.listdir(path)

files_xls = [f for f in files if f[-3:] == 'xls']

df = pd.DataFrame()

for f in files_xls:
    data = pd.read_excel(f for f in files_xls) # I dont understand what to add 
# in the parentheses here.
    df = df.append(data)
    df

1 个答案:

答案 0 :(得分:1)

试试这个兄弟

df = []

for f in files_xls:
    data = pd.read_excel(f) 
    df = df.append(data)

mydf = pd.concat(df, axis = 0)