R可以读取不规则的xlsx吗?

时间:2017-09-26 10:31:06

标签: r r-xlsx

enter image description here

我有如上图所示的那么多(大约1,000)xlsx。我想阅读每个xlsx并获取每个候选人姓名,号码和年龄的数据。但我不知道怎么读这个不规则的xlsx?

2 个答案:

答案 0 :(得分:4)

我不知道任何R Excel API是否足够智能来处理您的列格式,但有一个简单的解决方法。您只需将上述工作表保存为CSV格式即可。对上面显示的数据执行此操作会给我留下以下三条CSV行:

Title,,,,,
name,mike,number,123214,age,28
,score,,ddd,aaa,bbb

您可以尝试以下代码:

df <- read.csv(file="path/to/your/file.csv", header=FALSE)
df <- df[2:nrow(df), ]      # drop first row

获取Mike的姓名,号码和年龄:

name   <- df[1, 2]
number <- df[1, 4]
age    <- df[1, 6]

答案 1 :(得分:1)

您可以在Excel中运行VBA代码,并将任意数量的XLSX文件转换为CSV文件。然后遍历所有CSV文件以将所有CSV文件合并到R中的数据框中。

Sub Convert_Excel_To_CSV()
    Dim MyPath As String, FilesInPath As String
    Dim MyFiles() As String, Fnum As Long
    Dim mybook As Workbook
    Dim CalcMode As Long
    Dim sh As Worksheet
    Dim ErrorYes As Boolean
    Dim LPosition As Integer

    'Fill in the path\folder where the Excel files are
    MyPath = "C:\Users\Ryan\Desktop\Excel_Files\"

    FilesInPath = Dir(MyPath & "*.xlsx*")
    If FilesInPath = "" Then
        MsgBox "No files found"
        Exit Sub
    End If

    Fnum = 0
    Do While FilesInPath <> ""
        Fnum = Fnum + 1
        ReDim Preserve MyFiles(1 To Fnum)
        MyFiles(Fnum) = FilesInPath
        FilesInPath = Dir()
    Loop

    With Application
        CalcMode = .Calculation
        .Calculation = xlCalculationManual
        .ScreenUpdating = False
        .EnableEvents = False
    End With

    If Fnum > 0 Then
        For Fnum = LBound(MyFiles) To UBound(MyFiles)
            Set mybook = Nothing
            On Error Resume Next
            Set mybook = Workbooks.Open(MyPath & MyFiles(Fnum))
            On Error GoTo 0

            If Not mybook Is Nothing Then


                    LPosition = InStr(1, mybook.Name, ".") - 1
                    mybookname = Left(mybook.Name, LPosition)
                    mybook.Activate
                    'All XLSX Files get saved in the directory below:
                    ActiveWorkbook.SaveAs Filename:="C:\your_path_here\" & mybookname & ".csv" _
                        , FileFormat:=xlCSVMSDOS, _
                        CreateBackup:=False

            End If

            mybook.Close SaveChanges:=False

        Next Fnum
    End If

    If ErrorYes = True Then
        MsgBox "There are problems in one or more files, possible problem:" _
             & vbNewLine & "protected workbook/sheet or a sheet/range that not exist"
    End If

    With Application
        .ScreenUpdating = True
        .EnableEvents = True
        .Calculation = CalcMode
    End With
End Sub






setwd("C:/your_path")
fnames <- list.files()
csv <- lapply(fnames, read.csv)
result <- do.call(rbind, csv)


********  ********  ********  ********  ********  ********  ********  ********  


filedir <- setwd("C:/your_path")
file_names <- dir(filedir)
your_data_frame <- do.call(rbind,lapply(file_names,read.csv))


********  ********  ********  ********  ********  ********  ********  ********  


filedir <- setwd("C:/your_path")
file_names <- dir(filedir)
your_data_frame <- do.call(rbind, lapply(file_names, read.csv, skip = 1, header = FALSE))


********  ********  ********  ********  ********  ********  ********  ********  


filedir <- setwd("C:/your_path")
file_names <- dir(filedir)
your_data_frame <- do.call(rbind, lapply(file_names, read.csv, header = FALSE))


********  ********  ********  ********  ********  ********  ********  ********  


# 
temp <- setwd("C:/your_path")
temp = list.files(pattern="*.csv")
myfiles = lapply(temp, read.delim)