
时间:2018-10-24 20:54:35

标签: r excel merge

我有多个Excel文件,我需要将它们合并为一个文件,但只能合并为某些行。 Excel文件如下所示...






2)日期以通用格式(“ 43008”而不是“ 9/30/2017”)返回




library(openxlsx)   # Excel and csv files
library(svDialogs)   # Dialog boxes

setwd("C:/Users/Work/Combined Manifest")

# Create and load Excel file
wb <- createWorkbook()

# Add worksheet
addWorksheet(wb, "Template")

# Read in & write header file
df.headers <- read.xlsx("headers.xlsx", sheet = "Template")

writeData(wb, "Template", df.headers, colNames = TRUE)

# Function to get user path
getPath <- function() { 
  # Ask for path
  path <- dlgInput("Enter path to files: ", Sys.info()["user"])$res
  if (dir.exists(path)) {
    # If path exists, set the path as the working directory
  } else {
    # If not, issue an error and recall the getPath function
    dlg_message("Error: The path you entered is not a valid directory. Please try again.")$res

# Call getPath function
folder <- getPath()


# Get list of files in directory
pattern.ext <- "\\.xlsx$"
files <- dir(folder, full=TRUE, pattern=pattern.ext)

# Get basenames and remove extension 
files.nms <- basename(files)
files.nms <- gsub(pattern.ext, "", files.nms)

# Set the names
names(files) <- files.nms

# Iterate to read in files and write to new file
for (nm in files.nms) {

  # Read in files 
  df <- read.xlsx((files[nm]), sheet = "Template", startRow = 9, colNames = FALSE)

  # Write data to sheet
  writeData(wb, "Template", df, startCol = 2, startRow = 2, colNames = FALSE)

saveWorkbook(wb, "Combined.xlsx", overwrite = TRUE)

编辑: 因此,在下面的循环中,我成功读取了文件并将其合并。感谢您的所有帮助!

for (nm in files.nms) {

  # Read in files 
  df <- read.xlsx(files[nm], sheet = "Template", startRow = 8, colNames = TRUE, detectDates = TRUE, skipEmptyRows = FALSE,
                  skipEmptyCols = FALSE)

  # Append the data
  allData <- rbind(allData, df)

编辑:最终解决方案 感谢大家的帮助!

library(openxlsx)   # Excel and csv files
library(svDialogs)   # Dialog boxes

# Create and load Excel file
wb <- createWorkbook()

# Add worksheet
addWorksheet(wb, "Template")

# Function to get user path
getPath <- function() { 
  # Ask for path
  path <- dlgInput("Enter path to files: ", Sys.info()["user"])$res
  if (dir.exists(path)) {
    # If path exists, set the path as the working directory
  } else {
    # If not, issue an error and recall the getPath function
    dlg_message("Error: The path you entered is not a valid directory. Please try again.")$res

# Call getPath function
folder <- getPath()

# Set working directory

# Get list of files in directory
pattern.ext <- "\\.xlsx$"
files <- dir(folder, full=TRUE, pattern=pattern.ext)

# Get basenames and remove extension 
files.nms <- basename(files)

# Set the names
names(files) <- files.nms

# Create empty dataframe
allData <- data.frame()

# Create list (reserve memory)
f.List <- vector("list",length(files.nms))

# Look and load files
for (nm in 1:length(files.nms)) {

  # Read in files
  f.List[[nm]] <- read.xlsx(files[nm], sheet = "Template", startRow = 8, colNames = TRUE, detectDates = TRUE, skipEmptyRows = FALSE,
                  skipEmptyCols = FALSE)

# Append the data
allData <- do.call("rbind", f.List)

# Add a new column as 'Member Site'
allData <- data.frame('Member Site' = "", allData)

# Take the substring of the Specimen.ID column for Memeber Site
allData$Member.Site <- sapply(strsplit(allData$Specimen.ID, "-"), "[", 2)

# Write data to sheet
writeData(wb, "Template", startCol = 1, allData)

# Save workbook
saveWorkbook(wb, "Combined.xlsx", overwrite = TRUE)

1 个答案:

答案 0 :(得分:2)




# Create and empty data frame
allData <- data.frame()

# Loop and load files
for(nm in files.nms) {

    # Read in files 
    df <- read.xlsx((files[nm]), sheet = "Template", startRow = 9, colNames = FALSE)

    # Append the data
    allData <- rbind(allData, df)


# Write data to sheet
writeData(wb, "Template", df, startCol = 2, startRow = 2, colNames = FALSE)



如果文件更多,则rbind会变得缓慢,就像提到的@Parfait一样,这是因为要复制多个数据。避免这种情况的方法是,首先通过创建一个具有足够空间来容纳您的数据的空列表来保留内存中的空间,然后填写该列表,然后最后使用do.call(“ rbind”将所有数据合并在一起,...)。我在下面编译了一些示例代码,这些代码与您在问题中提供的内容一致。

# Create list (reserve memory)
f.List <- vector("list",length(files.nms))

# Loop and load files
for(eNr in 1:length(files.nms)) {

    # Read in files 
    f.List[[eNr]] <- read.xlsx((files.nms[eNr]), sheet = "Template", startRow = 9)


# Append the data
allData <- do.call("rbind", f.List)


# Sample data
df1 <- data.frame(x=1:3, y=3:1)
df2 <- data.frame(y=4:6, x=3:1)
df.List <- list(df1,df2)

# Create list
d.List <- vector("list",length(df.List))

# Loop and add data
for(eNr in 1:length(df.List)) {
    d.List[[eNr]] <- df.List[[eNr]] 

# Bind all at once
dfAll <- do.call("rbind", d.List)
