使用RDCOMClient打开数据框时内存泄漏

时间:2017-05-23 14:49:56

标签: r memory-leaks rdcomclient

我正在尝试使用RDCOMClient包来动态打开数据帧到Excel。我有功能代码将数据框打开到Excel,但我遇到的问题是代码完成时rsession.exe没有释放从加载数据到Excel的内存资源。即使在关闭Excel应用程序后,内存资源也不会被释放。有什么我缺少的,我可以用来从rsession.exe释放内存?我能够释放内存的唯一方法是关闭RStudio并重新打开它。

功能代码

in.xl <- function(data, headers = TRUE, rownames = FALSE) {
  require(RDCOMClient)

  # Attempt to coerce non dataframe data into a dataframe
  if (!is.data.frame(data)) {
    data <- as.data.frame(data)
  }

  # Set row range for data
  if (headers == TRUE) {
    d.row.start <- 2
    d.row.end <- nrow(data) + 1
  } else {
    d.row.start <- 1
    d.row.end <- nrow(data)
  }

  # Set column range for data
  if (rownames == TRUE) {
    d.col.start <- 2
    d.col.end <- ncol(data) + 1
  } else {
      d.col.start <- 1
      d.col.end <- ncol(data)
    }

  # Create COM Connection to Excel
  xlApp <- COMCreate("Excel.Application")
  xlWB <- xlApp[["Workbooks"]]$Add()
  xlSheet <- xlWB$Sheets(1)

  # Check if headers should be included
  if (headers == TRUE) {

    # Create a dataframe from headers
    headers <- t(as.data.frame(colnames(data)))

    # Set range for header values
    startCell <- xlSheet$Cells(1, d.col.start)
    endCell <- xlSheet$Cells(1, d.col.end)
    rng <- xlSheet$Range(startCell, endCell)

    # Add headers to Excel sheet
    rng[["Value"]] <- asCOMArray(headers)

    # Remove header dataframe
    rm(headers)
  }

  # Check if rownames should be included
  if(rownames == TRUE) {

    # Create dataframe from row names
    if (is.null(rownames(data))) {
      rnames = as.data.frame(1:nrow(data))
    } else {
        rnames = as.data.frame(rownames(data))
      }

    # Set range for row name values
    startCell <- xlSheet$Cells(d.row.start, 1)
    endCell <- xlSheet$Cells(d.row.end, 1)
    rng <- xlSheet$Range(startCell, endCell)

    # Add row names to Excel sheet
    rng[["Value"]] <- asCOMArray(rnames)

    # Remove row name dataframe
    rm(rnames)
  } 

  xlApp[["ScreenUpdating"]] <- FALSE

  nblocks <- ceiling(nrow(data) / 2000)
  pb <- txtProgressBar(min = 0, max = nblocks, initial = 0, style = 3, width = 20)
  data.start <- d.row.start
  block <- 1
  d.row.end <- d.row.start
  df.row.start <- 1

  while(d.row.end < nrow(data)) {
    d.row.end <- d.row.start + 1999
    df.row.end <- df.row.start + 1999

    if (d.row.end > nrow(data) + data.start) {
      d.row.end <- nrow(data) + data.start - 1
    }

    if (df.row.end > nrow(data)) {
      df.row.end <- nrow(data)
    }

    xlApp[["StatusBar"]] <- paste("Processing block", block, "of", nblocks)

    # Set range for data values
    rng <- xlSheet$Range(xlSheet$Cells(d.row.start, d.col.start), xlSheet$Cells(d.row.end, d.col.end))

    # Add data to Excel sheet
    rng[["Value"]] <- asCOMArray(data[df.row.start:df.row.end, ])

    d.row.start <- d.row.end + 1
    df.row.start <- df.row.end + 1

    if (block != nblocks) {
      block <- block + 1
    }

    # update the progress bar with the current value
    setTxtProgressBar(pb,block)
    rm(rng, vals)
    gc()
  }

  xlApp[["StatusBar"]] <- "Formatting Columns..."

  # Auto adjust column widths
  for(c in 1:d.col.end) {
    col <- xlSheet$Columns(c)
    col[["EntireColumn"]]$AutoFit()
  }

  xlApp[["StatusBar"]] <- ""
  xlApp[["ScreenUpdating"]] <- TRUE

  # Show Excel application
  xlApp[["Visible"]] <- TRUE

  gc()
} 

生成用于测试的大型数据帧的代码。 (在任务管理器中查看rsession.exe进程时,可以更轻松地查看内存使用问题)

df <- data.frame(replicate(20, sample(replicate(10, paste(sample(LETTERS, 15, rep = TRUE), collapse = "")), 100000, rep = TRUE)))

in.xl(df)

1 个答案:

答案 0 :(得分:1)

您可以使用包调用程序。使用以下代码,释放内存:

df <- data.frame(replicate(20, sample(replicate(10, paste(sample(LETTERS, 15, rep = TRUE), collapse = "")), 1000000, rep = TRUE)))

library(callr)
callr::r(func = in.xl, args = list(data = df))