R如何在Linux上获得R可用的内存量

时间:2016-04-02 11:01:30

标签: r linux

我知道,我可以使用以下内容查询/proc/meminfo

memfree <- tryCatch(
    as.numeric(system("/usr/bin/awk '/MemAvailable/ {print $2}' /proc/meminfo", intern=TRUE))*1024,
    error = function(e) 0)

不幸的是,它有两个主要限制:

  • 产生子shell本身就有很大的内存占用,而且我已经多次看到它失败了,同时仍然有很多MB内存可供使用。
  • 它与每进程内存约束assigned by kernel API不兼容。

当然,R必须有办法知道它可用的实际内存大小。但在哪里找到它?

我在R&#39的错误跟踪器上添加了一个错误:https://bugs.r-project.org/bugzilla/show_bug.cgi?id=16793

1 个答案:

答案 0 :(得分:1)

这是一个有趣的问题,我认为你可以通过使用Rcpp实际解决这个问题。这是一个可能的解决方案(代码中的注释):

#install ulimit package from github
#devtools::install_github("krlmlr/ulimit")

#one should delete all objects from workspace
#you have to uncomment that out yourself :)
#rm(list=ls())

library(Rcpp)

#source code for function that gets the memory used by a process
#taken from
#stackoverflow.com/questions/669438/how-to-get-memory-usage-at-run-time-in-c
src_string_1="
long mem_used_bytes(int pid) {
    long rss = 0L;
    FILE* fp = NULL;
    std::string file_path=\"/proc/\"+std::to_string(pid)+\"/statm\";
    if ( (fp = fopen(file_path.c_str(), \"r\" )) == NULL )
        return (size_t)0L;      /* Can't open? */
    if ( fscanf( fp, \"%*s%ld\", &rss ) != 1 )
    {
        fclose( fp );
        return (size_t)0L;      /* Can't read? */
    }
    fclose( fp );
    return (size_t)rss * (size_t)sysconf( _SC_PAGESIZE);
}
"

#source code for function that gets available memory
#code snippets taken from http://linux.die.net/man/2/getrlimit
src_string_2="
long mem_limit_bytes(int pid_int) {
    long res;
    struct rlimit tmp;
    pid_t pid=pid_int;
    prlimit(pid, RLIMIT_AS, NULL, &tmp);
    if (tmp.rlim_cur==-1L) {
      //there is no memory limit for the current process (should be default)
      Rcpp::Rcout<<\"No limit detected\\n\";
      struct sysinfo tmp2;
      sysinfo(&tmp2);
      res = tmp2.mem_unit * tmp2.totalram;
    } else {
      //memory limit set
      Rcpp::Rcout<<\"Limit detected\\n\";
      res=tmp.rlim_cur;
    }
    return res;
}
"

#compile functions; for convenience, we use c++11
cppFunction(src_string_1,
            plugins=c("cpp11"),
            includes=c("#include <string>",
                       "#include <sys/resource.h>",
                       "#include <unistd.h>"))
cppFunction(src_string_2,
            includes=c("#include <sys/resource.h>",
                       "#include <unistd.h>",
                       "#include <sys/sysinfo.h> "))

#memory without limit set; returns total system memory
mem_limit_bytes(Sys.getpid())/1e6
#No limit detected
#[1] 8228.246

#set limit for current R process
ulimit::memory_limit(4000)

#now the C++ function will detect the limit
mem_limit_bytes(Sys.getpid())/1e6
#Limit detected
#[1] 4194.304

现在试用mem_used_bytes函数

#first some garbage collection
gc()
old_mem_mb=mem_used_bytes(Sys.getpid())/1e6

#allocate a matrix with approx 800MB
NN=1e4
expected_memory_mb=NN^2*8/1e6
A=matrix(runif(NN**2),NN,NN)

#garbage collection, again
gc()

#query used memory again
new_mem_mb=mem_used_bytes(Sys.getpid())/1e6

#the following value should be close to 1
(new_mem_mb-old_mem_mb)/expected_memory_mb

编辑:这是一个稍微简单的单文件版本,缩减为所需的标头并使用普通的C ++:

#include <Rcpp.h>
#include <unistd.h>
#include <sys/resource.h>
#include <sys/sysinfo.h>

// [[Rcpp::export]]
long mem_used_bytes(int pid) {
    long rss = 0L;
    FILE* fp = NULL;
    char filepath[128];
    snprintf(filepath, 127, "/proc/%d/statm", pid);
    if ( (fp = fopen(filepath, "r" )) == NULL )
        return (size_t)0L;      /* Can't open? */
    if ( fscanf( fp, "%*s%ld", &rss ) != 1 ) {
        fclose( fp );
        return (size_t)0L;      /* Can't read? */
    }
    fclose( fp );
    return (size_t)rss * (size_t)sysconf( _SC_PAGESIZE);
}

// [[Rcpp::export]]
long mem_limit_bytes(int pid_int) {
    long res;
    struct rlimit tmp;
    pid_t pid=pid_int;
    prlimit(pid, RLIMIT_AS, NULL, &tmp);
    if (tmp.rlim_cur==-1L) {
        //there is no memory limit for the current process (should be default)
        Rcpp::Rcout << "No limit detected\n";
        struct sysinfo tmp2;
        sysinfo(&tmp2);
        res = tmp2.mem_unit * tmp2.totalram;
    } else {
        //memory limit set
        Rcpp::Rcout << "Limit detected\n";
        res=tmp.rlim_cur;
    }
    return res;
}

/*** R
## memory without limit set; returns total system memory
mem_limit_bytes(Sys.getpid())/1e6

## try out the `mem_used_bytes` function
## first some garbage collection
gc()
old_mem_mb <- mem_used_bytes(Sys.getpid())/1e6

## allocate a matrix with approx 800MB
NN <- 1e4
expected_memory_mb <- NN^2 * 8 / 1e6
A <- matrix(runif(NN**2),NN,NN)

##garbage collection, again
gc()

## query used memory again
new_mem_mb <- mem_used_bytes(Sys.getpid())/1e6

## the following value should be close to 1
(new_mem_mb - old_mem_mb)/expected_memory_mb

*/