每组中的含义超过1列组索引

时间:2016-12-08 18:11:30

标签: r matrix grouping mean sapply

我有一个变量,我想获得每个组中的每个组中列出的组的方法,并且我有很多这样的列。然后我想将组平均值与适当的观察结果联系起来,这样如果我从一个m obs和不同分组的矩阵开始,我就得到一个m×n平均矩阵。例如:

> var <- round(runif(10),digits=2)

> var
[1] 0.47 0.21 0.80 0.65 0.32 0.72 0.29 0.93 0.77 0.64
> groupings <- cbind(sample(c(1,2,3), 10, replace=TRUE),
           sample(c(1,2,3), 10, replace=TRUE),
           sample(c(1,2,3,5), 10, replace=TRUE))
> groupings
      [,1] [,2] [,3]
 [1,]    3    1    5
 [2,]    1    1    5
 [3,]    2    1    5
 [4,]    3    2    3
 [5,]    2    3    1
 [6,]    1    1    1
 [7,]    2    3    1
 [8,]    1    2    1
 [9,]    3    1    5
[10,]    1    3    2

我可以使用以下(例如)

分别获得每个组内的均值
> means.1 <- sapply(split(var, groupings[,1]), function(x) mean(x))
> means.2 <- sapply(split(var, groupings[,2]), function(x) mean(x))
> means.3 <- sapply(split(var, groupings[,3]), function(x) mean(x))

> means.1
    1     2     3 
0.625 0.470 0.630 
> means.2
    1         2         3 
0.5940000 0.7900000 0.4166667 
> means.3
    1      2      3      5 
0.5650 0.6400 0.6500 0.5625 

但不仅这些单独的调用效率低下,他们仍然没有得到我想要的东西,这是以下

       [,1]      [,2]   [,3]
[1,]  0.630 0.5940000 0.5625
[2,]  0.625 0.5940000 0.5625
[3,]  0.470 0.5940000 0.5625
[4,]  0.630 0.7900000 0.6500
[5,]  0.470 0.4166667 0.5650
[6,]  0.625 0.5940000 0.5650
[7,]  0.470 0.4166667 0.5650
[8,]  0.625 0.7900000 0.5650
[9,]  0.630 0.5940000 0.5625
[10,] 0.625 0.4166667 0.6400

3 个答案:

答案 0 :(得分:3)

library(dplyr)
set.seed(1000)
var <- round(runif(10),digits=2)

groupings <- cbind(sample(c(1,2,3), 10, replace=TRUE),
                     sample(c(1,2,3), 10, replace=TRUE),
                     sample(c(1,2,3,5), 10, replace=TRUE), var)

df = data.frame(groupings)
df %>% 
  group_by(V1)%>% mutate(x1 =mean(var))%>% ungroup(V1) %>% 
  group_by(V2) %>% mutate(x2=mean(var)) %>% ungroup(V2) %>% 
  group_by(V3) %>% mutate(x3=mean(var)) %>% ungroup(V3)

#      V1    V2    V3   var        x1    x2    x3
#   <dbl> <dbl> <dbl> <dbl>     <dbl> <dbl> <dbl>
#1      2     1     3  0.33 0.4775000 0.322 0.250
#2      3     3     1  0.76 0.6566667 0.470 0.750
#3      1     1     3  0.11 0.1333333 0.322 0.250
#4      3     1     5  0.69 0.6566667 0.322 0.635
#5      3     2     3  0.52 0.6566667 0.630 0.250
#6      1     3     3  0.07 0.1333333 0.470 0.250
#7      2     2     1  0.74 0.4775000 0.630 0.750
#8      2     3     5  0.58 0.4775000 0.470 0.635
#9      1     1     3  0.22 0.1333333 0.322 0.250
#10     2     1     2  0.26 0.4775000 0.322 0.260

# you can simply subset the columns

答案 1 :(得分:2)

另一种选择,您可以使用apply(因为您已经有一个矩阵)循环遍历列( Margin 设置为2)并将列传递给ave作为组变量,您可以显式指定 FUN 参数为或不指定它为 mean 是使用的默认函数:

apply(groupings, 2, ave, x = var)  # pass the var as a named parameter since it is the 
                                   # parameter at the first position of ave function, if not
                                   # ave will treat the column as the first position parameter
                                   # which you don't want to

 #      [,1]      [,2]   [,3]
 #[1,] 0.630 0.5940000 0.5625
 #[2,] 0.625 0.5940000 0.5625
 #[3,] 0.470 0.5940000 0.5625
 #[4,] 0.630 0.7900000 0.6500
 #[5,] 0.470 0.4166667 0.5650
 #[6,] 0.625 0.5940000 0.5650
 #[7,] 0.470 0.4166667 0.5650
 #[8,] 0.625 0.7900000 0.5650
 #[9,] 0.630 0.5940000 0.5625
#[10,] 0.625 0.4166667 0.6400

或者使用dplyr,您可以使用mutate_all()功能:

library(dplyr)
mutate_all(as.data.frame(groupings), funs(ave(var, .)))

#      V1        V2     V3
#1  0.630 0.5940000 0.5625
#2  0.625 0.5940000 0.5625
#3  0.470 0.5940000 0.5625
#4  0.630 0.7900000 0.6500
#5  0.470 0.4166667 0.5650
#6  0.625 0.5940000 0.5650
#7  0.470 0.4166667 0.5650
#8  0.625 0.7900000 0.5650
#9  0.630 0.5940000 0.5625
#10 0.625 0.4166667 0.6400

答案 2 :(得分:1)

以下是一种使用ave以及splitsapply

的方法
sapply(split(groupings, rep(seq_len(ncol(groupings)), each=nrow(groupings))),
       function(x) ave(var, x, FUN=mean))
              1         2         3
 [1,] 0.4566667 0.5550000 0.3925000
 [2,] 0.6200000 0.5550000 0.3925000
 [3,] 0.4816667 0.5550000 0.3925000
 [4,] 0.4566667 0.5550000 0.6200000
 [5,] 0.4816667 0.5550000 0.4350000
 [6,] 0.4566667 0.5133333 0.6066667
 [7,] 0.4816667 0.0100000 0.4350000
 [8,] 0.4816667 0.5133333 0.3925000
 [9,] 0.4816667 0.5133333 0.6066667
[10,] 0.4816667 0.5550000 0.6066667

sapply的条目,split(groupings, rep(seq_len(ncol(groupings)), each=nrow(groupings)))生成一个列表,其中每个列表项都是矩阵分组的列。此列表的每个元素都会传送到sapplyave使用向量对set.seed(1234) var <- round(runif(10),digits=2) groupings <- cbind(sample(c(1,2,3), 10, replace=TRUE), sample(c(1,2,3), 10, replace=TRUE), sample(c(1,2,3,5), 10, replace=TRUE)) 进行分组。

数据

if(!function_exists('mime_content_type')) {

    function mime_content_type($filename) {

        $mime_types = array(

            'txt' => 'text/plain',
            'htm' => 'text/html',
            'html' => 'text/html',
            'php' => 'text/html',
            'css' => 'text/css',
            'js' => 'application/javascript',
            'json' => 'application/json',
            'xml' => 'application/xml',
            'swf' => 'application/x-shockwave-flash',
            'flv' => 'video/x-flv',

            // images
            'png' => 'image/png',
            'jpe' => 'image/jpeg',
            'jpeg' => 'image/jpeg',
            'jpg' => 'image/jpeg',
            'gif' => 'image/gif',
            'bmp' => 'image/bmp',
            'ico' => 'image/vnd.microsoft.icon',
            'tiff' => 'image/tiff',
            'tif' => 'image/tiff',
            'svg' => 'image/svg+xml',
            'svgz' => 'image/svg+xml',

            // archives
            'zip' => 'application/zip',
            'rar' => 'application/x-rar-compressed',
            'exe' => 'application/x-msdownload',
            'msi' => 'application/x-msdownload',
            'cab' => 'application/vnd.ms-cab-compressed',

            // audio/video
            'mp3' => 'audio/mpeg',
            'qt' => 'video/quicktime',
            'mov' => 'video/quicktime',

            // adobe
            'pdf' => 'application/pdf',
            'psd' => 'image/vnd.adobe.photoshop',
            'ai' => 'application/postscript',
            'eps' => 'application/postscript',
            'ps' => 'application/postscript',

            // ms office
            'doc' => 'application/msword',
            'rtf' => 'application/rtf',
            'xls' => 'application/vnd.ms-excel',
            'ppt' => 'application/vnd.ms-powerpoint',

            // open office
            'odt' => 'application/vnd.oasis.opendocument.text',
            'ods' => 'application/vnd.oasis.opendocument.spreadsheet',
        );

        $ext = strtolower(array_pop(explode('.',$filename)));
        if (array_key_exists($ext, $mime_types)) {
            return $mime_types[$ext];
        }
        elseif (function_exists('finfo_open')) {
            $finfo = finfo_open(FILEINFO_MIME);
            $mimetype = finfo_file($finfo, $filename);
            finfo_close($finfo);
            return $mimetype;
        }
        else {
            return 'application/octet-stream';
        }
    }
}
?>