我有一个变量,我想获得每个组中的每个组中列出的组的方法,并且我有很多这样的列。然后我想将组平均值与适当的观察结果联系起来,这样如果我从一个m obs和不同分组的矩阵开始,我就得到一个m×n平均矩阵。例如:
> var <- round(runif(10),digits=2)
> var
[1] 0.47 0.21 0.80 0.65 0.32 0.72 0.29 0.93 0.77 0.64
> groupings <- cbind(sample(c(1,2,3), 10, replace=TRUE),
sample(c(1,2,3), 10, replace=TRUE),
sample(c(1,2,3,5), 10, replace=TRUE))
> groupings
[,1] [,2] [,3]
[1,] 3 1 5
[2,] 1 1 5
[3,] 2 1 5
[4,] 3 2 3
[5,] 2 3 1
[6,] 1 1 1
[7,] 2 3 1
[8,] 1 2 1
[9,] 3 1 5
[10,] 1 3 2
我可以使用以下(例如)
分别获得每个组内的均值> means.1 <- sapply(split(var, groupings[,1]), function(x) mean(x))
> means.2 <- sapply(split(var, groupings[,2]), function(x) mean(x))
> means.3 <- sapply(split(var, groupings[,3]), function(x) mean(x))
> means.1
1 2 3
0.625 0.470 0.630
> means.2
1 2 3
0.5940000 0.7900000 0.4166667
> means.3
1 2 3 5
0.5650 0.6400 0.6500 0.5625
但不仅这些单独的调用效率低下,他们仍然没有得到我想要的东西,这是以下
[,1] [,2] [,3]
[1,] 0.630 0.5940000 0.5625
[2,] 0.625 0.5940000 0.5625
[3,] 0.470 0.5940000 0.5625
[4,] 0.630 0.7900000 0.6500
[5,] 0.470 0.4166667 0.5650
[6,] 0.625 0.5940000 0.5650
[7,] 0.470 0.4166667 0.5650
[8,] 0.625 0.7900000 0.5650
[9,] 0.630 0.5940000 0.5625
[10,] 0.625 0.4166667 0.6400
答案 0 :(得分:3)
library(dplyr)
set.seed(1000)
var <- round(runif(10),digits=2)
groupings <- cbind(sample(c(1,2,3), 10, replace=TRUE),
sample(c(1,2,3), 10, replace=TRUE),
sample(c(1,2,3,5), 10, replace=TRUE), var)
df = data.frame(groupings)
df %>%
group_by(V1)%>% mutate(x1 =mean(var))%>% ungroup(V1) %>%
group_by(V2) %>% mutate(x2=mean(var)) %>% ungroup(V2) %>%
group_by(V3) %>% mutate(x3=mean(var)) %>% ungroup(V3)
# V1 V2 V3 var x1 x2 x3
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 2 1 3 0.33 0.4775000 0.322 0.250
#2 3 3 1 0.76 0.6566667 0.470 0.750
#3 1 1 3 0.11 0.1333333 0.322 0.250
#4 3 1 5 0.69 0.6566667 0.322 0.635
#5 3 2 3 0.52 0.6566667 0.630 0.250
#6 1 3 3 0.07 0.1333333 0.470 0.250
#7 2 2 1 0.74 0.4775000 0.630 0.750
#8 2 3 5 0.58 0.4775000 0.470 0.635
#9 1 1 3 0.22 0.1333333 0.322 0.250
#10 2 1 2 0.26 0.4775000 0.322 0.260
# you can simply subset the columns
答案 1 :(得分:2)
另一种选择,您可以使用apply
(因为您已经有一个矩阵)循环遍历列( Margin 设置为2)并将列传递给ave
作为组变量,您可以显式指定 FUN 参数为或不指定它为 mean 是使用的默认函数:
apply(groupings, 2, ave, x = var) # pass the var as a named parameter since it is the
# parameter at the first position of ave function, if not
# ave will treat the column as the first position parameter
# which you don't want to
# [,1] [,2] [,3]
#[1,] 0.630 0.5940000 0.5625
#[2,] 0.625 0.5940000 0.5625
#[3,] 0.470 0.5940000 0.5625
#[4,] 0.630 0.7900000 0.6500
#[5,] 0.470 0.4166667 0.5650
#[6,] 0.625 0.5940000 0.5650
#[7,] 0.470 0.4166667 0.5650
#[8,] 0.625 0.7900000 0.5650
#[9,] 0.630 0.5940000 0.5625
#[10,] 0.625 0.4166667 0.6400
或者使用dplyr
,您可以使用mutate_all()
功能:
library(dplyr)
mutate_all(as.data.frame(groupings), funs(ave(var, .)))
# V1 V2 V3
#1 0.630 0.5940000 0.5625
#2 0.625 0.5940000 0.5625
#3 0.470 0.5940000 0.5625
#4 0.630 0.7900000 0.6500
#5 0.470 0.4166667 0.5650
#6 0.625 0.5940000 0.5650
#7 0.470 0.4166667 0.5650
#8 0.625 0.7900000 0.5650
#9 0.630 0.5940000 0.5625
#10 0.625 0.4166667 0.6400
答案 2 :(得分:1)
以下是一种使用ave
以及split
和sapply
sapply(split(groupings, rep(seq_len(ncol(groupings)), each=nrow(groupings))),
function(x) ave(var, x, FUN=mean))
1 2 3
[1,] 0.4566667 0.5550000 0.3925000
[2,] 0.6200000 0.5550000 0.3925000
[3,] 0.4816667 0.5550000 0.3925000
[4,] 0.4566667 0.5550000 0.6200000
[5,] 0.4816667 0.5550000 0.4350000
[6,] 0.4566667 0.5133333 0.6066667
[7,] 0.4816667 0.0100000 0.4350000
[8,] 0.4816667 0.5133333 0.3925000
[9,] 0.4816667 0.5133333 0.6066667
[10,] 0.4816667 0.5550000 0.6066667
sapply
的条目,split(groupings, rep(seq_len(ncol(groupings)), each=nrow(groupings)))
生成一个列表,其中每个列表项都是矩阵分组的列。此列表的每个元素都会传送到sapply
,ave
使用向量对set.seed(1234)
var <- round(runif(10),digits=2)
groupings <- cbind(sample(c(1,2,3), 10, replace=TRUE),
sample(c(1,2,3), 10, replace=TRUE),
sample(c(1,2,3,5), 10, replace=TRUE))
进行分组。
数据强>
if(!function_exists('mime_content_type')) {
function mime_content_type($filename) {
$mime_types = array(
'txt' => 'text/plain',
'htm' => 'text/html',
'html' => 'text/html',
'php' => 'text/html',
'css' => 'text/css',
'js' => 'application/javascript',
'json' => 'application/json',
'xml' => 'application/xml',
'swf' => 'application/x-shockwave-flash',
'flv' => 'video/x-flv',
// images
'png' => 'image/png',
'jpe' => 'image/jpeg',
'jpeg' => 'image/jpeg',
'jpg' => 'image/jpeg',
'gif' => 'image/gif',
'bmp' => 'image/bmp',
'ico' => 'image/vnd.microsoft.icon',
'tiff' => 'image/tiff',
'tif' => 'image/tiff',
'svg' => 'image/svg+xml',
'svgz' => 'image/svg+xml',
// archives
'zip' => 'application/zip',
'rar' => 'application/x-rar-compressed',
'exe' => 'application/x-msdownload',
'msi' => 'application/x-msdownload',
'cab' => 'application/vnd.ms-cab-compressed',
// audio/video
'mp3' => 'audio/mpeg',
'qt' => 'video/quicktime',
'mov' => 'video/quicktime',
// adobe
'pdf' => 'application/pdf',
'psd' => 'image/vnd.adobe.photoshop',
'ai' => 'application/postscript',
'eps' => 'application/postscript',
'ps' => 'application/postscript',
// ms office
'doc' => 'application/msword',
'rtf' => 'application/rtf',
'xls' => 'application/vnd.ms-excel',
'ppt' => 'application/vnd.ms-powerpoint',
// open office
'odt' => 'application/vnd.oasis.opendocument.text',
'ods' => 'application/vnd.oasis.opendocument.spreadsheet',
);
$ext = strtolower(array_pop(explode('.',$filename)));
if (array_key_exists($ext, $mime_types)) {
return $mime_types[$ext];
}
elseif (function_exists('finfo_open')) {
$finfo = finfo_open(FILEINFO_MIME);
$mimetype = finfo_file($finfo, $filename);
finfo_close($finfo);
return $mimetype;
}
else {
return 'application/octet-stream';
}
}
}
?>