Question

我有一个包含6000个基因名称的数据集。它有6种不同的功能A，B，C，D，E，F，具有唯一的位置编号。我想将功能A的列除以87，将功能B除以54。

最后，我希望得到新列中每行的总和和平均值。我怎么能在R中做到这一点？

private void setDays(String input) {
    for (int i=0; i<input.length(); i++) {
        switch (input.charAt(i)) {
            case '1':
                meetDays += "Sunday ";
                break;
            case '2':
                meetDays += "Monday ";
                break;
            // and so on for the remaining days
            default:
                //throw the exception here
        }
    }
    meetDays = meetDays.trim();
}

输入文件

feature_A=87
feature_B=54

输出文件

 feature pos gene_1 gene_2 gene_3 gene_n
       A   1      6      2     51      0
       A   2      4      5      8      2
       A   3      1     74      5      0
       B   1     11      2     41     89
       B   2      4      5      3      5

Answer 1

通过将除数合并到主数据集中可以更容易实现这一点：

feat_div <- data.frame(feature=c("A","B"), value=c(87,54))
#  feature value
#1       A    87
#2       B    54

cols <- grepl("^gene_", names(dat))

dat <- merge(dat, feat_div)

dat[cols]       <- lapply(dat[cols], `/`, dat$value)
dat$sum_all     <- rowSums(dat[cols])
dat$average_all <- rowMeans(dat[cols])

#  feature pos     gene_1     gene_2     gene_3     gene_n value   sum_all average_all
#1       A   1 0.06896552 0.02298851 0.58620690 0.00000000    87 0.6781609   0.1695402
#2       A   2 0.04597701 0.05747126 0.09195402 0.02298851    87 0.2183908   0.0545977
#3       A   3 0.01149425 0.85057471 0.05747126 0.00000000    87 0.9195402   0.2298851
#4       B   1 0.20370370 0.03703704 0.75925926 1.64814815    54 2.6481481   0.6620370
#5       B   2 0.07407407 0.09259259 0.05555556 0.09259259    54 0.3148148   0.0787037

Answer 2

dplyr可以做你想要的，如果你使用查找表等来索引要除以的数字：

library(dplyr)

# make a lookup vector
feat_num <- c(A = 87, B = 54)

feat_num
##
##  A  B 
## 87 54 

# group by feature and pos so they don't get divided
df %>% group_by(feature, pos) %>%    
    # divide everything but grouping variables (.) by the number looked up from feat_num
    mutate_each(funs(. / feat_num[feature])) %>%    
    # ungroup so next mutate works nicely
    ungroup() %>%    
    # add row sum and mean columns, indexing out the first and second columns
    mutate(sum_all = rowSums(.[-1:-2]), 
           average_all = rowMeans(.[-1:-2]))
##
## Source: local data frame [5 x 8]
## 
##   feature   pos     gene_1     gene_2     gene_3     gene_n   sum_all average_all
##    (fctr) (int)      (dbl)      (dbl)      (dbl)      (dbl)     (dbl)       (dbl)
## 1       A     1 0.06896552 0.02298851 0.58620690 0.00000000 0.6781609   0.1695402
## 2       A     2 0.04597701 0.05747126 0.09195402 0.02298851 0.2183908   0.0545977
## 3       A     3 0.01149425 0.85057471 0.05747126 0.00000000 0.9195402   0.2298851
## 4       B     1 0.20370370 0.03703704 0.75925926 1.64814815 2.6481481   0.6620370
## 5       B     2 0.07407407 0.09259259 0.05555556 0.09259259 0.3148148   0.0787037

Answer 3

首先想到的是将数据拆分为两个数据框，计算您的值，然后重新组合它们。假设您的原始数据框为df：

df_A <- df[df$feature == 'A', -1]
df_A <- apply(df_A, 2, function(x) x/87)
df_B <- df[df$feature == 'B', -1] 
df_B <- apply(df_B, 2, function(x) x/54)

df2 <- rbind(df_A, df_B)

df2$sum_all <- rowSums(df2)
df2$avg_all <- rowMeans(df2)

df2 <- cbind(df2, df$feature)

这应该将每列用'A'作为特征除以87，将每列用'B'除以54作为特征，重新组合行并找到每行的和和均值。如果有任何问题请告诉我，因为我现在无法测试。

编辑：看起来其他答案更好，imo。

Answer 4

以下是使用data.table的选项。将feature向量转换为data.frame（＆＃39; d2＆＃39;），将它们放在list，stack到data.frame，然后更改列姓名和transform＆＃39;功能＆＃39;用于删除前缀feature_的列。在转换＆＃39; d1＆＃39;之后，我们join此数据集与原始数据集（＆＃39; d1＆＃39;）到data.table（setDT(d1)）并更改＆＃39;基因＆＃39;的列类型从integer到numeric的列。我们加入了on＆＃39;功能＆＃39;栏by .EACHI，循环浏览.SDcols中指定的列，除以＆＃39;值＆＃39;，得到行明智的总和（Reduce(...）和平均值（＆＃39; x3＆＃39;），并将输出分配（:=）到指定的名称。

library(data.table)
d2 <- transform(setNames(stack(mget(paste("feature", c("A", "B"), 
        sep="_")))[2:1], c("feature", "value")), 
          feature = sub(".*_", "", feature))
nm1 <- grep("^gene", names(d1), value = TRUE)
setDT(d1)[, (nm1):= lapply(.SD, as.numeric), .SDcols = nm1]
d1[d2, c(nm1, "sum_all", "average_all") :={
           x1 <- lapply(.SD, `/`, value)
           x2 <- Reduce(`+`, x1)
           x3 <- x2/length(nm1)
           c(x1, list(x2, x3))} ,
           on = "feature", by = .EACHI,.SDcols = nm1][]
#    feature pos     gene_1     gene_2     gene_3     gene_n   sum_all average_all
#1:       A   1 0.06896552 0.02298851 0.58620690 0.00000000 0.6781609   0.1695402
#2:       A   2 0.04597701 0.05747126 0.09195402 0.02298851 0.2183908   0.0545977
#3:       A   3 0.01149425 0.85057471 0.05747126 0.00000000 0.9195402   0.2298851
#4:       B   1 0.20370370 0.03703704 0.75925926 1.64814815 2.6481481   0.6620370
#5:       B   2 0.07407407 0.09259259 0.05555556 0.09259259 0.3148148   0.0787037

数据

d1 <- structure(list(feature = c("A", "A", "A", "B", "B"), pos = c(1L, 
2L, 3L, 1L, 2L), gene_1 = c(6L, 4L, 1L, 11L, 4L), gene_2 = c(2L, 
5L, 74L, 2L, 5L), gene_3 = c(51L, 8L, 5L, 41L, 3L), gene_n = c(0L, 
2L, 0L, 89L, 5L)), .Names = c("feature", "pos", "gene_1", "gene_2", 
"gene_3", "gene_n"), class = "data.frame", row.names = c(NA, -5L))

Answer 5

您可以将每列除以平均值，或者如果您想将其除以特定数字，请用该数字或mean(x)，std(x)替换median(x)。

data(mtcars);head(mtcars)
mtcars[] <- lapply(mtcars,function(x) x / mean(x))

与同一个标记一样，您可以按如下所示对单个列执行此操作

data(mtcars);head(mtcars)
mtcars$mpg <- lapply(mtcars$mpg ,function(x) x / mean(x))

我如何将每列除以R中的数字？

5 个答案:

数据