我有数据框 我想对第一个存在的值进行平均行显示
DF1
structure(list(Symbols = c("AAAS", "AACS", "AADAC", "AAGAB",
"AAGAB", "AAK1", "AAK1", "AAK1", "AAK1", "AAK1", "AAMDC"), Average_Control = c(5.7212099528,
9.6925693375, 3.1913650495, 7.9479411012, 10.5609967525, 8.2969969243,
5.3382193495, 1.1836102209, 0.7941625658, 3.8002240701, 2.7307985646
), Glycyrrhizic_acid_rep_1 = c(5.290201, 9.735883, 3.3448757611,
7.6838303132, 10.0561155597, 8.1006595504, 4.6894686662, 1.478692,
2.382658, 4.474969, 3.396714), Hydroxysafflor_yellow_A = c(5.574157,
9.806325, 0.7223951505, 7.8382521567, 10.4195623492, 8.1103481281,
4.950000706, 1.192986, -0.09879839, 4.585423, 4.151861), Anhydroicaritin = c(5.464502,
9.781125, 4.3363375165, 7.8300766195, 10.4435803063, 8.2280380864,
5.3344613357, 0.9328041, 1.675867, 2.748738, 3.945212), Hyperoside =c(6.126322,
9.829496, 2.4293884258, 7.7151641411, 10.4487046678, 8.0864510043,
4.9309392652, 0.5856222, 0.422873, 2.518488, 3.8994)), .Names = c("Symbols",
"Average_Control", "Glycyrrhizic_acid_rep_1", "Hydroxysafflor_yellow_A",
"Anhydroicaritin", "Hyperoside"), row.names = c(NA, -11L), class = c("tbl_df",
"tbl", "data.frame"))
输出 Final_Result exprected输出将具有以下数据框
structure(list(Symbols = c("AAAS", "AACS", "AADAC", "AAGAB",
"AAGAB", "AAGA_Average", "AAK1", "AAK1", "AAK1", "AAK1", "AAK1",
"AAK1_Average", "AAMDC"), Average_Control = c(5.7212099528, 9.6925693375,
3.1913650495, 7.9479411012, 10.5609967525, 9.25446892685, 8.2969969243,
5.3382193495, 1.1836102209, 0.7941625658, 3.8002240701, 3.88264262612,
2.7307985646), Glycyrrhizic_acid_rep_1 = c(5.290201, 9.735883,
3.3448757611, 7.6838303132, 10.0561155597, 8.86997293645, 8.1006595504,
4.6894686662, 1.478692, 2.382658, 4.474969, 4.22528944332, 3.396714
), Hydroxysafflor_yellow_A = c(5.574157, 9.806325, 0.7223951505,
7.8382521567, 10.4195623492, 9.12890725295, 8.1103481281, 4.950000706,
1.192986, -0.09879839, 4.585423, 3.74799188882, 4.151861), Anhydroicaritin = c(5.464502,
9.781125, 4.3363375165, 7.8300766195, 10.4435803063, 9.1368284629,
8.2280380864, 5.3344613357, 0.9328041, 1.675867, 2.748738, 3.78398170442,
3.945212), Hyperoside = c(6.126322, 9.829496, 2.4293884258, 7.7151641411,
10.4487046678, 9.08193440445, 8.0864510043, 4.9309392652, 0.5856222,
0.422873, 2.518488, 3.3088746939, 3.8994)), .Names = c("Symbols",
"Average_Control", "Glycyrrhizic_acid_rep_1", "Hydroxysafflor_yellow_A",
"Anhydroicaritin", "Hyperoside"), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -13L))
答案 0 :(得分:0)
# example data
df1 = structure(list(Symbols = c("AAAS", "AACS", "AADAC", "AAGAB", "AAGAB", "AAK1", "AAK1", "AAK1", "AAK1", "AAK1", "AAMDC"),
Average_Control = c(5.7212099528, 9.6925693375, 3.1913650495, 7.9479411012, 10.5609967525, 8.2969969243,
5.3382193495, 1.1836102209, 0.7941625658, 3.8002240701, 2.7307985646),
Glycyrrhizic_acid_rep_1 = c(5.290201, 9.735883, 3.3448757611,
7.6838303132, 10.0561155597, 8.1006595504, 4.6894686662, 1.478692, 2.382658, 4.474969, 3.396714),
Hydroxysafflor_yellow_A = c(5.574157, 9.806325, 0.7223951505, 7.8382521567, 10.4195623492, 8.1103481281,
4.950000706, 1.192986, -0.09879839, 4.585423, 4.151861),
Anhydroicaritin = c(5.464502, 9.781125, 4.3363375165, 7.8300766195, 10.4435803063, 8.2280380864,
5.3344613357, 0.9328041, 1.675867, 2.748738, 3.945212),
Hyperoside =c(6.126322, 9.829496, 2.4293884258, 7.7151641411, 10.4487046678, 8.0864510043,
4.9309392652, 0.5856222, 0.422873, 2.518488, 3.8994)),
.Names = c("Symbols", "Average_Control", "Glycyrrhizic_acid_rep_1", "Hydroxysafflor_yellow_A", "Anhydroicaritin", "Hyperoside"),
row.names = c(NA, -11L), class = c("tbl_df", "tbl", "data.frame"))
library(dplyr)
df1 %>%
group_by(Symbols) %>% # for each Symbol value
filter(n() > 1) %>% # count number of rows and keep only those with multipl rows
summarise_all(mean) %>% # get average for all columns
mutate(Symbols = paste0(Symbols, "_Average")) %>% # update Symbol values
rbind(df1) # bind rows with original dataset
# # A tibble: 13 x 6
# Symbols Average_Control Glycyrrhizic_acid_rep_1 Hydroxysafflor_yellow_A Anhydroicaritin Hyperoside
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 AAGAB_Average 9.2544689 8.869973 9.12890725 9.1368285 9.0819344
# 2 AAK1_Average 3.8826426 4.225289 3.74799189 3.7839817 3.3088747
# 3 AAAS 5.7212100 5.290201 5.57415700 5.4645020 6.1263220
# 4 AACS 9.6925693 9.735883 9.80632500 9.7811250 9.8294960
# 5 AADAC 3.1913650 3.344876 0.72239515 4.3363375 2.4293884
# 6 AAGAB 7.9479411 7.683830 7.83825216 7.8300766 7.7151641
# 7 AAGAB 10.5609968 10.056116 10.41956235 10.4435803 10.4487047
# 8 AAK1 8.2969969 8.100660 8.11034813 8.2280381 8.0864510
# 9 AAK1 5.3382193 4.689469 4.95000071 5.3344613 4.9309393
# 10 AAK1 1.1836102 1.478692 1.19298600 0.9328041 0.5856222
# 11 AAK1 0.7941626 2.382658 -0.09879839 1.6758670 0.4228730
# 12 AAK1 3.8002241 4.474969 4.58542300 2.7487380 2.5184880
# 13 AAMDC 2.7307986 3.396714 4.15186100 3.9452120 3.8994000
答案 1 :(得分:0)
没有dplyr,你可以这样做:
df1 = structure(list(Symbols = c("AAAS", "AACS", "AADAC", "AAGAB", "AAGAB", "AAK1", "AAK1", "AAK1", "AAK1", "AAK1", "AAMDC"),
Average_Control = c(5.7212099528, 9.6925693375, 3.1913650495, 7.9479411012, 10.5609967525, 8.2969969243,
5.3382193495, 1.1836102209, 0.7941625658, 3.8002240701, 2.7307985646),
Glycyrrhizic_acid_rep_1 = c(5.290201, 9.735883, 3.3448757611,
7.6838303132, 10.0561155597, 8.1006595504, 4.6894686662, 1.478692, 2.382658, 4.474969, 3.396714),
Hydroxysafflor_yellow_A = c(5.574157, 9.806325, 0.7223951505, 7.8382521567, 10.4195623492, 8.1103481281,
4.950000706, 1.192986, -0.09879839, 4.585423, 4.151861),
Anhydroicaritin = c(5.464502, 9.781125, 4.3363375165, 7.8300766195, 10.4435803063, 8.2280380864,
5.3344613357, 0.9328041, 1.675867, 2.748738, 3.945212),
Hyperoside =c(6.126322, 9.829496, 2.4293884258, 7.7151641411, 10.4487046678, 8.0864510043,
4.9309392652, 0.5856222, 0.422873, 2.518488, 3.8994)),
.Names = c("Symbols", "Average_Control", "Glycyrrhizic_acid_rep_1", "Hydroxysafflor_yellow_A", "Anhydroicaritin", "Hyperoside"),
row.names = c(NA, -11L), class = c("tbl_df", "tbl", "data.frame"))
#select rows w/ symbols that occur > 1 times & aggregate by mean
agg <- aggregate(.~Symbols, FUN = mean,
data = a1[a1$Symbols %in% unique(a1$Symbols[duplicated(a1$Symbols)]), ])
agg$Symbols <- paste(agg$Symbols, "_Average")
a3 <- rbind(agg, df1)
# change order and row index to make it equal to your output
a3 <- a3[order(a3$Symbols), ]
row.names(a3) <- seq(1:nrow(a3))
> a3
Symbols Average_Control Glycyrrhizic_acid_rep_1 Hydroxysafflor_yellow_A Anhydroicaritin Hyperoside
1 AAAS 5.7212100 5.290201 5.57415700 5.4645020 6.1263220
2 AACS 9.6925693 9.735883 9.80632500 9.7811250 9.8294960
3 AADAC 3.1913650 3.344876 0.72239515 4.3363375 2.4293884
4 AAGAB 7.9479411 7.683830 7.83825216 7.8300766 7.7151641
5 AAGAB 10.5609968 10.056116 10.41956235 10.4435803 10.4487047
6 AAGAB _Average 9.2544689 8.869973 9.12890725 9.1368285 9.0819344
7 AAK1 8.2969969 8.100660 8.11034813 8.2280381 8.0864510
8 AAK1 5.3382193 4.689469 4.95000071 5.3344613 4.9309393
9 AAK1 1.1836102 1.478692 1.19298600 0.9328041 0.5856222
10 AAK1 0.7941626 2.382658 -0.09879839 1.6758670 0.4228730
11 AAK1 3.8002241 4.474969 4.58542300 2.7487380 2.5184880
12 AAK1 _Average 3.8826426 4.225289 3.74799189 3.7839817 3.3088747
13 AAMDC 2.7307986 3.396714 4.15186100 3.9452120 3.8994000