我想为此数据帧中每四列的每一行计算一个平均值。
structure(list(Item = c("PPPG01050.1", "PPPG01080.1", "PPPG01090.1"
), C_Day1_WT1 = c(296160000, 552080000, 2560300000), C_Day1_WT2 = c(215080000,
605270000, 1507200000), C_Day1_WT3 = c(0, 372940000, 2524800000
), C_Day1_WT4 = c(0, 445260000, 2239800000), C_Day1_CCV1 = c(0,
365160000, 2018600000), C_Day1_CCV2 = c(0, 448180000, 2576900000
), C_Day1_CCV3 = c(0, 582880000, 1820900000), C_Day1_CCV4 = c(0,
393070000, 1.925e+09), C_Day2_WT1 = c(369260000, 639450000, 2125500000
), C_Day2_WT2 = c(0, 376960000, 1645700000), C_Day2_WT3 = c(441210000,
456260000, 2694300000), C_Day2_WT4 = c(240700000, 584580000,
2145200000), C_Day2_CCV1 = c(0, 586700000, 1906300000), C_Day2_CCV2 = c(357200000,
527120000, 2441700000), C_Day2_CCV3 = c(114700000, 411270000,
1637200000), C_Day2_CCV4 = c(98765000, 289370000, 1540600000),
C_Day3_WT1 = c(272120000, 664570000, 2633400000), C_Day3_WT2 = c(432050000,
372230000, 2269700000), C_Day3_WT3 = c(326660000, 577260000,
1894100000), C_Day3_WT4 = c(332330000, 645940000, 1924300000
), C_Day3_CCV1 = c(156920000, 695450000, 936200000), C_Day3_CCV2 = c(249730000,
619140000, 1126500000), C_Day3_CCV3 = c(317850000, 525230000,
1394700000), C_Day3_CCV4 = c(180140000, 463630000, 1641200000
), C_Day4_WT1 = c(0, 0, 0), C_Day4_WT2 = c(0, 0, 2665900000
), C_Day4_WT4 = c(0, 779120000, 1024400000), C_Day4_CCV1 = c(0,
0, 1406800000), C_Day4_CCV2 = c(256840000, 669830000, 1452200000
), C_Day4_CCV3 = c(302730000, 529320000, 1018200000), C_Day4_CCV4 = c(213970000,
513700000, 1003300000), P_Day1_WT1 = c(0, 345700000, 2619900000
), P_Day1_WT2 = c(0, 0, 0), P_Day1_WT3 = c(0, 666780000,
2020700000), P_Day1_WT4 = c(0, 397790000, 2038100000), P_Day1_CCV1 = c(0,
306270000, 1581500000), P_Day1_CCV2 = c(430770000, 492330000,
2801800000), P_Day1_CCV3 = c(276210000, 602960000, 2547600000
), P_Day1_CCV4 = c(0, 268450000, 2359500000), P_Day2_WT1 = c(0,
514390000, 2486900000), P_Day2_WT2 = c(358710000, 477030000,
2828200000), P_Day2_WT3 = c(0, 187190000, 2.283e+09), P_Day2_WT4 = c(0,
321850000, 2321300000), P_Day2_CCV1 = c(0, 376680000, 2.251e+09
), P_Day2_CCV2 = c(0, 376450000, 3247200000), P_Day2_CCV3 = c(190410000,
408790000, 1997100000), P_Day2_CCV4 = c(372620000, 316680000,
2898100000), P_Day3_WT1 = c(534270000, 578150000, 2232200000
), P_Day3_WT2 = c(244510000, 638660000, 1399200000), P_Day3_WT3 = c(294660000,
508260000, 2248900000), P_Day3_WT4 = c(453780000, 521150000,
1816600000), P_Day3_CCV1 = c(305890000, 451130000, 2483900000
), P_Day3_CCV2 = c(439220000, 771130000, 2477800000), P_Day3_CCV3 = c(0,
434490000, 2352500000), P_Day3_CCV4 = c(84403000, 354070000,
1.29e+09), P_Day4_WT1 = c(243900000, 346050000, 2630100000
), P_Day4_WT2 = c(206100000, 272760000, 2810700000), P_Day4_WT3 = c(0,
508280000, 1505900000), P_Day4_WT4 = c(0, 332080000, 2410800000
), P_Day4_CCV1 = c(0, 0, 2053600000), P_Day4_CCV2 = c(0,
422370000, 2906300000), P_Day4_CCV3 = c(435900000, 456150000,
2854400000), P_Day4_CCV4 = c(0, 278450000, 2883700000)), row.names = c(NA,
3L), class = "data.frame")
Okey,通常的模式如下:
前4列在一起: C_Day1_WT1:C_Day1_WT4 |平均值= colname(C_Day1_WT)
接下来的4列: C_Day1_CCV1:C_Day1_CCV4 |平均值= colname(C_Day1_CCV)
等通常,新的data.frame
所包含的列应比原始数据少4倍。
问题:总共总共只有63列包含数字,因为缺少C_Day4_WT3
列,这表明对于C_Day4_WT,将只有3个重复项。
我有一个想法,如何使用grep
函数对列进行子集设置,并对每一行使用apply
来进行循环分析。但是,我想避免循环。
我将在等待回复时尝试编写一个循环。
答案 0 :(得分:1)
关于tidyverse的建议:
df %>% gather(k,v,-Item) %>% # transform into tuples
mutate(k1=str_extract(k,".*(?=\\d$)")) %>% # get the name of the group of variables
group_by(Item,k1) %>% summarise(sum=mean(v)) %>% # compute the means
spread(k1,sum) %>% # go back as an usual data frame
inner_join(df,by="Item") # and merge to the original
答案 1 :(得分:1)
这是data.table库的解决方案:
library(data.table)
df <- data.table(df)
df1 <- transpose(df[, -1])[, dd:=substring(colnames(df)[-1],1,9)][,.(Mean1=mean(V1), Mean2=mean(V2), Mean3=mean(V3)),.(dd) ]
output <- transpose(df1)