在其中一列中具有相似字符串的子集行并将它们绘制在一起

时间:2016-10-11 11:49:21

标签: r

我想对其中一列中具有相同“基本”字符串的行进行分组/子集,并将它们绘制在一个图形上。将所有内容放在一个pdf文件中会很棒。每个图​​表都在pdf的单独页面上。

数据:

structure(list(`10` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `34` = c(0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 370500, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1091361.9, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1512409.6, 
0, 0, 0, 0, 0, 0), `59` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 4231358.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 5995680.4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 2266775, 0, 0, 0, 0, 0, 0, 6864490.1, 0, 0, 
0, 0, 0, 0), `84` = c(0, 0, 0, 0, 1783350, 0, 0, 0, 1177650, 
0, 0, 0, 0, 0, 0, 0, 0, 4316664.7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 9262556.7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 2831286.1, 0, 0, 0, 0, 0, 0, 10643218.2, 
0, 0, 0, 0, 0, 0), `110` = c(0, 0, 0, 0, 1778743.3, 0, 0, 0, 
1465966.7, 0, 0, 0, 0, 0, 0, 0, 0, 3111700, 0, 0, 1955337.5, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5584784.4, 5584784.4, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3092525, 0, 
0, 0, 0, 0, 0, 7847143.8, 0, 0, 0, 0, 0, 0), `134` = c(0, 0, 
0, 0, 1121869.4, 0, 0, 0, 1439430.6, 0, 0, 0, 0, 0, 0, 0, 0, 
2854250, 0, 0, 0, 0, 0, 0, 914890, 0, 0, 847880, 0, 0, 0, 0, 
0, 0, 0, 8191800, 0, 0, 0, 0, 0, 0, 1830904.5, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 1650150, 0, 0, 837130, 0, 0, 0, 4925095.1, 0, 
0, 0, 0, 0, 0), `165` = c(0, 0, 0, 0, 1432775, 0, 0, 0, 1394186.1, 
0, 1120183.3, 0, 0, 0, 0, 0, 0, 2262421.7, 0, 0, 0, 615660, 0, 
0, 1292795.8, 0, 0, 712622.5, 0, 0, 0, 0, 0, 0, 0, 2683469.4, 
0, 0, 0, 0, 0, 0, 2318485.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1561800, 
0, 0, 0, 0, 0, 0, 4382993.7, 0, 0, 763460, 0, 0, 0), `199` = c(0, 
0, 0, 0, 1314220, 0, 0, 0, 1439718.8, 0, 1929266.7, 0, 0, 0, 
1101800, 0, 0, 2759366.7, 0, 0, 0, 1291728.6, 0, 0, 2489775.6, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2858345.8, 0, 0, 0, 1819542.1, 
0, 0, 1497640.3, 0, 0, 0, 1300250, 0, 0, 0, 0, 0, 0, 1566875, 
0, 0, 0, 0, 0, 0, 4625895.6, 0, 0, 1308158.3, 0, 0, 0), `234` = c(1257250, 
0, 0, 0, 0, 0, 0, 0, 1276080, 0, 1848500, 0, 0, 0, 1529350, 0, 
0, 2155275, 0, 0, 0, 2023041.9, 0, 0, 1966447.7, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 1184200, 1184200, 0, 0, 1652350, 0, 0, 2018581.7, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1835225, 0, 0, 0, 0, 0, 0, 4639414.7, 
0, 0, 720715, 0, 0, 0), `257` = c(0, 0, 0, 0, 0, 669442.5, 0, 
0, 1253026.7, 0, 960410, 960410, 0, 0, 1258267.5, 0, 0, 1707392.5, 
0, 0, 0, 563280, 0, 0, 2403237.9, 0, 0, 0, 1044100, 0, 2075700, 
0, 0, 0, 0, 0, 5718450, 0, 0, 1704550, 0, 0, 1350286.9, 0, 0, 
0, 0, 2011700, 0, 0, 0, 0, 0, 1739500, 0, 0, 0, 0, 0, 0, 4612520.8, 
4612520.8, 0, 0, 0, 0, 0), `362` = c(0, 1593500, 0, 0, 0, 1610625.3, 
0, 0, 1234902.5, 0, 0, 1481036.8, 0, 0, 1583647.5, 0, 0, 1752089.2, 
0, 0, 0, 0, 0, 0, 2410809.2, 0, 0, 0, 654940, 0, 0, 0, 0, 0, 
0, 0, 7014905.6, 0, 0, 0, 0, 0, 1165672.1, 0, 0, 0, 0, 0, 0, 
0, 1029910, 0, 0, 2153087.5, 0, 0, 0, 422920, 0, 0, 0, 7495855.9, 
0, 0, 0, 0, 0), `433` = c(0, 0, 0, 0, 0, 1340283.9, 0, 0, 1268996.9, 
0, 0, 1416683.3, 0, 0, 1047862.5, 0, 0, 1819653.8, 0, 0, 0, 0, 
0, 0, 2227565.7, 0, 0, 0, 763765, 0, 0, 1595430, 0, 0, 0, 0, 
4894549, 0, 0, 0, 0, 0, 1061375.4, 0, 0, 0, 0, 0, 2251950, 0, 
1042130, 0, 0, 2055300, 0, 0, 0, 696278.3, 0, 0, 0, 5353797.8, 
0, 0, 0, 0, 0), `506` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2020300, 
2020300, 0, 0, 0, 0, 0, 0, 7681526, 0, 0, 0, 0, 0), `581` = c(0, 
0, 1749237.5, 0, 0, 0, 2421665.8, 0, 0, 1773262.5, 0, 0, 2251004.3, 
0, 0, 2570175, 0, 0, 3379756.9, 0, 0, 0, 2054455.6, 0, 0, 2518270.8, 
0, 0, 0, 0, 0, 0, 2917968.2, 0, 0, 0, 0, 7004350, 0, 0, 1451600, 
0, 0, 1394411, 0, 0, 0, 0, 0, 2507858.3, 0, 2377012.5, 0, 0, 
3719165.4, 0, 0, 0, 1472870.3, 0, 0, 9666916.1, 0, 0, 1730300, 
0, 0), `652` = c(0, 0, 476910, 476910, 0, 0, 1149078.8, 1149078.8, 
0, 1082468.7, 0, 0, 882769.7, 0, 0, 1370449.4, 1370449.4, 0, 
1529049, 1529049, 0, 0, 943632.2, 0, 0, 916587.8, 0, 0, 0, 988261.1, 
0, 0, 1778007.1, 1778007.1, 0, 0, 0, 3087304.8, 3087304.8, 0, 
782860, 782860, 0, 510158.5, 510158.5, 0, 0, 0, 0, 1503750, 0, 
1100677.5, 1100677.5, 0, 1669260, 1669260, 0, 0, 770733.2, 0, 
0, 4939242.8, 4939242.8, 0, 643564.4, 643564.4, 0), `733` = c(0, 
0, 0, 1095060, 0, 0, 0, 1674089.3, 0, 1252101.3, 0, 0, 1259111, 
0, 0, 0, 2429293.3, 0, 0, 2326928.3, 0, 0, 1259216.5, 0, 0, 1238837.5, 
0, 0, 0, 1224858.3, 0, 0, 0, 2952529.9, 0, 0, 0, 0, 4626414.7, 
0, 0, 1121440, 0, 0, 1025386.2, 0, 0, 0, 0, 1917900, 0, 0, 2197533.3, 
0, 0, 2840155.5, 0, 0, 1054285.7, 0, 0, 0, 7516814.2, 0, 0, 1329434.4, 
0), `818` = c(0, 0, 0, 720551.1, 0, 0, 0, 714662.7, 0, 617012.9, 
0, 0, 549850.8, 0, 0, 0, 1197460, 0, 0, 771979.2, 0, 0, 585847.5, 
585847.5, 0, 875475.4, 0, 0, 0, 576774, 0, 0, 0, 1147389.8, 0, 
0, 0, 0, 2292421.7, 0, 0, 755258.3, 0, 0, 0, 0, 0, 0, 0, 858930, 
0, 0, 1242668.3, 0, 0, 1580088.3, 0, 0, 641938.6, 641938.6, 0, 
0, 3838660.4, 0, 0, 733140.8, 733140.8), `896` = c(0, 0, 0, 590480, 
0, 0, 0, 817087.6, 0, 569869.5, 0, 0, 650822.5, 650822.5, 0, 
0, 1624052.5, 0, 0, 682570.8, 0, 0, 0, 1538800, 0, 690488.6, 
690488.6, 0, 0, 797923.9, 0, 0, 0, 1204889.3, 0, 0, 0, 0, 2184432.2, 
0, 0, 676654.7, 0, 0, 0, 210680, 0, 0, 0, 791152.5, 0, 0, 1599855.8, 
0, 0, 1358543.8, 0, 0, 0, 931288, 0, 0, 4683895.2, 0, 0, 0, 1202806
), `972` = c(0, 0, 0, 799116.4, 0, 0, 0, 759169.9, 0, 408845, 
0, 0, 0, 948980, 0, 0, 968766.7, 0, 0, 675349.7, 0, 0, 0, 0, 
0, 0, 1811117.6, 0, 0, 609098.5, 0, 0, 0, 1073749.1, 0, 0, 0, 
0, 2392258.9, 0, 0, 743580, 0, 0, 0, 1020485, 0, 0, 0, 446596.7, 
0, 0, 1178583, 0, 0, 1438261.7, 0, 0, 0, 1133057.9, 0, 0, 4445814.7, 
0, 0, 0, 1057776.9), `1039` = c(0, 0, 0, 447255.3, 0, 0, 0, 609409.1, 
0, 304340, 0, 0, 0, 0, 0, 0, 694232.8, 0, 0, 473015.3, 0, 0, 
0, 0, 0, 0, 419524.9, 0, 0, 447760.6, 0, 0, 0, 932513.5, 0, 0, 
0, 0, 1251960.5, 0, 0, 276560, 0, 0, 0, 259640, 0, 0, 0, 354995, 
0, 0, 1570222.5, 0, 0, 1021822, 0, 0, 0, 811614, 0, 0, 2941698.2, 
0, 0, 0, 1199942.5), Gene = c("AT1G04170_1", "AT1G04170_2", "AT1G04170_3", 
"AT1G04170_4", "AT1G08520_1", "AT1G08520_2", "AT1G08520_3", "AT1G08520_4", 
"AT1G10670_1", "AT1G10670_2", "AT1G53500_1", "AT1G53500_2", "AT1G53500_3", 
"AT1G53500_4", "AT1G54270_1", "AT1G54270_2", "AT1G54270_3", "AT1G80480_1", 
"AT1G80480_2", "AT1G80480_3", "AT2G16950_1", "AT2G16950_2", "AT2G16950_3", 
"AT2G16950_4", "AT3G03960_1", "AT3G03960_2", "AT3G03960_3", "AT3G57290_1", 
"AT3G57290_2", "AT3G57290_3", "AT3G63460_1", "AT3G63460_2", "AT3G63460_3", 
"AT3G63460_4", "AT4G20890_1", "AT4G20890_2", "AT4G20890_3", "AT4G20890_4", 
"AT4G20890_5", "AT4G20980_1", "AT4G20980_2", "AT4G20980_3", "AT4G24190_1", 
"AT4G24190_2", "AT4G24190_3", "AT4G24190_4", "AT4G29670_1", "AT4G29670_2", 
"AT4G29670_3", "AT4G29670_4", "AT5G23740_1", "AT5G23740_2", "AT5G23740_3", 
"AT5G23860_1", "AT5G23860_2", "AT5G23860_3", "AT5G40450_1", "AT5G40450_2", 
"AT5G40450_3", "AT5G40450_4", "AT5G62700_1", "AT5G62700_2", "AT5G62700_3", 
"ATCG00780_1", "ATCG00780_2", "ATCG00780_3", "ATCG00780_4")), .Names = c("10", 
"34", "59", "84", "110", "134", "165", "199", "234", "257", "362", 
"433", "506", "581", "652", "733", "818", "896", "972", "1039", 
"Gene"), row.names = c("AT1G04170_1", "AT1G04170_2", "AT1G04170_3", 
"AT1G04170_4", "AT1G08520_1", "AT1G08520_2", "AT1G08520_3", "AT1G08520_4", 
"AT1G10670_1", "AT1G10670_2", "AT1G53500_1", "AT1G53500_2", "AT1G53500_3", 
"AT1G53500_4", "AT1G54270_1", "AT1G54270_2", "AT1G54270_3", "AT1G80480_1", 
"AT1G80480_2", "AT1G80480_3", "AT2G16950_1", "AT2G16950_2", "AT2G16950_3", 
"AT2G16950_4", "AT3G03960_1", "AT3G03960_2", "AT3G03960_3", "AT3G57290_1", 
"AT3G57290_2", "AT3G57290_3", "AT3G63460_1", "AT3G63460_2", "AT3G63460_3", 
"AT3G63460_4", "AT4G20890_1", "AT4G20890_2", "AT4G20890_3", "AT4G20890_4", 
"AT4G20890_5", "AT4G20980_1", "AT4G20980_2", "AT4G20980_3", "AT4G24190_1", 
"AT4G24190_2", "AT4G24190_3", "AT4G24190_4", "AT4G29670_1", "AT4G29670_2", 
"AT4G29670_3", "AT4G29670_4", "AT5G23740_1", "AT5G23740_2", "AT5G23740_3", 
"AT5G23860_1", "AT5G23860_2", "AT5G23860_3", "AT5G40450_1", "AT5G40450_2", 
"AT5G40450_3", "AT5G40450_4", "AT5G62700_1", "AT5G62700_2", "AT5G62700_3", 
"ATCG00780_1", "ATCG00780_2", "ATCG00780_3", "ATCG00780_4"), class = "data.frame")

我想使用相同的“基础”对行进行子集化,这意味着在破折号之前使用字符串。

我试图将subsetgrep功能结合起来,但只有当我手动提供字符串时它才适用于我。逐字符串做很多工作。

1 个答案:

答案 0 :(得分:3)

您可以将gather()数据转换为长格式separate() Gene列转换为labelnumber,为了更好地绘制美学,请将0替换为0使用NAy列中使用na_if() s的值,并使用x

指定factor()轴的顺序
library(dplyr)
library(tidyr)
long_df <- df %>%
  gather(x, y, -Gene) %>%
  separate(Gene, into = c("label", "number")) %>%
  mutate(y = na_if(y, 0),
         x = factor(x, levels = unique(x)))

然后,根据this answer,您可以创建地图p并使用dplyr的do()或purrr的by_slice()

遍历各个组
library(ggplot2)
p = ggplot(data = long_df, aes(x = x, y = y, color = number)) + geom_point()

# Using dplyr's do()
res <- long_df %>%
  group_by(label) %>%
  do(plots = p %+% . + facet_wrap(~label))

# Using purrr's by_slice()
library(purrr)
res <- long_df %>%
  slice_rows("label") %>%
  by_slice(~(p %+% . + facet_wrap(~label)), .to = "plots")

给出了:

#Source: local data frame [19 x 2]
#Groups: <by row>
#
## A tibble: 19 × 2
#       label    plots
#*      <chr>   <list>
#1  AT1G04170 <S3: gg>
#2  AT1G08520 <S3: gg>
#3  AT1G10670 <S3: gg>
#4  AT1G53500 <S3: gg>
#5  AT1G54270 <S3: gg>
#6  AT1G80480 <S3: gg>
#7  AT2G16950 <S3: gg>
#8  AT3G03960 <S3: gg>
#9  AT3G57290 <S3: gg>
#10 AT3G63460 <S3: gg>
#11 AT4G20890 <S3: gg>
#12 AT4G20980 <S3: gg>
#13 AT4G24190 <S3: gg>
#14 AT4G29670 <S3: gg>
#15 AT5G23740 <S3: gg>
#16 AT5G23860 <S3: gg>
#17 AT5G40450 <S3: gg>
#18 AT5G62700 <S3: gg>
#19 ATCG00780 <S3: gg>

然后,您可以访问每个图,例如:

res$plots[1]

给出了:

enter image description here

要将所有图表保存为pdf,只需执行以下操作:

pdf()
res$plots
dev.off()