我有两个data.frames
,第一个是基因的表达,第二个是基因列表。
我将两者合并,还使用pheatmap
库绘制了一个热图图。但是,当我的基因列表的大小明显变大时,使用我目前的方法来达到相同的结果将很耗时。
这是我的代码:
library(limma)
library(reshape2)
library(limma)
library(data.table)
library(pheatmap)
library(ggplot2)
library(gplots)
library(dplyr)
ex <- read.delim("mdata.c.txt", stringsAsFactors = FALSE)
miR <- read.delim("TargetScan (mir).txt")
x <- t.data.frame(miR)
colnames(x) <- x[1,]
x1 <- subset(x, select = "mmu-miR-468")
x1 <- x1[-1:-3]
x1 <- strsplit2(x1,";")
x1 <- as.data.frame(t(x1)); colnames(x1) <- "Genes"; x1 <- subset(x1, select = (Genes))
d <- merge(ex, x1, by.x = "Symbol", by.y = "Genes" )
rownames(d) <- d$Symbol; d <- subset(d, select = -(Symbol))
pheatmap(d, color = redgreen(256), border_color = NA)
我会很高兴有任何提示可以帮助我将这些代码通用化为一个函数。
miR <-
structure(list(Term = structure(1:2, .Label = c("MMU-MIR-463",
"MMU-MIR-468"), class = "factor"), Overlap = structure(1:2, .Label = c("50/1186",
"67/1844"), class = "factor"), Adjusted.P.value = c(0.000112,
0.000321), Genes = structure(1:2, .Label = c("HHIP;PRSS23;CDH6;HTR7;CCND2;CAPN6;DPYSL3;PSD3;HOXA1;SOX6;PHLDA1;MME;TET2;NPY1R;UNC5C;HNF1B;ATP11A;FOXP2;ETV5;CCDC80;EMCN;ZADH2;PLCB1;DSC2;SKAP2;RBMS3;FOXC1;SLC35D3;ATP10D;GATA6;TMTC2;DLL1;PRDM16;GSE1;ST3GAL1;LONRF2;KCNJ3;ABCA1;DTNA;PRRX1;IGF1;ZIC5;FBXO32;MEIS2;APLN;MEIS1;PDE10A;CYP2S1;ECHDC2;LGR5",
"KCNG3;SH3KBP1;SLC35F2;COL12A1;SLC7A11;IKZF2;PRSS23;CDH6;HTR7;FSD1L;CCND2;CAPN6;DPYSL3;NEFL;ENPP3;BVES;EDIL3;IGFBP5;MME;UNC5C;PAX6;FOXP2;TOX3;GAP43;GPRC5A;RARB;DSC2;SKAP2;RBMS3;CRABP1;NEDD9;THY1;PCDH18;FOXO1;RASGRP3;CALCR;FLRT2;ALDH1B1;IRAK2;PRDM16;ATOH8;PLXNA2;BTLA;HOXC8;ST3GAL1;ABCA1;DTNA;FZD4;PCDH7;ST8SIA2;PLEKHA6;LHFPL2;FHDC1;FBXO32;ELL2;GRHL2;DCLK1;SULF2;COL3A1;BMP2;MEIS1;CXCL12;PDE10A;TSPAN18;CYP2S1;FGFR2;YPEL2"
), class = "factor")), .Names = c("Term", "Overlap", "Adjusted.P.value",
"Genes"), row.names = 1:2, class = "data.frame")
ex <-
structure(list(Symbol = structure(c(4L, 5L, 6L, 7L, 8L, 9L, 1L,
2L, 3L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L
), .Label = c("0610009B22RIK", "0610009O20RIK", "0610010F05RIK",
"ABCA1", "ALDH1B1", "ATOH8", "BMP2", "BTLA", "BVES", "CALCR",
"CAPN6", "CCND2", "CDH6", "COL12A1", "COL3A1", "CRABP1", "CXCL12",
"CYP2S1", "DCLK1", "DPYSL3", "DSC2", "DTNA", "EDIL3", "ELL2",
"ENPP3", "FBXO32", "FGFR2", "FHDC1", "FLRT2", "FOXO1", "FOXP2",
"FSD1L", "FZD4", "GAP43", "GPRC5A", "GRHL2", "HOXC8", "HTR7",
"IGFBP5", "IKZF2", "IRAK2", "KCNG3", "LHFPL2", "MEIS1", "MME",
"NEDD9", "NEFL", "PAX6", "PCDH18", "PCDH7", "PDE10A", "PLEKHA6",
"PLXNA2", "PRDM16", "PRSS23", "RARB", "RASGRP3", "RBMS3", "SH3KBP1",
"SKAP2", "SLC35F2", "SLC7A11", "ST3GAL1", "ST8SIA2", "SULF2",
"THY1", "TOX3", "TSPAN18", "UNC5C", "YPEL2"), class = "factor"),
C = c(3.77748567, 4.822980067, 3.452958608, 2.236141549,
3.614683896, 3.593664654, 5.945905795, 5.742337759, 4.450522449,
5.498841258, 3.697467989, 5.513117379, 3.242845829, 2.46836919,
3.496960685, 5.384089632, 5.74625976, 5.450381604, 4.874852097,
4.949946396), C.1 = c(3.732461809, 4.692345608, 3.643638517,
2.464903369, 3.923196924, 3.616573833, 5.963905317, 5.809169031,
4.365436784, 5.503449863, 3.330910541, 5.4208984, 3.458151893,
2.638169702, 3.472312384, 5.22817678, 5.748194037, 4.960376197,
4.912263491, 5.050741047), C.2 = c(3.601493477, 5.004207195,
3.563712699, 2.181613498, 3.875891666, 3.423808752, 6.069401994,
5.681649549, 4.446904356, 5.531107836, 3.253315428, 5.51998794,
3.508949312, 2.822579494, 3.763116713, 5.099594951, 5.534438643,
4.777189191, 4.672866795, 5.161092117), RA = c(5.513066726,
3.812278261, 5.265405362, 3.87924304, 2.039519506, 4.759189108,
6.084061334, 5.667799517, 4.866842767, 3.002351423, 5.769773653,
7.193207195, 5.058879419, 4.485727823, 5.208026508, 7.516305285,
3.898770479, 3.619682092, 3.387839299, 6.397249876), RA.1 = c(5.880092133,
3.5148164, 5.318008079, 4.006997944, 2.102835455, 5.040962612,
6.502205273, 5.751918988, 5.188732013, 3.890624116, 5.515095982,
7.270263422, 5.434812681, 5.024162992, 5.769700472, 7.279298657,
4.355383213, 4.172363648, 3.614671345, 6.565207804), RA.2 = c(5.159058155,
3.806456716, 5.065596205, 3.474947237, 1.910148592, 4.336504004,
6.094690605, 5.570627182, 4.533170181, 3.776406815, 5.082191362,
6.770287927, 5.15348365, 4.358172991, 5.237166993, 6.723533889,
3.9179596, 4.221442971, 3.03103911, 6.667786042), RA.3 = c(4.734074368,
4.200001776, 5.197170604, 3.976954591, 2.259782729, 4.161739308,
6.178605852, 5.754705037, 4.480547997, 3.923700593, 4.216836024,
6.334891312, 4.620697451, 3.196568573, 4.270539044, 6.497724885,
5.127722309, 4.306590271, 3.7298749, 5.72776812), RA.4 = c(4.820123851,
4.266872888, 5.280094123, 3.635206849, 2.280822878, 4.24332292,
6.325583534, 5.813427882, 4.652434261, 4.094850763, 4.443030899,
6.373346607, 4.881255731, 3.280170001, 4.157540907, 6.546316014,
5.23067982, 4.269793519, 3.57404558, 5.700715548), RA.5 = c(5.776388404,
4.015218348, 5.562190604, 4.360043358, 2.109369794, 4.956135039,
6.218643051, 5.930558299, 5.016813151, 3.69361207, 5.558617804,
6.89059873, 5.377773892, 5.428164931, 4.681609449, 8.099779514,
3.188122992, 3.429885722, 3.272005291, 6.326745001), RA.6 = c(5.750993941,
3.608186366, 5.518325191, 4.095582272, 1.893268986, 4.738806628,
6.202852679, 5.881382715, 5.010168205, 3.690336209, 5.664577192,
6.8557427, 5.474816114, 5.42014608, 5.090621538, 8.057870572,
2.955197794, 3.902526229, 3.313782312, 6.2926593), C.3 = c(4.132465802,
4.905138375, 3.967882032, 2.605869749, 3.555513557, 2.982052818,
5.87631377, 5.693549115, 4.179721658, 5.318095317, 3.079103092,
5.407948228, 3.640464954, 3.403790177, 4.496064346, 5.172545273,
5.801449219, 5.762655142, 4.356521607, 4.55632696), C.4 = c(3.855829156,
4.981920253, 4.079812013, 2.437137471, 3.842764845, 3.465524973,
5.841440246, 5.695324233, 4.434939145, 5.334565117, 3.161046102,
5.170178954, 3.910542712, 3.32875125, 4.061759375, 5.25140962,
5.771233881, 5.725335141, 4.442725426, 4.689254442), C.5 = c(3.670887112,
5.297501513, 3.593775915, 2.666339182, 3.223641858, 4.023582703,
5.887486768, 5.695584675, 4.745276113, 6.136571937, 3.584883651,
4.410289078, 3.676541465, 3.282359706, 4.211894441, 5.509690722,
4.929828541, 4.816327629, 5.355952516, 5.298531279), C.6 = c(3.940111321,
5.358870859, 4.101887008, 2.428626315, 3.228658354, 3.655620865,
5.771101996, 5.799439398, 4.593426198, 6.04354472, 3.44480619,
4.518295413, 3.796813607, 3.617399678, 4.574228077, 5.574980059,
4.891775578, 4.815227266, 5.32952356, 5.294238506), RA.7 = c(4.527375399,
3.39283287, 5.85159372, 3.292592687, 2.417753678, 5.020235853,
6.258998893, 5.477798041, 4.547759067, 3.069608355, 5.405238368,
6.566895702, 4.359885707, 3.395221434, 4.631341687, 6.456497373,
6.15282374, 4.078732358, 3.328715232, 6.778655763), C.7 = c(3.53014304,
4.924750179, 4.225072133, 2.493934357, 4.200661489, 4.046989449,
6.194185274, 5.858296163, 4.40748286, 6.471134787, 4.602980231,
6.296995365, 3.768578264, 3.346921313, 4.392344433, 6.18166146,
6.075550117, 4.650449169, 5.548648278, 5.194624578), C.8 = c(3.277752462,
4.891125389, 4.154476758, 2.579864194, 4.208255243, 3.826840694,
6.26510624, 5.915726889, 4.267961934, 6.556073333, 4.395646401,
6.221880088, 4.141886823, 3.08775399, 4.144399592, 6.124129498,
6.064130254, 4.671598262, 5.491215185, 5.048757799), RA.8 = c(4.707785292,
4.379237421, 5.220415009, 2.487991083, 2.194346988, 4.862574883,
6.235785184, 5.848167668, 4.74069477, 4.654447747, 5.206939621,
7.382431878, 4.6627111, 4.715126219, 5.783151068, 7.857044358,
5.219060393, 3.972425948, 4.79923732, 6.466648851), RA.9 = c(4.926884673,
4.191861635, 5.404949243, 2.886561899, 2.179271153, 4.917931887,
6.233350584, 5.969063117, 4.613329075, 4.868862816, 5.563571853,
7.299846573, 4.815513507, 4.444855458, 6.104803044, 7.867913117,
5.265837909, 4.414191539, 4.844477913, 6.273708928)), .Names = c("Symbol",
"C", "C.1", "C.2", "RA", "RA.1", "RA.2", "RA.3", "RA.4", "RA.5",
"RA.6", "C.3", "C.4", "C.5", "C.6", "RA.7", "C.7", "C.8", "RA.8",
"RA.9"), row.names = c(NA, 20L), class = "data.frame")