按值排序后选择顶行熔化数据

时间:2016-02-26 06:25:11

标签: r reshape melt

我有以下融化数据:

dat.melt <- structure(list(CellTypes = structure(c(62L, 35L, 73L, 45L, 14L, 
22L, 46L, 13L, 68L, 21L, 1L, 10L, 64L, 24L, 72L, 58L, 51L, 9L, 
60L, 37L, 34L, 49L, 33L, 2L, 50L, 32L, 11L, 52L, 44L, 66L, 8L, 
5L, 47L, 59L, 53L, 7L, 6L, 77L, 75L, 17L, 27L, 61L, 20L, 18L, 
19L, 16L, 54L, 15L, 41L, 3L, 63L, 48L, 57L, 43L, 70L, 40L, 12L, 
76L, 74L, 29L, 28L, 25L, 30L, 42L, 39L, 56L, 4L, 67L, 71L, 31L, 
36L, 23L, 38L, 69L, 55L, 26L, 65L, 62L, 35L, 73L, 45L, 14L, 22L, 
46L, 13L, 68L, 21L, 1L, 10L, 64L, 24L, 72L, 58L, 51L, 9L, 60L, 
37L, 34L, 49L, 33L, 2L, 50L, 32L, 11L, 52L, 44L, 66L, 8L, 5L, 
47L, 59L, 53L, 7L, 6L, 77L, 75L, 17L, 27L, 61L, 20L, 18L, 19L, 
16L, 54L, 15L, 41L, 3L, 63L, 48L, 57L, 43L, 70L, 40L, 12L, 76L, 
74L, 29L, 28L, 25L, 30L, 42L, 39L, 56L, 4L, 67L, 71L, 31L, 36L, 
23L, 38L, 69L, 55L, 26L, 65L), .Label = c("3T3-L1", "Adipose Brown", 
"Adipose White", "Adrenal Gland", "B Cells (GL7 neg; Alum)", 
"B Cells (GL7 neg; KLH)", "B Cells (GL7 pos; Alum)", "B Cells (GL7 pos; KLH)", 
"B Cells Marginal Zone", "B220+ Dend. Cells", "BA/F3", "Bladder", 
"Bone", "Bone Marrow", "C2C12", "CD4+ SP Thymoctyes", "CD4+ T cells", 
"CD4+/CD8+ DP Thymocytes", "CD8+ SP Thymocytes", "CD8+ T cells", 
"CD8a+ Dend. Cells Lymphoid", "CD8a+ Dend. Cells Myeloid", "Ciliary Bodies", 
"Common Myeloid Progenitor", "Cornea", "Dorsal Root Ganglia", 
"Embryonic Fibroblasts", "Embryonic Stem Line Bruce4 P13", "Embryonic Stem Line V26 2 P16", 
"Epidermis", "Eyecup", "Follicular B Cells", "Foxp3+ Tcells", 
"Granulo Monoprogenitor", "Granulocytes", "Heart", "Hematopoietic Stem Cells", 
"Iris", "Kidney", "Lacrimal Gland", "Large Intestine", "Lens", 
"Liver", "Lung", "Lymph Nodes", "Macrophage Peri ", "Mammary Gland", 
"Mammary Gland Non-Lactating", "Mast Cells", "Mast Cells IgE", 
"Mast Cells IgE 1hr", "Mast Cells IgE 6hr", "Megaerythrocyte Progenitor", 
"mIMCD-3 Cells", "MIN6 cells", "Neuro2a Neuroblastoma Cells", 
"NIH 3T3", "NK Cells", "Osteoblast Day14", "Osteoblast Day21", 
"Osteoblast Day5", "Osteoclasts", "Ovary", "Pancreas", "Pituitary", 
"Placenta", "Prostate", "RAW 264.7 Cells", "Retinal Pigment Epithelium", 
"Salivary Gland", "Skeletal Muscle", "Small Intestine", "Spleen", 
"Stem Cells C3H/10T1/2", "Stomach", "Umbilical Cord", "Uterus"
), class = "factor"), variable = structure(c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("LPS_IV_SP", "MPL_IV_SP"), class = "factor"), 
    value = c(3.647, 33.629, 17.838, 33.917, 29.66, 31.694, 32.603, 
    24.152, 19.969, 24.012, 40.101, 12.682, 0.323, 12.846, 5.087, 
    11.707, 16.682, 7.71, 22.472, 10.21, 10.109, 12.643, 12.623, 
    1.48, 13.075, 5.042, 12.19, 11.691, 15.24, 17.073, 5.854, 
    5.188, 11.983, 18.679, 6.406, 4.474, 5.445, 8.144, 0.739, 
    3.652, 14.232, 17.1, 2.603, 1.762, 1.993, 3.475, 10.305, 
    7.457, 1.189, 2.895, 4.181, 3.06, 5.885, 3.063, 2.532, 1.662, 
    3.86, 5.094, 5.916, 4.553, 3.703, 2.546, 0.764, 0.597, 1.39, 
    2.933, 0.665, 0.121, 0.257, 0.764, 0.196, 0.208, 0.232, 0.001, 
    0.004, 0.035, 0.036, 56.156, 53.485, 48.206, 45.975, 41.067, 
    40.581, 38.155, 33.009, 29.468, 29.219, 27.945, 19.165, 15.985, 
    15.682, 15.077, 14.72, 13.856, 13.576, 12.914, 12.77, 12.577, 
    12.526, 11.05, 10.532, 10.008, 9.942, 9.238, 8.67, 8.237, 
    7.938, 7.819, 7.55, 7.349, 7.217, 7.146, 6.158, 5.852, 5.368, 
    5.328, 5.126, 4.887, 4.767, 4.24, 3.858, 3.816, 3.676, 3.318, 
    3.118, 2.459, 2.269, 2.266, 2.201, 1.467, 1.418, 1.368, 1.267, 
    1.077, 1.022, 0.835, 0.667, 0.655, 0.609, 0.53, 0.452, 0.24, 
    0.239, 0.211, 0.124, 0.084, 0.05, 0.028, 0.024, 0.016, 0.007, 
    0.006, 0.003, 0.002)), row.names = c(NA, -154L), .Names = c("CellTypes", 
"variable", "value"), class = "data.frame")

看起来像这样:

> tail(dat.melt,n=5L)
                     CellTypes  variable value
150                       Iris MPL_IV_SP 0.016
151 Retinal Pigment Epithelium MPL_IV_SP 0.007
152                 MIN6 cells MPL_IV_SP 0.006
153        Dorsal Root Ganglia MPL_IV_SP 0.003
154                  Pituitary MPL_IV_SP 0.002
> head(dat.melt,n=5L)
     CellTypes  variable  value
1  Osteoclasts LPS_IV_SP  3.647
2 Granulocytes LPS_IV_SP 33.629
3       Spleen LPS_IV_SP 17.838
4  Lymph Nodes LPS_IV_SP 33.917
5  Bone Marrow LPS_IV_SP 29.660
> 

对于每个变量MPL_IV_SPLPS_IV_SP,我想选择按值降序排序的前5行('单元格类型')。我怎么能这样做?

2 个答案:

答案 0 :(得分:3)

您也可以使用data.table包。以下是代码:

library(data.table)
dat.melt <- data.table(dat.melt)
dat.melt[, .SD[1:5], by=variable]

data.table的优点是它比data.frame更快。

答案 1 :(得分:1)

我们可以使用top_n

library(dplyr)
dat.melt %>%
      group_by(variable) %>%
      top_n(5, value)

注意:在另一个答案中,没有sort完成。但是,我能理解有偏见的投票。