反向排序后,数据帧中的因子级别变为NA

时间:2014-12-17 13:56:31

标签: r

来自数据框列表,我将它们转换为列表中的向量:

vector3 <- lapply(list3, function(x) {as.numeric(as.vector(unlist(x)))})
names(vector3) <- as.factor(names(vector3))
names(vector3)
[1] "1"       "3"       "2"       "12"      "13"      "15"      "5"       "11"      "21"     
[10] "18"      "20"      "19"      "out"     "25"      "4"       "GBSL1B0" "6"       "17"     
[19] "11B2"    "9"       "ATs328"  "d142"    "10"      "B276D12" "TPD58"   "23"      "HoloI"  
[28] "7I"      "7II"     "8Holo"   "8Aca"    "BPU1C1g" "22"      "26"    

我要使用ggplot2进行boxplotting,所以我创建了一个熔化的数据帧:

library(reshape2)
v3m <- melt(vector3)
head(v3m)
 value L1
1  83.0  1
2  83.3  1
3  83.0  1
4  83.8  1
5  82.8  1
6  83.0  1

L1现在是初始列表中前一个向量的名称,并且有望成为一个因素:

  

表(V3M $ L1)

      1      10      11    11B2      12      13      15      17      18      19       2      20 
2376363  431959   98868    3531   11770   98868   56496  251878  130647    4708  479039    4708 
     21      22      23      25      26       3       4       5       6      7I     7II    8Aca 
 188320  353100  134178   81213   60027 1385329 2164503  313082 4002977  129470  281303   32956 
  8Holo       9  ATs328 B276D12 BPU1C1g    d142 GBSL1B0   HoloI     out   TPD58 
 141240  123585   29425    3531    2354    3531    2354   28248    5885   22363 

我想根据vector3中反向的初始顺序重新排序因子级别:

v3m$L1 <- factor(v3m$L1,levels = rev(levels(v3m$L1)),ordered = TRUE)

但现在:

head(v3m)
1  83.0 <NA>
2  83.3 <NA>
3  83.0 <NA>
4  83.8 <NA>
5  82.8 <NA>
6  83.0 <NA>
table(v3m$L1)
table of extent 0 >

L1中的所有因素都变为NA。我做错了什么?我此刻感到非常愚蠢。

以下是按样本创建的矢量测试列表。

dput(v3m)
structure(list(`1` = c(82.5, 82.2, 81.7, 83.2, 84.5, 82.1, 84, 
81.8, 84.1, 83.5), `3` = c(90.5, 92, 94.7, 92.7, 90.2, 91.2, 
85.7, 92.9, 92.9, 90.3), `2` = c(82.8, 81.7, 82, 81.9, 80.9, 
81.9, 81.7, 82.1, 81.5, 82.5), `12` = c(86, 85.3, 87.7, 87, 84.9, 
84.6, 84.4, 88.1, 86.8, 88.5), `13` = c(83.1, 83.2, 85, 83.9, 
82.6, 82.9, 83.7, 82.6, 82.7, 83.9), `15` = c(86.6, 84.6, 84, 
80.8, 83.6, 84.8, 84.8, 83.2, 85, 85.1), `5` = c(83, 81.5, 83.2, 
83.4, 81.8, 82.6, 83.4, 83.2, 83.9, 83), `11` = c(82.3, 82.2, 
83.1, 81, 81.4, 83.7, 82.1, 82.5, 82.7, 81.7), `21` = c(80.4, 
78.7, 78.7, 80.5, 81, 80.4, 79.9, 79.3, 80.4, 80), `18` = c(80.8, 
82.7, 81.9, 80.2, 81.2, 81.7, 80.5, 81, 81.3, 80.6), `20` = c(80.2, 
81.1, 82.2, 81.7, 81.5, 81.7, 80.1, 82.8, 81.3, 81.2), `19` = c(81.5, 
79.9, 79.7, 81.2, 81.3, 82.2, 81.8, 82.1, 82, 82.9), out = c(81.1, 
81.5, 80.9, 81.1, 80.5, 80.8, 81, 80.9, 80.9, 79.9), `25` = c(79.8, 
79.9, 78.8, 78, 78.6, 80.1, 78.6, 79.3, 78.8, 79.3), `4` = c(79.3, 
81.4, 80.8, 80, 80.4, 79.4, 79, 78.3, 79.1, 79.1), GBSL1B0 = c(76.4, 
75.2, 76.7, 78.6, 76.9, 76.3, 77.8, 79.2, 77.2, 77.1), `6` = c(80.3, 
81.2, 81.5, 81.3, 81.9, 82.6, 81.2, 81.5, 81.6, 81.1), `17` = c(82, 
80.7, 77.8, 81, 82.3, 80.9, 81.4, 80.9, 81.7, 82.6), `11B2` = c(82.2, 
79.9, 80.8, 80.8, 81.3, 82.7, 82, 81.5, 81.2, 82.1), `9` = c(80.8, 
80.6, 82.1, 80.6, 79.4, 82.3, 81.6, 81.4, 81, 79.5), ATs328 = c(81.1, 
80.7, 79.7, 81.9, 80.5, 80, 80.4, 81.2, 80.6, 79), d142 = c(80.2, 
80.8, 79.9, 79.4, 79.4, 80, 79.9, 81.5, 80.6, 80), `10` = c(79.9, 
80.6, 80.3, 79.8, 79, 79.2, 80.9, 80.6, 80, 78.5), B276D12 = c(80.6, 
78.9, 80.2, 79.6, 80, 79.6, 79.8, 79.2, 78.8, 79.6), TPD58 = c(79.2, 
80.3, 81.6, 80.5, 81.7, 81.6, 82.6, 80.4, 82.2, 82.2), `23` = c(78.2, 
80.2, 79.7, 79.8, 79.7, 80.4, 80.2, 77.8, 80, 79.9), HoloI = c(80.4, 
80.7, 80.7, 80.3, 80.3, 81, 79.8, 79, 77.9, 81.4), `7I` = c(80.2, 
81.8, 79.8, 80.7, 81, 78.7, 81.1, 79, 81.7, 81.4), `7II` = c(80.2, 
80.3, 80.7, 79.9, 80.5, 80, 81, 79.2, 81.9, 78), `8Holo` = c(80.2, 
82.5, 80.4, 79.5, 81.2, 79.4, 79, 80.9, 80, 79.6), `8Aca` = c(77.1, 
81.4, 80.7, 81.9, 81, 79.8, 79.9, 80.4, 78.9, 79), BPU1C1g = c(78.3, 
79.2, 76.9, 78.6, 79.1, 77.7, 78, 78.9, 78.5, 78), `22` = c(81.4, 
80.3, 80.1, 78.8, 81.1, 79.8, 81.1, 80.7, 81.8, 81.2), `26` = c(80.6, 
79.8, 80, 79, 80.6, 77.5, 80.6, 81.5, 79.8, 81.3)), .Names = c("1", 
"3", "2", "12", "13", "15", "5", "11", "21", "18", "20", "19", 
"out", "25", "4", "GBSL1B0", "6", "17", "11B2", "9", "ATs328", 
"d142", "10", "B276D12", "TPD58", "23", "HoloI", "7I", "7II", 
"8Holo", "8Aca", "BPU1C1g", "22", "26"))

有趣的是,如果我正在做的话

v3m$L1 <- factor(v3m$L1,levels = v3m$L1,ordered = TRUE)

而不是用&#34; rev&#34;来反转订单,我得

Warning message:
In `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels) else paste0(labels,  :
  duplicated levels in factors are deprecated

但这些因素未转换为NA。但是,我在名单中没有看到任何重复的级别。我想,名单中出现的数字可能会出现问题,但如果所有名称都以&#34; df _&#34;

开头,这种行为也会出现。

谢谢!

1 个答案:

答案 0 :(得分:0)

答案是按级别排序是错误的。我需要创建一个包含所有名称的向量,反向,并使用它来订购boxplot:

vector3 <- lapply(list3, function(x) {as.numeric(as.vector(unlist(x)))})
nv3 <- rev(as.factor(names(vector3)))
v2m  <- melt(vector3)
v3m$L1 <- factor(v3m$L1, levels = nv3, ordered = TRUE)

很抱歉打扰。