剪切功能的奇怪行为

时间:2016-02-26 17:51:15

标签: r

我想在数据框中存储数值变量,请查看我的示例代码:

x <- -10:10
y <- x^2
parab <- data.frame(x, y)
str(parab)
## 'data.frame':    21 obs. of  2 variables:
##  $ x: int  -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 ...
##  $ y: num  100 81 64 49 36 25 16 9 4 1 ...
cut(parab$x, 3) #works as expected
##  [1] (-10,-3.33]  (-10,-3.33]  (-10,-3.33]  (-10,-3.33]  (-10,-3.33] 
##  [6] (-10,-3.33]  (-10,-3.33]  (-3.33,3.33] (-3.33,3.33] (-3.33,3.33]
## [11] (-3.33,3.33] (-3.33,3.33] (-3.33,3.33] (-3.33,3.33] (3.33,10]   
## [16] (3.33,10]    (3.33,10]    (3.33,10]    (3.33,10]    (3.33,10]   
## [21] (3.33,10]   
## Levels: (-10,-3.33] (-3.33,3.33] (3.33,10]
apply(parab, 2, function(x) cut(x, 3)) #works as expected
##       x              y            
##  [1,] "(-10,-3.33]"  "(66.7,100]" 
##  [2,] "(-10,-3.33]"  "(66.7,100]" 
##  [3,] "(-10,-3.33]"  "(33.3,66.7]"
##  [4,] "(-10,-3.33]"  "(33.3,66.7]"
##  [5,] "(-10,-3.33]"  "(33.3,66.7]"
##  [6,] "(-10,-3.33]"  "(-0.1,33.3]"
##  [7,] "(-10,-3.33]"  "(-0.1,33.3]"
##  [8,] "(-3.33,3.33]" "(-0.1,33.3]"
##  [9,] "(-3.33,3.33]" "(-0.1,33.3]"
## [10,] "(-3.33,3.33]" "(-0.1,33.3]"
## [11,] "(-3.33,3.33]" "(-0.1,33.3]"
## [12,] "(-3.33,3.33]" "(-0.1,33.3]"
## [13,] "(-3.33,3.33]" "(-0.1,33.3]"
## [14,] "(-3.33,3.33]" "(-0.1,33.3]"
## [15,] "(3.33,10]"    "(-0.1,33.3]"
## [16,] "(3.33,10]"    "(-0.1,33.3]"
## [17,] "(3.33,10]"    "(33.3,66.7]"
## [18,] "(3.33,10]"    "(33.3,66.7]"
## [19,] "(3.33,10]"    "(33.3,66.7]"
## [20,] "(3.33,10]"    "(66.7,100]" 
## [21,] "(3.33,10]"    "(66.7,100]"
apply(parab, 2, function(x) if(is.numeric(x)) cut(x, 3) else x) #works as expected
##       x              y            
##  [1,] "(-10,-3.33]"  "(66.7,100]" 
##  [2,] "(-10,-3.33]"  "(66.7,100]" 
##  [3,] "(-10,-3.33]"  "(33.3,66.7]"
##  [4,] "(-10,-3.33]"  "(33.3,66.7]"
##  [5,] "(-10,-3.33]"  "(33.3,66.7]"
##  [6,] "(-10,-3.33]"  "(-0.1,33.3]"
##  [7,] "(-10,-3.33]"  "(-0.1,33.3]"
##  [8,] "(-3.33,3.33]" "(-0.1,33.3]"
##  [9,] "(-3.33,3.33]" "(-0.1,33.3]"
## [10,] "(-3.33,3.33]" "(-0.1,33.3]"
## [11,] "(-3.33,3.33]" "(-0.1,33.3]"
## [12,] "(-3.33,3.33]" "(-0.1,33.3]"
## [13,] "(-3.33,3.33]" "(-0.1,33.3]"
## [14,] "(-3.33,3.33]" "(-0.1,33.3]"
## [15,] "(3.33,10]"    "(-0.1,33.3]"
## [16,] "(3.33,10]"    "(-0.1,33.3]"
## [17,] "(3.33,10]"    "(33.3,66.7]"
## [18,] "(3.33,10]"    "(33.3,66.7]"
## [19,] "(3.33,10]"    "(33.3,66.7]"
## [20,] "(3.33,10]"    "(66.7,100]" 
## [21,] "(3.33,10]"    "(66.7,100]"
apply(parab, 2, function(x) ifelse(T, cut(x, 3), T)) #does not work!
## x y 
## 1 3
parab$z <- rep("test", length(x))
str(parab)
## 'data.frame':    21 obs. of  3 variables:
##  $ x: int  -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 ...
##  $ y: num  100 81 64 49 36 25 16 9 4 1 ...
##  $ z: chr  "test" "test" "test" "test" ...
apply(parab, 2, function(x) if(is.numeric(x)) cut(x, 3) else x) #does not work anymore?!?
##       x     y     z     
##  [1,] "-10" "100" "test"
##  [2,] " -9" " 81" "test"
##  [3,] " -8" " 64" "test"
##  [4,] " -7" " 49" "test"
##  [5,] " -6" " 36" "test"
##  [6,] " -5" " 25" "test"
##  [7,] " -4" " 16" "test"
##  [8,] " -3" "  9" "test"
##  [9,] " -2" "  4" "test"
## [10,] " -1" "  1" "test"
## [11,] "  0" "  0" "test"
## [12,] "  1" "  1" "test"
## [13,] "  2" "  4" "test"
## [14,] "  3" "  9" "test"
## [15,] "  4" " 16" "test"
## [16,] "  5" " 25" "test"
## [17,] "  6" " 36" "test"
## [18,] "  7" " 49" "test"
## [19,] "  8" " 64" "test"
## [20,] "  9" " 81" "test"
## [21,] " 10" "100" "test"

我的问题

  1. 为什么必须使用ifelse代替ifelse(我认为它与ifelse进行矢量化有关?)......更重要的是
  2. 为什么cut函数在另一列不是数字时停止工作?如何纠正这种情况以使其再次发挥作用?

1 个答案:

答案 0 :(得分:2)

您的问题与剪切无关,而与ifelseapply函数有关。

ifelse仅返回与输入长度相同的结果,因此当您执行

ifelse(T, cut(x, 3), T)

输入的长度仅为1,因此每列只能获得长度为1的结果。

您遇到的另一个问题是理解apply的工作过程。来自apply文档:

 If ‘X’ is not an array but an object of a class with a non-null
 ‘dim’ value (such as a data frame), ‘apply’ attempts to coerce it
 to an array via ‘as.matrix’ if it is two-dimensional (e.g., a data
 frame) or via ‘as.array’.

您在data.frame中添加了非数字列。使用apply时,它会首先将data.frame转换为矩阵。矩阵只能是单一类型,而字符是通过数字选择的。所以现在您认为数字现在是字符,所以它在if / else语句中使用'else'分支,因为输入不是数字。

要做你想做的事,你可以使用:

parab[] <- lapply(parab, function(x) if(is.numeric(x)) cut(x, 3) else x)

(感谢此版本的@PierreLafortune)