我想在数据框中存储数值变量,请查看我的示例代码:
x <- -10:10
y <- x^2
parab <- data.frame(x, y)
str(parab)
## 'data.frame': 21 obs. of 2 variables:
## $ x: int -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 ...
## $ y: num 100 81 64 49 36 25 16 9 4 1 ...
cut(parab$x, 3) #works as expected
## [1] (-10,-3.33] (-10,-3.33] (-10,-3.33] (-10,-3.33] (-10,-3.33]
## [6] (-10,-3.33] (-10,-3.33] (-3.33,3.33] (-3.33,3.33] (-3.33,3.33]
## [11] (-3.33,3.33] (-3.33,3.33] (-3.33,3.33] (-3.33,3.33] (3.33,10]
## [16] (3.33,10] (3.33,10] (3.33,10] (3.33,10] (3.33,10]
## [21] (3.33,10]
## Levels: (-10,-3.33] (-3.33,3.33] (3.33,10]
apply(parab, 2, function(x) cut(x, 3)) #works as expected
## x y
## [1,] "(-10,-3.33]" "(66.7,100]"
## [2,] "(-10,-3.33]" "(66.7,100]"
## [3,] "(-10,-3.33]" "(33.3,66.7]"
## [4,] "(-10,-3.33]" "(33.3,66.7]"
## [5,] "(-10,-3.33]" "(33.3,66.7]"
## [6,] "(-10,-3.33]" "(-0.1,33.3]"
## [7,] "(-10,-3.33]" "(-0.1,33.3]"
## [8,] "(-3.33,3.33]" "(-0.1,33.3]"
## [9,] "(-3.33,3.33]" "(-0.1,33.3]"
## [10,] "(-3.33,3.33]" "(-0.1,33.3]"
## [11,] "(-3.33,3.33]" "(-0.1,33.3]"
## [12,] "(-3.33,3.33]" "(-0.1,33.3]"
## [13,] "(-3.33,3.33]" "(-0.1,33.3]"
## [14,] "(-3.33,3.33]" "(-0.1,33.3]"
## [15,] "(3.33,10]" "(-0.1,33.3]"
## [16,] "(3.33,10]" "(-0.1,33.3]"
## [17,] "(3.33,10]" "(33.3,66.7]"
## [18,] "(3.33,10]" "(33.3,66.7]"
## [19,] "(3.33,10]" "(33.3,66.7]"
## [20,] "(3.33,10]" "(66.7,100]"
## [21,] "(3.33,10]" "(66.7,100]"
apply(parab, 2, function(x) if(is.numeric(x)) cut(x, 3) else x) #works as expected
## x y
## [1,] "(-10,-3.33]" "(66.7,100]"
## [2,] "(-10,-3.33]" "(66.7,100]"
## [3,] "(-10,-3.33]" "(33.3,66.7]"
## [4,] "(-10,-3.33]" "(33.3,66.7]"
## [5,] "(-10,-3.33]" "(33.3,66.7]"
## [6,] "(-10,-3.33]" "(-0.1,33.3]"
## [7,] "(-10,-3.33]" "(-0.1,33.3]"
## [8,] "(-3.33,3.33]" "(-0.1,33.3]"
## [9,] "(-3.33,3.33]" "(-0.1,33.3]"
## [10,] "(-3.33,3.33]" "(-0.1,33.3]"
## [11,] "(-3.33,3.33]" "(-0.1,33.3]"
## [12,] "(-3.33,3.33]" "(-0.1,33.3]"
## [13,] "(-3.33,3.33]" "(-0.1,33.3]"
## [14,] "(-3.33,3.33]" "(-0.1,33.3]"
## [15,] "(3.33,10]" "(-0.1,33.3]"
## [16,] "(3.33,10]" "(-0.1,33.3]"
## [17,] "(3.33,10]" "(33.3,66.7]"
## [18,] "(3.33,10]" "(33.3,66.7]"
## [19,] "(3.33,10]" "(33.3,66.7]"
## [20,] "(3.33,10]" "(66.7,100]"
## [21,] "(3.33,10]" "(66.7,100]"
apply(parab, 2, function(x) ifelse(T, cut(x, 3), T)) #does not work!
## x y
## 1 3
parab$z <- rep("test", length(x))
str(parab)
## 'data.frame': 21 obs. of 3 variables:
## $ x: int -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 ...
## $ y: num 100 81 64 49 36 25 16 9 4 1 ...
## $ z: chr "test" "test" "test" "test" ...
apply(parab, 2, function(x) if(is.numeric(x)) cut(x, 3) else x) #does not work anymore?!?
## x y z
## [1,] "-10" "100" "test"
## [2,] " -9" " 81" "test"
## [3,] " -8" " 64" "test"
## [4,] " -7" " 49" "test"
## [5,] " -6" " 36" "test"
## [6,] " -5" " 25" "test"
## [7,] " -4" " 16" "test"
## [8,] " -3" " 9" "test"
## [9,] " -2" " 4" "test"
## [10,] " -1" " 1" "test"
## [11,] " 0" " 0" "test"
## [12,] " 1" " 1" "test"
## [13,] " 2" " 4" "test"
## [14,] " 3" " 9" "test"
## [15,] " 4" " 16" "test"
## [16,] " 5" " 25" "test"
## [17,] " 6" " 36" "test"
## [18,] " 7" " 49" "test"
## [19,] " 8" " 64" "test"
## [20,] " 9" " 81" "test"
## [21,] " 10" "100" "test"
我的问题
if
和else
代替ifelse
(我认为它与ifelse
进行矢量化有关?)......更重要的是cut
函数在另一列不是数字时停止工作?如何纠正这种情况以使其再次发挥作用?答案 0 :(得分:2)
您的问题与剪切无关,而与ifelse
和apply
函数有关。
ifelse
仅返回与输入长度相同的结果,因此当您执行
ifelse(T, cut(x, 3), T)
输入的长度仅为1,因此每列只能获得长度为1的结果。
您遇到的另一个问题是理解apply
的工作过程。来自apply
文档:
If ‘X’ is not an array but an object of a class with a non-null
‘dim’ value (such as a data frame), ‘apply’ attempts to coerce it
to an array via ‘as.matrix’ if it is two-dimensional (e.g., a data
frame) or via ‘as.array’.
您在data.frame中添加了非数字列。使用apply时,它会首先将data.frame转换为矩阵。矩阵只能是单一类型,而字符是通过数字选择的。所以现在您认为数字现在是字符,所以它在if / else语句中使用'else'分支,因为输入不是数字。
要做你想做的事,你可以使用:
parab[] <- lapply(parab, function(x) if(is.numeric(x)) cut(x, 3) else x)
(感谢此版本的@PierreLafortune)