我在数据框中有多个因素,每个因素都包含名称为“非常长的文本1”和“非常长的文本2”的级别。我想将级别重命名为“1”和“2”。我可以使用
轻松地为每个因素做这件事levels(df$factorname1)
[levels(df$factorname1)=="Very long text 1"] <- "1"
但重复几百个因素是很麻烦的。有没有办法重命名一系列因素的级别,或者只是在数据框中的任何位置找到“超长文本1”并重命名它?
示例数据:
structure(list(Q5.2.01 = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"A whole different level\n"), class = "factor"), Q5.2.02 = structure(c(2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L), .Label = c("", "Very long text 2\n"), class = "factor"),
Q5.2.03 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), Q5.2.04 = structure(c(1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("", "Very long text 2\n"), class = "factor"),
Q5.2.05 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L), .Label = c("", "A whole different level\n",
"Very long text 2 blablabla\n"), class = "factor"), Q5.2.06 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L), .Label = c("", "Very long text 2\n"), class = "factor"),
Q5.2.07 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), Q5.2.08 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Q5.2.09 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = c("", "Very long text 1\n"), class = "factor"),
Q5.2.10 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), Q5.2.11 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("", "Very long text 2\n"), class = "factor"),
Q5.2.12 = structure(c(1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "Very long text 2\n"
), class = "factor"), Q5.2.13 = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"Very long text 1\n"), class = "factor"), Q5.2.14 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L), .Label = c("", "Very long text 1\n", "Very long text 2\n"
), class = "factor"), Q5.2.15 = structure(c(1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"Very long text 2\n"), class = "factor"), Q5.2.16 = structure(c(1L,
1L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 2L), .Label = c("", "Very long text 1\n", "Very long text 2\n"
), class = "factor"), respondentID = structure(c(8L, 8L,
8L, 6L, 7L, 7L, 5L, 5L, 5L, 4L, 4L, 4L, 3L, 3L, 3L, 2L, 2L,
1L), .Label = c("EO13", "EO15", "EO17", "EO19", "EO21", "Eo23",
"EO23", "EO24"), class = "factor")), .Names = c("Q5.2.01",
"Q5.2.02", "Q5.2.03", "Q5.2.04", "Q5.2.05", "Q5.2.06", "Q5.2.07",
"Q5.2.08", "Q5.2.09", "Q5.2.10", "Q5.2.11", "Q5.2.12", "Q5.2.13", `"Q5.2.14", "Q5.2.15", "Q5.2.16", "respondentID"), class = "data.frame",` row.names = c(NA,
-18L))
答案 0 :(得分:0)
您可以使用包revalue
中的plyr
功能。 (它也可以用基础R完成,但我喜欢这个解决方案。)
这是一个例子
> DF <- data.frame(V1 = factor(c("A", "B", "C", "A", "D", "E")),
V2=factor(c("A", "A", "A", "A", "D", "E")))
> DF
V1 V2
1 A A
2 B A
3 C A
4 A A
5 D D
6 E E
现在让我们假设要素等级D
是我们想要替换的那个。然后我们可以使用lapply
迭代数据框中的列(记住只选择相关因子),并使用revalue
来指定替换。将所有内容换行as.data.frame
以转换回数据框。
> library("plyr")
> as.data.frame(lapply(DF, function(x) { revalue(x, c("D"="YAY")) }))
V1 V2
1 A A
2 B A
3 C A
4 A A
5 YAY YAY
6 E E
<强>更新强>
您可以通过向数据框添加子集选择来限制对因素的关注
as.data.frame(lapply(DF[,sapply(DF, class) == "factor"], function(x) { revalue(x, c("D"="YAY")) }))