我想在R中添加一个新列,以将我的子组汇总为组。
这是我的示例:
-Filter
现在,我想添加一个新列“ colour”,将属性分为3个格罗普斯“红色”,“绿色”和“蓝色”。 我可以先将子组分配给变量,然后再将其分配给组吗?
id = c(1,2,2,3,4,4,4,5,5,5,6,6,6)
subgroup = c("lightred","marine","cyan","rose","bordeaux","darkred","sky","gras","bottle","lightgreen","darkred","marine","lightgreen")
data = data.frame(cbind(id,subgroup))
> data
id subgroup
1 1 lightred
2 2 marine
3 2 cyan
4 3 rose
5 4 bordeaux
6 4 darkred
7 4 sky
8 5 gras
9 5 bottle
10 5 lightgreen
11 6 darkred
12 6 marine
13 6 lightgreen
最后应该是这样的:
red = "lightred", "darkred" , "rose" , "bordeaux"
blue = "marine", "cyan", "sky"
green = "gras", "bottle" , "lightgreen"
谢谢!
答案 0 :(得分:2)
使用case_when
中的dplyr
:
red <- c("lightred", "darkred", "rose", "bordeaux")
blue <- c("marine", "cyan", "sky")
green <- c("gras", "bottle", "lightgreen")
data$colour <-
case_when(
data$subgroup %in% red ~ "red",
data$subgroup %in% blue ~ "blue",
data$subgroup %in% green ~ "green",
TRUE ~ data$subgroup
)
答案 1 :(得分:2)
使用cut
的非常规方法。我们创建键值对的list
,然后使用match
来data$subgroup
。我们创建break
值作为每个列表的length
的累积和,并创建labels
作为列表的names
的累积和。
new_list <- list('red' = c("lightred", "darkred" , "rose" , "bordeaux"),
'blue' = c("marine", "cyan", "sky"),
'green' = c("gras", "bottle" , "lightgreen"))
data$colour <- cut(match(data$subgroup, unlist(new_list)),
breaks = c(0,cumsum(lengths(new_list))),
labels = names(new_list))
data
# id subgroup colour
#1 1 lightred red
#2 2 marine blue
#3 2 cyan blue
#4 3 rose red
#5 4 bordeaux red
#6 4 darkred red
#7 4 sky blue
#8 5 gras green
#9 5 bottle green
#10 5 lightgreen green
#11 6 darkred red
#12 6 marine blue
#13 6 lightgreen green
其中
cumsum(lengths(new_list))
# red blue green
# 4 7 10
另一种替代方法(由@Jaap提出)是使用new_list
将stack
作为单独的颜色,将values
作为相应的组,从ind
创建一个数据帧。然后我们将match
,subgroup
与values
并获得相应的组(ind
)。
ref <- stack(new_list)
data$colour <- ref$ind[match(data$subgroup, ref$values)]
答案 2 :(得分:0)
id = c(1,2,2,3,4,4,4,5,5,5,6,6,6)
subgroup = c("lightred","marine","cyan","rose","bordeaux","darkred","sky","gras","bottle","lightgreen","darkred","marine","lightgreen")
data = data.frame(cbind(id,subgroup))
library(dplyr)
data <- data %>%
dplyr::mutate(
colour = dplyr::case_when(
grepl("(lightred)|(darkred)|(rose)|(bordeaux)", subgroup, perl = TRUE) ~ "red",
grepl("(marine)|(cyan)|(sky)", subgroup, perl = TRUE) ~ "blue",
grepl("(gras)|(bottle)|(lightgreen)", subgroup, perl = TRUE) ~ "green",
TRUE ~ "else"
)
)
data
答案 3 :(得分:0)
使用dplyr
和plyr
:
mapvalues
从x
到y
。这里的x
代表唯一的subgroup
和y
及其对应的color
值,以进行映射:
x = c("lightred", "darkred" , "rose" , "bordeaux", "marine", "cyan", "sky", "gras", "bottle" , "lightgreen" )
y = c(rep("red",4), rep("blue", 3), rep("green",3))
data %>% dplyr::mutate(color = plyr::mapvalues(subgroup, x,y))
答案 4 :(得分:0)
data$colour <- ifelse(data$subgroup %in% red, "red", ifelse(data$subgroup %in% blue, "blue", "green"))
id subgroup colour
1 1 lightred red
2 2 marine blue
3 2 cyan blue
4 3 rose red
5 4 bordeaux red
6 4 darkred red
7 4 sky blue
8 5 gras green
9 5 bottle green
10 5 lightgreen green
11 6 darkred red
12 6 marine blue
13 6 lightgreen green
答案 5 :(得分:0)
(a=merge(data,stack(list(red=red,blue=blue,green=green)),by.x="subgroup",by.y="values"))
subgroup id ind
1 bordeaux 4 red
2 bottle 5 green
3 cyan 2 blue
4 darkred 4 red
5 darkred 6 red
6 gras 5 green
7 lightgreen 5 green
8 lightgreen 6 green
9 lightred 1 red
10 marine 2 blue
11 marine 6 blue
12 rose 3 red
13 sky 4 blue
a[order(a$id),]
subgroup id ind
9 lightred 1 red
3 cyan 2 blue
10 marine 2 blue
12 rose 3 red
1 bordeaux 4 red
4 darkred 4 red
13 sky 4 blue
2 bottle 5 green
6 gras 5 green
7 lightgreen 5 green
5 darkred 6 red
8 lightgreen 6 green
11 marine 6 blue
您可以这样做:
colors=unlist(list(red=red,blue=blue,green=green))
names(colors)=sub("\\d+","",names(colors))
data$color=names(colors[match(subgroup,colors)])
data
id subgroup color
1 1 lightred red
2 2 marine blue
3 2 cyan blue
4 3 rose red
5 4 bordeaux red
6 4 darkred red
7 4 sky blue
8 5 gras green
9 5 bottle green
10 5 lightgreen green
11 6 darkred red
12 6 marine blue
13 6 lightgreen green