添加新的列和组属性

时间:2018-07-04 04:35:04

标签: r

我想在R中添加一个新列,以将我的子组汇总为组。

这是我的示例:

-Filter

现在,我想添加一个新列“ colour”,将属性分为3个格罗普斯“红色”,“绿色”和“蓝色”。 我可以先将子组分配给变量,然后再将其分配给组吗?

id = c(1,2,2,3,4,4,4,5,5,5,6,6,6)
subgroup = c("lightred","marine","cyan","rose","bordeaux","darkred","sky","gras","bottle","lightgreen","darkred","marine","lightgreen")
data = data.frame(cbind(id,subgroup))

> data
   id   subgroup
1   1   lightred
2   2     marine
3   2       cyan
4   3       rose
5   4   bordeaux
6   4    darkred
7   4        sky
8   5       gras
9   5     bottle
10  5 lightgreen
11  6    darkred
12  6     marine
13  6 lightgreen

最后应该是这样的:

red = "lightred", "darkred" , "rose" , "bordeaux"
blue = "marine", "cyan", "sky"
green = "gras", "bottle" , "lightgreen"

谢谢!

6 个答案:

答案 0 :(得分:2)

使用case_when中的dplyr

red <- c("lightred", "darkred", "rose", "bordeaux")
blue <- c("marine", "cyan", "sky")
green <- c("gras", "bottle", "lightgreen")

data$colour <-
case_when(
    data$subgroup %in% red ~ "red",
    data$subgroup %in% blue ~ "blue",
    data$subgroup %in% green ~ "green",
    TRUE ~ data$subgroup
)

答案 1 :(得分:2)

使用cut的非常规方法。我们创建键值对的list,然后使用matchdata$subgroup。我们创建break值作为每个列表的length的累积和,并创建labels作为列表的names的累积和。

new_list <- list('red' = c("lightred", "darkred" , "rose" , "bordeaux"),
                 'blue' = c("marine", "cyan", "sky"),
                  'green' = c("gras", "bottle" , "lightgreen"))


data$colour <- cut(match(data$subgroup, unlist(new_list)), 
               breaks = c(0,cumsum(lengths(new_list))), 
               labels = names(new_list))


data
#   id   subgroup colour
#1   1   lightred    red
#2   2     marine   blue
#3   2       cyan   blue
#4   3       rose    red
#5   4   bordeaux    red
#6   4    darkred    red
#7   4        sky   blue
#8   5       gras  green
#9   5     bottle  green
#10  5 lightgreen  green
#11  6    darkred    red
#12  6     marine   blue
#13  6 lightgreen  green

其中

cumsum(lengths(new_list))
# red  blue green 
#   4     7    10 

另一种替代方法(由@Jaap提出)是使用new_liststack作为单独的颜色,将values作为相应的组,从ind创建一个数据帧。然后我们将matchsubgroupvalues并获得相应的组(ind)。

ref <- stack(new_list)
data$colour <- ref$ind[match(data$subgroup, ref$values)]

答案 2 :(得分:0)

id = c(1,2,2,3,4,4,4,5,5,5,6,6,6)
subgroup = c("lightred","marine","cyan","rose","bordeaux","darkred","sky","gras","bottle","lightgreen","darkred","marine","lightgreen")
data = data.frame(cbind(id,subgroup))

library(dplyr)
data <- data %>% 
  dplyr::mutate(
    colour = dplyr::case_when(
      grepl("(lightred)|(darkred)|(rose)|(bordeaux)", subgroup, perl = TRUE) ~ "red",
      grepl("(marine)|(cyan)|(sky)", subgroup, perl = TRUE) ~ "blue",
      grepl("(gras)|(bottle)|(lightgreen)", subgroup, perl = TRUE) ~ "green",
      TRUE ~ "else"
    )
  )
data

答案 3 :(得分:0)

使用dplyrplyr

mapvaluesxy。这里的x代表唯一的subgroupy及其对应的color值,以进行映射:

x = c("lightred", "darkred" , "rose" , "bordeaux", "marine", "cyan", "sky", "gras", "bottle" , "lightgreen" )
y = c(rep("red",4), rep("blue", 3), rep("green",3))

data %>% dplyr::mutate(color = plyr::mapvalues(subgroup, x,y))

答案 4 :(得分:0)

data$colour <-  ifelse(data$subgroup %in% red, "red", ifelse(data$subgroup %in% blue, "blue", "green"))

   id   subgroup colour
1   1   lightred    red
2   2     marine   blue
3   2       cyan   blue
4   3       rose    red
5   4   bordeaux    red
6   4    darkred    red
7   4        sky   blue
8   5       gras  green
9   5     bottle  green
10  5 lightgreen  green
11  6    darkred    red
12  6     marine   blue
13  6 lightgreen  green

答案 5 :(得分:0)

(a=merge(data,stack(list(red=red,blue=blue,green=green)),by.x="subgroup",by.y="values"))
     subgroup id   ind
1    bordeaux  4   red
2      bottle  5 green
3        cyan  2  blue
4     darkred  4   red
5     darkred  6   red
6        gras  5 green
7  lightgreen  5 green
8  lightgreen  6 green
9    lightred  1   red
10     marine  2  blue
11     marine  6  blue
12       rose  3   red
13        sky  4  blue

a[order(a$id),]

    subgroup id   ind
9    lightred  1   red
3        cyan  2  blue
10     marine  2  blue
12       rose  3   red
1    bordeaux  4   red
4     darkred  4   red
13        sky  4  blue
2      bottle  5 green
6        gras  5 green
7  lightgreen  5 green
5     darkred  6   red
8  lightgreen  6 green
11     marine  6  blue

您可以这样做:

colors=unlist(list(red=red,blue=blue,green=green))
names(colors)=sub("\\d+","",names(colors))
data$color=names(colors[match(subgroup,colors)])
data
  id   subgroup color
1   1   lightred   red
2   2     marine  blue
3   2       cyan  blue
4   3       rose   red
5   4   bordeaux   red
6   4    darkred   red
7   4        sky  blue
8   5       gras green
9   5     bottle green
10  5 lightgreen green
11  6    darkred   red
12  6     marine  blue
13  6 lightgreen green