通过计算R代码中每一行的单词来添加一列

时间:2018-08-28 11:20:14

标签: r dataframe count rows

我有一个2511行和6列的数据框,其中包含糖果和彩色项。请查看下面的前15行:

structure(list(x = 1:15, iteml = structure(c(2L, 1L, 1L, 1L, 
5L, 4L, 4L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("{dulce1_rojo", 
"{dulce2_verde", "{dulce7_plata", "{miel21_amarillo", "{miel30_azul"
), class = "factor"), item2 = structure(c(4L, 2L, 2L, 2L, 1L, 
5L, 5L, 4L, 3L, 3L, 4L, 1L, 4L, 4L, 1L), .Label = c("chocolate2l_amarillo", 
"dulce2_verde", "dulce7_plata", "miel21_amarillo", "miel30_azul"
), class = "factor"), item3 = structure(c(1L, 1L, 3L, 3L, 2L, 
2L, 1L, 2L, 2L, 3L, 2L, 2L, 2L, 1L, 2L), .Label = c("chocolate2l_amarillo", 
"chocolate30_azul", "miel21_amarillo"), class = "factor"), item4 = structure(c(2L, 
2L, 2L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("chocolate2l_amarillo", 
"chocolate32_violeta", "cookie30_azul"), class = "factor"), item5 = structure(c(2L, 
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("cookie2l_amarillo}", 
"cookie32_violeta}"), class = "factor"), item6 = structure(c(4L, 
6L, 1L, 3L, 6L, 1L, 2L, 4L, 6L, 2L, 5L, 6L, 1L, 2L, 4L), .Label = c(">{chocolate2l_amarillo}", 
">{chocolate30_azul}", ">{chocolate32_violeta}", ">{dulce1_rojo}", 
">{dulce7_plata}", ">{miel21_amarillo}"), class = "factor")), class = "data.frame", row.names = c(NA, 
-15L))

我不知道如何仅将每一行所拥有的糖果数算在新列中。第一行是结果数据帧的预期输出:

  x         iteml           item2                item3               item4             item5          item6 dulce miel chocolate cookie
1 1 {dulce2_verde miel21_amarillo chocolate2l_amarillo chocolate32_violeta cookie32_violeta} >{dulce1_rojo}     2    1         2      1

我被困住了,不胜感激。

1 个答案:

答案 0 :(得分:0)

您可以使用apply函数为初始数据帧逐行应用grepl函数。然后,您使用sapply遍历所指示的四个元素。然后使用cbind将初始数据帧和带有成分的数据帧合并为一个。请参见下面的代码:

# initialize data frame
df <- structure(list(x = 1:15, iteml = structure(c(2L, 1L, 1L, 1L, 
5L, 4L, 4L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("{dulce1_rojo", 
"{dulce2_verde", "{dulce7_plata", "{miel21_amarillo", "{miel30_azul"
), class = "factor"), item2 = structure(c(4L, 2L, 2L, 2L, 1L, 
5L, 5L, 4L, 3L, 3L, 4L, 1L, 4L, 4L, 1L), .Label = c("chocolate2l_amarillo", 
"dulce2_verde", "dulce7_plata", "miel21_amarillo", "miel30_azul"
), class = "factor"), item3 = structure(c(1L, 1L, 3L, 3L, 2L, 
2L, 1L, 2L, 2L, 3L, 2L, 2L, 2L, 1L, 2L), .Label = c("chocolate2l_amarillo", 
"chocolate30_azul", "miel21_amarillo"), class = "factor"), item4 = structure(c(2L, 
2L, 2L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("chocolate2l_amarillo", 
"chocolate32_violeta", "cookie30_azul"), class = "factor"), item5 = structure(c(2L, 
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("cookie2l_amarillo}", 
"cookie32_violeta}"), class = "factor"), item6 = structure(c(4L, 
6L, 1L, 3L, 6L, 1L, 2L, 4L, 6L, 2L, 5L, 6L, 1L, 2L, 4L), .Label = c(">{chocolate2l_amarillo}", 
">{chocolate30_azul}", ">{chocolate32_violeta}", ">{dulce1_rojo}", 
">{dulce7_plata}", ">{miel21_amarillo}"), class = "factor")), class = "data.frame", row.names = c(NA, 
-15L))

# counting ingridients
ingridients <- c("dulce", "miel", "chocolate", "cookie")
x <- sapply(ingridients, function(y) apply(df, 1, function(x) sum(grepl(y, x))))
df_res <- cbind(df, x)                                                                        
head(df_res)                                                                         

输出:

  x            iteml                item2                item3                item4              item5                   item6 dulce miel chocolate cookie
1 1    {dulce2_verde      miel21_amarillo chocolate2l_amarillo  chocolate32_violeta  cookie32_violeta}          >{dulce1_rojo}     2    1         2      1
2 2     {dulce1_rojo         dulce2_verde chocolate2l_amarillo  chocolate32_violeta  cookie32_violeta}      >{miel21_amarillo}     2    1         2      1
3 3     {dulce1_rojo         dulce2_verde      miel21_amarillo  chocolate32_violeta  cookie32_violeta} >{chocolate2l_amarillo}     2    1         2      1
4 4     {dulce1_rojo         dulce2_verde      miel21_amarillo chocolate2l_amarillo  cookie32_violeta}  >{chocolate32_violeta}     2    1         2      1
5 5     {miel30_azul chocolate2l_amarillo     chocolate30_azul        cookie30_azul cookie2l_amarillo}      >{miel21_amarillo}     0    2         2      2
6 6 {miel21_amarillo          miel30_azul     chocolate30_azul        cookie30_azul cookie2l_amarillo} >{chocolate2l_amarillo}     0    2         2      2