使用ifelse语句,但来自R中的数据集

时间:2018-10-15 19:27:34

标签: r if-statement dplyr grepl

要创建属性,我可以这样做

dat$clas <- ifelse(grepl("den", dat$stuff), "bak", 
                  ifelse(grepl("kro", dat$stuff), "bak1", 

  ifelse(grepl("ris", dat$stuff), "bak3",  
         ifelse(grepl("muka", dat$stuff), "rty", 

               ifelse(grepl("chlo", dat$stuff), "cos", 
              ifelse(grepl("prokl", dat$stuff), "gig",  "no"))))))

但是我有很多属性,假设有200个属性。 使用这样的ifelse语句要花费很长时间,并且代码也很长。 我可以从数据框中使用它吗?

templatedata<-prod  clas
den               bak
kro              bak1
ris               bak3
muka            rty
chlo               cos
prokl                gig

templatedata=structure(list(prod = structure(c(2L, 3L, 6L, 4L, 1L, 5L), .Label = c("chlo\t", 
"den", "kro\t", "muka", "prokl\t", "ris\t"), class = "factor"), 
    class = structure(c(1L, 2L, 3L, 6L, 4L, 5L), .Label = c("bak", 
    "bak1", "bak3", "cos", "gig", "rty"), class = "factor")), .Names = c("prod", 
"class"), class = "data.frame", row.names = c(NA, -6L))

如此

workingdataset<-(
prod
den sg
kro serdgt
ris szdg
muka aszgt
chlo sdgt
prokl zfdsgr
den zdasfh)


workingdataset=structure(list(prod = structure(c(2L, 4L, 7L, 5L, 1L, 6L, 3L), .Label = c("chlo sdgt", 
"den sg", "den zdasfh)", "kro serdgt", "muka aszgt", "prokl zfdsgr", 
"ris szdg"), class = "factor")), .Names = "prod", class = "data.frame", row.names = c(NA, 
-7L))

我使用templatedata获取workigdataset中的属性。

输出workingdataset看起来像

prod           clas
den   sg        bak
kro  serdgt      bak1
ris szdg        bak3
muka aszgt      rty
chlo sdgt       cos
prokl   zfdsgr  gig
den  vv         bak

操作方法

1 个答案:

答案 0 :(得分:3)

您可以使用David Robinson创建的精美的fuzzyjoin软件包来实现这一目标。它允许您使用模糊逻辑(包括字符串距离或正则表达式)连接两个表。在这里,我们将使用正则表达式。

library(fuzzyjoin)
library(magrittr)

workingdataset=structure(list(prod = structure(c(2L, 4L, 7L, 5L, 1L, 6L, 3L), 
                                               .Label = c("chlo sdgt", 
                                                          "den sg", 
                                                          "den zdasfh)", 
                                                          "kro serdgt", 
                                                          "muka aszgt", 
                                                          "prokl zfdsgr", 
                                                          "ris szdg"), 
                                               class = "factor")), 
                         .Names = "prod", 
                         class = "data.frame", 
                         row.names = c(NA, -7L))

templatedata=structure(list(prod = structure(c(2L, 3L, 6L, 4L, 1L, 5L), 
                                             .Label = c("chlo", 
                                                        "den", 
                                                        "kro", 
                                                        "muka", 
                                                        "prokl", 
                                                        "ris"), 
                                             class = "factor"), 
                            class = structure(c(1L, 2L, 3L, 6L, 4L, 5L), 
                                              .Label = c("bak", "bak1", "bak3", 
                                                         "cos", "gig", "rty"), 
                                              class = "factor")), 
                       .Names = c("prod", "class"), 
                       class = "data.frame", 
                       row.names = c(NA, -6L))

workingdataset %>%
    regex_inner_join(templatedata, by='prod')

        prod.x prod.y class
1       den sg    den   bak
2   kro serdgt    kro  bak1
3     ris szdg    ris  bak3
4   muka aszgt   muka   rty
5    chlo sdgt   chlo   cos
6 prokl zfdsgr  prokl   gig
7  den zdasfh)    den   bak