要创建属性,我可以这样做
dat$clas <- ifelse(grepl("den", dat$stuff), "bak",
ifelse(grepl("kro", dat$stuff), "bak1",
ifelse(grepl("ris", dat$stuff), "bak3",
ifelse(grepl("muka", dat$stuff), "rty",
ifelse(grepl("chlo", dat$stuff), "cos",
ifelse(grepl("prokl", dat$stuff), "gig", "no"))))))
但是我有很多属性,假设有200个属性。 使用这样的ifelse语句要花费很长时间,并且代码也很长。 我可以从数据框中使用它吗?
templatedata<-prod clas
den bak
kro bak1
ris bak3
muka rty
chlo cos
prokl gig
)
templatedata=structure(list(prod = structure(c(2L, 3L, 6L, 4L, 1L, 5L), .Label = c("chlo\t",
"den", "kro\t", "muka", "prokl\t", "ris\t"), class = "factor"),
class = structure(c(1L, 2L, 3L, 6L, 4L, 5L), .Label = c("bak",
"bak1", "bak3", "cos", "gig", "rty"), class = "factor")), .Names = c("prod",
"class"), class = "data.frame", row.names = c(NA, -6L))
如此
workingdataset<-(
prod
den sg
kro serdgt
ris szdg
muka aszgt
chlo sdgt
prokl zfdsgr
den zdasfh)
workingdataset=structure(list(prod = structure(c(2L, 4L, 7L, 5L, 1L, 6L, 3L), .Label = c("chlo sdgt",
"den sg", "den zdasfh)", "kro serdgt", "muka aszgt", "prokl zfdsgr",
"ris szdg"), class = "factor")), .Names = "prod", class = "data.frame", row.names = c(NA,
-7L))
我使用templatedata获取workigdataset中的属性。
输出workingdataset
看起来像
prod clas
den sg bak
kro serdgt bak1
ris szdg bak3
muka aszgt rty
chlo sdgt cos
prokl zfdsgr gig
den vv bak
操作方法
答案 0 :(得分:3)
您可以使用David Robinson创建的精美的fuzzyjoin软件包来实现这一目标。它允许您使用模糊逻辑(包括字符串距离或正则表达式)连接两个表。在这里,我们将使用正则表达式。
library(fuzzyjoin)
library(magrittr)
workingdataset=structure(list(prod = structure(c(2L, 4L, 7L, 5L, 1L, 6L, 3L),
.Label = c("chlo sdgt",
"den sg",
"den zdasfh)",
"kro serdgt",
"muka aszgt",
"prokl zfdsgr",
"ris szdg"),
class = "factor")),
.Names = "prod",
class = "data.frame",
row.names = c(NA, -7L))
templatedata=structure(list(prod = structure(c(2L, 3L, 6L, 4L, 1L, 5L),
.Label = c("chlo",
"den",
"kro",
"muka",
"prokl",
"ris"),
class = "factor"),
class = structure(c(1L, 2L, 3L, 6L, 4L, 5L),
.Label = c("bak", "bak1", "bak3",
"cos", "gig", "rty"),
class = "factor")),
.Names = c("prod", "class"),
class = "data.frame",
row.names = c(NA, -6L))
workingdataset %>%
regex_inner_join(templatedata, by='prod')
prod.x prod.y class
1 den sg den bak
2 kro serdgt kro bak1
3 ris szdg ris bak3
4 muka aszgt muka rty
5 chlo sdgt chlo cos
6 prokl zfdsgr prokl gig
7 den zdasfh) den bak