下面的程序通过在另一个列表中找到它来返回单词的最接近匹配。我想要3场比赛,而不是最接近的比赛。
source1.devices = data.frame(name = c("Galaxy S5",
"Samsung Galaxy S4",
"Samsung Galaxy S4 schwarz",
"Samsung GALAXY Note 4",
"Samsung GALAXY Tab T535 schwarz",
"Samsung Galaxy S4 mini weiß",
"iPhone 5S 16GB Gold",
"iPad Air 2 Gold Tastatur Schwarz",
"iPad Air 2 Gold & Kensington "), stringsAsFactors = FALSE)
source2.devices = data.frame(name = c("Galaxy S5 & Galaxy Tab 3 7.0 Lite",
"Samsung Galaxy S4 Black Edition",
"Samsung Galaxy S4 mini schwarz",
"Samsung GALAXY Note 4",
"Samsung Galaxy S4 mini weiß",
"iPhone 5S 16GB Gold",
"iPad Air 2 Gold & Kensington Tastatur Schwarz",
"Samsung Galaxy S5 weiß",
"iPhone 6 64 GB Gold",
"iPhone 6 16 GB Silber") , stringsAsFactors = FALSE)
library(stringdist)
distance.methods<-c('jw')
dist.methods<-list()
for(m in 1:length(distance.methods))
{
dist.name.enh<-matrix(NA, ncol = length(source2.devices$name),nrow = length(source1.devices$name))
for(i in 1:length(source2.devices$name)) {
for(j in 1:length(source1.devices$name)) {
dist.name.enh[j,i]<-stringdist(tolower(source2.devices[i,"name"]),tolower(source1.devices[j,"name"]),method = distance.methods[m])
}
}
dist.methods[[distance.methods[m]]]<-dist.name.enh
}
match.s1.s2.enh<-NULL
for(m in 1:length(dist.methods))
{
dist.matrix<-as.matrix(dist.methods[[distance.methods[m]]])
min.name.enh<-apply(dist.matrix, 1, base::min)
for(i in 1:nrow(dist.matrix))
{
s2.i<-match(min.name.enh[i],dist.matrix[i,])
s1.i<-i
match.s1.s2.enh<-rbind(data.frame(s2.i=s2.i,s1.i=s1.i,s2name=source2.devices[s2.i,"name"], s1name=source1.devices[s1.i,"name"], adist=min.name.enh[i],method=distance.methods[m]),match.s1.s2.enh)
}
}
match.s1.s2.enh
在上面的代码中,它使用min()函数来找到dist.matrix中的最小距离(最接近的匹配)。我可以使用以下代码编写代码来获得3个接近的值 -
dist.matrix = data.frame(dist.matrix)
df2=data.frame(t(apply(dist.matrix,1,function(x) names(x)[order(x,na.last=NA)][1:3])))
colnames(df2)=c(paste0("Ranked",c(1:3)))
df2
我发现在主程序中实现此代码有困难。我希望输出看起来如下 -
Name Rank1 Rank2 Rank3
Samsung Galaxy S4 Samsung Galaxy S4 mini weiß Samsung Galaxy Note4 Samsung Galaxy S4 mini schwarz
答案 0 :(得分:1)
library(tidyverse)
library(stringdist)
制作字符串距离矩阵(row = source1.devices
,column = source2.devices
)
M <- as.data.frame(stringdistmatrix(source1.devices$name, source2.devices$name, method="jw"))
使用map_df
nearest <- map_df(1:nrow(M), ~data.frame(name=source1.devices$name[.x],
key=paste0("Rank", 1:ncol(M)),
val=source2.devices$name[order(M[.x,])],
stringsAsFactors=F) %>%
slice(1:3))
spread
为宽格式
ans <- nearest %>%
group_by(name) %>%
spread(key, val)
ans[6,] # Samsung Galaxy S4
name Rank1 Rank2
1 Samsung Galaxy S4 "Samsung Galaxy S5 weiß" "Samsung Galaxy S4 mini weiß"
# ... with 1 more variables: Rank3 <chr>