我有一个功能,我正在尝试对无线电格式进行分类,但是,它无法正常工作。
findFormat <- function(format) {
currentFormat <- c(strsplit(tolower(format), " "))
if ("christian" %in% currentFormat || "gospel" %in% currentFormat || "religious" %in% currentFormat || "religion" %in% currentFormat) {
return("Religious")
}
if ("pop" %in% currentFormat || "contemporary" %in% currentFormat || "mainstream" %in% currentFormat || "top" %in% currentFormat || "hot" %in% currentFormat || "hit" %in% currentFormat) {
return("Pop or Contemporary")
}
if ("rock" %in% currentFormat || "alternative" %in% currentFormat || "indie" %in% currentFormat) {
return("Rock, Alternative, or Indie")
}
if ("country" %in% currentFormat || "southern" %in% currentFormat) {
return("Country")
}
if ("urban" %in% currentFormat || "hip" %in% currentFormat || "rap" %in% currentFormat || "hip-hop" %in% currentFormat) {
return("Hip-hop")
}
if ("jazz" %in% currentFormat || "blues" %in% currentFormat) {
return("Jazz or Blues")
}
if ("latin" %in% currentFormat || "mexican" %in% currentFormat || "international" %in% currentFormat) {
return("International")
}
if ("oldies" %in% currentFormat) {
return("Oldies")
}
if ("news/talk" %in% currentFormat || "news" %in% currentFormat || "talk" %in% currentFormat || "public" %in% currentFormat) {
return("News and Talk")
}
# Default
return("Other")
}
现在,如果我运行findFormat("rap")
,它将按预期返回[1] "Hip-hop"
,但如果我运行findFormat("rap and rhythm")
,则会获得[1] Other
。我不知道为什么,因为currentFormat应该只是一个字符串向量而"rap" %in% c("rap", "and", "rhythm")
返回[1] TRUE
。任何帮助将不胜感激!
答案 0 :(得分:0)
我认为通过查找表解决这个问题可以更好地服务,或许是这样:
findFormat = function(fmt) {
matchDT =
data.table(keyword = c('christian', 'gospel', 'religious', 'religion',
'pop', 'contemporary', 'mainstream', 'top',
'hot', 'hit', 'rock', 'alternative',' indie',
'country', 'southern', 'urban', 'hip',
'hop', 'rap', 'jazz', 'blues', 'latin',
'mexican', 'international', 'oldies',
'news', 'talk'),
format = c(rep('Religious', 4L),
rep('Pop or Contemporary', 6L),
rep('Rock, Alternative, or Indie', 3L),
rep('Country', 2L), rep('Hip-Hop', 4L),
rep('Jazz or Blues', 2L),
rep('International', 3L), 'Oldies',
rep('News and Talk', 2L)),
key = 'keyword')
out = matchDT[strsplit(fmt, '[ [:punct:]]')[[1L]], nomatch = 0L]
if (!length(out)) return('Other') else return(out$format)
}
findFormat('rap and rhythm')
# [1] "Hip-Hop"