我有超过1000个字符串和一个固定的[sub]字符串数组。我想知道哪些字符串包含任何子字符串。 (同样,子串也是不变的。)我还想确保单词匹配,而不是字符串。
最强效率的方法是什么?我可以做的比在所有子串上执行1000次indexOf()更好吗?
let str1 = "During the winter holiday I'll go skiing."
let str2 = "Do knock on the door or chime the bell"
let fixedSearchStrings = ["ring the", "chime the bell", "knock on the door", "knock on the window"]
str1.indexOf(fixedSearchStrings) // returns nil. "During" is not the word "ring".
str2.indexOf(fixedSearchStrings) // returns 2. "knock on the door" substring found, no need to check further in the sentence.
答案 0 :(得分:2)
考虑一下。这个解决方案的优点是准备了fixedSearchStrings,你只能将索引建立一次,然后有效地重用它。
class Index
{
var indexes: [String: Index]
var terminated: Bool = false
init() {
indexes = [String: Index]()
}
func searchFor(keywords: [String]) -> String? {
var ws = keywords
if ws.count > 0 {
let word = ws.removeFirst()
if let i = indexes[word] {
if i.terminated {
return word
} else {
if let rval = i.searchFor(ws) {
return "\(word) \(rval)"
}
}
}
}
return nil
}
func add(words: [String]) {
var ws = words
if ws.count > 0 {
let word = ws.removeFirst()
var index: Index!
if let i = indexes[word] {
index = i
} else {
let i = Index()
indexes[word] = i
index = i
}
index.add(ws)
index.terminated = ws.count == 0 || index.terminated
}
}
}
class SearchEngine {
var index: Index!
func buildIndex(keywords: [String]) {
index = Index()
for keyword in keywords {
let words = keyword.characters.split(" ").map(String.init)
index.add(words)
}
}
func firstEntryIn(string: String) -> String? {
var strArr = string.characters.split(" ").map(String.init)
var rval: String?
while strArr.count > 0 {
if let r = index.searchFor(strArr) {
rval = r
break
}
strArr.removeFirst()
}
return rval
}
}
let str1 = "During the winter holiday I'll go skiing."
let str2 = "Do knock on the door or chime the bell"
let fixedSearchStrings = ["ring the", "chime the bell", "knock on the door", "knock on the window"]
let se = SearchEngine()
se.buildIndex(fixedSearchStrings)
se.firstEntryIn(str1)
se.firstEntryIn(str2)
结果
nil
"knock on the door"
答案 1 :(得分:0)
func foundSubString(str:String,array:[String]) -> Bool {
var count = 0
repeat {
print("count : \(count)")
if str.lowercaseString.rangeOfString(array[count].lowercaseString) != nil {
print("founded")
return true
}
count += 1
} while count < array.count
return false
}
<强>用法强>:
let str1 = "During the winter holiday I'll go skiing."
let str2 = "Do knock on the door or chime the bell"
let fixedSearchStrings = ["ring the", "chime the bell", "knock on the door", "knock on the window"]
let exist: Bool = foundSubString(str2,array: fixedSearchStrings)
<强>结果强>:
如果您想了解有关搜索的更多详细信息,例如,如果您找到了某个字词,则需要知道该字词及其位置:
func foundSubString2(str:String,array:[String]) -> (Bool,[(String,Int)]) {
var count: Int = 0
var matched = [(String,Int)]()
repeat {
if str.lowercaseString.rangeOfString(array[count].lowercaseString) != nil {
matched.append((array[count],count))
}
count += 1
} while count < array.count
if matched.count>0 {
return (true,matched)
}
return (false,[("",0)])
}
<强>用法强>:
let str1 = "During the winter holiday I'll go skiing."
let str2 = "Do knock on the door or chime the bell"
let fixedSearchStrings = ["ring the", "chime the bell", "knock on the door", "knock on the window", "knock on the door"]
let (exist,matched) = foundSubString2(str2,array: fixedSearchStrings)
if exist { print (matched) }
<强>结果强>:
答案 2 :(得分:0)
使用正则表达式。它将比indexOf或类似方法快约1000倍。内部正则表达式将构建一个状态机,它将能够在一次传递中匹配您想要的所有字符串。