如何检查Character
是否为单词边界(在Unicode TR#29上定义)?
我有一个使用正则表达式的解决方案,但我认为这很难看。
答案 0 :(得分:0)
对于单个字符,您无法确定它是否是单词边界。您只能决定连续两个字符,或者两个连续的字形集群。
这段代码可能对您有用。它为给定的字符串创建一个单词边界数组。它建立在NSString.enumerateSubstrings(in:options:using:)。
之上您可以在Playground中运行它:
import Foundation
let str = "The quick brown_fox jumps over / the lazy dog. Flag Emoji ."
extension String {
public var wordBoundaries: [String.Index] {
var boundaries: [String.Index] = []
let start = startIndex
let end = endIndex
boundaries.append(start)
enumerateSubstrings(in: start ..< end, options: .byWords) {
substring, substringRange, enclosingRange, stop in
let lb = substringRange.lowerBound
if boundaries.last! != lb {
boundaries.append(substringRange.lowerBound)
}
boundaries.append(substringRange.upperBound)
}
if boundaries.last! != end {
boundaries.append(end)
}
return boundaries
}
}
var prevBoundary: String.Index? = nil
for b in str.wordBoundaries {
if let lower = prevBoundary {
let s = str[lower ..< b]
print(s)
}
prevBoundary = b
}