我需要检测从pdf文件读取的字符串的语言,该文本基本上是英语,但“ NLLanguageRecognizer”返回的是“罗马尼亚语”
class func detectedLangaugeFormat(for string: String) -> String {
if #available(iOS 12.0, *) {
let recognizer = NLLanguageRecognizer()
guard let languageCode = recognizer.dominantLanguage?.rawValue else { return "rtl" }
let detectedLangauge = Locale.current.localizedString(forIdentifier: languageCode)
let currentLocale = NSLocale.current as NSLocale
let direction: NSLocale.LanguageDirection = NSLocale.characterDirection(forLanguage: languageCode)
if direction == .rightToLeft {
return "rtl"
}else if direction == .leftToRight {
return "ltr"
} else {
// Fallback on earlier versions
return "rtl"
"\r\n A Simple PDF File \r\n This is a small demonstration .pdf file - \r\n just for use in the Virtual Mechanics tutorials. More text. And more \r\n text. And more text. And more text. And more text. \r\n And more text. And more text. And more text. And more text. And more \r\n text. And more text. Boring, zzzzz. And more text. And more text. And \r\n more text. And more text. And more text. And more text. And more text. \r\n And more text. And more text. \r\n And more text. And more text. And more text. And more text. And more \r\n text. And more text. And more text. Even more. Continued on page 2 ...\r\n Simple PDF File 2 \r\n ...continued from page 1. Yet more text. And more text. And more text. \r\n And more text. And more text. And more text. And more text. And more \r\n text. Oh, how boring typing this stuff. But not as boring as watching \r\n paint dry. And more text. And more text. And more text. And more text. \r\n Boring. More, a little more text. The end, and just as well. "
答案 0 :(得分:1)
let regex = try? NSRegularExpression(pattern: " +", options: .caseInsensitive)
str = regex?.stringByReplacingMatches(in: str, options: [], range: NSRange(location: 0, length: str.count), withTemplate: " ") ?? ""
答案 1 :(得分:1)
let givenString = "\r\n A Simple PDF File \r\n This is a small demonstration .pdf file - \r\n just for use in the Virtual Mechanics tutorials. More text. And more \r\n text. And more text. And more text. And more text. \r\n And more text. And more text. And more text. And more text. And more \r\n text. And more text. Boring, zzzzz. And more text. And more text. And \r\n more text. And more text. And more text. And more text. And more text. \r\n And more text. And more text. \r\n And more text. And more text. And more text. And more text. And more \r\n text. And more text. And more text. Even more. Continued on page 2 ...\r\n Simple PDF File 2 \r\n ...continued from page 1. Yet more text. And more text. And more text. \r\n And more text. And more text. And more text. And more text. And more \r\n text. Oh, how boring typing this stuff. But not as boring as watching \r\n paint dry. And more text. And more text. And more text. And more text. \r\n Boring. More, a little more text. The end, and just as well. "
let trimmedString = givenString.trimmingCharacters(in: .whitespacesAndNewlines)
let result = detectedLangaugeFormat(for: trimmedString)
print(result) // ltr
而不是“ lan”,则会发现它是“英文”)。
let detectedLangauge = Locale.current.localizedString(forIdentifier: languageCode)
print(detectedLangauge) // Optional("English")
答案 2 :(得分:0)
删除字符串中的非字母[WhiteSpaces,!,@,#等] char,然后尝试检测语言。
extension String{
func findFirstAlphabetic() -> String.Index?{
for index in self.indices{
if String(self[index]).isAlphanumeric == true{
return index
return nil
var isAlphanumeric: Bool {
return !isEmpty && range(of: "[^a-zA-Z0-9]", options: .regularExpression) == nil
func alphabetic_Leading_SubString() -> String?{
if let startIndex = self.findFirstAlphabetic(){
let newSubString = self[startIndex..<self.endIndex]
return String(newSubString)
return nil
let string = "\r\n A Simple PDF File \r\n This is a small demonstration .pdf file - \r\n just for use in the Virtual Mechanics tutorials. More text. And more \r\n text. And more text. And more text. And more text. \r\n And more text. And more text. And more text. And more text. And more \r\n text. And more text. Boring, zzzzz. And more text. And more text. And \r\n more text. And more text. And more text. And more text. And more text. \r\n And more text. And more text. \r\n And more text. And more text. And more text. And more text. And more \r\n text. And more text. And more text. Even more. Continued on page 2 ...\r\n Simple PDF File 2 \r\n ...continued from page 1. Yet more text. And more text. And more text. \r\n And more text. And more text. And more text. And more text. And more \r\n text. Oh, how boring typing this stuff. But not as boring as watching \r\n paint dry. And more text. And more text. And more text. And more text. \r\n Boring. More, a little more text. The end, and just as well. "
detectedLangaugeFormat(for: string.alphabetic_Leading_SubString()!)