我们知道我们可以用UTF8代码单位打印每个字符吗? 然后,如果我们有这些字符的代码单元,我们如何用它们创建一个String?
答案 0 :(得分:13)
可以使用UTF8
Swift类以惯用方式将UTF8代码点转换为Swift String。虽然从String转换为UTF8要容易得多!
import Foundation
public class UTF8Encoding {
public static func encode(bytes: Array<UInt8>) -> String {
var encodedString = ""
var decoder = UTF8()
var generator = bytes.generate()
var finished: Bool = false
do {
let decodingResult = decoder.decode(&generator)
switch decodingResult {
case .Result(let char):
encodedString.append(char)
case .EmptyInput:
finished = true
/* ignore errors and unexpected values */
case .Error:
finished = true
default:
finished = true
}
} while (!finished)
return encodedString
}
public static func decode(str: String) -> Array<UInt8> {
var decodedBytes = Array<UInt8>()
for b in str.utf8 {
decodedBytes.append(b)
}
return decodedBytes
}
}
func testUTF8Encoding() {
let testString = "A UTF8 String With Special Characters: "
let decodedArray = UTF8Encoding.decode(testString)
let encodedString = UTF8Encoding.encode(decodedArray)
XCTAssert(encodedString == testString, "UTF8Encoding is lossless: \(encodedString) != \(testString)")
}
建议的其他替代方案:
使用NSString
调用Objective-C桥接器;
使用UnicodeScalar
容易出错,因为它会将UnicodeScalars直接转换为Characters,忽略复杂的字形集群;以及
使用String.fromCString
可能不安全,因为它使用指针。
答案 1 :(得分:5)
改进Martin R的答案
import AppKit
let utf8 : CChar[] = [65, 66, 67, 0]
let str = NSString(bytes: utf8, length: utf8.count, encoding: NSUTF8StringEncoding)
println(str) // Output: ABC
import AppKit
let utf8 : UInt8[] = [0xE2, 0x82, 0xAC, 0]
let str = NSString(bytes: utf8, length: utf8.count, encoding: NSUTF8StringEncoding)
println(str) // Output: €
Array
发生的事情可自动转换为CConstVoidPointer
,可用于创建NSSString(bytes: CConstVoidPointer, length len: Int, encoding: Uint)
字符串
答案 2 :(得分:4)
Swift 3
let s = String(bytes: arr, encoding: .utf8)
答案 3 :(得分:2)
我一直在寻找关于Swift中字符串操作的全面答案。依赖于来自NSString
和其他不安全指针魔法的强制转换,我并没有这样做。这是一个安全的选择:
首先,我们要扩展UInt8
。这是CodeUnit
背后的原始类型。
extension UInt8 {
var character: Character {
return Character(UnicodeScalar(self))
}
}
这将允许我们做这样的事情:
let codeUnits: [UInt8] = [
72, 69, 76, 76, 79
]
let characters = codeUnits.map { $0.character }
let string = String(characters)
// string prints "HELLO"
配备此扩展程序,我们现在可以修改字符串。
let string = "ABCDEFGHIJKLMONP"
var modifiedCharacters = [Character]()
for (index, utf8unit) in string.utf8.enumerate() {
// Insert a "-" every 4 characters
if index > 0 && index % 4 == 0 {
let separator: UInt8 = 45 // "-" in ASCII
modifiedCharacters.append(separator.character)
}
modifiedCharacters.append(utf8unit.character)
}
let modifiedString = String(modifiedCharacters)
// modified string == "ABCD-EFGH-IJKL-MONP"
答案 4 :(得分:1)
这是一种可能的解决方案(现已针对 Swift 2 进行了更新):
let utf8 : [CChar] = [65, 66, 67, 0]
if let str = utf8.withUnsafeBufferPointer( { String.fromCString($0.baseAddress) }) {
print(str) // Output: ABC
} else {
print("Not a valid UTF-8 string")
}
在闭包中,$0
是UnsafeBufferPointer<CChar>
指向数组的连续存储。从那里可以创建一个Swift String
。
或者,如果您希望输入为 unsigned 字节:
let utf8 : [UInt8] = [0xE2, 0x82, 0xAC, 0]
if let str = utf8.withUnsafeBufferPointer( { String.fromCString(UnsafePointer($0.baseAddress)) }) {
print(str) // Output: €
} else {
print("Not a valid UTF-8 string")
}
答案 5 :(得分:1)
我会做这样的事情,它可能不如使用指针那么优雅。但它完成的工作很好,这些几乎是关于+=
的一堆新的String
运算符,如:
@infix func += (inout lhs: String, rhs: (unit1: UInt8)) {
lhs += Character(UnicodeScalar(UInt32(rhs.unit1)))
}
@infix func += (inout lhs: String, rhs: (unit1: UInt8, unit2: UInt8)) {
lhs += Character(UnicodeScalar(UInt32(rhs.unit1) << 8 | UInt32(rhs.unit2)))
}
@infix func += (inout lhs: String, rhs: (unit1: UInt8, unit2: UInt8, unit3: UInt8, unit4: UInt8)) {
lhs += Character(UnicodeScalar(UInt32(rhs.unit1) << 24 | UInt32(rhs.unit2) << 16 | UInt32(rhs.unit3) << 8 | UInt32(rhs.unit4)))
}
注意:您也可以使用覆盖 +
运算符扩展支持的运算符列表,为 {{定义完全可交换运算符的列表1}}
现在您可以使用unicode(UTF-8,UTF-16或UTF-32)字符附加String
,例如:
String
答案 6 :(得分:1)
如果您从一个原始缓冲区开始,例如从文件句柄返回的Data对象(在这种情况下,从Pipe对象中获取):
let data = pipe.fileHandleForReading.readDataToEndOfFile()
var unsafePointer = UnsafeMutablePointer<UInt8>.allocate(capacity: data.count)
data.copyBytes(to: unsafePointer, count: data.count)
let output = String(cString: unsafePointer)
答案 7 :(得分:1)
// Swift4
var units = [UTF8.CodeUnit]()
//
// update units
//
let str = String(decoding: units, as: UTF8.self)
答案 8 :(得分:0)
有Martin R回答的Swift 3.0版本
public class UTF8Encoding {
public static func encode(bytes: Array<UInt8>) -> String {
var encodedString = ""
var decoder = UTF8()
var generator = bytes.makeIterator()
var finished: Bool = false
repeat {
let decodingResult = decoder.decode(&generator)
switch decodingResult {
case .scalarValue(let char):
encodedString += "\(char)"
case .emptyInput:
finished = true
case .error:
finished = true
}
} while (!finished)
return encodedString
}
public static func decode(str: String) -> Array<UInt8> {
var decodedBytes = Array<UInt8>()
for b in str.utf8 {
decodedBytes.append(b)
}
return decodedBytes
}
}
如果您想要使用UTF-8字符串显示表情符号,请使用下面的用户 convertEmojiCodesToString 方法。它适用于&#34; U + 1F52B&#34; (表情符号)或&#34; U + 1F1E6 U + 1F1F1&#34; (国家/地区)等字符串国旗表情符号)
class EmojiConverter {
static func convertEmojiCodesToString(_ emojiCodesString: String) -> String {
let emojies = emojiCodesString.components(separatedBy: " ")
var resultString = ""
for emoji in emojies {
var formattedCode = emoji
formattedCode.slice(from: 2, to: emoji.length)
formattedCode = formattedCode.lowercased()
if let charCode = UInt32(formattedCode, radix: 16),
let unicode = UnicodeScalar(charCode) {
let str = String(unicode)
resultString += "\(str)"
}
}
return resultString
}
}
答案 9 :(得分:0)
使用Swift 5,您可以选择以下方式之一,以将一组UTF-8代码单元转换为字符串。
String
的{{3}}初始化程序如果您有一个String.UTF8View
实例(即UTF-8代码单元的集合)并将其转换为字符串,则可以使用init(_:)
初始化程序。 init(_:)
具有以下声明:
init(_ utf8: String.UTF8View)
创建与给定的UTF-8代码单元序列相对应的字符串。
下面的Playground示例代码显示了如何使用init(_:)
:
let string = "Café "
let utf8View: String.UTF8View = string.utf8
let newString = String(utf8View)
print(newString) // prints: Café
Swift
的{{3}}初始化程序 init(decoding:as:)
使用指定的编码从给定的Unicode代码单元集合创建一个字符串:
let string = "Café "
let codeUnits: [Unicode.UTF8.CodeUnit] = Array(string.utf8)
let newString = String(decoding: codeUnits, as: UTF8.self)
print(newString) // prints: Café
请注意,init(decoding:as:)
也可以与String.UTF8View
参数一起使用:
let string = "Café "
let utf8View: String.UTF8View = string.utf8
let newString = String(decoding: utf8View, as: UTF8.self)
print(newString) // prints: Café
init(_:)
函数以下示例将初始字符串的UTF-8表示形式转码为Unicode标量值(UTF-32代码单元),可用于构建新字符串:
let string = "Café "
let bytes = Array(string.utf8)
var newString = ""
_ = transcode(bytes.makeIterator(), from: UTF8.self, to: UTF32.self, stoppingOnError: true, into: {
newString.append(String(Unicode.Scalar($0)!))
})
print(newString) // prints: Café
Array
的{{3}}方法和String
的{{3}}初始化程序 init(cString:)
具有以下声明:
init(cString: UnsafePointer<CChar>)
通过复制给定指针引用的以空字符结尾的UTF-8数据来创建新字符串。
下面的示例演示如何将init(cString:)
与指向CChar
数组(即格式良好的UTF-8代码单元序列)的内容的指针一起使用,以便从中创建字符串:
let bytes: [CChar] = [67, 97, 102, -61, -87, 32, -16, -97, -121, -85, -16, -97, -121, -73, 0]
let newString = bytes.withUnsafeBufferPointer({ (bufferPointer: UnsafeBufferPointer<CChar>)in
return String(cString: bufferPointer.baseAddress!)
})
print(newString) // prints: Café
Unicode.UTF8
的{{3}}方法要解码代码单元序列,请反复调用decode(_:)
,直到返回UnicodeDecodingResult.emptyInput
:
let string = "Café "
let codeUnits = Array(string.utf8)
var codeUnitIterator = codeUnits.makeIterator()
var utf8Decoder = Unicode.UTF8()
var newString = ""
Decode: while true {
switch utf8Decoder.decode(&codeUnitIterator) {
case .scalarValue(let value):
newString.append(Character(Unicode.Scalar(value)))
case .emptyInput:
break Decode
case .error:
print("Decoding error")
break Decode
}
}
print(newString) // prints: Café
String
的{{3}}初始化程序 Foundation为String
提供了一个init(bytes:encoding:)
初始化程序,您可以按照以下Playground示例代码中的说明使用该初始化程序:
import Foundation
let string = "Café "
let bytes: [Unicode.UTF8.CodeUnit] = Array(string.utf8)
let newString = String(bytes: bytes, encoding: String.Encoding.utf8)
print(String(describing: newString)) // prints: Optional("Café ")