在我的应用程序中,tcp客户端正在处理来自远程tcp服务器的数据流。一切正常,而收到的字符是1字节字符。当tcp服务器发送特殊字符“ü”(十六进制“c3b5” - >一个2字节字符)时,我开始遇到问题。
这是Swift 3代码行,只要收到的数据包含一些超过1个字节的UTF8字符,就会得到一个nil字符串:
let convertedString = String(bytes: data, encoding: String.Encoding.utf8)
我怎么能解决这个问题呢?基本上,传入流可以包括编码为UTF8的1字节或2字节字符,我需要将数据流转换为字符串而不会出现问题。
以下是我遇到问题的代码的全部内容:
func startRead(for task: URLSessionStreamTask) {
task.readData(ofMinLength: 1, maxLength: 65535, timeout: 300) { (data, eof, error) in
if let data = data {
NSLog("stream task read %@", data as NSData)
let convertedString1 = String(data: data, encoding: String.Encoding(rawValue: String.Encoding.utf8.rawValue))
if let convertedString = String(bytes: data, encoding: String.Encoding.utf8) {
self.partialMessage = self.partialMessage + convertedString
NSLog(convertedString)
// Assign lengths (delimiter, MD5 digest, minimum expected length, message length)
let delimiterLength = Constants.END_OF_MESSAGE_DELIMITER.lengthOfBytes(using: String.Encoding.utf8)
let MD5Length = 32 // 32 characters -> hex representation of 16 bytes
// 3 = CR+LF+1 char at least
let minimumExpectedMessageLength = MD5Length + delimiterLength + 3
let messageLength = self.partialMessage.lengthOfBytes(using: String.Encoding.utf8)
// Check for delimiter and minimum expected message length (2 char msg + MD5 digest + delimiter)
if (self.partialMessage.contains(Constants.END_OF_MESSAGE_DELIMITER)) &&
(messageLength >= minimumExpectedMessageLength) {
var message = self.partialMessage
// Get rid of optional CR+LF
var lowBound = message.index(message.endIndex, offsetBy: -1)
var hiBound = message.index(message.endIndex, offsetBy: 0)
var midRange = lowBound ..< hiBound
let optionalCRLF = message.substring(with: midRange)
if (optionalCRLF == "\r\n") || (optionalCRLF == "\0") { // Remove CR+LF if present
lowBound = message.index(message.endIndex, offsetBy: -1)
hiBound = message.index(message.endIndex, offsetBy: 0)
midRange = lowBound ..< hiBound
message.removeSubrange(midRange)
}
// Check for delimiter proper position (has to be at the end)
lowBound = message.index(message.endIndex, offsetBy: -delimiterLength)
hiBound = message.index(message.endIndex, offsetBy: 0)
midRange = lowBound ..< hiBound
let delimiter = message.substring(with: midRange)
if (delimiter == Constants.END_OF_MESSAGE_DELIMITER) // Delimiter in proper position?
{
// Acquire the MD digest
lowBound = message.index(message.endIndex, offsetBy: -(MD5Length+delimiterLength))
hiBound = message.index(message.endIndex, offsetBy: -(delimiterLength))
midRange = lowBound ..< hiBound
let receivedMD5 = message.substring(with: midRange)
// Acquire the deframed message (normalized message)
lowBound = message.index(message.startIndex, offsetBy: 0)
hiBound = message.index(message.endIndex, offsetBy: -(MD5Length+delimiterLength))
midRange = lowBound ..< hiBound
let normalizedMessage = message.substring(with: midRange)
// Calculate the MD5 digest on the normalized message
let calculatedMD5Digest = normalizedMessage.md5()
// Debug
print(delimiter)
print(normalizedMessage)
print(receivedMD5)
print(calculatedMD5Digest!)
// Check for the integrity of the data
if (receivedMD5.lowercased() == calculatedMD5Digest?.lowercased()) || self.noMD5Check // TEMPORARY
{
if (normalizedMessage == "Unauthorized Access")
{
// Update the authorization status
self.authorized = false
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Stop the stream
NSLog("stream task stop")
self.stop(task: task)
// Shows an alert
self.showAlert(title: NSLocalizedString("Unauthorized Access", comment: "Unauthorized Access Title"), message: NSLocalizedString("Please login with the proper Username and Password before to send any command!", comment: "Unauthorized Access Message"))
}
else if (normalizedMessage == "System Busy")
{
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Stop the stream
NSLog("stream task stop")
self.stop(task: task)
// Shows an alert
self.showAlert(title: NSLocalizedString("System Busy", comment: "System Busy Title"), message: NSLocalizedString("The system is busy at the moment. Only one connection at a time is allowed!", comment: "System Busy Message"))
}
else if (normalizedMessage == "Error")
{
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Stop the stream
NSLog("stream task stop")
self.stop(task: task)
// Shows an alert
self.showAlert(title: NSLocalizedString("Error", comment: "Error Title"), message: NSLocalizedString("An error occurred during the execution of the command!", comment: "Command Error Message"))
}
else if (normalizedMessage == "ErrorMachineRunning")
{
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Stop the stream
NSLog("stream task stop")
self.stop(task: task)
// Shows an alert
self.showAlert(title: NSLocalizedString("Error", comment: "Error Title"), message: NSLocalizedString("The command cannot be executed while the machine is running", comment: "Machine Running Message 1")+"!\r\n\n "+NSLocalizedString("Trying to execute any command in this state could be dangerous for both people and machinery", comment: "Machine Running Message 2")+".\r\n\n "+NSLocalizedString("Please stop the machine and leave the automatic or semi-automatic modes before to provide any command", comment: "Machine Running Message 3")+".")
}
else if (normalizedMessage == "Command Not Recognized")
{
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Stop the stream
NSLog("stream task stop")
self.stop(task: task)
// Shows an alert
self.showAlert(title: NSLocalizedString("Error", comment: "Error Title"), message: NSLocalizedString("Command not recognized!", comment: "Command Unrecognized Message"))
}
else
{
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Stop the stream
NSLog("stream task stop")
self.stop(task: task)
//let testMessage = "test\r\nf3ea0b9bff4a2c79e60acf6873f4a1ce</EOM>\r\n"
//normalizedMessage = testMessage
// Process the received csv file
self.processCsvData(file: normalizedMessage)
}
}
else
{
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Stop the stream
NSLog("stream task stop")
self.stop(task: task)
// Shows an alert
self.showAlert(title: NSLocalizedString("Data Error", comment: "Data Error Title"), message: NSLocalizedString("The received data cannot be read since it's corrupted or incomplete!", comment: "Data Error Message"))
}
}
else
{
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Stop the stream
NSLog("stream task stop")
self.stop(task: task)
// Shows an alert
self.showAlert(title: NSLocalizedString("Data Error", comment: "Data Error Title"), message: NSLocalizedString("The received data cannot be read since it's corrupted or incomplete!", comment: "Data Error Message"))
}
}
}
}
if eof {
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Refresh the tableview content
self.tableView.reloadData()
// Stop the stream
NSLog("stream task end")
self.stop(task: task)
} else if error == nil {
self.startRead(for: task)
} else {
// We ignore the error because we'll see it again in `didCompleteWithError`.
NSLog("stream task read error")
}
}
}
答案 0 :(得分:1)
data
表示整个字符串的数据,而不仅仅是子字符串,这一点至关重要。如果您尝试从整个字符串的部分数据转换子字符串,则在许多情况下它将失败。
它适用于1字节字符,因为无论您在何处切断数据流,部分数据仍然代表有效字符串。但是一旦开始处理多字节字符,部分数据流很容易导致数据的第一个或最后一个字节只是多字节字符的一部分。这可以防止正确解释数据。
因此,在尝试将数据转换为字符串之前,必须确保使用给定字符串的所有字节构建data
对象。
通常,您应该使用字节数来启动数据。假设前4个字节代表一些商定的“字节序”中的32位整数。你读取这4个字节来获得长度。然后你读取数据,直到你得到更多的字节。然后你知道你在信息的最后。
尝试在数据末尾使用“消息结束”标记的问题是“消息结束”标记可以在读取之间拆分。无论哪种方式,您都需要重构代码以在数据级别进行处理,并且在读取所有字符串数据之前不要尝试将数据转换为字符串。
答案 1 :(得分:0)
如您所知,单个UTF-8字符为1,2,3或4个字节。 对于您的情况,您需要处理1或2个字节的字符。并且您的接收字节序列可能未与“字符边界”对齐。 但是,正如rmaddy指出的那样,String.Encoding.utf8的字节序列必须以右边界开始和结束。
现在,有两种方法可以处理这种情况。 正如rmaddy建议的那样,首先发送长度并计算输入数据字节。 这样做的缺点是你必须修改传输(服务器)端,这可能是不可能的。
另一种选择是逐字节扫描输入序列并跟踪字符边界,然后构建合法的UTF-8字节序列。 幸运的是,UTF-8的设计使您可以轻松识别字符边界的位置 通过查看字节流中的任何字节。具体来说,1,2,3和4字节UTF-8字符的第一个字节分别以0xxxxxxx,110xxxxx,1110xxxx和11110xxx开头,第二个......第四个字节 在位表示中都是10xxxxxx。这会让你的生活更轻松。
如果您从一个字节的UTF-8字符中选择一个“消息结束”标记, 您可以轻松成功地检测EOM而不考虑字节序列,因为它是单个字节,并且不会出现在2..4字节字符中的任何位置。