为什么这个文件读取代码会在文件末尾给出垃圾?

时间:2016-11-27 02:29:53

标签: swift readfile

我写了这个Swift代码,它使用Glibc将文本文件加载到字符串中,但有时它会将\U{7F}之类的垃圾字符附加到字符串的末尾(在同一个文件中)和我不知道为什么。来自Glibc函数的错误检查都不会引发任何警告。即使在同一个文件上使用它也会随机失败。

public typealias Cpath    = String
public typealias Unixpath = String
public typealias CString = UnsafeMutablePointer<CChar>

public func unix_path(_ path:Cpath) -> Unixpath
{
    guard path.characters.count > 1 
    else {
        return path
    }
    let path_i0 = path.startIndex
    let path_i2 = path.index(path_i0, offsetBy: 2)
    var expanded_path:Unixpath = path
    if path[path.startIndex..<path_i2] == "~/" {
        expanded_path = String(cString: getenv("HOME")) + 
                        path[path.index(path_i0, offsetBy: 1)..<path.endIndex]
    }
    return expanded_path
}

public func open_text_file(_ path:Cpath) -> String?
{
    let path = unix_path(path)

    guard let f:UnsafeMutablePointer<FILE> = fopen(path, "rb") 
    else {
        print("Error, could not open file '\(path)'")
        return nil
    }
    defer { fclose(f) }

    let fseek_status = fseeko(f, 0, SEEK_END)
    guard fseek_status == 0 
    else {
        print("Error, fseeko() failed with error code \(fseek_status)")
        return nil
    }

    let n = ftello(f)
    guard 0..<CLong.max ~= n
    else {
        print("Error, ftello() returned file size outsize of allowed range")
        return nil
    }
    rewind(f)

    guard let raw_buffer:UnsafeMutableRawPointer = malloc(n*MemoryLayout<CChar>.size)
    else {
        print("Error, could not allocate memory buffer")
        return nil
    }
    defer { free(raw_buffer) }

    let n_read = fread(raw_buffer, MemoryLayout<CChar>.size, n, f)
    guard n_read == n
    else {
        print("Error, fread() read \(n_read) characters out of \(n)")
        return nil
    }
    let cchar_buffer:CString = raw_buffer.assumingMemoryBound(to: CChar.self)
    return String(cString: cchar_buffer)
}

1 个答案:

答案 0 :(得分:0)

我发现了问题 - 我省略了应该在C字符串末尾出现的标记(\U{00})。没有它,String(cString:)构造函数只是抽空,直到它在系统内存中找到一个自然发生的x00字节。这是正确的形式:

...

// n + 1 to leave room for sentinel
let cchar_buffer:CString = CString.allocate(capacity: n + 1) 
defer { cchar_buffer.deallocate(capacity: n + 1) }

let n_read = fread(cchar_buffer, MemoryLayout<CChar>.size, n, f)
guard n_read == n
else {
    print("Error, fread() read \(n_read) characters out of \(n)")
    return nil
}
cchar_buffer[n] = 0 // cap with sentinel
return String(cString: cchar_buffer)