我正在尝试读取NSURL
中给出的文件并将其加载到数组中,其中的项目由换行符\n
分隔。
到目前为止,我已经采用了这种方式:
var possList: NSString? = NSString.stringWithContentsOfURL(filePath.URL) as? NSString
if var list = possList {
list = list.componentsSeparatedByString("\n") as NSString[]
return list
}
else {
//return empty list
}
由于几个原因,我对此并不满意。一,我正在处理从几千字节到几百MB的文件。你可以想象,使用这么大的字符串是缓慢而笨拙的。其次,当它执行时,这会冻结用户界面 - 再次,不好。
我已经考虑在另一个线程中运行此代码,但我一直遇到问题,此外,它仍然没有解决处理大字符串的问题。 / p>
我想做的事情与以下伪代码类似:
var aStreamReader = new StreamReader(from_file_or_url)
while aStreamReader.hasNextLine == true {
currentline = aStreamReader.nextLine()
list.addItem(currentline)
}
我如何在Swift中完成此任务?
关于我正在阅读的文件的一些注意事项:所有文件都包含由\n
或\r\n
分隔的短(<255个字符)字符串。文件的长度范围从~100行到超过5000万行。它们可能包含欧洲字符和/或带重音的字符。
答案 0 :(得分:135)
(代码现在适用于Swift 2.2 / Xcode 7.3。如果有人需要,可以在编辑历史中找到旧版本。最后提供Swift 3的更新版本。) < / p>
以下Swift代码深受各种答案的启发 How to read data from NSFileHandle line by line?。它以块的形式从文件中读取,并将完整的行转换为字符串。
默认行分隔符(\n
),字符串编码(UTF-8)和块大小(4096)
可以使用可选参数进行设置。
class StreamReader {
let encoding : UInt
let chunkSize : Int
var fileHandle : NSFileHandle!
let buffer : NSMutableData!
let delimData : NSData!
var atEof : Bool = false
init?(path: String, delimiter: String = "\n", encoding : UInt = NSUTF8StringEncoding, chunkSize : Int = 4096) {
self.chunkSize = chunkSize
self.encoding = encoding
if let fileHandle = NSFileHandle(forReadingAtPath: path),
delimData = delimiter.dataUsingEncoding(encoding),
buffer = NSMutableData(capacity: chunkSize)
{
self.fileHandle = fileHandle
self.delimData = delimData
self.buffer = buffer
} else {
self.fileHandle = nil
self.delimData = nil
self.buffer = nil
return nil
}
}
deinit {
self.close()
}
/// Return next line, or nil on EOF.
func nextLine() -> String? {
precondition(fileHandle != nil, "Attempt to read from closed file")
if atEof {
return nil
}
// Read data chunks from file until a line delimiter is found:
var range = buffer.rangeOfData(delimData, options: [], range: NSMakeRange(0, buffer.length))
while range.location == NSNotFound {
let tmpData = fileHandle.readDataOfLength(chunkSize)
if tmpData.length == 0 {
// EOF or read error.
atEof = true
if buffer.length > 0 {
// Buffer contains last line in file (not terminated by delimiter).
let line = NSString(data: buffer, encoding: encoding)
buffer.length = 0
return line as String?
}
// No more lines.
return nil
}
buffer.appendData(tmpData)
range = buffer.rangeOfData(delimData, options: [], range: NSMakeRange(0, buffer.length))
}
// Convert complete line (excluding the delimiter) to a string:
let line = NSString(data: buffer.subdataWithRange(NSMakeRange(0, range.location)),
encoding: encoding)
// Remove line (and the delimiter) from the buffer:
buffer.replaceBytesInRange(NSMakeRange(0, range.location + range.length), withBytes: nil, length: 0)
return line as String?
}
/// Start reading from the beginning of file.
func rewind() -> Void {
fileHandle.seekToFileOffset(0)
buffer.length = 0
atEof = false
}
/// Close the underlying file. No reading must be done after calling this method.
func close() -> Void {
fileHandle?.closeFile()
fileHandle = nil
}
}
用法:
if let aStreamReader = StreamReader(path: "/path/to/file") {
defer {
aStreamReader.close()
}
while let line = aStreamReader.nextLine() {
print(line)
}
}
您甚至可以使用带有for-in循环的阅读器
for line in aStreamReader {
print(line)
}
通过实施SequenceType
协议(比较http://robots.thoughtbot.com/swift-sequences):
extension StreamReader : SequenceType {
func generate() -> AnyGenerator<String> {
return AnyGenerator {
return self.nextLine()
}
}
}
更新Swift 3 / Xcode 8 beta 6:也“现代化”
使用guard
和新的Data
值类型:
class StreamReader {
let encoding : String.Encoding
let chunkSize : Int
var fileHandle : FileHandle!
let delimData : Data
var buffer : Data
var atEof : Bool
init?(path: String, delimiter: String = "\n", encoding: String.Encoding = .utf8,
chunkSize: Int = 4096) {
guard let fileHandle = FileHandle(forReadingAtPath: path),
let delimData = delimiter.data(using: encoding) else {
return nil
}
self.encoding = encoding
self.chunkSize = chunkSize
self.fileHandle = fileHandle
self.delimData = delimData
self.buffer = Data(capacity: chunkSize)
self.atEof = false
}
deinit {
self.close()
}
/// Return next line, or nil on EOF.
func nextLine() -> String? {
precondition(fileHandle != nil, "Attempt to read from closed file")
// Read data chunks from file until a line delimiter is found:
while !atEof {
if let range = buffer.range(of: delimData) {
// Convert complete line (excluding the delimiter) to a string:
let line = String(data: buffer.subdata(in: 0..<range.lowerBound), encoding: encoding)
// Remove line (and the delimiter) from the buffer:
buffer.removeSubrange(0..<range.upperBound)
return line
}
let tmpData = fileHandle.readData(ofLength: chunkSize)
if tmpData.count > 0 {
buffer.append(tmpData)
} else {
// EOF or read error.
atEof = true
if buffer.count > 0 {
// Buffer contains last line in file (not terminated by delimiter).
let line = String(data: buffer as Data, encoding: encoding)
buffer.count = 0
return line
}
}
}
return nil
}
/// Start reading from the beginning of file.
func rewind() -> Void {
fileHandle.seek(toFileOffset: 0)
buffer.count = 0
atEof = false
}
/// Close the underlying file. No reading must be done after calling this method.
func close() -> Void {
fileHandle?.closeFile()
fileHandle = nil
}
}
extension StreamReader : Sequence {
func makeIterator() -> AnyIterator<String> {
return AnyIterator {
return self.nextLine()
}
}
}
答案 1 :(得分:18)
我已将藻类的代码包裹在方便的课程中(Swift 4.0)
UPD:此代码与平台无关(macOS,iOS,ubuntu)
import Foundation
/// Read text file line by line
public class LineReader {
public let path: String
fileprivate let file: UnsafeMutablePointer<FILE>!
init?(path: String) {
self.path = path
file = fopen(path, "r")
guard file != nil else { return nil }
}
public var nextLine: String? {
var line:UnsafeMutablePointer<CChar>? = nil
var linecap:Int = 0
defer { free(line) }
return getline(&line, &linecap, file) > 0 ? String(cString: line!) : nil
}
deinit {
fclose(file)
}
}
extension LineReader: Sequence {
public func makeIterator() -> AnyIterator<String> {
return AnyIterator<String> {
return self.nextLine
}
}
}
用法:
guard let reader = LineReader(path: "/Path/to/file.txt") else {
return; // cannot open file
}
for line in reader {
print(">" + line.trimmingCharacters(in: .whitespacesAndNewlines))
}
答案 2 :(得分:4)
我已经迟到了,但这是我为此目的写的小班。经过一些不同的尝试(尝试子类NSInputStream
)后,我发现这是一种合理而简单的方法。
请记住您的桥接标题中的#import <stdio.h>
。
// Use is like this:
let readLine = ReadLine(somePath)
while let line = readLine.readLine() {
// do something...
}
class ReadLine {
private var buf = UnsafeMutablePointer<Int8>.alloc(1024)
private var n: Int = 1024
let path: String
let mode: String = "r"
private lazy var filepointer: UnsafeMutablePointer<FILE> = {
let csmode = self.mode.withCString { cs in return cs }
let cspath = self.path.withCString { cs in return cs }
return fopen(cspath, csmode)
}()
init(path: String) {
self.path = path
}
func readline() -> String? {
// unsafe for unknown input
if getline(&buf, &n, filepointer) > 0 {
return String.fromCString(UnsafePointer<CChar>(buf))
}
return nil
}
deinit {
buf.dealloc(n)
fclose(filepointer)
}
}
答案 3 :(得分:2)
事实证明,一旦你理解了UnsafePointer,好的老式C API在Swift中非常舒服。这是一个简单的cat,它从stdin读取并逐行打印到stdout。你甚至不需要基金会。达尔文就足够了:
import Darwin
let bufsize = 4096
// let stdin = fdopen(STDIN_FILENO, "r") it is now predefined in Darwin
var buf = UnsafePointer<Int8>.alloc(bufsize)
while fgets(buf, Int32(bufsize-1), stdin) {
print(String.fromCString(CString(buf)))
}
buf.destroy()
答案 4 :(得分:2)
此函数接收文件流并返回AnyGenerator
,返回文件的每一行:
func lineGenerator(file:UnsafeMutablePointer<FILE>) -> AnyGenerator<String>
{
return AnyGenerator { () -> String? in
var line:UnsafeMutablePointer<CChar> = nil
var linecap:Int = 0
defer { free(line) }
return getline(&line, &linecap, file) > 0 ? String.fromCString(line) : nil
}
}
因此,例如,您可以使用它来打印应用包中名为“foo”的文件的每一行:
let path = NSBundle.mainBundle().pathForResource("foo", ofType: nil)!
let file = fopen(path,"r") // open the file stream
for line in lineGenerator(file) {
// suppress print's automatically inserted line ending, since
// lineGenerator captures each line's own new line character.
print(line, separator: "", terminator: "")
}
fclose(file) // cleanup the file stream
我通过修改Alex Brown的答案来修改这个答案,以消除Martin R评论中提到的内存泄漏,并更新它以使用Swift 2.2(Xcode 7.3)。
答案 5 :(得分:2)
Swift 4.2 安全语法
class LineReader {
let path: String
init?(path: String) {
self.path = path
guard let file = fopen(path, "r") else {
return nil
}
self.file = file
}
deinit {
fclose(file)
}
var nextLine: String? {
var line: UnsafeMutablePointer<CChar>?
var linecap = 0
defer {
free(line)
}
let status = getline(&line, &linecap, file)
guard status > 0, let unwrappedLine = line else {
return nil
}
return String(cString: unwrappedLine)
}
private let file: UnsafeMutablePointer<FILE>
}
extension LineReader: Sequence {
func makeIterator() -> AnyIterator<String> {
return AnyIterator<String> {
return self.nextLine
}
}
}
用法:
guard let reader = LineReader(path: "/Path/to/file.txt") else {
return
}
reader.forEach { line in
print(line.trimmingCharacters(in: .whitespacesAndNewlines))
}
答案 6 :(得分:1)
尝试this回答,或阅读Mac OS Stream Programming Guide。
您可能会发现使用stringWithContentsOfURL
实际上会更好,因为使用基于内存(或内存映射)的数据比使用基于光盘的数据更快。
在另一个线程上执行它也有详细记录,例如here。
更新
如果您不想一次阅读所有内容,并且您不想使用NSStream,那么您可能不得不使用C级文件I / O.有许多的理由不这样做 - 阻塞,字符编码,处理I / O错误,速度等等 - 这就是Foundation库的用途。我在下面简单回答了只处理ACSII数据的答案:
class StreamReader {
var eofReached = false
let fileHandle: UnsafePointer<FILE>
init (path: String) {
self.fileHandle = fopen(path.bridgeToObjectiveC().UTF8String, "rb".bridgeToObjectiveC().UTF8String)
}
deinit {
fclose(self.fileHandle)
}
func nextLine() -> String {
var nextChar: UInt8 = 0
var stringSoFar = ""
var eolReached = false
while (self.eofReached == false) && (eolReached == false) {
if fread(&nextChar, 1, 1, self.fileHandle) == 1 {
switch nextChar & 0xFF {
case 13, 10 : // CR, LF
eolReached = true
case 0...127 : // Keep it in ASCII
stringSoFar += NSString(bytes:&nextChar, length:1, encoding: NSASCIIStringEncoding)
default :
stringSoFar += "<\(nextChar)>"
}
} else { // EOF or error
self.eofReached = true
}
}
return stringSoFar
}
}
// OP's original request follows:
var aStreamReader = StreamReader(path: "~/Desktop/Test.text".stringByStandardizingPath)
while aStreamReader.eofReached == false { // Changed property name for more accurate meaning
let currentline = aStreamReader.nextLine()
//list.addItem(currentline)
println(currentline)
}
答案 7 :(得分:1)
(注意:我在Xcode 8.2.1上使用Swift 3.0.1和macOS Sierra 10.12.3)
我在这里看到的所有答案都错过了他可能正在寻找LF或CRLF。如果一切顺利,他/她可以匹配LF并在最后检查返回的字符串以获得额外的CR。但是一般查询涉及多个搜索字符串。换句话说,分隔符必须是Set<String>
,其中集合既不是空的也不包含空字符串,而不是单个字符串。
在去年的第一次尝试中,我尝试做“正确的事情”并搜索一组通用字符串。太难了;你需要一个完整的解析器和状态机等。我放弃了它和它所属的项目。
现在我又在做这个项目,并再次面临同样的挑战。现在我要对CR和LF进行硬编码搜索。我不认为任何人都需要在CR / LF解析之外搜索两个半独立和半依赖的字符。
我正在使用Data
提供的搜索方法,因此我不会在这里进行字符串编码。只是原始二进制处理。假设我有一个ASCII超集,如ISO Latin-1或UTF-8,这里。您可以在下一个更高层处理字符串编码,并且您可以确定附加了辅助代码点的CR / LF是否仍然计为CR或LF。
算法:只需从当前字节偏移量中继续搜索下一个CR 和下一个LF。
以下是一些代码:
struct DataInternetLineIterator: IteratorProtocol {
/// Descriptor of the location of a line
typealias LineLocation = (offset: Int, length: Int, terminatorLength: Int)
/// Carriage return.
static let cr: UInt8 = 13
/// Carriage return as data.
static let crData = Data(repeating: cr, count: 1)
/// Line feed.
static let lf: UInt8 = 10
/// Line feed as data.
static let lfData = Data(repeating: lf, count: 1)
/// The data to traverse.
let data: Data
/// The byte offset to search from for the next line.
private var lineStartOffset: Int = 0
/// Initialize with the data to read over.
init(data: Data) {
self.data = data
}
mutating func next() -> LineLocation? {
guard self.data.count - self.lineStartOffset > 0 else { return nil }
let nextCR = self.data.range(of: DataInternetLineIterator.crData, options: [], in: lineStartOffset..<self.data.count)?.lowerBound
let nextLF = self.data.range(of: DataInternetLineIterator.lfData, options: [], in: lineStartOffset..<self.data.count)?.lowerBound
var location: LineLocation = (self.lineStartOffset, -self.lineStartOffset, 0)
let lineEndOffset: Int
switch (nextCR, nextLF) {
case (nil, nil):
lineEndOffset = self.data.count
case (nil, let offsetLf):
lineEndOffset = offsetLf!
location.terminatorLength = 1
case (let offsetCr, nil):
lineEndOffset = offsetCr!
location.terminatorLength = 1
default:
lineEndOffset = min(nextLF!, nextCR!)
if nextLF! < nextCR! {
location.terminatorLength = 1
} else {
switch nextLF! - nextCR! {
case 2 where self.data[nextCR! + 1] == DataInternetLineIterator.cr:
location.terminatorLength += 1 // CR-CRLF
fallthrough
case 1:
location.terminatorLength += 1 // CRLF
fallthrough
default:
location.terminatorLength += 1 // CR-only
}
}
}
self.lineStartOffset = lineEndOffset + location.terminatorLength
location.length += self.lineStartOffset
return location
}
}
当然,如果你的Data
块长度至少是千兆字节的一小部分,那么只要当前字节偏移不存在CR或LF,你就会受到攻击;在每次迭代期间总是毫无结果地搜索到最后。以块的形式读取数据会有所帮助:
struct DataBlockIterator: IteratorProtocol {
/// The data to traverse.
let data: Data
/// The offset into the data to read the next block from.
private(set) var blockOffset = 0
/// The number of bytes remaining. Kept so the last block is the right size if it's short.
private(set) var bytesRemaining: Int
/// The size of each block (except possibly the last).
let blockSize: Int
/// Initialize with the data to read over and the chunk size.
init(data: Data, blockSize: Int) {
precondition(blockSize > 0)
self.data = data
self.bytesRemaining = data.count
self.blockSize = blockSize
}
mutating func next() -> Data? {
guard bytesRemaining > 0 else { return nil }
defer { blockOffset += blockSize ; bytesRemaining -= blockSize }
return data.subdata(in: blockOffset..<(blockOffset + min(bytesRemaining, blockSize)))
}
}
你必须自己将这些想法混合在一起,因为我还没有完成它。考虑:
答案 8 :(得分:1)
在@dankogai的answer之后,我对Swift 4+进行了一些修改,
let bufsize = 4096
let fp = fopen(jsonURL.path, "r");
var buf = UnsafeMutablePointer<Int8>.allocate(capacity: bufsize)
while (fgets(buf, Int32(bufsize-1), fp) != nil) {
print( String(cString: buf) )
}
buf.deallocate()
这对我有用。
谢谢
答案 9 :(得分:0)
或者你可以简单地使用Generator
:
let stdinByLine = GeneratorOf({ () -> String? in
var input = UnsafeMutablePointer<Int8>(), lim = 0
return getline(&input, &lim, stdin) > 0 ? String.fromCString(input) : nil
})
让我们尝试一下
for line in stdinByLine {
println(">>> \(line)")
}
它简单,懒惰,易于与其他快速的东西链接,如枚举器和仿函数,如map,reduce,filter;使用lazy()
包装器。
它概括为所有FILE
:
let byLine = { (file:UnsafeMutablePointer<FILE>) in
GeneratorOf({ () -> String? in
var input = UnsafeMutablePointer<Int8>(), lim = 0
return getline(&input, &lim, file) > 0 ? String.fromCString(input) : nil
})
}
称为
for line in byLine(stdin) { ... }
答案 10 :(得分:0)
我想要一个不会不断修改缓冲区或重复代码的版本,因为它们都是低效的,并且允许任何大小的缓冲区(包括1个字节)和任何分隔符。它有一个公共方法:readline()
。调用此方法将返回下一行的String值或EOF处的nil。
import Foundation
// LineStream(): path: String, [buffSize: Int], [delim: String] -> nil | String
// ============= --------------------------------------------------------------
// path: the path to a text file to be parsed
// buffSize: an optional buffer size, (1...); default is 4096
// delim: an optional delimiter String; default is "\n"
// ***************************************************************************
class LineStream {
let path: String
let handle: NSFileHandle!
let delim: NSData!
let encoding: NSStringEncoding
var buffer = NSData()
var buffSize: Int
var buffIndex = 0
var buffEndIndex = 0
init?(path: String,
buffSize: Int = 4096,
delim: String = "\n",
encoding: NSStringEncoding = NSUTF8StringEncoding)
{
self.handle = NSFileHandle(forReadingAtPath: path)
self.path = path
self.buffSize = buffSize < 1 ? 1 : buffSize
self.encoding = encoding
self.delim = delim.dataUsingEncoding(encoding)
if handle == nil || self.delim == nil {
print("ERROR initializing LineStream") /* TODO use STDERR */
return nil
}
}
// PRIVATE
// fillBuffer(): _ -> Int [0...buffSize]
// ============= -------- ..............
// Fill the buffer with new data; return with the buffer size, or zero
// upon reaching end-of-file
// *********************************************************************
private func fillBuffer() -> Int {
buffer = handle.readDataOfLength(buffSize)
buffIndex = 0
buffEndIndex = buffer.length
return buffEndIndex
}
// PRIVATE
// delimLocation(): _ -> Int? nil | [1...buffSize]
// ================ --------- ....................
// Search the remaining buffer for a delimiter; return with the location
// of a delimiter in the buffer, or nil if one is not found.
// ***********************************************************************
private func delimLocation() -> Int? {
let searchRange = NSMakeRange(buffIndex, buffEndIndex - buffIndex)
let rangeToDelim = buffer.rangeOfData(delim,
options: [], range: searchRange)
return rangeToDelim.location == NSNotFound
? nil
: rangeToDelim.location
}
// PRIVATE
// dataStrValue(): NSData -> String ("" | String)
// =============== ---------------- .............
// Attempt to convert data into a String value using the supplied encoding;
// return the String value or empty string if the conversion fails.
// ***********************************************************************
private func dataStrValue(data: NSData) -> String? {
if let strVal = NSString(data: data, encoding: encoding) as? String {
return strVal
} else { return "" }
}
// PUBLIC
// readLine(): _ -> String? nil | String
// =========== ____________ ............
// Read the next line of the file, i.e., up to the next delimiter or end-of-
// file, whichever occurs first; return the String value of the data found,
// or nil upon reaching end-of-file.
// *************************************************************************
func readLine() -> String? {
guard let line = NSMutableData(capacity: buffSize) else {
print("ERROR setting line")
exit(EXIT_FAILURE)
}
// Loop until a delimiter is found, or end-of-file is reached
var delimFound = false
while !delimFound {
// buffIndex will equal buffEndIndex in three situations, resulting
// in a (re)filling of the buffer:
// 1. Upon the initial call;
// 2. If a search for a delimiter has failed
// 3. If a delimiter is found at the end of the buffer
if buffIndex == buffEndIndex {
if fillBuffer() == 0 {
return nil
}
}
var lengthToDelim: Int
let startIndex = buffIndex
// Find a length of data to place into the line buffer to be
// returned; reset buffIndex
if let delim = delimLocation() {
// SOME VALUE when a delimiter is found; append that amount of
// data onto the line buffer,and then return the line buffer
delimFound = true
lengthToDelim = delim - buffIndex
buffIndex = delim + 1 // will trigger a refill if at the end
// of the buffer on the next call, but
// first the line will be returned
} else {
// NIL if no delimiter left in the buffer; append the rest of
// the buffer onto the line buffer, refill the buffer, and
// continue looking
lengthToDelim = buffEndIndex - buffIndex
buffIndex = buffEndIndex // will trigger a refill of buffer
// on the next loop
}
line.appendData(buffer.subdataWithRange(
NSMakeRange(startIndex, lengthToDelim)))
}
return dataStrValue(line)
}
}
如下所示:
guard let myStream = LineStream(path: "/path/to/file.txt")
else { exit(EXIT_FAILURE) }
while let s = myStream.readLine() {
print(s)
}