如何逐行读取NSFileHandle的数据?

时间:2010-09-14 09:04:28

标签: objective-c cocoa nsfilehandle

我有一个文本文件,其中包含数据

e.g。

PUFGUjVRallYZDNaazFtVjVObU1zWm5ZcUJUYU5ORk4zbGthNHNDVUdSMlFVQmpSVEoxUUNSallYaFhkanBITXBGR1NTQnpZRTltZE1OalVzSkdXQ0Z6WXR0V2RpTmpTdXgwTWs5V1lZSkZiWjFXT29OV2JSVlhaSTUwYUpwR040UUZXTzVHVXFoWFVRcFdWNHdVTUJ0Q1VHSmxXVlJVTlJCMVE1VTFWV
PUFGUjVRallYZDNaazFtVjVObU1zWm5ZcUJUYU5ORk4zbGthNHNDVUdSMlFVQmpSVEoxUUNSallYaFhkanBITXBGR1NTQnpZRTltZE1OalVzSkdXQ0Z6WXR0V2RpTmpTdXgwTWs5V1lZSkZiWjFXT29OV2JSVlhaSTUwYUpwR040UUZXTzVHVXFoWFVRcFdWNHdVTUJ0Q1VHSmxXVlJVTlJCMVE1VTFWV

现在我想逐行读取数据。这意味着首先我要阅读

PUFGUjVRallYZDNaazFtVjVObU1zWm5ZcUJUYU5ORk4zbGthNHNDVUdSMlFVQmpSVEoxUUNSallYaFhkanBITXBGR1NTQnpZRTltZE1OalVzSkdXQ0Z6WXR0V2RpTmpTdXgwTWs5V1lZSkZiWjFXT29OV2JSVlhaSTUwYUpwR040UUZXTzVHVXFoWFVRcFdWNHdVTUJ0Q1VHSmxXVlJVTlJCMVE1VTFWV

然后下一个剩下的。 任何人都有任何想法??

11 个答案:

答案 0 :(得分:147)

如果您的文件很小,那么@ mipadi的方法可能会很好。但是,如果您的文件很大(可能是> 1MB?),那么您可能需要考虑逐行读取文件。我曾写过一次课程,我将在此处粘贴:

//DDFileReader.h

@interface DDFileReader : NSObject {
    NSString * filePath;

    NSFileHandle * fileHandle;
    unsigned long long currentOffset;
    unsigned long long totalFileLength;

    NSString * lineDelimiter;
    NSUInteger chunkSize;
}

@property (nonatomic, copy) NSString * lineDelimiter;
@property (nonatomic) NSUInteger chunkSize;

- (id) initWithFilePath:(NSString *)aPath;

- (NSString *) readLine;
- (NSString *) readTrimmedLine;

#if NS_BLOCKS_AVAILABLE
- (void) enumerateLinesUsingBlock:(void(^)(NSString*, BOOL *))block;
#endif

@end


//DDFileReader.m

#import "DDFileReader.h"

@interface NSData (DDAdditions)

- (NSRange) rangeOfData_dd:(NSData *)dataToFind;

@end

@implementation NSData (DDAdditions)

- (NSRange) rangeOfData_dd:(NSData *)dataToFind {

    const void * bytes = [self bytes];
    NSUInteger length = [self length];

    const void * searchBytes = [dataToFind bytes];
    NSUInteger searchLength = [dataToFind length];
    NSUInteger searchIndex = 0;

    NSRange foundRange = {NSNotFound, searchLength};
    for (NSUInteger index = 0; index < length; index++) {
        if (((char *)bytes)[index] == ((char *)searchBytes)[searchIndex]) {
            //the current character matches
            if (foundRange.location == NSNotFound) {
                foundRange.location = index;
            }
            searchIndex++;
            if (searchIndex >= searchLength) { return foundRange; }
        } else {
            searchIndex = 0;
            foundRange.location = NSNotFound;
        }
    }
    return foundRange;
}

@end

@implementation DDFileReader
@synthesize lineDelimiter, chunkSize;

- (id) initWithFilePath:(NSString *)aPath {
    if (self = [super init]) {
        fileHandle = [NSFileHandle fileHandleForReadingAtPath:aPath];
        if (fileHandle == nil) {
            [self release]; return nil;
        }

        lineDelimiter = [[NSString alloc] initWithString:@"\n"];
        [fileHandle retain];
        filePath = [aPath retain];
        currentOffset = 0ULL;
        chunkSize = 10;
        [fileHandle seekToEndOfFile];
        totalFileLength = [fileHandle offsetInFile];
        //we don't need to seek back, since readLine will do that.
    }
    return self;
}

- (void) dealloc {
    [fileHandle closeFile];
    [fileHandle release], fileHandle = nil;
    [filePath release], filePath = nil;
    [lineDelimiter release], lineDelimiter = nil;
    currentOffset = 0ULL;
    [super dealloc];
}

- (NSString *) readLine {
    if (currentOffset >= totalFileLength) { return nil; }

    NSData * newLineData = [lineDelimiter dataUsingEncoding:NSUTF8StringEncoding];
    [fileHandle seekToFileOffset:currentOffset];
    NSMutableData * currentData = [[NSMutableData alloc] init];
    BOOL shouldReadMore = YES;

    NSAutoreleasePool * readPool = [[NSAutoreleasePool alloc] init];
    while (shouldReadMore) {
        if (currentOffset >= totalFileLength) { break; }
        NSData * chunk = [fileHandle readDataOfLength:chunkSize];
        NSRange newLineRange = [chunk rangeOfData_dd:newLineData];
        if (newLineRange.location != NSNotFound) {

            //include the length so we can include the delimiter in the string
            chunk = [chunk subdataWithRange:NSMakeRange(0, newLineRange.location+[newLineData length])];
            shouldReadMore = NO;
        }
        [currentData appendData:chunk];
        currentOffset += [chunk length];
    }
    [readPool release];

    NSString * line = [[NSString alloc] initWithData:currentData encoding:NSUTF8StringEncoding];
    [currentData release];
    return [line autorelease];
}

- (NSString *) readTrimmedLine {
    return [[self readLine] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
}

#if NS_BLOCKS_AVAILABLE
- (void) enumerateLinesUsingBlock:(void(^)(NSString*, BOOL*))block {
  NSString * line = nil;
  BOOL stop = NO;
  while (stop == NO && (line = [self readLine])) {
    block(line, &stop);
  }
}
#endif

@end

然后使用它,你会做:

DDFileReader * reader = [[DDFileReader alloc] initWithFilePath:pathToMyFile];
NSString * line = nil;
while ((line = [reader readLine])) {
  NSLog(@"read line: %@", line);
}
[reader release];

或(适用于10.6+和iOS 4 +):

DDFileReader * reader = [[DDFileReader alloc] initWithFilePath:pathToMyFile];
[reader enumerateLinesUsingBlock:^(NSString * line, BOOL * stop) {
  NSLog(@"read line: %@", line);
}];
[reader release];
圣诞快乐。 :)

答案 1 :(得分:21)

我将此重写为符合ARC标准:

//
//  DDFileReader.m
//  PBX2OPML
//
//  Created by michael isbell on 11/6/11.
//  Copyright (c) 2011 BlueSwitch. All rights reserved.
//

//DDFileReader.m

#import "DDFileReader.h"

@interface NSData (DDAdditions)

- (NSRange) rangeOfData_dd:(NSData *)dataToFind;

@end

@implementation NSData (DDAdditions)

- (NSRange) rangeOfData_dd:(NSData *)dataToFind {

    const void * bytes = [self bytes];
    NSUInteger length = [self length];

    const void * searchBytes = [dataToFind bytes];
    NSUInteger searchLength = [dataToFind length];
    NSUInteger searchIndex = 0;

    NSRange foundRange = {NSNotFound, searchLength};
    for (NSUInteger index = 0; index < length; index++) {
        if (((char *)bytes)[index] == ((char *)searchBytes)[searchIndex]) {
            //the current character matches
            if (foundRange.location == NSNotFound) {
                foundRange.location = index;
            }
            searchIndex++;
            if (searchIndex >= searchLength) { return foundRange; }
        } else {
            searchIndex = 0;
            foundRange.location = NSNotFound;
        }
    }
    return foundRange;
}

@end

@implementation DDFileReader
@synthesize lineDelimiter, chunkSize;

- (id) initWithFilePath:(NSString *)aPath {
    if (self = [super init]) {
        fileHandle = [NSFileHandle fileHandleForReadingAtPath:aPath];
        if (fileHandle == nil) {
            return nil;
        }

        lineDelimiter = @"\n";
        currentOffset = 0ULL; // ???
        chunkSize = 10;
        [fileHandle seekToEndOfFile];
        totalFileLength = [fileHandle offsetInFile];
        //we don't need to seek back, since readLine will do that.
    }
    return self;
}

- (void) dealloc {
    [fileHandle closeFile];
    currentOffset = 0ULL;

}

- (NSString *) readLine {
    if (currentOffset >= totalFileLength) { return nil; }

    NSData * newLineData = [lineDelimiter dataUsingEncoding:NSUTF8StringEncoding];
    [fileHandle seekToFileOffset:currentOffset];
    NSMutableData * currentData = [[NSMutableData alloc] init];
    BOOL shouldReadMore = YES;

    @autoreleasepool {

    while (shouldReadMore) {
        if (currentOffset >= totalFileLength) { break; }
        NSData * chunk = [fileHandle readDataOfLength:chunkSize];
        NSRange newLineRange = [chunk rangeOfData_dd:newLineData];
        if (newLineRange.location != NSNotFound) {

            //include the length so we can include the delimiter in the string
            chunk = [chunk subdataWithRange:NSMakeRange(0, newLineRange.location+[newLineData length])];
            shouldReadMore = NO;
        }
        [currentData appendData:chunk];
        currentOffset += [chunk length];
    }
    }

    NSString * line = [[NSString alloc] initWithData:currentData encoding:NSUTF8StringEncoding];
    return line;  
}

- (NSString *) readTrimmedLine {
    return [[self readLine] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
}

#if NS_BLOCKS_AVAILABLE
- (void) enumerateLinesUsingBlock:(void(^)(NSString*, BOOL*))block {
    NSString * line = nil;
    BOOL stop = NO;
    while (stop == NO && (line = [self readLine])) {
        block(line, &stop);
    }
}
#endif

@end

答案 2 :(得分:17)

我根据GitHub project的源代码开始Dave DeLong。欢迎您改进代码。到现在为止,我可以向前和向后阅读文件。

答案 3 :(得分:12)

NSString *fh = [NSString stringWithContentsOfFile:filePath encoding:fileEncoding error:NULL];
for (NSString *line in [fh componentsSeparatedByString:@"\n"]) {
    // Do something with the line
}

在Cocoa或内置语言结构中没有用于逐行读取文件的API。

答案 4 :(得分:4)

LARGE 文本文件的这个问题的答案不需要自定义函数。 Objective-C是c的超集,因此有c方法可以做到这一点。

FILE* file = fopen("path to my file", "r");

size_t length;
char *cLine = fgetln(file,&length);

while (length>0) {
    char str[length+1];
    strncpy(str, cLine, length);
    str[length] = '\0';

    NSString *line = [NSString stringWithFormat:@"%s",str];        
    % Do what you want here.

    cLine = fgetln(file,&length);
}

请注意,fgetln不会保留换行符。另外,我们+1为str​​的长度,因为我们想为NULL终止腾出空间。

答案 5 :(得分:3)

这是一个用于从NSInputStream中读取单个行的方法。请注意,它针对可读性而非速度进行了优化。 ; - )

- (NSString*) readLine: (NSInputStream*) inputStream {
    NSMutableData* data = [NSMutableData data];
    uint8_t oneByte;
    do {
        int actuallyRead = [inputStream read: &oneByte maxLength: 1];
        if (actuallyRead == 1) {
            [data appendBytes: &oneByte length: 1];
        }        
    } while (oneByte != '\n');

    return [[NSString alloc] initWithData: data encoding: NSUTF8StringEncoding];

答案 6 :(得分:2)

我发现GitX也使用了线路阅读器 结帐brotherbard's repository on GitHubwebsite of the Michael Stapelberg

@Joe Yang
太好了!接下来的几天我会仔细看看 如果您想 fork my repository on GitHub向我发送拉取请求,我会很高兴。

答案 7 :(得分:1)

我已将FileReader修改为NSFileHandle类别,希望它可以帮助其他人

@interface NSFileHandle (Readline)
- (NSString*)readLine;
- (NSString*)readLineBackwards;
@end

#import "NSFileHandle+Readline.h"
#import "NSDataExtensions.h"

@implementation NSFileHandle (Readline)

- (NSString*)readLine {

    NSString * _lineDelimiter = @"\n";

    NSData* newLineData = [_lineDelimiter dataUsingEncoding:NSUTF8StringEncoding];
    NSMutableData* currentData = [[NSMutableData alloc] init];
    BOOL shouldReadMore = YES;

    NSUInteger _chunkSize = 10;

    while (shouldReadMore) {
        NSData* chunk = [self readDataOfLength:_chunkSize]; // always length = 10

        if ([chunk length] == 0) {
            break;
        }

        // Find the location and length of the next line delimiter.
        NSRange newLineRange = [chunk rangeOfData:newLineData];
        if (newLineRange.location != NSNotFound) {
            // Include the length so we can include the delimiter in the string.
            NSRange subDataRange = NSMakeRange(0, newLineRange.location + [newLineData length]);
            unsigned long long newOffset = [self offsetInFile] - [chunk length] + newLineRange.location + [newLineData length];
            [self seekToFileOffset:newOffset];
            chunk = [chunk subdataWithRange:subDataRange];
            shouldReadMore = NO;
        }
        [currentData appendData:chunk];
    }

    NSString* line = [currentData stringValueWithEncoding:NSASCIIStringEncoding];
    return line;
}

- (NSString*)readLineBackwards {

    NSString * _lineDelimiter = @"\n";

    NSData* newLineData = [_lineDelimiter dataUsingEncoding:NSUTF8StringEncoding];
    NSUInteger _chunkSize = 10;

    NSMutableData* currentData = [[NSMutableData alloc] init];
    BOOL shouldReadMore = YES;

    while (shouldReadMore) {

        unsigned long long offset;

        NSUInteger currentChunkSize = _chunkSize;

        if ([self offsetInFile] <= _chunkSize) {
            offset = 0;
            currentChunkSize = [self offsetInFile];
            shouldReadMore = NO;
        } else {
            offset = [self offsetInFile] - _chunkSize;
        }

        NSLog(@"seek to offset %qu, offset in file is %qu", offset, [self offsetInFile]);

        [self seekToFileOffset:offset];

        NSData* chunk = [self readDataOfLength:currentChunkSize];

        NSRange newLineRange = [chunk rangeOfDataBackwardsSearch:newLineData];

        if (newLineRange.location == NSNotFound) {
            [self seekToFileOffset:offset];
        }

        if (newLineRange.location != NSNotFound) {
            NSUInteger subDataLoc = newLineRange.location;
            NSUInteger subDataLen = currentChunkSize - subDataLoc;
            chunk = [chunk subdataWithRange:NSMakeRange(subDataLoc, subDataLen)];
            NSLog(@"got chunk data %@", [chunk stringValueWithEncoding:NSASCIIStringEncoding]);
            shouldReadMore = NO;
            [self seekToFileOffset:offset + newLineRange.location];
        }
        [currentData prepend:chunk];
    }

    NSString* line = [[NSString alloc] initWithData:currentData encoding:NSASCIIStringEncoding];
    return [line autorelease];
}

@end





//
//  NSDataExtensions.m
//  LineReader
//
//  Created by Tobias Preuss on 08.10.10.
//  Copyright 2010 Tobias Preuss. All rights reserved.
//

#import "NSDataExtensions.h"



// -----------------------------------------------------------------------------
// NSData additions.
// -----------------------------------------------------------------------------


/**
 Extension of the NSData class. 
 Data can be found forwards or backwards. Further the extension supplies a function 
 to convert the contents to string for debugging purposes.
 @param Additions Category labeled Additions.
 @returns An initialized NSData object or nil if the object could not be created.
 */
@implementation NSData (Additions)




/**
 Returns a range of data.
 @param dataToFind Data object specifying the delimiter and encoding.
 @returns A range.
 */
- (NSRange)rangeOfData:(NSData*)dataToFind {

    const void* bytes = [self bytes];
    NSUInteger length = [self length];
    const void* searchBytes = [dataToFind bytes];
    NSUInteger searchLength = [dataToFind length];
    NSUInteger searchIndex = 0;

    NSRange foundRange = {NSNotFound, searchLength};
    for (NSUInteger index = 0; index < length; index++) {
        // The current character matches.
        if (((char*)bytes)[index] == ((char*)searchBytes)[searchIndex]) {
            // Store found location if not done earlier.
            if (foundRange.location == NSNotFound) {
                foundRange.location = index;
            }
            // Increment search character index to check for match.
            searchIndex++;
            // All search character match.
            // Break search routine and return found position.
            if (searchIndex >= searchLength) {
                return foundRange;
            }
        }
        // Match does not continue.
        // Return to the first search character.
        // Discard former found location.
        else {
            searchIndex = 0;
            foundRange.location = NSNotFound;
        }
    }
    return foundRange;
}


- (NSRange)rangeOfDataBackwardsSearch:(NSData*)dataToFind {

    const void* bytes = [self bytes];
    NSUInteger length = [self length];
    const void* searchBytes = [dataToFind bytes];
    NSUInteger searchLength = [dataToFind length];
    NSUInteger searchIndex = 0;

    NSRange foundRange = {NSNotFound, searchLength};
    if (length < searchLength) {
        return foundRange;
    }
    for (NSUInteger index = length - searchLength; index >= 0;) {
//      NSLog(@"%c == %c", ((char*)bytes)[index], ((char*)searchBytes)[searchIndex]); /* DEBUG LOG */
        if (((char*)bytes)[index] == ((char*)searchBytes)[searchIndex]) {
            // The current character matches.
            if (foundRange.location == NSNotFound) {
                foundRange.location = index;
            }
            index++;
            searchIndex++;
            if (searchIndex >= searchLength) {
                return foundRange;
            }
        }
        else {
            // Decrement to search backwards.
            if (foundRange.location == NSNotFound) {
                // Skip if first byte has been reached.
                if (index == 0) {
                    foundRange.location = NSNotFound;
                    return foundRange;
                }
                index--;
            }
            // Jump over the former found location
            // to avoid endless loop.
            else {
                index = index - 2;
            }
            searchIndex = 0;
            foundRange.location = NSNotFound;
        }
    }
    return foundRange;
}

- (NSString*)stringValueWithEncoding:(NSStringEncoding)encoding {
    return [[NSString alloc] initWithData:self encoding:encoding];
}

@end




// -----------------------------------------------------------------------------
// NSMutableData additions.
// -----------------------------------------------------------------------------


/**
 Extension of the NSMutableData class. 
 Data can be prepended in addition to the append function of the framework.
 @param Additions Category labeled Additions.
 @returns An initialized NSMutableData object or nil if the object could not be created.
 */
@implementation NSMutableData (Additions)

/**
    Inserts the data before the data of the object.
    @param data Data to be prepended.
 */
- (void)prepend:(NSData*)data {


    NSMutableData* concat = [NSMutableData dataWithData:data];
    [concat appendData:self];
    [self setData:concat];
}

@end

答案 8 :(得分:1)

您还可以在https://github.com/xcvista/ohttpd2/tree/master/CGIStream查看我为HTTP服务器项目创建的CGIStream库。此代码不是文件描述符,而是在NSInputStream上运行。它本质上是来自Microsoft的.net框架的System.IO.StreamReaderSystem.IO.StreamWriter的Objective-C克隆。

它不仅适用于文件,还适用于网络套接字。我用它来处理HTTP协议,这是CGI前缀的同名。

答案 9 :(得分:1)

我遇到了与其他情况类似的情况,这是我在Swift 3中的解决方案。假设文本文件为utf8。

extension FileHandle {

    func enumerateLines(_ block: @escaping (String, UnsafeMutablePointer<Bool>) -> Void) {

        // find the end of file
        var offset = self.offsetInFile
        let eof = self.seekToEndOfFile()
        self.seek(toFileOffset: offset)
        let blockSize = 1024
        var buffer = Data()

        // process to the end of file
        while offset + UInt64(buffer.count) < eof {
            var found = false

            // make sure buffer contains at least one CR, LF or null
            while !found && offset + UInt64(buffer.count) < eof {
                let block = self.readData(ofLength: blockSize)
                buffer.append(block)
                for byte in block {
                    if [0x0d, 0x0a, 0x00].contains(byte) {
                        found = true ; break
                    }
                }
            }

            // retrieve lines within the buffer
            var index = 0
            var head = 0 // head of line
            var done = false
            buffer.enumerateBytes({ (pointer, count, stop) in
                while index < count {
                    // find a line terminator
                    if [0x0d, 0x0a, 0x00].contains(pointer[index]) {
                        let lineData = Data(pointer[head ..< index])
                        if let line = String(bytes: lineData, encoding: .utf8) {
                            block(line, &stop) // stop requested
                            if pointer[index] == 0x0d && index+1 < count && pointer[index+1] == 0x0a {
                                index += 2 ; head = index
                            }
                            else { index += 1 ; head = index }
                            if stop { done = true ; return } // end of enumerateLines
                        }
                        else { return } // end of enumerateLines
                    }
                    else { index += 1 }
                }
            })

            offset += UInt64(head)
            buffer.replaceSubrange(0 ..< head, with: Data())
            if done { // stop requested
                self.seek(toFileOffset: offset)
                return
            }
        }
    }

以下是用法:

    let fileURL = Bundle.main.url(forResource: "huge_file", withExtension: "txt")!
    let fileHandle = try! FileHandle(forReadingFrom: fileURL)

    fileHandle.enumerateLines { (line, stop) in
        if someCondition { stop.pointee = true }
        print(line)
    }
    /* let remaining = fileHandle.readDataToEndOfFile() */

https://gist.github.com/codelynx/c1de603a85e7503fe9597d027e93f4de

答案 10 :(得分:0)