Swift:解析10k行数据的最有效方式?

时间:2019-02-18 07:25:25

标签: json swift protocol-buffers flatbuffers

所以我的应用程序当前正在将1万行读入变量,然后使用SwiftyJson将其解析为领域。

来源https://github.com/skishore/makemeahanzi/blob/master/graphics.txt https://github.com/skishore/makemeahanzi/blob/master/dictionary.txt

问题:此过程花费的时间太长: 2:28分钟。还需要 400mb的内存

问题:如何使其更快? 你们中有FlatbuffersProtobuf的经验吗?

非常欢迎您提供帮助!

干杯,Dom


这是代码:

func parseToRealm() {

    // each of these files have 9500+ lines of data
    // (basically dictionaries with word definitions)
    let graphicsFileContents = readFile_Graphics()
    let dictFileContents = readFile_Dict()

    // check if counts of two source files match
    if (graphicsFileContents.count == dictFileContents.count && graphicsFileContents.count > 1 && dictFileContents.count > 1) {
        var i = 0
        // make empty array of characters
        var characterArr = [Characters()]

        // loop through two files to get all chars
        for jsonString in graphicsFileContents {
            // parse data from string into json
            let dataFromString = jsonString.data(using: .utf8)
            let singleCharJson = try? JSON(data: dataFromString!)


            // parse stuff from file1
            // ... deleted lines for legal reasons


            // DICT information
            let dictDataFromString = dictFileContents[i].data(using: .utf8)
            let singleDictJson = try? JSON(data: dictDataFromString!)

            // parse stuff from that dictionary
            // ... deleted lines for legal reasons

            characterArr.append(Character)

            // Every x characters, write them into DB
            if (i % 150 == 0 || i == graphicsFileContents.count){
                realmActions.writeCharsToRealm(characterArr: characterArr)
                print("Writing \(i)-\(i + 150)")
                // reset array to safe memory
                characterArr = [Characters()]
            }
            i+=1
        } // end loop file contents
    }else{
        print ("two files have different counts of lines. aborting...")
    }
}


// read graphics file and return all contents as array of strings
        // return Array of Strings
        func readFile_Graphics () -> [String] {
            // predeclare emtpy string array
            var myStrings = [String]()
            if let path = Bundle.main.path(forResource: "graphics", ofType: "txt") {
                do {
                    let data = try String(contentsOfFile: path, encoding: .utf8)
                     myStrings = data.components(separatedBy: .newlines)
                } catch {
                    print("cannot get file graphics.txt. Error message:")
                    print(error)
                }
            }
            return myStrings
        }



// read dictionary file and return all contents as array of strings
    func readFile_Dict () -> [String]{
        var myStrings = [""]
        if let path = Bundle.main.path(forResource: "dictionary", ofType: "txt") {
            do {
                let data = try String(contentsOfFile: path, encoding: .utf8)
                myStrings = data.components(separatedBy: .newlines)
            } catch {
                print("cannot get file dictionary.txt. Error message:")
                print(error)
            }
        }
    return myStrings
    }

1 个答案:

答案 0 :(得分:1)

DispatchQueue.global(qos: .background).async {
            guard let path = Bundle.main.path(forResource: "graphics", ofType: "txt") else {
                print("Dang! File wasn't found!")
                return
            }
            let cal = Calendar.current
            let d1 = Date()

            guard let streamReader = StreamReader(path: path) else {
                print("Dang! StreamReader couldn't be created!")
                return
            }
            var counter = 0
            while !streamReader.atEof {
                guard let nextLine = streamReader.nextLine() else {
                    print("Oops! Reached the end before printing!")
                    break
                }
                let json = JSON(parseJSON: nextLine)

                counter += 1
                print("\(counter): \(nextLine)")
            }
            let d2 = Date() 
            let components = cal.dateComponents([.minute], from: d2, to: d1)
            print("Diff: \(components.minute!)")
        }
    }

Stream Reader class

import Foundation

class StreamReader  {

    let encoding : String.Encoding
    let chunkSize : Int
    var fileHandle : FileHandle!
    let delimData : Data
    var buffer : Data
    var atEof : Bool

    init?(path: String, delimiter: String = "\n", encoding: String.Encoding = .utf8,
          chunkSize: Int = 4096) {

        guard let fileHandle = FileHandle(forReadingAtPath: path),
            let delimData = delimiter.data(using: encoding) else {
                return nil
        }
        self.encoding = encoding
        self.chunkSize = chunkSize
        self.fileHandle = fileHandle
        self.delimData = delimData
        self.buffer = Data(capacity: chunkSize)
        self.atEof = false
    }

    deinit {
        self.close()
    }

    /// Return next line, or nil on EOF.
    func nextLine() -> String? {
        precondition(fileHandle != nil, "Attempt to read from closed file")

        // Read data chunks from file until a line delimiter is found:
        while !atEof {
            if let range = buffer.range(of: delimData) {
                // Convert complete line (excluding the delimiter) to a string:
                let line = String(data: buffer.subdata(in: 0..<range.lowerBound), encoding: encoding)
                // Remove line (and the delimiter) from the buffer:
                buffer.removeSubrange(0..<range.upperBound)
                return line
            }
            let tmpData = fileHandle.readData(ofLength: chunkSize)
            if tmpData.count > 0 {
                buffer.append(tmpData)
            } else {
                // EOF or read error.
                atEof = true
                if buffer.count > 0 {
                    // Buffer contains last line in file (not terminated by delimiter).
                    let line = String(data: buffer as Data, encoding: encoding)
                    buffer.count = 0
                    return line
                }
            }
        }
        return nil
    }

    /// Start reading from the beginning of file.
    func rewind() -> Void {
        fileHandle.seek(toFileOffset: 0)
        buffer.count = 0
        atEof = false
    }

    /// Close the underlying file. No reading must be done after calling this method.
    func close() -> Void {
        fileHandle?.closeFile()
        fileHandle = nil
    }
}

extension StreamReader : Sequence {
    func makeIterator() -> AnyIterator<String> {
        return AnyIterator {
            return self.nextLine()
        }
    }
}

StreamReader类逐行读取文本文件,因此无需一次读取整个文件。第一块读取文件的内容。试试上面的代码。这应该可以解决您的问题。请注意,我使用了后台线程,而该领域不适用于后台线程(AFAIK)。让我知道是否有帮助。