所以我的应用程序当前正在将1万行读入变量,然后使用SwiftyJson将其解析为领域。
来源: https://github.com/skishore/makemeahanzi/blob/master/graphics.txt https://github.com/skishore/makemeahanzi/blob/master/dictionary.txt
问题:此过程花费的时间太长: 2:28分钟。还需要 400mb的内存!
问题:如何使其更快? 你们中有Flatbuffers或Protobuf的经验吗?
非常欢迎您提供帮助!
干杯,Dom
这是代码:
func parseToRealm() {
// each of these files have 9500+ lines of data
// (basically dictionaries with word definitions)
let graphicsFileContents = readFile_Graphics()
let dictFileContents = readFile_Dict()
// check if counts of two source files match
if (graphicsFileContents.count == dictFileContents.count && graphicsFileContents.count > 1 && dictFileContents.count > 1) {
var i = 0
// make empty array of characters
var characterArr = [Characters()]
// loop through two files to get all chars
for jsonString in graphicsFileContents {
// parse data from string into json
let dataFromString = jsonString.data(using: .utf8)
let singleCharJson = try? JSON(data: dataFromString!)
// parse stuff from file1
// ... deleted lines for legal reasons
// DICT information
let dictDataFromString = dictFileContents[i].data(using: .utf8)
let singleDictJson = try? JSON(data: dictDataFromString!)
// parse stuff from that dictionary
// ... deleted lines for legal reasons
characterArr.append(Character)
// Every x characters, write them into DB
if (i % 150 == 0 || i == graphicsFileContents.count){
realmActions.writeCharsToRealm(characterArr: characterArr)
print("Writing \(i)-\(i + 150)")
// reset array to safe memory
characterArr = [Characters()]
}
i+=1
} // end loop file contents
}else{
print ("two files have different counts of lines. aborting...")
}
}
// read graphics file and return all contents as array of strings
// return Array of Strings
func readFile_Graphics () -> [String] {
// predeclare emtpy string array
var myStrings = [String]()
if let path = Bundle.main.path(forResource: "graphics", ofType: "txt") {
do {
let data = try String(contentsOfFile: path, encoding: .utf8)
myStrings = data.components(separatedBy: .newlines)
} catch {
print("cannot get file graphics.txt. Error message:")
print(error)
}
}
return myStrings
}
// read dictionary file and return all contents as array of strings
func readFile_Dict () -> [String]{
var myStrings = [""]
if let path = Bundle.main.path(forResource: "dictionary", ofType: "txt") {
do {
let data = try String(contentsOfFile: path, encoding: .utf8)
myStrings = data.components(separatedBy: .newlines)
} catch {
print("cannot get file dictionary.txt. Error message:")
print(error)
}
}
return myStrings
}
答案 0 :(得分:1)
DispatchQueue.global(qos: .background).async {
guard let path = Bundle.main.path(forResource: "graphics", ofType: "txt") else {
print("Dang! File wasn't found!")
return
}
let cal = Calendar.current
let d1 = Date()
guard let streamReader = StreamReader(path: path) else {
print("Dang! StreamReader couldn't be created!")
return
}
var counter = 0
while !streamReader.atEof {
guard let nextLine = streamReader.nextLine() else {
print("Oops! Reached the end before printing!")
break
}
let json = JSON(parseJSON: nextLine)
counter += 1
print("\(counter): \(nextLine)")
}
let d2 = Date()
let components = cal.dateComponents([.minute], from: d2, to: d1)
print("Diff: \(components.minute!)")
}
}
import Foundation
class StreamReader {
let encoding : String.Encoding
let chunkSize : Int
var fileHandle : FileHandle!
let delimData : Data
var buffer : Data
var atEof : Bool
init?(path: String, delimiter: String = "\n", encoding: String.Encoding = .utf8,
chunkSize: Int = 4096) {
guard let fileHandle = FileHandle(forReadingAtPath: path),
let delimData = delimiter.data(using: encoding) else {
return nil
}
self.encoding = encoding
self.chunkSize = chunkSize
self.fileHandle = fileHandle
self.delimData = delimData
self.buffer = Data(capacity: chunkSize)
self.atEof = false
}
deinit {
self.close()
}
/// Return next line, or nil on EOF.
func nextLine() -> String? {
precondition(fileHandle != nil, "Attempt to read from closed file")
// Read data chunks from file until a line delimiter is found:
while !atEof {
if let range = buffer.range(of: delimData) {
// Convert complete line (excluding the delimiter) to a string:
let line = String(data: buffer.subdata(in: 0..<range.lowerBound), encoding: encoding)
// Remove line (and the delimiter) from the buffer:
buffer.removeSubrange(0..<range.upperBound)
return line
}
let tmpData = fileHandle.readData(ofLength: chunkSize)
if tmpData.count > 0 {
buffer.append(tmpData)
} else {
// EOF or read error.
atEof = true
if buffer.count > 0 {
// Buffer contains last line in file (not terminated by delimiter).
let line = String(data: buffer as Data, encoding: encoding)
buffer.count = 0
return line
}
}
}
return nil
}
/// Start reading from the beginning of file.
func rewind() -> Void {
fileHandle.seek(toFileOffset: 0)
buffer.count = 0
atEof = false
}
/// Close the underlying file. No reading must be done after calling this method.
func close() -> Void {
fileHandle?.closeFile()
fileHandle = nil
}
}
extension StreamReader : Sequence {
func makeIterator() -> AnyIterator<String> {
return AnyIterator {
return self.nextLine()
}
}
}
StreamReader类逐行读取文本文件,因此无需一次读取整个文件。第一块读取文件的内容。试试上面的代码。这应该可以解决您的问题。请注意,我使用了后台线程,而该领域不适用于后台线程(AFAIK)。让我知道是否有帮助。