我试图读取这种格式的大文件:
a string key, 200 values separated by comma
并将其写入地图。
我写了这段代码:
package main
import (
"bufio"
"unsafe"
"fmt"
"log"
"os"
"runtime"
"strings"
)
func main() {
file, err := os.Open("file_address.txt")
if err != nil {
log.Fatal(err)
}
defer file.Close()
mp := make(map[string]float32)
var total_size int64 = 0
scanner := bufio.NewScanner(file)
var counter int64 = 0
for scanner.Scan() {
counter++
sliced := strings.Split(scanner.Text(), ",")
mp[sliced[0]] = 2.2
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
fmt.Printf("loaded: %d. Took %d Mb of memory.", counter, total_size/1024.0/1024.0)
fmt.Println("Loading finished. Now waiting...")
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
fmt.Printf("\n")
fmt.Printf("Alloc: %d MB, TotalAlloc: %d MB, Sys: %d MB\n",
ms.Alloc/1024/1024, ms.TotalAlloc/1024/1024, ms.Sys/1024/1024)
fmt.Printf("Mallocs: %d, Frees: %d\n",
ms.Mallocs, ms.Frees)
fmt.Printf("HeapAlloc: %d MB, HeapSys: %d MB, HeapIdle: %d MB\n",
ms.HeapAlloc/1024/1024, ms.HeapSys/1024/1024, ms.HeapIdle/1024/1024)
fmt.Printf("HeapObjects: %d\n", ms.HeapObjects)
fmt.Printf("\n")
}
以下是输出:
loaded: 544594. Took 8 Mb of memory.Loading finished. Now waiting...
Alloc: 2667 MB, TotalAlloc: 3973 MB, Sys: 2831 MB
Mallocs: 1108463, Frees: 401665
HeapAlloc: 2667 MB, HeapSys: 2687 MB, HeapIdle: 11 MB
HeapObjects: 706798
Done!
尽管密钥仅占用8Mb,但该程序占用了大约2.7Gb的内存!看来sliced
从未从堆中删除。我尝试在sliced=nil
的末尾设置for
,但这无济于事。我已经读过,如果我将整个文件加载到内存中然后进行拆分,则可以避免此问题,但是我必须逐行读取文件,因为我没有足够的内存来加载一些较大的文件文件。
为什么内存被占用?处理完每一行后如何释放它?
答案 0 :(得分:5)
为了有效地使用CPU和内存,
key := string(bytes.SplitN(scanner.Bytes(), []byte(","), 2)[0])
mp[key] = 2.2
答案 1 :(得分:1)
我想我找到了问题!我将大文件的每一行切成薄片。返回的[]string
是一个切片,其中包含原始字符串(文件行)的子字符串。现在的问题是,每个子字符串都不是新字符串。 Is只是一个slice
,它保留对未切片的字符串(文件行!)的引用。我为每一行保留sliced[0]
,因此,我保留对文件每一行的引用。垃圾收集器不会触摸读取行,因为我仍然对其进行引用。从技术上讲,我读取并保存了文件的所有行。
解决方案是将我想要的部分(sliced[0]
)复制到新字符串中,从而有效地丢失对整行的引用。我这样做是这样的:
sliced := strings.Split(scanner.Text(), ",")
key_rune_arr := []rune(sliced[0])
key := string(key_rune_arr) // now key is a copy of sliced[0] without reference to line
mp[key] = 2.2 //instead of mp[sliced[0]] = 2.2
程序现在变为:
package main
import (
"bufio"
"unsafe"
"fmt"
"log"
"os"
"runtime"
"strings"
)
func main() {
file, err := os.Open("file_address.txt")
if err != nil {
log.Fatal(err)
}
defer file.Close()
mp := make(map[string]float32)
var total_size int64 = 0
scanner := bufio.NewScanner(file)
var counter int64 = 0
for scanner.Scan() {
counter++
sliced := strings.Split(scanner.Text(), ",")
key_rune_arr := []rune(sliced[0])
key := string(key_rune_arr) // now key is a copy of sliced[0] without reference to line
mp[key] = 2.2 //instead of mp[sliced[0]] = 2.2
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
fmt.Printf("loaded: %d. Took %d Mb of memory.", counter, total_size/1024.0/1024.0)
fmt.Println("Loading finished. Now waiting...")
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
fmt.Printf("\n")
fmt.Printf("Alloc: %d MB, TotalAlloc: %d MB, Sys: %d MB\n",
ms.Alloc/1024/1024, ms.TotalAlloc/1024/1024, ms.Sys/1024/1024)
fmt.Printf("Mallocs: %d, Frees: %d\n",
ms.Mallocs, ms.Frees)
fmt.Printf("HeapAlloc: %d MB, HeapSys: %d MB, HeapIdle: %d MB\n",
ms.HeapAlloc/1024/1024, ms.HeapSys/1024/1024, ms.HeapIdle/1024/1024)
fmt.Printf("HeapObjects: %d\n", ms.HeapObjects)
fmt.Printf("\n")
}
结果如我所愿:
loaded: 544594. Took 8 Mb id memory.Loading finished. Now waiting...
Alloc: 94 MB, TotalAlloc: 3986 MB, Sys: 135 MB
Mallocs: 1653590, Frees: 1108129
HeapAlloc: 94 MB, HeapSys: 127 MB, HeapIdle: 32 MB
HeapObjects: 545461
Done!