我试图在计算行时捕获GNU的wc -l速度。我会满意的是慢1.5-2倍。我能做的最好的是慢4倍。使用syscall.Mmap的代码:
main.go
package main
import (
"flag"
"fmt"
"log"
"os"
"runtime/pprof"
"syscall"
)
const (
DATA_READ int64 = 4096 * 4096
)
var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
func main() {
flag.Usage = func() {
fmt.Printf("Usage: %s [options] <file>\n\n", os.Args[0])
flag.PrintDefaults()
}
flag.Parse()
if len(flag.Args()) != 1 {
flag.Usage()
os.Exit(1)
}
file_path := flag.Args()[0]
if *cpuprofile != "" {
profile, err := os.Create(*cpuprofile)
if err != nil {
log.Fatal(err)
}
pprof.StartCPUProfile(profile)
defer pprof.StopCPUProfile()
}
// Open file for reading
file, err := os.OpenFile(file_path, os.O_RDONLY, 0)
if err != nil {
log.Fatal(err)
}
fi, err := file.Stat()
if err != nil {
log.Fatal(err)
}
file_size := fi.Size()
fd := int(file.Fd())
defer file.Close()
line_counter := 0
read_from := int64(0)
for {
read_size := DATA_READ
if (file_size - read_from) < read_size {
read_size = file_size - read_from
}
if read_size == 0 {
break
}
data, err := syscall.Mmap(
fd, read_from, int(read_size),
syscall.PROT_READ, syscall.MAP_PRIVATE)
if err != nil {
log.Fatal(err)
}
for _, char := range data {
if char == '\n' {
line_counter += 1
}
}
err = syscall.Munmap(data)
if err != nil {
log.Fatal(err)
}
read_from += int64(len(data))
}
fmt.Println(line_counter)
}
直接的方法,将大块读入缓冲区,直接.go
package main
import (
"fmt"
"io"
"log"
"os"
)
const BUF_SIZE = 4096 * 4096
func main() {
if len(os.Args) != 2 {
fmt.Printf("Usage: %s <file>\n\n", os.Args[0])
os.Exit(-1)
}
file, err := os.Open(os.Args[1])
if err != nil {
log.Fatal(err)
}
defer file.Close()
buf := make([]byte, BUF_SIZE)
line_counter := 0
for {
bytes_read, err := file.Read(buf)
if err != nil {
if err == io.EOF {
break
} else {
log.Fatal(err)
}
}
for i := 0; i < bytes_read; i++ {
if buf[i] == '\n' {
line_counter += 1
}
}
}
fmt.Println(line_counter)
}
使用python脚本生成测试文件:
import os
data = ' '*80 + '\n'
data *= 1000000
with open(os.environ['HOME'] + '/data', 'w') as dest:
for i in range(70):
dest.write(data)
dest.flush()
时序:
$ time ./main ~/data
70000000
real 0m3.965s
user 0m3.768s
sys 0m0.196s
$ time ./straight ~/data
70000000
real 0m5.180s
user 0m4.432s
sys 0m0.748s
$ time wc -l ~/data
70000000 /home/<user>/data
real 0m0.947s
user 0m0.440s
sys 0m0.504s
我试图介绍我的main.go程序:
./main -cpuprofile main.prof ~/data
go tool pprof ./main main.prof
结果对我没有帮助。它也很有趣main.go在用户和系统部门都比direct.go更快。看起来像index,value:= range不进行范围检查(用户),mmap确实比普通Read(sys和user)更快。尽管如此,我所做的最快的程序比“wc -l”慢4倍,后者只有getc(fp)。
我想知道还有什么方法可以接近旧的好wc的速度吗?
upd:关于getc的错误,来自coreutils的真正的wc使用了safe_read,这本质上是普通读取的包装。