Question

我在这个网站上看到了关于字符串连接速度的问题。在那个主题中，人们用奇怪的数字写了一些短暂的基准。 How to efficiently concatenate strings in Go?

我决定检查这些速度并编写测试。我的测试显示了其他结果。大尺寸＆＃34; +＆＃34;运算符比其他方法更快。是吗？

这是我的代码。

package main

import (
    "bytes"
    "fmt"
    "runtime/debug"
    "time"
)

const variations = 30

var time1, time2 time.Time
var delta, catcher string
var x, deltaSize, k, dataSize, operations uint64
var i, j, x_min uint64
var l int
var delta_byte []byte
var method1Speed, method2Speed, method3Speed, method3ASpeed, method2ASpeed [variations]uint64
var dataTotal [variations]uint64
var tmp []byte

func main() {

    x_min = 2
    operations = 1

    for x = variations; x >= x_min; x = x - 2 {
        deltaSize = 1 << x // 2^x
        dataSize = operations * deltaSize
        dataTotal[x-1] = dataSize

        fmt.Println("Step #", x, "delta=", deltaSize, "op.=", operations, "data=", dataSize)
        fmt.Println("Preparing Data...")
        delta_byte = make([]byte, deltaSize)
        for i = 0; i < deltaSize; i++ {
            delta_byte[i] = 255
        }
        delta = string(delta_byte)

        delta_byte = nil
        catcher = ""
        debug.FreeOSMemory()

        fmt.Println("Testing Method #1...")
        time1 = time.Now()
        for j = 1; j <= operations; j++ {
            //----------------------------
            catcher += delta
            //----------------------------
        }
        time2 = time.Now()
        method1Speed[x-1] = uint64((1000000 * float64(dataSize)) / float64(time2.Sub(time1).Nanoseconds())) // KiB/sec.

        catcher = ""
        debug.FreeOSMemory()

        fmt.Println("Testing Method #2...")
        time1 = time.Now()
        for j = 1; j <= operations; j++ {
            //----------------------------
            stringsJoinViaCopy(&catcher, &catcher, &delta)
            //----------------------------
        }
        time2 = time.Now()
        method2Speed[x-1] = uint64((1000000 * float64(dataSize)) / float64(time2.Sub(time1).Nanoseconds())) // KiB/sec.

        catcher = ""
        debug.FreeOSMemory()

        fmt.Println("Testing Method #3...")
        time1 = time.Now()
        for j = 1; j <= operations; j++ {
            //----------------------------
            stringsJoinViaBuffer(&catcher, &catcher, &delta)
            //----------------------------
        }
        time2 = time.Now()
        method3Speed[x-1] = uint64((1000000 * float64(dataSize)) / float64(time2.Sub(time1).Nanoseconds())) // KiB/sec.

        catcher = ""
        debug.FreeOSMemory()

        fmt.Println("Testing Method #3A...")
        time1 = time.Now()
        buffer := bytes.NewBuffer(nil)
        for j = 1; j <= operations; j++ {
            //----------------------------
            buffer.WriteString(delta)
            //----------------------------
        }
        catcher = buffer.String()
        time2 = time.Now()
        method3ASpeed[x-1] = uint64((1000000 * float64(dataSize)) / float64(time2.Sub(time1).Nanoseconds())) // KiB/sec.

        catcher = ""
        debug.FreeOSMemory()

        fmt.Println("Testing Method #2A...")
        time1 = time.Now()
        tmp = make([]byte, int(operations)*len(delta)) // Cheating (guessing) with size
        l = 0
        for j = 1; j <= operations; j++ {
            //----------------------------
            l += copy(tmp[l:], delta)
            //----------------------------
        }
        catcher = string(tmp)
        time2 = time.Now()
        method2ASpeed[x-1] = uint64((1000000 * float64(dataSize)) / float64(time2.Sub(time1).Nanoseconds())) // KiB/sec.

        catcher = ""
        delta = ""
        debug.FreeOSMemory()

        ///
        operations *= 2
    }

    // Show Results
    fmt.Println("#. ops. Total Data, B. Speed (KiB/sec) M1 M2 M3 M3A M2A")
    for x = x_min; x <= variations; x = x + 2 {
        dataSize = 1 << x // 2^x
        operations = 1 << (variations - x)
        fmt.Println(x, operations, dataTotal[x-1], method1Speed[x-1], method2Speed[x-1], method3Speed[x-1],
            method3ASpeed[x-1], method2ASpeed[x-1])
    }
}

//------------------------------------------------------------------------------

func stringsJoinViaBuffer(dest, a, b *string) {

    // Joins two strings (a & b) using Buffer and puts them into dest.

    buffer := bytes.NewBuffer(nil)
    buffer.WriteString(*a)
    buffer.WriteString(*b)

    *dest = buffer.String()
}

//------------------------------------------------------------------------------

func stringsJoinViaCopy(dest, a, b *string) {
    x := make([]byte, len(*a)+len(*b))
    i := 0
    i += copy(x[i:], *a)
    i += copy(x[i:], *b)

    *dest = string(x)
}

以下是结果

#. ops. Total Data, B. Speed (KiB/sec) M1 M2 M3 M3A M2A
2 268435456 65536 236 109 57 108413 301653
4 67108864 131072 464 227 113 251519 576660
6 16777216 262144 895 410 202 225300 626165
8 4194304 524288 1514 672 351 205068 552088
10 1048576 1048576 3187 1412 756 207588 532239
12 262144 2097152 7980 3238 1727 209447 592230
14 65536 4194304 16361 6553 3641 230521 536320
16 16384 8388608 29568 12170 6835 241752 604050
18 4096 16777216 55158 23950 13549 238039 563997
20 1024 33554432 98348 43400 25958 216947 521189
22 256 67108864 168906 80442 48725 231806 534722
24 64 134217728 299127 129035 89686 254403 519534
26 16 268435456 529730 207405 153894 284578 506730
28 4 536870912 1167316 353510 268546 359990 523471
30 1 1073741824 909950698305 503703 581848 572763 579852

看起来它很有效，当你有大量的数据不断进行或者可以通过大小猜测作弊时......这是正确的吗？如果有ocassional字符串，简单＆＃34; +＆＃34;更好？不知何故，在提到的问题中，人们测量没有实际任务的字节传输。

在步骤＃26＆＃34; +＆＃34;操作员甚至比猜测大小的作弊更快！

Answer 1

这是Go基准入门套件。

concat_test.go：

package main

import (
    "bytes"
    "strconv"
    "strings"
    "testing"
)

func BenchmarkConcat(b *testing.B) {
    var s string
    for n := 1; n <= 1<<12; n <<= 3 {
        s1 := strings.Repeat("a", n)
        s2 := strings.Repeat("b", n)

        b.Run("PlusL"+strconv.Itoa(n), func(b *testing.B) {
            b.ReportAllocs()
            b.ResetTimer()
            for i := 0; i < b.N; i++ {
                s = s1 + s2
            }
            b.StopTimer()
        },
        )

        b.Run("CopyL"+strconv.Itoa(n), func(b *testing.B) {
            b.ReportAllocs()
            b.ResetTimer()
            for i := 0; i < b.N; i++ {
                buf := make([]byte, len(s1)+len(s2))
                copy(buf[copy(buf, s1):], s2)
                s = string(buf)
            }
            b.StopTimer()
        },
        )

        b.Run("BufferL"+strconv.Itoa(n), func(b *testing.B) {
            b.ReportAllocs()
            b.ResetTimer()
            for i := 0; i < b.N; i++ {
                var buf bytes.Buffer
                buf.WriteString(s1)
                buf.WriteString(s2)
                s = buf.String()
            }
            b.StopTimer()
        },
        )

    }
    _ = s
}

输出：

$ go test -bench=.
goos: linux
goarch: amd64
pkg: so/concat
BenchmarkConcat/PlusL1-4       30000000     55.9 ns/op     2 B/op      1 allocs/op
BenchmarkConcat/CopyL1-4       30000000     63.0 ns/op     4 B/op      2 allocs/op
BenchmarkConcat/BufferL1-4     10000000    115 ns/op     114 B/op      2 allocs/op
BenchmarkConcat/PlusL8-4       20000000     78.1 ns/op    16 B/op      1 allocs/op
BenchmarkConcat/CopyL8-4       20000000     99.2 ns/op    32 B/op      2 allocs/op
BenchmarkConcat/BufferL8-4     10000000    131 ns/op     128 B/op      2 allocs/op
BenchmarkConcat/PlusL64-4      20000000     85.3 ns/op   128 B/op      1 allocs/op
BenchmarkConcat/CopyL64-4      10000000    125 ns/op     256 B/op      2 allocs/op
BenchmarkConcat/BufferL64-4     5000000    328 ns/op     432 B/op      3 allocs/op
BenchmarkConcat/PlusL512-4      5000000    249 ns/op    1024 B/op      1 allocs/op
BenchmarkConcat/CopyL512-4      3000000    457 ns/op    2048 B/op      2 allocs/op
BenchmarkConcat/BufferL512-4    1000000   1012 ns/op    3184 B/op      4 allocs/op
BenchmarkConcat/PlusL4096-4     1000000   1527 ns/op    8192 B/op      1 allocs/op
BenchmarkConcat/CopyL4096-4      500000   3132 ns/op   16384 B/op      2 allocs/op
BenchmarkConcat/BufferL4096-4    300000   4863 ns/op   24688 B/op      4 allocs/op
PASS
ok      so/concat   24.308s
$

Answer 2

你做错的一件事是你如何对缓冲版本进行基准测试。您在每次迭代时都要分配一个新的缓冲区，而应该创建一个缓冲区并继续写入它直到您完成，然后您可以检索您的结果。为什么要使用缓冲区？

buf := bytes.NewBuffer([]byte(catcher))
for j = 1; j <= operations; j++ {
    //----------------------------
    buf.WriteString(delta)
    //----------------------------
}
catcher = buf.String()

您的stringsJoinViaCopy每次都会不必要地分配一个新的字节切片。只有当copy已经使用bytes.Buffer已经使用Buffer以及基础字节切片的一些增长启发式时，copy才会预先知道字符串的大小parallel -j5 -k sh /home/user/scripts/test.sh ::: samplehost1 samplehost2 samplehost3 samplehost4 samplehost5

Answer 3

在非常大的尺寸上，Plus运算符（“+”）比其他方法更快。

Go中字符串变量的连接速度

3 个答案: