Question

我正在寻找一种最有效的方法来判断字节切片是否为浮点数。

这是在大型数据集上完成的，因此性能至关重要。

尝试过的方法：

strconv.ParseFloat
regexp.Match

CheckNumber - 使用IsNumber +查看字节切片是否包含.的本地滚动函数。

func CheckNumber(p []byte) bool {
    r := string(p)
    sep := 0
    for _, b := range r {
        if unicode.IsNumber(b) {
            continue
        }
        if b == rune('.') {
            if sep > 0 {
                return false
            }
            sep++
            continue
        }
        return false
    }
    return true
}

基准代码：

func BenchmarkFloatStrconv(b *testing.B) {
    p := []byte("15.34234234234")

    for i := 0; i < b.N; i++ {
        _, err := strconv.ParseFloat(string(p), 64)
        if err != nil {
            log.Fatalf("NaN")
        }
    }
}

func BenchmarkFloatRegex(b *testing.B) {
    p := []byte("15.34234234234")
    r := `[-+]?[0-9]*\.?[0-9]`
    c, _ := regexp.Compile(r)

    for i := 0; i < b.N; i++ {
        ok := c.Match(p)
        if !ok {
            log.Fatalf("NaN")
        }
    }
}

func BenchmarkCheckNumber(b *testing.B) {
    p := []byte("15.34234234234")

    for i := 0; i < b.N; i++ {
        ok := CheckNumber(p)
        if !ok {
            log.Fatalf("NaN")
        }
    }
}

基准测试结果：

BenchmarkFloatStrconv-8     20000000            85.8 ns/op        16 B/op          1 allocs/op
BenchmarkFloatRegex-8        5000000           252 ns/op           0 B/op          0 allocs/op
BenchmarkCheckNumber-8      20000000            64.3 ns/op         0 B/op          0 allocs/op

我在做不同的解决方案公平吗？
有更好的解决方案吗？

修改：感谢Adrian和icza提供的指示，这可以避免转换为strings / rune

func CheckNumberNoStringConvert(r []byte) bool {
    sep := 0

    for i := range r {
        if r[i] >= 48 && r[i] <= 57 {
            continue
        }
        if r[i] == 46 {
            if sep > 0 {
                return false
            }
            sep++
            continue
        }
        return false
    }

    return true
}

表现得非常好; - ）

BenchmarkCheckNumberNoStringConvert-8       200000000            8.55 ns/op        0 B/op          0 allocs/op

Answer 1

对于一个简单的实数（浮点）数字（没有科学或工程浮点格式，没有组分隔符），

case_when

基准：

func IsReal(n []byte) bool {
    if len(n) > 0 && n[0] == '-' {
        n = n[1:]
    }
    if len(n) == 0 {
        return false
    }
    var point bool
    for _, c := range n {
        if '0' <= c && c <= '9' {
            continue
        }
        if c == '.' && len(n) > 1 && !point {
            point = true
            continue
        }
        return false
    }
    return true
}

$ go test -run=! -bench=. -benchmem -cpu=1 real_test.go goos: linux goarch: amd64 BenchmarkIsReal 100000000 20.8 ns/op 0 B/op 0 allocs/op BenchmarkFloatStrconv 20000000 101 ns/op 16 B/op 1 allocs/op BenchmarkFloatRegex 5000000 284 ns/op 0 B/op 0 allocs/op BenchmarkCheckNumber 20000000 73.0 ns/op 0 B/op 0 allocs/op PASS ok command-line-arguments 7.380s：

real_test.go

Answer 2

我把它作为一个挑战让我自己重写这个作为某种状态机合成来自所有人的集体输入:)

<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<body>
<input class="entry" placeholder="000F:000F:000F:000F" maxlength="19">

<div class="result"></div>
<div class="block">Value A:<span class="value-a"></span></div>
<div class="block">Value B:<span class="value-b"></span></div>
<div class="block">Value C:<span class="value-c"></span></div>
<div class="block">Value D:<span class="value-d"></span></div>

</body>

它似乎适用于几种不同的数字格式：

func Validate(b []byte) bool {
    for i := range b {
        switch {
        case b[i] >= '0' && b[i] <= '9':
            continue
        case b[i] == '.':
            if len(b) == 1 {
                return false
            }
            if len(b) > i {
                return fractional(b[i+1:])
            }
            return true
        case i == 0 && b[i] == '-':
            if len(b) == 1 {
                return false
            }
            continue
        default:
            return false
        }
    }

    return true
}

func fractional(b []byte) bool {
    for i := range b {
        switch {
        case b[i] >= '0' && b[i] <= '9':
            continue
        case b[i] == 'e' || b[i] == 'E':
            if len(b[:i]) == 0 {
                return false
            }
            if len(b) > i+1 {
                return scientific(b[i+1:])
            }
            return false
        default:
            return false
        }
    }

    return true
}

func scientific(b []byte) bool {
    for i := range b {
        switch {
        case b[i] >= '0' && b[i] <= '9':
            continue
        case i == 0 && b[i] == '-':
            if len(b) == 1 {
                return false
            }
            continue
        default:
            return false
        }
    }

    return true
}

在原始基准测试中表现良好：

type v struct {
    Input    []byte
    Expected bool
}

func TestPermutations(t *testing.T) {
    b := []v{
        v{[]byte("123.456"), true},
        v{[]byte("123"), true},
        v{[]byte("123."), true},
        v{[]byte(".123"), true},
        v{[]byte("12.1e12"), true},
        v{[]byte("12.1e-12"), true},
        v{[]byte("-123.456"), true},
        v{[]byte("-123"), true},
        v{[]byte("-123."), true},
        v{[]byte("-.123"), true},
        v{[]byte("-12.1e12"), true},
        v{[]byte("-12.1e-12"), true},
        v{[]byte(".1e-12"), true},
        v{[]byte(".e-12"), false},
        v{[]byte(".e"), false},
        v{[]byte("e"), false},
        v{[]byte("abcdef"), false},
        v{[]byte("-"), false},
        v{[]byte("."), false},
    }

    for _, test := range b {
        ok := Validate(test.Input)
        if ok != test.Expected {
            t.Errorf("could not handle case %s", test.Input)
        }
    }

}

基准代码：

BenchmarkValidate-8     100000000           13.0 ns/op         0 B/op          0 allocs/op

检查字节切片是否为数字的最有效方法

2 个答案: