我正在寻找一种最有效的方法来判断字节切片是否为浮点数。
这是在大型数据集上完成的,因此性能至关重要。
尝试过的方法:
strconv.ParseFloat
regexp.Match
CheckNumber
- 使用IsNumber
+查看字节切片是否包含.
的本地滚动函数。
func CheckNumber(p []byte) bool {
r := string(p)
sep := 0
for _, b := range r {
if unicode.IsNumber(b) {
continue
}
if b == rune('.') {
if sep > 0 {
return false
}
sep++
continue
}
return false
}
return true
}
基准代码:
func BenchmarkFloatStrconv(b *testing.B) {
p := []byte("15.34234234234")
for i := 0; i < b.N; i++ {
_, err := strconv.ParseFloat(string(p), 64)
if err != nil {
log.Fatalf("NaN")
}
}
}
func BenchmarkFloatRegex(b *testing.B) {
p := []byte("15.34234234234")
r := `[-+]?[0-9]*\.?[0-9]`
c, _ := regexp.Compile(r)
for i := 0; i < b.N; i++ {
ok := c.Match(p)
if !ok {
log.Fatalf("NaN")
}
}
}
func BenchmarkCheckNumber(b *testing.B) {
p := []byte("15.34234234234")
for i := 0; i < b.N; i++ {
ok := CheckNumber(p)
if !ok {
log.Fatalf("NaN")
}
}
}
基准测试结果:
BenchmarkFloatStrconv-8 20000000 85.8 ns/op 16 B/op 1 allocs/op
BenchmarkFloatRegex-8 5000000 252 ns/op 0 B/op 0 allocs/op
BenchmarkCheckNumber-8 20000000 64.3 ns/op 0 B/op 0 allocs/op
修改:感谢Adrian和icza提供的指示,这可以避免转换为strings
/ rune
func CheckNumberNoStringConvert(r []byte) bool {
sep := 0
for i := range r {
if r[i] >= 48 && r[i] <= 57 {
continue
}
if r[i] == 46 {
if sep > 0 {
return false
}
sep++
continue
}
return false
}
return true
}
表现得非常好; - )
BenchmarkCheckNumberNoStringConvert-8 200000000 8.55 ns/op 0 B/op 0 allocs/op
答案 0 :(得分:3)
对于一个简单的实数(浮点)数字(没有科学或工程浮点格式,没有组分隔符),
case_when
基准:
func IsReal(n []byte) bool {
if len(n) > 0 && n[0] == '-' {
n = n[1:]
}
if len(n) == 0 {
return false
}
var point bool
for _, c := range n {
if '0' <= c && c <= '9' {
continue
}
if c == '.' && len(n) > 1 && !point {
point = true
continue
}
return false
}
return true
}
$ go test -run=! -bench=. -benchmem -cpu=1 real_test.go
goos: linux
goarch: amd64
BenchmarkIsReal 100000000 20.8 ns/op 0 B/op 0 allocs/op
BenchmarkFloatStrconv 20000000 101 ns/op 16 B/op 1 allocs/op
BenchmarkFloatRegex 5000000 284 ns/op 0 B/op 0 allocs/op
BenchmarkCheckNumber 20000000 73.0 ns/op 0 B/op 0 allocs/op
PASS
ok command-line-arguments 7.380s
:
real_test.go
答案 1 :(得分:1)
我把它作为一个挑战让我自己重写这个作为某种状态机合成来自所有人的集体输入:)
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<body>
<input class="entry" placeholder="000F:000F:000F:000F" maxlength="19">
<div class="result"></div>
<div class="block">Value A:<span class="value-a"></span></div>
<div class="block">Value B:<span class="value-b"></span></div>
<div class="block">Value C:<span class="value-c"></span></div>
<div class="block">Value D:<span class="value-d"></span></div>
</body>
它似乎适用于几种不同的数字格式:
func Validate(b []byte) bool {
for i := range b {
switch {
case b[i] >= '0' && b[i] <= '9':
continue
case b[i] == '.':
if len(b) == 1 {
return false
}
if len(b) > i {
return fractional(b[i+1:])
}
return true
case i == 0 && b[i] == '-':
if len(b) == 1 {
return false
}
continue
default:
return false
}
}
return true
}
func fractional(b []byte) bool {
for i := range b {
switch {
case b[i] >= '0' && b[i] <= '9':
continue
case b[i] == 'e' || b[i] == 'E':
if len(b[:i]) == 0 {
return false
}
if len(b) > i+1 {
return scientific(b[i+1:])
}
return false
default:
return false
}
}
return true
}
func scientific(b []byte) bool {
for i := range b {
switch {
case b[i] >= '0' && b[i] <= '9':
continue
case i == 0 && b[i] == '-':
if len(b) == 1 {
return false
}
continue
default:
return false
}
}
return true
}
在原始基准测试中表现良好:
type v struct {
Input []byte
Expected bool
}
func TestPermutations(t *testing.T) {
b := []v{
v{[]byte("123.456"), true},
v{[]byte("123"), true},
v{[]byte("123."), true},
v{[]byte(".123"), true},
v{[]byte("12.1e12"), true},
v{[]byte("12.1e-12"), true},
v{[]byte("-123.456"), true},
v{[]byte("-123"), true},
v{[]byte("-123."), true},
v{[]byte("-.123"), true},
v{[]byte("-12.1e12"), true},
v{[]byte("-12.1e-12"), true},
v{[]byte(".1e-12"), true},
v{[]byte(".e-12"), false},
v{[]byte(".e"), false},
v{[]byte("e"), false},
v{[]byte("abcdef"), false},
v{[]byte("-"), false},
v{[]byte("."), false},
}
for _, test := range b {
ok := Validate(test.Input)
if ok != test.Expected {
t.Errorf("could not handle case %s", test.Input)
}
}
}
基准代码:
BenchmarkValidate-8 100000000 13.0 ns/op 0 B/op 0 allocs/op