查找[] byte切片中所有出现的字符串

时间:2018-10-06 09:15:00

标签: string go byte

我想找到字节数组中包含的所有字符串的索引。

func findAllOccurrences(data []byte, searches []string) map[string][]int {
    var results map[string][]int

    for _, search := range searches {
        firstMatch = bytes.Index(data, []byte(search))
        results[search] = append(results[search], firstMatch)

        // How do I find subsequent the rest of the matches?
    }

    return results
}

找到第一个Index()很简单,但是如何以惯用的方式找到它们中的所有 ,而又不会消耗不必要的内存?

2 个答案:

答案 0 :(得分:2)

好的,所以这是我的评论中的解决方案,方法是先阅读LastIndex,而不是先不确定它是否有效,但这确实行得通,您只需按相反的顺序获取索引,就可以始终在阅读时修复。

package main

import (
    "fmt"
    "bytes"
)

func main() {
    str1:= "foobarfoobarfoobarfoobarfoofoobar"
    arr := make([]string, 2)
    arr[0]="foo"
    arr[1]="bar"
    res:=findAllOccurrences([]byte(str1), arr)
    fmt.Println(res)
}


func findAllOccurrences(data []byte, searches []string) map[string][]int {
    results:= make(map[string][]int,0)

    for _, search := range searches {
    index := len(data)
    tmp:=data
    for true{
        match := bytes.LastIndex(tmp[0:index], []byte(search))
        if match==-1{
            break
        }else{
            index=match
            results[search]=append(results[search], match)
            }
        }
    }

    return results
}

希望这会有所帮助! :)

答案 1 :(得分:0)

ishaan's answer所示,您可以为每次搜索将data分配给另一个切片变量,然后在每次匹配后重新切片该变量。该分配仅复制长度,容量和指针。重新切片仅会更改slice变量的长度和指针:它不会影响基础数组,并且不是新的分配。我添加了此答案以阐明内存效率,并演示您仍然可以使用bytes.Index,并且可以将其用作传统的for循环的起点和增量器:

package main

import (
    "bytes"
    "fmt"
)

func findAllOccurrences(data []byte, searches []string) map[string][]int {
    results := make(map[string][]int)
    for _, search := range searches {
        searchData := data
        term := []byte(search)
        for x, d := bytes.Index(searchData, term), 0; x > -1; x, d = bytes.Index(searchData, term), d+x+1 {
            results[search] = append(results[search], x+d)
            searchData = searchData[x+1 : len(searchData)]
        }
    }
    return results
}

func main() {
    fmt.Println(findAllOccurrences([]byte(`foo foo hey foo`), []string{`foo`, `hey`, ` `}))
}

打印

map[foo:[0 4 12] hey:[8]  :[3 7 11]]