如何将json压缩文件解组为结构

时间:2019-05-24 04:07:40

标签: go gzip

我正在编写一个trie DS,将json压缩到文件trieSample.json.gz中,然后将其读回到结构中。奇怪的是,解组成功,但未填充该结构。

我尝试了json.Unmarshal和json.Decoder都无济于事。在寻找我在这里缺少的地方时需要帮助。读取时没有引发任何错误,只是该结构没有任何键。 如果我尝试使用普通的json marshal->写入文件并从文件读取-> Unmarshal,则可以正常工作。

var charSet = "0123456789bcdefghjkmnopqrstuvwxyz"
const logTagSlice = "trie.log"

type trieSlice struct {
    Children []*tNode          `json:"c"`
    Charset  map[int32]int8    `json:"l"` // Charset is the legend of what charset is used to create the keys and how to position them in trie array
    logger   loggingapi.Logger `json:"-"`
    capacity int               `json:"-"` // capacity is the slice capacity to have enough to hold all the characters in the charset
}

type tNode struct {
    Children []*tNode `json:"c"`           // children represents the next valid runes AFTER the current one
    IsLeaf   bool     `json:"e,omitempty"` // isLeaf represents if this node represents the end of a valid word
    Value    int16    `json:"v,omitempty"` // value holds the corresponding value for key value pair, where key is the whole tree of nodes starting from parent representing a valid word
}

// NewTrieSlice returns a Trie, charset represents how the children need to be positioned in the array
func NewTrieSlice(charset string, logger loggingapi.Logger) *trieSlice {
    m := map[int32]int8{}
    for index, r := range charset {
        m[r] = int8(index)
    }
    return &trieSlice{
        Charset:  m,
        Children: make([]*tNode, len(charset)),
        logger:   logger,
        capacity: len(charset),
    }
}

func newNode(capacity int) *tNode {
    return &tNode{
        Children: make([]*tNode, capacity),
    }
}

// getPosition gets the array index position that the rune should be put in
func (t *trieSlice) getPosition(r int32) (index int8, found bool) {
    if index, ok := t.Charset[r]; ok {
        return index, true
    }

    return -1, false
}

// Add ...
func (t *trieSlice) Add(key string, val int16) {
    if len(key) == 0 {
        t.logger.Info(logTagSlice, "trying to add empty key, return with no action")
        return
    }

    runes := []rune(key)
    prefix := runes[0]
    var child *tNode
    var pos int

    index, ok := t.getPosition(prefix)
    if !ok {
        t.logger.Info(logTagSlice, "key is not present in the charset %s,  cannot add to trieSlice", prefix)
        return
    }

    // trie node with same prefix doesnt exist
    if child = t.Children[index]; child == nil {
        child = newNode(len(t.Charset))
        t.Children[index] = child
    }

    pos = 1
    for pos <= len(runes) {
        // base condition
        if pos == len(key) {
            child.IsLeaf = true
            child.Value = val
            return
        }

        prefix := runes[pos]
        index, ok := t.getPosition(prefix)
        if !ok {
            t.logger.Info(logTagSlice, "key is not present in the charset %s,  cannot add to trieSlice", prefix)
            return
        }

        // repeat with child node if prefix is already present
        if newChild := child.Children[index]; newChild == nil {
            child.Children[index] = newNode(len(t.Charset))
            child = child.Children[index]
        } else {
            child = newChild
        }
        pos++
    }
}

// Test using gzip writer, reader
func TestSample(t *testing.T) {
    // Create trie and add a few keys
    trie := NewTrieSlice(charSet, loggingapi.NewStdOut())
    trie.Add("test", 10)
    trie.Add("test1", 20)
    trie.Add("test2", 30)
    trie.Add("test3", 40)
    trie.Add("test4", 50)

    // Write gzipped json to file
    var network bytes.Buffer
    b, err := json.Marshal(trie)
    if err != nil {
        fmt.Println("error in marshal ... ", err.Error())
        t.Fail()
    }
    w := gzip.NewWriter(&network)
    w.Write(b)
    ioutil.WriteFile("../resources/trieSample.json.gz", []byte(network.String()), 0644)
    w.Close()

    // Read gzipped json from file into struct
    trieUnmarshal := NewTrieSlice(charSet, loggingapi.NewStdOut())
    trieDecoder := NewTrieSlice(charSet, loggingapi.NewStdOut())

    // attempt via json Unmarshal
    file, err := os.Open("../resources/trieSample.json.gz")
    if err != nil {
        fmt.Println(err.Error())
        t.Fail()
    }
    r, err := gzip.NewReader(file)
    if err != nil {
        fmt.Println(err.Error())
        t.Fail()
    }
    sc := bufio.NewScanner(r)
    json.Unmarshal(sc.Bytes(), trieUnmarshal)

    // attempt via json Decoder
    b, err = ioutil.ReadFile("../resources/trieSample.json.gz")
    if err != nil {
        fmt.Println(err.Error())
        t.Fail()
    }
    bReader := bytes.NewReader(b)
    json.NewDecoder(bReader).Decode(trieDecoder)

    // spew.Dump shows that object is not populated
    spew.Dump(trieUnmarshal)
    spew.Dump(trieDecoder)
}

spew.Dump显示trieSlice Children数组具有所有nil个元素

1 个答案:

答案 0 :(得分:1)

使用数据之前,请关闭压缩机。使用前先解压缩数据。不要因为不适当地使用bufio.Scanner而将其砍掉。

var network bytes.Buffer
b, err := json.Marshal(trie)
if err != nil {
    fmt.Println("error in marshal ... ", err.Error())
    t.Fail()
}
w := gzip.NewWriter(&network)
w.Write(b)
w.Close()
err = ioutil.WriteFile("trieSample.json.gz", network.Bytes(), 0644)
if err != nil {
    log.Fatal(err)
}

trieDecoder := NewTrieSlice(charSet)

// attempt via json Unmarshal
file, err := os.Open("trieSample.json.gz")
if err != nil {
    log.Fatal(err)
}
r, err := gzip.NewReader(file)
if err != nil {
    log.Fatal(err)
}
err = json.NewDecoder(r).Decode(trieDecoder)
if err != nil {
    log.Fatal(err)
}
spew.Dump(trieDecoder)

https://play.golang.org/p/pYup3v8-f4c