在Golang中过滤字节流的正确方法?

时间:2017-02-13 14:43:16

标签: go stream byte

我想从命令中过滤STDOUT,这样我只保留\ r \ n终止行的任何连续块的第一行和最后一行(基本上忽略进度指示符)。

这是我的尝试(orig代码更多,这是一个简化版本,但基本上过滤必须在输入进入时发生,而不是在结束时):

package main

import (
    "bytes"
    "fmt"
    "os/exec"
)

var cr = []byte("\r")
var lf = []byte("\n")

func main() {
    input1 := []byte("a\nb\n\nprogress 98%\r")
    input2 := []byte("progress 99%\r")
    input3 := []byte("progress 100%\r")
    input4 := []byte("\n\nc\n")

    var stream []byte
    stream = append(stream, input1...)
    stream = append(stream, input2...)
    stream = append(stream, input3...)
    stream = append(stream, input4...)

    fmt.Printf("stream:\n%s\n", stream)

    streamer := &myFilter{}
    streamer.Write(input1)
    streamer.Write(input2)
    streamer.Write(input3)
    streamer.Write(input4)
    final := streamer.Bytes()

    fmt.Printf("streamer:\n%s\n\n", final)

    cmd := exec.Command("bash", "-c", "perl -e '$|++; print qq[a\nb\n\nprogress: 98%\r]; for (99..100) { print qq[progess: $_%\r]; sleep(1); } print qq[\n\nc\n]'")
    cmd.Stdout = &myFilter{}
    cmd.Start()
    cmd.Wait()
    fromCmd := cmd.Stdout.(*myFilter).Bytes()

    fmt.Printf("fromCmd:\n%s\n", fromCmd)
}

type myFilter struct {
    partialLine []byte
    storage     []byte
}

func (w *myFilter) Write(p []byte) (n int, err error) {
    // in order to filter out all but the first and last line of a set of \r
    // terminated lines (a progress bar), we need to collect whole \n terminated
    // lines
    lines := bytes.SplitAfter(p, lf)

    if len(w.partialLine) > 0 || (len(lines) == 1 && !bytes.HasSuffix(p, lf)) {
        w.partialLine = append(w.partialLine, lines[0]...)

        partialComplete := false
        if len(lines) > 1 {
            lines = lines[1:]
            partialComplete = true

        } else {
            lines = nil
            if bytes.HasSuffix(p, lf) {
                partialComplete = true
            }
        }

        if partialComplete {
            w.filterCR(w.partialLine)
            w.partialLine = nil
        }
    }

    lastLineIndex := len(lines) - 1
    if lastLineIndex > -1 && !bytes.HasSuffix(p, lf) {
        w.partialLine, lines = lines[lastLineIndex], lines[:lastLineIndex]
    }

    for _, line := range lines {
        w.filterCR(line)
    }

    return len(p), nil
}

func (w *myFilter) filterCR(p []byte) {
    if bytes.Contains(p, cr) {
        lines := bytes.Split(p, cr)
        w.store(lines[0])
        w.store(lf)

        if len(lines) > 2 {
            w.store(lines[len(lines)-2])
            w.store(lf)
        }
    } else {
        w.store(p)
    }
}

func (w *myFilter) store(p []byte) {
    w.storage = append(w.storage, p...)
}

func (w *myFilter) Bytes() []byte {
    if len(w.partialLine) > 0 {
        w.filterCR(w.partialLine)
    }
    return w.storage
}

我的输出是:

stream:
a
b

progress 100%

c

streamer:
a
b

progress 98%
progress 100%

c


fromCmd:
a
b

ss: 100%
progess: 100%

c

我想要的是你从" fromCmd"匹配我从"流光"。

得到的输出

我做错了什么,为什么我的实际输出似乎"损坏",为什么真正的命令运行与我的"流媒体"测试,以及过滤STDOUT的更好方法是什么?

1 个答案:

答案 0 :(得分:2)

您的部分线算法对所有输入都不正确。

您可以将myFilter替换为bufio.Scanner,它将为您正确处理部分线路缓冲,并[]bytebytes.Buffer来累积输出。

var out bytes.Buffer
scanner := bufio.NewScanner(stdout)
for scanner.Scan() {
    p := scanner.Bytes()
    lines := bytes.Split(p, cr)
    out.Write(lines[0])
    out.Write(lf)
    if len(lines) > 1 {
        out.Write(lines[len(lines)-1])
        out.Write(lf)
    }
}