我试图找出一个(multiline.pattern)或两个(multiline.pattern&exclude_line)正则表达式,以便将日志信息从filebeat传送到logstash。 写入日志的系统具有标准的日志格式,如下所示
[2019-08-28 10:38:57 +0200][0000000000][Info][User][OLS][201][Some Logging Information]
为此,我建立了正则表达式(也许还需要一些改进:-))
^\[(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})\s\+(\d{4})\]\[\d{10}\]\[[^\]]*\]\[[^\]]*\]\[[^\]]*\]\[[\d]*\]\[[^\]]*\]$
不幸的是,当系统在调试模式下运行时,日志结构会发生变化
[2019-05-24 09:58:39 +0200][0000000000][Debug][External][RESTLM][HTDOC_REQUEST][Some Debug Loginformation]
[2019-05-24 09:58:39 +0200][0000000000][Debug][External][RESTLM[HTDOC_REQUEST][Some Debug Loginformation]
[2019-05-24 09:58:34 +0200][0000000026][Debug][External][RESTLM][REST_RESPONSE][[45][HTTP/1.0 201 Created
Server: Test/2019.3
Pragma: no-cache
Cache-control: no-cache
Content-Type: text/xml
Content-Length: 255
<?xml version="1.0" encoding="utf-8"?>
<Status><Repository><Path>D:/repository/tabfiles</Path><Version>4_0</Version><Fingerprint>p12uqocQM0gtaRieBldCix/CSSs=</Fingerprint></Repository><System>Running</System></Status>]]
[2019-05-24 09:58:34 +0200][0000000000][Debug][External][RESTLM][REST_REQUEST][[45][POST / HTTP/1.1
Content-Type: text/xml; charset=utf-8
Cache-Control: no-cache
Pragma: no-cache
User-Agent: Java/11.0.2
Host: serverxyz:24821
Accept: text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2
Connection: keep-alive
Content-Length: 10
<Status />]]
我要排除在第三字段中包含“调试”的那些日志条目(多行)。 从我的角度来看,普通日志和调试日志之间的主要区别在于第6个字段不是[\ d *]。 在某些情况下,我认为这是我的问题,Loginformation(最后一个日志字段)中有一个日志-看起来像[[[45] [some text] [other text]]
我要寻找的是一个正则表达式,它匹配一个独立于调试或正常状态的完整日志条目。 或两个表达 普通日志的第一场比赛 第二个匹配的调试日志(并排除它们)
答案 0 :(得分:0)
由于您要做的就是与日志条目匹配,并且不捕获任何信息,因此请使用以下方法:
^\[\d{4}-\d{2}-\d{2}[\s\S]+?\]\]?$ /gm
这个想法是延迟捕获数据(通过使用?
),直到在行尾遇到一个或两个]
。
答案 1 :(得分:0)
您的某些组是可选的,不确定是哪个组,但此表达式可以以以下内容开头:
^\[(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})\s\+(\d{4})\]\[\d{10}\](\[[^\]]*\])?\[[^\]]*\]\[[^\]]*\]\[[^\]]*\]\[[^\]]*\](\[[\s\S]*?\])?$
或者也许
^\[(\d{4})-(\d{2})-(\d{2})\s(\d{2}):(\d{2}):(\d{2})\s\+(\d{4})\]\[(\d{10})\](\[([^\]]*)\])?\[([^\]]*)\]\[([^\]]*)\]\[([^\]]*)\]\[([^\]]*)\](\[([\s\S]*?)\])?$
如果您想捕获括号中的那些数据。
如果您希望简化/修改/探索表达式,请在regex101.com的右上角进行说明。如果愿意,您还可以在this link中查看它如何与某些示例输入匹配。
答案 2 :(得分:0)
如果要解析数据,则可以选择bufio.Scanner之类的界面
package main
import (
"bufio"
"io"
"log"
"strings"
)
func main() {
input := `
[2019-05-24 09:58:39 +0200][0000000000][Debug][External][RESTLM][HTDOC_REQUEST][Some Debug Loginformation]
[2019-05-24 09:58:40 +0200][0000000000][Debug][External][RESTLM][HTDOC_REQUEST][Some Debug Loginformation]
[2019-05-24 09:58:41 +0200][0000000026][Debug][External][RESTLM][REST_RESPONSE][[45][HTTP/1.0 201 Created
Server: Test/2019.3
Pragma: no-cache
Cache-control: no-cache
Content-Type: text/xml
Content-Length: 255
<?xml version="1.0" encoding="utf-8"?>
<Status><Repository><Path>D:/repository/tabfiles</Path><Version>4_0</Version><Fingerprint>p12uqocQM0gtaRieBldCix/CSSs=</Fingerprint></Repository><System>Running</System></Status>]]
[2019-05-24 09:58:42 +0200][0000000000][Debug][External][RESTLM][REST_REQUEST][[45][POST / HTTP/1.1
Content-Type: text/xml; charset=utf-8
Cache-Control: no-cache
Pragma: no-cache
User-Agent: Java/11.0.2
Host: serverxyz:24821
Accept: text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2
Connection: keep-alive
Content-Length: 10\]
<Status />]]
`
// input = `[2019-05-24 09:58:39 +0200][0000000000][Debug][External][RESTLM][HTDOC_REQUEST][Some Debug Loginformation]`
src := strings.NewReader(input)
parser := newScanner(src)
for parser.Scan() {
line := parser.Items()
if len(line) > 2 && line[2] == "DEBUG" {
continue
}
log.Printf("line %#v\n", line)
}
log.Println("done")
}
type scanner struct {
*bufio.Scanner
buf []byte
openedBrackets int
lineDone bool
atEOF bool
lines [][]string
currentCols []string
currentCol []byte
}
func newScanner(r io.Reader) *scanner {
b := bufio.NewScanner(r)
s := &scanner{
Scanner: b,
buf: make([]byte, 500),
lines: [][]string{},
currentCols: []string{},
currentCol: []byte{},
}
b.Split(s.parse)
return s
}
func (s *scanner) Scan() bool {
for !s.lineDone {
if s.Scanner.Scan() {
s.lineDone = false
return true
} else if s.atEOF {
return false
}
}
return false
}
func (s *scanner) Items() []string {
if len(s.lines) == 0 {
return nil
}
return s.lines[len(s.lines)-1]
}
var (
buf = make([]byte, 500)
eol = []byte("\n")[0]
bracketClose = []byte("]")[0]
bracketOpen = []byte("[")[0]
backslash = []byte("\\")[0]
)
func (s *scanner) parse(data []byte, atEOF bool) (advance int, token []byte, err error) {
var d byte
var i int
var since int
for i, d = range data {
if d == bracketClose {
if i > 0 && data[i-1] == backslash {
continue
}
s.currentCol = append(s.currentCol, data[since+1:i]...)
since = i
s.openedBrackets--
if s.openedBrackets == 0 {
s.currentCols = append(s.currentCols, string(s.currentCol))
s.currentCol = s.currentCol[:0]
} else {
s.currentCol = append(s.currentCol, d)
}
} else if s.openedBrackets == 0 && d == eol {
line := make([]string, len(s.currentCols))
copy(line, s.currentCols)
s.lines = append(s.lines, line)
s.currentCols = s.currentCols[:0]
s.openedBrackets = 0
s.lineDone = true
return i + 1, data[i+1:], nil
} else if d == bracketOpen {
if i > 0 && data[i-1] == backslash {
continue
}
since = i
if s.openedBrackets > 0 {
s.currentCol = append(s.currentCol, d)
}
s.openedBrackets++
}
}
if atEOF {
s.atEOF = true
s.lineDone = true
if len(s.currentCols) > 0 {
line := make([]string, len(s.currentCols))
copy(line, s.currentCols)
s.lines = append(s.lines, line)
s.currentCols = s.currentCols[:0]
s.openedBrackets = 0
s.lineDone = true
return len(data) + 1, nil, nil
}
return len(data) + 1, nil, io.EOF
}
return len(data) + 1, nil, nil
}