Apache pig,使用正则表达式解析组合日志

时间:2017-01-31 12:24:29

标签: regex parsing logging apache-pig quote

我使用猪拉丁语脚本,我尝试使用正则表达式解析日志但是,它在匹配双引号时返回错误" 。 如 : 错误1200:意外的角色' " ' 日志格式:



118.102.255.50 - - [17/Oct/2014:00:00:29 -0400] "GET /favicon.ico HTTP/1.1" 200 20 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36"




而我写的脚本:



test = LOAD '/pigdata/log' as (line:chararray);
log = FOREACH test GENERATE FLATTEN(REGEX_EXTRACT_ALL(line,'^(\\S+)\\s+(\\S+)\\s+(\\S+)\\s+.(\\S+\\s+\\S+).\\s+\"(\\S+)\\s+(.+?)\\s+(HTTP[^\"]+)\"\\s+(\\S+)\\s+(\\S+)\\s+\"([^\"]*)\"\\s+\"(.*)\"$')) AS (address_ip: chararray, logname: chararray, user: chararray, timestamp: chararray, method: chararray, uri: chararray, proto: chararray, status: int, bytes: int, referer: chararray, userAgent: chararray);

dump log; 




1 个答案:

答案 0 :(得分:0)

因为Pig使用Java Regex,所以你需要像\\那样转义log = FOREACH test GENERATE FLATTEN(REGEX_EXTRACT_ALL(line,'^(\\S+)\\s+(\\S+)\\s+(\\S+)\\s+.(\\S+\\s+\\S+).\\s+\\"(\\S+)\\s+(.+?)\\s+(HTTP[^"]+)\\"\\s+(\\S+)\\s+(\\S+)\\s+\\"([^"]*)\\"\\s+\\"(.*)\\"$')) AS (address_ip: chararray, logname: chararray, user: chararray, timestamp: chararray, method: chararray, uri: chararray, proto: chararray, status: int, bytes: int, referer: chararray, userAgent: chararray);

Private Sub Worksheet_Change(ByVal Target As Range)

If Not Intersect(Target, Me.Range("O6")) Is Nothing Then
    If Len(Cells(6, 15)) >= 1 Then
        Application.OnTime Now + TimeValue("00:00:10"), "kko"
    End If
End If

If Not Intersect(Target, Me.Range("O9")) Is Nothing Then
    If Len(Cells(9, 15)) >= 1 Then
        Application.OnTime Now + TimeValue("00:00:10"), "kko2"
    End If
End If
If Not Intersect(Target, Me.Range("f4")) Is Nothing Then
    If (Range("f4") = Range("g4")) Then
        Range("G9").Select
        Selection.Copy
        Range("F6").Select
        Selection.PasteSpecial Paste:=xlPasteValues
        Range("G11").Select
        Selection.Copy
        Range("G6").Select
        Selection.PasteSpecial Paste:=xlPasteValues
    End If
End If

End Sub
相关问题