当它包含=时,如何找到以=结尾的整个字符串

时间:2014-02-26 11:43:36

标签: scala parsing

我对scala完全陌生,想尝试一下。我想解析由另一个工具生成的日志文件并提取有价值的信息。 它是一个包含度量列表的大行,名称后跟=和值列表,每个值都是一个名称后面跟着=和一个值在“。”之间的值。对于每个kpi重复计数器,同样适用于每个kpi

我的问题:当名称包含=时,我无法正确解析一行,因为我将其用作结束标记

input : 20:53:19.503 Dbg 29999 [er-thread-1] KPI reporter: [sql.pstmt.executeQuery.select top 200 Interaction.Id, Interaction.MediaTypeId, Interaction.TypeId, Interaction.SubtypeId, Interaction.TenantId, Interaction.StartDate, Interaction.ParentId, Interaction.AllAttributes, Interaction.QueueName from Interaction where ((Interaction.TypeId = {0}) and (Interaction.MediaTypeId = {1}) and (Interaction.Status = {2}) and ((Interaction.CreatorAppId = {3}) or (Interaction.CreatorAppId = {4}))) order by Interaction.StartDate asc .tmr=[min="0.0"|m5_rate="0.099"|max="249.0"|count="1241"|p999="248.565"|p99="73.65"|mean_rate="0.099"|type="TIMER"|m15_rate="0.099"|duration_unit="MILLISECONDS"|m1_rate="0.096"|p50="0.0"|p75="15.0"|p95="16.0"|mean="6.533"|rate_unit="SECOND"|stddev="17.611"|p98="16.0"]]

[1.336]失败:[' expected but'找到了

20:53:19.503 Dbg 29999 [er-thread-1] KPI reporter: [sql.pstmt.executeQuery.select top 200 Interaction.Id, Interaction.MediaTypeId, Interaction.TypeId, Interaction.SubtypeId, Interaction.TenantId, Interaction.StartDate, Interaction.ParentId, Interaction.AllAttributes, Interaction.QueueName from Interaction where ((Interaction.TypeId = {0}) and (Interaction.MediaTypeId = {1}) and (Interaction.Status = {2}) and ((Interaction.CreatorAppId = {3}) or (Interaction.CreatorAppId = {4}))) order by Interaction.StartDate asc .tmr=[min="0.0"|m5_rate="0.099"|max="249.0"|count="1241"|p999="248.565"|p99="73.65"|mean_rate="0.099"|type="TIMER"|m15_rate="0.099"|duration_unit="MILLISECONDS"|m1_rate="0.096"|p50="0.0"|p75="15.0"|p95="16.0"|mean="6.533"|rate_unit="SECOND"|stddev="17.611"|p98="16.0"]]
                                                                                                                                                                                                                                                                                                                                               ^

这是我当前的解析器:

import scala.util.parsing.combinator.RegexParsers
import scala.util.matching.Regex
import scala.util.parsing.combinator._

class kpiParser extends JavaTokenParsers {

  override val whiteSpace = "".r

  def name: Parser[String] = """[\w\. -\\,\\(\\){}]+?=""".r

  def counter: Parser[Any] = name~value

  def timer: Parser[Any] = """tmr""".r
  def gauge: Parser[Any] = """gge""".r
  def count: Parser[Any] = """cnt""".r
  def meter: Parser[Any] = """mtr""".r

  def value: Parser[Any] = """""""~(decimalNumber|intNumber|stringValue)~"""""""
//  def value: Parser[Any] = """\w+""".r

  def stringValue: Parser[Any] = """\w+""".r
  def intNumber: Parser[String] = """\d+""".r

  def logLine: Parser[Any] = timeStamp~kpiReporter
  def timeStamp: Parser[Any] = """([0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{3})""".r
  def kpiReporter: Parser[Any] = """.* KPI reporter: \[""".r~rep(kpi|"|"~kpi)~"]"
  def kpi: Parser[Any] = name~"["~rep(counter|"|"~counter)~"]"

}

object ParseExpr extends kpiParser {

  def main(args: Array[String]) {

    val simple2 = """20:53:19.503 Dbg 29999 [er-thread-1] KPI reporter: [evt.logic-check-listeners.tmr=[min="0.023"|m5_rate="0.018"|max="0.492"|count="211"|p999="0.492"|p99="0.328"|mean_rate="0.017"|type="TIMER"|m15_rate="0.017"|duration_unit="MILLISECONDS"|m1_rate="0.025"|p50="0.055"|p75="0.061"|p95="0.134"|mean="0.064"|rate_unit="SECOND"|stddev="0.045"|p98="0.199"]|svc.OMResponse.AddStdRespUsage.success.cnt=[count="20000"|type="COUNTER"]]"""      
    val problem = """20:53:19.503 Dbg 29999 [er-thread-1] KPI reporter: [sql.pstmt.executeQuery.select top 200 Interaction.Id, Interaction.MediaTypeId, Interaction.TypeId, Interaction.SubtypeId, Interaction.TenantId, Interaction.StartDate, Interaction.ParentId, Interaction.AllAttributes, Interaction.QueueName from Interaction where ((Interaction.TypeId = {0}) and (Interaction.MediaTypeId = {1}) and (Interaction.Status = {2}) and ((Interaction.CreatorAppId = {3}) or (Interaction.CreatorAppId = {4}))) order by Interaction.StartDate asc .tmr=[min="0.0"|m5_rate="0.099"|max="249.0"|count="1241"|p999="248.565"|p99="73.65"|mean_rate="0.099"|type="TIMER"|m15_rate="0.099"|duration_unit="MILLISECONDS"|m1_rate="0.096"|p50="0.0"|p75="15.0"|p95="16.0"|mean="6.533"|rate_unit="SECOND"|stddev="17.611"|p98="16.0"]|evt.logic-check-listeners.tmr=[min="0.023"|m5_rate="0.018"|max="0.492"|count="211"|p999="0.492"|p99="0.328"|mean_rate="0.017"|type="TIMER"|m15_rate="0.017"|duration_unit="MILLISECONDS"|m1_rate="0.025"|p50="0.055"|p75="0.061"|p95="0.134"|mean="0.064"|rate_unit="SECOND"|stddev="0.045"|p98="0.199"]|svc.OMResponse.AddStdRespUsage.success.cnt=[count="20000"|type="COUNTER"]]"""      
    // println("input : " + args(0))
    println("input : " + simple2)
    println(parseAll(logLine, simple2))
    println("input : " + complex)
    println(parseAll(logLine, complex))
    println("input : " + problem)
    println(parseAll(logLine, problem))
  }
}

1 个答案:

答案 0 :(得分:0)

我结束了两个名字的定义,一个用于我的kpi,一个用于我的计数器,如下:

def kpiName: Parser[String] = """[\w\. -\\,\\(\\){}=]*?=\[""".r
def counterName: Parser[Any] = """[\w\. -]*?=""".r

然后我能够使用=中解析名称