将spark决策树模型调试字符串转换为scala中的嵌套JSON

时间:2017-02-02 08:46:07

标签: json scala apache-spark-mllib decision-tree

与引用here的树json解析类似,我试图在scala中实现决策树的简单可视化。它与数据库笔记本中的显示方法完全相同。

我是scala的新手并努力使逻辑正确。我知道我们必须进行递归调用来构建子项,并在显示最终预测值时中断。我使用下面提到的输入模型调试字符串

尝试了一个代码
  def getStatmentType(x: String): (String, String) = {
    val ifPattern = "If+".r
    val ifelsePattern = "Else+".r
    var t = ifPattern.findFirstIn(x.toString)
    if(t != None){
      ("If", (x.toString).replace("If",""))
    }else {
      var ts = ifelsePattern.findFirstIn(x.toString)
      if(ts != None) ("Else", (x.toString).replace("Else", ""))
      else ("None", (x.toString).replace("(", "").replace(")",""))
    }
  }
  def delete[A](test:List[A])(i: Int) = test.take(i) ++ test.drop((i+1))
  def BuildJson(tree:List[String]):List[Map[String, Any]] = {
    var block:List[Map[String, Any]] = List()
    var lines:List[String] = tree
    loop.breakable {
      while (lines.length > 0) {
        println("here")
        var (cond, name) = getStatmentType(lines(0))
        println("initial" + cond)
        if (cond == "If") {
          println("if" + cond)
         // lines = lines.tail
          lines = delete(lines)(0)
          block = block :+ Map("if-name" -> name, "children" -> BuildJson(lines))
          println("After pop Else State"+lines(0))
          val (p_cond, p_name) = getStatmentType(lines(0))
         // println(p_cond + " = "+ p_name+ "\n")
          cond = p_cond
          name = p_name
          println(cond + " after="+ name+ "\n")
          if (cond == "Else") {
            println("else" + cond)
            lines = lines.tail
            block = block :+ Map("else-name" -> name, "children" -> BuildJson(lines))
          }
        }else if( cond == "None") {
          println(cond + "NONE")
          lines = delete(lines)(0)
          block = block :+ Map("predict" -> name)
        }else {
            println("Finaly Break")
            println("While loop--" +lines)
            loop.break()

        }
      }
    }
    block
  }

  def treeJson1(str: String):JsValue = {
   val str = "If (feature 0 in {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,10.0,11.0,12.0,13.0})\n   If (feature 0 in {6.0})\n      Predict: 17.0\n    Else (feature 0 not in {6.0})\n      Predict: 6.0\n  Else (feature 0 not in {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,10.0,11.0,12.0,13.0})\n   Predict: 20.0"
    val x = str.replace(" ","")
    val xs = x.split("\n").toList
    var js = BuildJson(xs)
    println(MapReader.mapToJson(js))
    Json.toJson("")
  }

预期产出:

[
  {
    'name': 'Root',
    'children': [
      {
    'name': 'feature 0 in {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,10.0,11.0,12.0,13.0}',
    'children': [
      {
        'name': 'feature 0 in {6.0}',
        'children': [
          {
            'name': 'Predict: 17.0'
          }
        ]
      },
      {
        'name': 'feature 0 not in {6.0}',
        'children': [
          {
            'name': 'Predict: 6.0'
          }
        ]
      }
    ]
  },
  {
    'name': 'feature 0 not in {1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,10.0,11.0,12.0,13.0}',
    'children': [
      {
        'name': 'Predict: 20.0'
      }
    ]
  }
]

1 个答案:

答案 0 :(得分:0)

您不需要解析调试字符串,而是可以从模型的根节点解析。 参考enter link description here