如何从spark应用程序运行yarn命令

时间:2017-09-14 21:56:41

标签: java scala apache-spark spark-streaming yarn

您好我正在尝试运行像

这样的纱线命令

纱线日志

纱线申请表 -

纱线申请-kill

我正在尝试使用yarnclient但面临问题。

 package com.ankush
 import java.io.Serializable

 import org.apache.hadoop.yarn.api.records.ApplicationId
 import org.apache.hadoop.yarn.client.api.YarnClient
 import org.apache.hadoop.yarn.conf._
 import org.apache.spark._
 import org.apache.spark.streaming._

 object SparkFiller extends Serializable{
  private var appId: ApplicationId = null
   def main(args: Array[String]): Unit ={

     try {

  var counter =0

  val sparkConf = new SparkConf().setAppName("SparkFiller")//.setMaster("local[*]")
  val conf: YarnConfiguration = new YarnConfiguration()

  // println("test")
  sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
  sparkConf.set("spark.streaming.stopGracefullyOnShutdown", "true")
  val ssc = new StreamingContext(sparkConf, Seconds(1))
  val lines = ssc.socketTextStream("localhost", 9999)
  lines.foreachRDD(
    rdd => {
      if(rdd.isEmpty())
      {
        counter+=1
        if(counter>4)
        {
          println("Testing")
          val conf: YarnConfiguration = new YarnConfiguration()
          val client = YarnClient.createYarnClient()
          client.init(conf)
          client.start()
          val newApp = client.createApplication()
          client.getApplications()

          val test = client.getApplications()
          println(test)

          counter=0
          //        val result = "sh /home/ankush/test.sh".!
          //              ssc.stop()
        }
      }

      rdd.foreachPartition(part => {
        // val dbsender = new PushToDocumentDB();
        part.foreach(msg =>{
          println(msg)
        })

      })
    })



  ssc.start()

  ssc.awaitTermination()


}
catch {

  case runtime: RuntimeException=> {
    1
  }
}


   }

 }

然后我面临像

这样的错误
  Exception in thread "streaming-job-executor-13" java.lang.NoClassDefFoundError: com/sun/jersey/api/client/config/ClientConfig
    at org.apache.hadoop.yarn.client.api.TimelineClient.createTimelineClient(TimelineClient.java:59)
    at org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.createTimelineClient(YarnClientImpl.java:187)
    at org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.serviceInit(YarnClientImpl.java:162)
    at org.apache.hadoop.service.AbstractService.init(AbstractService.java:163)

使用此应用程序尝试如果没有数据流过套接字一段时间我想在纱线上列出应用程序并杀死另一个将数据发送到套接字的应用程序。

谢谢,

Ankush Reddy。

1 个答案:

答案 0 :(得分:0)

在类路径中缺少泽西jar,将jersey-bundle jar添加到spark-submit。

spark-submit --jars jersey-bundle-x.y.z.jar \
...........

spark-submit --conf "spark.executor.extraClassPath=\\<path>jersey-bundle-x.y.z.jar" \
.......