我试图在火花流中运行这个应用程序代码来自我正在阅读的书,但不幸的是我没有得到预期的结果。有一个java类,我在其中打开一个套接字并等待输入。我运行套接字代码并将其与spark作业正确连接。然后我提交以下工作,我收到一条消息,表明我已成功连接。当我在套接字中键入内容时,我希望在终端中输入wordcount结果而不是我收到此消息:
问题在哪里?请提前感谢,请参阅下面的代码INFO BlockManagerInfo:在内存中添加了输入-0-1480077969600 192.168.1.4:38818(大小:7.0 B,免费:265.1 MB)
import org.apache.spark.SparkConf
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming._
import org.apache.spark.storage.StorageLevel._
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.dstream.ForEachDStream
object ScalaFirstStreamingExample {
def main(args: Array[String]){
println("Creating Spark Configuration") //Create an Object of Spark Configuration
val conf = new SparkConf() //Set the logical and user defined Name of this Application
conf.setAppName("My First Spark Streaming Application")
println("Retreiving Streaming Context from Spark Conf") //Retrieving Streaming Context from SparkConf Object.
//Second parameter is the time interval at which
//streaming data will be divided into batches
val streamCtx = new StreamingContext(conf, Seconds(2)) //Define the type of Stream. Here we are using TCP
//Socket as textstream,
//It will keep watching for the incoming data from a
//specific machine (localhost) and port (9087)
//Once the data is retrieved it will be saved in the
//memory and in case memory
//is not sufficient, then it will store it on the Disk
//It will further read the Data and convert it into DStream
val lines = streamCtx.socketTextStream("localhost", 9087, MEMORY_AND_DISK_SER_2) //Apply the Split() function to all elements of DStream
//which will further generate multiple new records from
//each record in Source Stream
//And then use flatmap to consolidate all records and
//create a new DStream.
val words = lines.flatMap(x => x.split(" ")) //Now, we will count these words by applying a using map()
//map() helps in applying a given function to each
//element in an RDD.
val pairs = words.map(word => (word, 1)) //Further we will aggregate the value of each key by
//using/applying the given function.
val wordCounts = pairs.reduceByKey(_ + _) //Lastly we will print all Values
//wordCounts.print(20)
myPrint(wordCounts,streamCtx)
//Most important statement which will initiate the
//Streaming Context
streamCtx.start();
//Wait till the execution is completed.
streamCtx.awaitTermination();
}
def myPrint(stream:DStream[(String,Int)],streamCtx: StreamingContext){
stream.foreachRDD(foreachFunc)
def foreachFunc = (rdd: RDD[(String,Int)]) => {
val array = rdd.collect()
println("---------Start Printing Results----------")
for(res<-array){
println(res)
}
println("---------Finished Printing Results----------")
}
}
}