我正在尝试运行这个火花程序,它将为我提供目前在Twitter上流行的主题标签,并且只会显示前10个主题标签。
我提供了twiiter访问令牌,Secret&客户密钥,通过文本文件保密。
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.twitter.TwitterUtils
object PopularHashtags {
def setupLogging() = {
import org.apache.log4j.{ Level, Logger }
val rootLogger = Logger.getRootLogger()
rootLogger.setLevel(Level.ERROR)
}
def setupTwitter() = {
import scala.io.Source
for (line <- Source.fromFile("C:\\twitter.txt").getLines) {
val fields = line.split(" ")
if (fields.length == 2) {
System.setProperty("twitter4j.oauth." + fields(0), fields(1))
}
}
}
def main(args: Array[String]) {
setupTwitter()
val ssc = new StreamingContext("local[2]", "PopularHashtags", Seconds(1))
setupLogging()
val tweets = TwitterUtils.createStream(ssc, None)
val statuses = tweets.map(status => status.getText())
val tweetwords = statuses.flatMap(tweetText => tweetText.split(" "))
val hashtags = tweetwords.filter(word => word.startsWith("#"))
val hashtagKeyValues = hashtags.map(hashtag => (hashtag,1))
val hashtagCounts = hashtagKeyValues.reduceByKeyAndWindow((x,y) => x + y, (x,y) => x - y, Seconds(300), Seconds(1))
val sortedResults = hashtagCounts.transform(rdd => rdd.sortBy(x => x._2, false))
sortedResults.print()
ssc.checkpoint("C:/checkpoint/")
ssc.start()
ssc.awaitTermination()
}
}
我得到的错误是下面的错误。在错误结束后,时间每秒都会过去,并且会一直持续但没有显示任何数据。任何人都可以帮助我理解这个错误实际意味着什么..
输出:
2018-04-10 01:04:00 INFO SparkContext:54 - Running Spark version 2.3.0
2018-04-10 01:04:00 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2018-04-10 01:04:00 INFO SparkContext:54 - Submitted application: PopularHashtags
2018-04-10 01:04:00 INFO SecurityManager:54 - Changing view acls to: Anantanuj
2018-04-10 01:04:00 INFO SecurityManager:54 - Changing modify acls to: Anantanuj
2018-04-10 01:04:00 INFO SecurityManager:54 - Changing view acls groups to:
2018-04-10 01:04:00 INFO SecurityManager:54 - Changing modify acls groups to:
2018-04-10 01:04:00 INFO SecurityManager:54 - SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(Anantanuj); groups with view permissions: Set(); users with modify permissions: Set(Anantanuj); groups with modify permissions: Set()
2018-04-10 01:04:01 INFO Utils:54 - Successfully started service 'sparkDriver' on port 56843.
2018-04-10 01:04:01 INFO SparkEnv:54 - Registering MapOutputTracker
2018-04-10 01:04:01 INFO SparkEnv:54 - Registering BlockManagerMaster
2018-04-10 01:04:01 INFO BlockManagerMasterEndpoint:54 - Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information
2018-04-10 01:04:01 INFO BlockManagerMasterEndpoint:54 - BlockManagerMasterEndpoint up
2018-04-10 01:04:01 INFO DiskBlockManager:54 - Created local directory at C:\Users\Anantanuj\AppData\Local\Temp\blockmgr-cd6b548a-5acb-44e9-adf5-7e13f458d900
2018-04-10 01:04:01 INFO MemoryStore:54 - MemoryStore started with capacity 904.8 MB
2018-04-10 01:04:01 INFO SparkEnv:54 - Registering OutputCommitCoordinator
2018-04-10 01:04:02 INFO log:192 - Logging initialized @7536ms
2018-04-10 01:04:02 INFO Server:346 - jetty-9.3.z-SNAPSHOT
2018-04-10 01:04:02 INFO Server:414 - Started @7634ms
2018-04-10 01:04:02 INFO AbstractConnector:278 - Started ServerConnector@1f3f02ee{HTTP/1.1,[http/1.1]}{0.0.0.0:4040}
2018-04-10 01:04:02 INFO Utils:54 - Successfully started service 'SparkUI' on port 4040.
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@71c27ee8{/jobs,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@1a6c1270{/jobs/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@18a136ac{/jobs/job,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@6dee4f1b{/jobs/job/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@176b3f44{/stages,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@6ee6f53{/stages/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@421bba99{/stages/stage,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@66ac5762{/stages/stage/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@797cf65c{/stages/pool,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@31bcf236{/stages/pool/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@4b3ed2f0{/storage,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@4fad9bb2{/storage/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@517d4a0d{/storage/rdd,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@7862f56{/storage/rdd/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@3a12c404{/environment,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@1941a8ff{/environment/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@22d7b4f8{/executors,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@149dd36b{/executors/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@38831718{/executors/threadDump,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@2c1156a7{/executors/threadDump/json,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@33fe57a9{/static,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@142269f2{/,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@331acdad{/api,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@6aa3a905{/jobs/job/kill,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@a22cb6a{/stages/stage/kill,null,AVAILABLE,@Spark}
2018-04-10 01:04:02 INFO SparkUI:54 - Bound SparkUI to 0.0.0.0, and started at http://DESKTOP-AEDI0R6:4040
2018-04-10 01:04:02 INFO Executor:54 - Starting executor ID driver on host localhost
2018-04-10 01:04:02 INFO Utils:54 - Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 56856.
2018-04-10 01:04:02 INFO NettyBlockTransferService:54 - Server created on DESKTOP-AEDI0R6:56856
2018-04-10 01:04:02 INFO BlockManager:54 - Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy
2018-04-10 01:04:02 INFO BlockManagerMaster:54 - Registering BlockManager BlockManagerId(driver, DESKTOP-AEDI0R6, 56856, None)
2018-04-10 01:04:02 INFO BlockManagerMasterEndpoint:54 - Registering block manager DESKTOP-AEDI0R6:56856 with 904.8 MB RAM, BlockManagerId(driver, DESKTOP-AEDI0R6, 56856, None)
2018-04-10 01:04:02 INFO BlockManagerMaster:54 - Registered BlockManager BlockManagerId(driver, DESKTOP-AEDI0R6, 56856, None)
2018-04-10 01:04:02 INFO BlockManager:54 - Initialized BlockManager: BlockManagerId(driver, DESKTOP-AEDI0R6, 56856, None)
2018-04-10 01:04:02 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@2dbf4cbd{/metrics/json,null,AVAILABLE,@Spark}
ERROR:
Exception in thread "receiver-supervisor-future-0" java.lang.AbstractMethodError
at org.apache.spark.internal.Logging$class.initializeLogIfNecessary(Logging.scala:99)
at org.apache.spark.streaming.twitter.TwitterReceiver.initializeLogIfNecessary(TwitterInputDStream.scala:60)
at org.apache.spark.internal.Logging$class.log(Logging.scala:46)
at org.apache.spark.streaming.twitter.TwitterReceiver.log(TwitterInputDStream.scala:60)
at org.apache.spark.internal.Logging$class.logInfo(Logging.scala:54)
at org.apache.spark.streaming.twitter.TwitterReceiver.logInfo(TwitterInputDStream.scala:60)
at org.apache.spark.streaming.twitter.TwitterReceiver.onStop(TwitterInputDStream.scala:106)
at org.apache.spark.streaming.receiver.ReceiverSupervisor.stopReceiver(ReceiverSupervisor.scala:170)
at org.apache.spark.streaming.receiver.ReceiverSupervisor$$anonfun$restartReceiver$1.apply$mcV$sp(ReceiverSupervisor.scala:194)
at org.apache.spark.streaming.receiver.ReceiverSupervisor$$anonfun$restartReceiver$1.apply(ReceiverSupervisor.scala:189)
at org.apache.spark.streaming.receiver.ReceiverSupervisor$$anonfun$restartReceiver$1.apply(ReceiverSupervisor.scala:189)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24)
at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
-------------------------------------------
Time: 1523302444000 ms
-------------------------------------------
-------------------------------------------
Time: 1523302445000 ms
-------------------------------------------
-------------------------------------------
Time: 1523302446000 ms
-------------------------------------------