Spark Streaming Twitter createStream问题

时间:2017-06-15 03:07:18

标签: scala spark-streaming

我试图使用Spark Streaming从Twitter流式传输数据。但是  下面的问题。

   @IBAction func PdfBtn(_ sender: Any) {

    let importMenu = UIDocumentMenuViewController(documentTypes: ["public.composite-content"], in: .import)
    importMenu.delegate = self
    present(importMenu, animated: true, completion: nil)

}


@available(iOS 8.0, *)
public func documentPicker(_ controller: UIDocumentPickerViewController, didPickDocumentAt url: URL) {


    let link = url as URL
    print("The Url is : \(link)")






}

@available(iOS 8.0, *)
public func documentMenu(_ documentMenu: UIDocumentMenuViewController, didPickDocumentPicker documentPicker: UIDocumentPickerViewController) {

    documentPicker.delegate = self
    present(documentPicker, animated: true, completion: nil)


}





func documentPickerWasCancelled(_ controller: UIDocumentPickerViewController) {

    print("we cancelled")

    dismiss(animated: true, completion: nil)


}

错误屏幕:

import org.apache.spark.streaming.twitter._
import twitter4j.auth._
import twitter4j.conf._
import org.apache.spark.streaming.{Seconds,StreamingContext}
import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.spark.streaming.StreamingContext._
val ssc = new StreamingContext(sc, Seconds(10))
val cb = new ConfigurationBuildercb.setDebugEnabled(true).setOAuthConsumerKey("").setOAuthConsumerSecret("").setOAuthAccessToken    ("").setOAuthAccessTokenSecret("")
val auth = new OAuthAuthorization(cb.build)
val tweets = TwitterUtils.createStream(ssc,auth)

2 个答案:

答案 0 :(得分:1)

问题中的方法有这个签名:

def createStream(
  ssc: StreamingContext,
  twitterAuth: Option[Authorization],
  filters: Seq[String] = Nil,
  storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2
)

我们可以看到ssc: StreamingContexttwitterAuth: Option[Authorization]是强制性的。另外两个是可选的。

在您的情况下,twitterAuth类型不正确。它是Option[Authorization]。在这种情况下,呼叫应如下所示:

val tweets = TwitterUtils.createStream(ssc, Some(auth))

答案 1 :(得分:0)

import org.apache.spark._
import org.apache.spark.SparkContext._
import org.apache.spark.streaming._
import org.apache.spark.streaming.twitter._
import org.apache.spark.streaming.StreamingContext._


object TwitterStream {

def setupLogging() = {
import org.apache.log4j.{Level, Logger}   
val rootLogger = Logger.getRootLogger()
rootLogger.setLevel(Level.ERROR)   
}

/** Configures Twitter service credentials using twiter.txt in the main 
workspace directory */
def setupTwitter() = {
import scala.io.Source

for (line <- Source.fromFile("/Users/sampy/twitter.txt").getLines) {
  val fields = line.split(" ")
  if (fields.length == 2) {
    System.setProperty("twitter4j.oauth." + fields(0), fields(1))
  }
}
}

/** Our main function where the action happens */
def main(args: Array[String]) {

setupTwitter()


val ssc = new StreamingContext("local[*]", 
"PopularHashtags",Seconds(5))

setupLogging()

val tweets = TwitterUtils.createStream(ssc, None)
val engTweets = tweets.filter(x => x.getLang() == "en")

val statuses = engTweets.map(status => status.getText)

val tweetwords = statuses.flatMap(tweetText => tweetText.split(" ")) 

val hashtags = tweetwords.filter(word => word.startsWith("#"))

val hashtagKeyValues = hashtags.map(hashtag => (hashtag, 1)) // 


val hashtagCounts = 
hashtagKeyValues.reduceByKeyAndWindow((x:Int,y:Int)=>x+y, Seconds(5), 
Seconds(20))
val sortedResults = hashtagCounts.transform(rdd => rdd.sortBy(x => 
x._2, false))
sortedResults.saveAsTextFiles("/Users/sampy/tweetsTwitter","txt")

sortedResults.print



ssc.checkpoint("/Users/sampy/checkpointTwitter")
ssc.start()
ssc.awaitTermination()
}  
}