我在Scala上有一个代码,可以通过帮助流从Twitter读取数据,我也想在Java上做同样的事情。我正在尝试在Jackson Jackson的帮助下序列化数据。但是我在MongoSpark.save(dataFrame,writeConfig);This one is underlined (Cannot resolve method save(org,apache.spark.sql.Dataframe, com.mongodb.saprk.config.WriteConfig))Can do the same in other
的方式上有错误吗?MongoSpark.save(rawTweetsDF.coalesce(1).write.format("org.apache.spark.sql.json").option("forensicdb","LiveRawTweets").mode("append"), writeConfig)
我也对这行感到困惑,我可以在Java中做同样的事情吗?
附言我正在使用Spark 1.6.2版本
object tweetstreamingmodel {
//***********************************************************************************
@transient
@volatile private
var spark_SparkSession: SparkSession = _ //Equivalent of SQLContext
val naivemodelpth = "/home/smulwa/data/naiveBayesModel"
case class SummaryStats(Recall: Double, Precision: Double, F1measure: Double, Accuracy: Double)
var tweetcategory: String = _
//***********************************************************************************
def main(args: Array[String]) {
try {
var totalTweets: Long = 0
if (spark_SparkSession == null) {
spark_SparkSession = SentUtilities.getSparkSession() //Get Spark Session Object
}
val spark_streamcontext = SentUtilities.getSparkStreamingContext(spark_SparkSession.sparkContext)
spark_streamcontext.checkpoint("hdfs://KENBO-SPK08.forensics.net:54310/checkpoint/")
// Load Naive Bayes Model from local drive.
val sqlcontext = spark_SparkSession.sqlContext //Create SQLContext from SparkSession Object
import sqlcontext.implicits._
val twitteroAuth: Some[OAuthAuthorization] = OAuthUtilities.getTwitterOAuth()
val tweetfilters = MongoScalaUtil.getTweetFilters(spark_SparkSession)
val Twitterstream: DStream[Status] = TwitterUtils.createStream(spark_streamcontext, twitteroAuth, tweetfilters,
StorageLevel.MEMORY_AND_DISK_SER).filter(_.getLang() == "en")
Twitterstream.foreachRDD {
rdd =>
if (rdd != null && !rdd.isEmpty() && !rdd.partitions.isEmpty) {
saveRawTweetsToMongoDB(rdd)
rdd.foreachPartition {
partitionOfRecords =>
if (!partitionOfRecords.isEmpty) {
partitionOfRecords.foreach(record =>
MongoScalaUtil.SaveRawtweetstoMongodb(record.toString, record.getUser.getId, record.getId, SentUtilities.getStrea mDate(), SentUtilities.getStreamTime())) //mongo_utilities.save(record.toString,spark_SparkSession.sparkContext))
}
}
}
}
val jacksonObjectMapper: ObjectMapper = new ObjectMapper()
// @param rdd -- RDD of Status objects to save.
def saveRawTweetsToMongoDB(rdd: RDD[Status]): Unit = {
try {
val sqlContext = spark_SparkSession.sqlContext
val tweet = rdd.map(status => jacksonObjectMapper.writeValueAsString(status))
val rawTweetsDF = sqlContext.read.json(tweet)
val readConfig: ReadConfig = ReadConfig(Map("uri" ->
"mongodb://10.0.10.100:27017/forensicdb.LiveRawTweets?readPreference=primaryPreferred"))
val writeConfig: WriteConfig = WriteConfig(Map("uri" ->
"mongodb://10.0.10.100:27017/forensicdb.LiveRawTweets"))
MongoSpark.save(rawTweetsDF.coalesce(1).write.format("org.apache.spark.sql.json").option("forensicdb",
"LiveRawTweets").mode("append"), writeConfig)
} catch {
case e: Exception => println("Error Saving tweets to Mongodb:", e)
}
}
}
和Java类似物
public class Main {
// Set system credentials for access to twitter
private static void setTwitterOAuth() {
System.setProperty("twitter4j.oauth.consumerKey", TwitterCredentials.consumerKey);
System.setProperty("twitter4j.oauth.consumerSecret", TwitterCredentials.consumerSecret);
System.setProperty("twitter4j.oauth.accessToken", TwitterCredentials.accessToken);
System.setProperty("twitter4j.oauth.accessTokenSecret", TwitterCredentials.accessTokenSecret);
}
public static void main(String[] args) {
setTwitterOAuth();
SparkConf conf = new SparkConf().setMaster("local[2]")
.setAppName("SparkTwitter");
// Spark contexts
JavaSparkContext sparkContext = new JavaSparkContext(conf);
JavaStreamingContext jssc = new JavaStreamingContext(sparkContext, new Duration(1000));
JavaReceiverInputDStream < Status > twitterStream = TwitterUtils.createStream(jssc);
// Stream that contains just tweets in english
JavaDStream < Status > enTweetsDStream = twitterStream.filter((status) -> "en".equalsIgnoreCase(status.getLang()));
enTweetsDStream.persist(StorageLevel.MEMORY_AND_DISK());
enTweetsDStream.foreachRDD(rdd -> {
if (rdd != null && !rdd.isEmpty() && !rdd.partitions().isEmpty()) {
saveRawTweetsToMondoDb(rdd, sparkContext);
}
});
enTweetsDStream.print();
jssc.start();
jssc.awaitTermination();
}
static void saveRawTweetsToMondoDb(JavaRDD < Status > rdd, JavaSparkContext sparkContext) {
try {
ObjectMapper objectMapper = new ObjectMapper();
Function < Status, String > toJsonString = status -> objectMapper.writeValueAsString(status);
SQLContext sqlContext = new SQLContext(sparkContext);
JavaRDD < String > tweet = (JavaRDD < String > ) rdd.map(toJsonString);
DataFrame dataFrame = sqlContext.read().json(tweet);
// Setting for read
Map < String, String > readOverrides = new HashMap < > ();
readOverrides.put("uri", "mongodb://127.0.0.1/forensicdb.LiveRawTweets");
readOverrides.put("readPreference", "primaryPreferred");
ReadConfig readConfig = ReadConfig.create(sparkContext).withJavaOptions(readOverrides);
// Settings for writing
Map < String, String > writeOverrides = new HashMap < > ();
writeOverrides.put("uri", "mongodb://127.0.0.1/forensicdb.LiveRawTweets");
WriteConfig writeConfig = WriteConfig.create(sparkContext).withJavaOptions(writeOverrides);
MongoSpark.write(dataFrame).option("collection", "LiveRawTweets").mode("append").save();
MongoSpark.save(dataFrame, writeConfig);
} catch (Exception e) {
System.out.println("Error saving to database");
}
}