更改后火花持续存档文件

时间:2018-04-25 15:46:10

标签: mongodb scala apache-spark persist

我是spark的新手,无法找到将更改后的文档保存到数据库的方法:

import com.mongodb.spark._
import com.mongodb.spark.config.{ReadConfig, WriteConfig}
import com.typesafe.scalalogging.slf4j.LazyLogging
import org.apache.spark.{SparkConf, SparkContext}
import org.bson.Document


object Test extends App with LazyLogging {

val conf = new SparkConf()
.setAppName("test")
.setMaster("local[*]")

val sc = new SparkContext(conf)
val readConfig = ReadConfig(Map("uri" -> "mongodb://127.0.0.1/", "database" -> "test", "collection" -> "customers"))
val rdd = sc.loadFromMongoDB(readConfig) //.toDF()


val logs = rdd.foreach {
  document => {
    val mongoDatabaseConnectionDetails = 
    document.get("address").asInstanceOf[Document]
    mongoDatabaseConnectionDetails.put("street", "azerty")
  }
}


val writeConfig = WriteConfig(
  Map(
    "uri" -> "mongodb://127.0.0.1/",
    "database" -> "test",
    "collection" -> "customers",
    "writeConcern.w" -> "majority"
  ))

// ??

}

文档更改后,我想在数据库中替换它。

提前致谢

1 个答案:

答案 0 :(得分:0)

好的,所以最后管理去做。我认为这不是最好的方法,但它有效

import com.mongodb.client.MongoCollection
import com.mongodb.spark._
import com.mongodb.spark.config.{ReadConfig, WriteConfig}
import com.typesafe.scalalogging.slf4j.LazyLogging
import org.apache.spark.{SparkConf, SparkContext}
import org.bson.Document


object Test extends App with LazyLogging {

  val conf = new SparkConf()
    .setAppName("test")
    .setMaster("local[*]")

  val sc = new SparkContext(conf)

  val writeConfig = WriteConfig(Map("uri" -> "mongodb://127.0.0.1/", 
    "database" 
    -> "test", "collection" -> "customers"))

  val readConfig =  ReadConfig(Map( "uri" -> "mongodb://127.0.0.1/", 
    "database" 
    -> "test", "collection" -> "customers"))

  val rdd = sc.loadFromMongoDB(readConfig)
  rdd.map(document => {
    val mongoDatabaseConnectionDetails = 
      document.get("mongoDatabaseConnectionDetails").asInstanceOf[Document]
    mongoDatabaseConnectionDetails.replace("street", "azerty")
    document.replace("mongoDatabaseConnectionDetails", 
      mongoDatabaseConnectionDetails)

    save(document, writeConfig)
  }).collect()


  def save(document: Document, writeConfig: WriteConfig): Unit = {
    val mongoConnector = MongoConnector(writeConfig.asOptions)
    mongoConnector.withCollectionDo(writeConfig, { collection: 
      MongoCollection[Document] => {
    val searchDocument = new Document()
    searchDocument.append("_id", document.get("_id").asInstanceOf[String])
    collection.replaceOne(searchDocument, document)
  }

})
}
}