我有以下情况,我有一堆目录,其中包含大量文件。我正在使用AKKA处理它们,但由于某些原因,只处理了最后一个序列,这是我所拥有的方法的代码,如果你看错了,请告诉我
def read(): Unit = {
implicit val system = ActorSystem("LiveS3Parser")
implicit val materializer = ActorMaterializer()
val reader = new LiveSequenceFileReader(conf.getString("s3url"))
val dateList = generateDates(conf.getString("startDate"), conf.getString("endDate"))
reader.readAllFilesFromPath(conf.getString("s3url"))
val seqElements = generateURLS(dateList, conf).via(readDataFromS3(reader)).via(parseJsonSeq())
val sinkseq = Sink.fold(0)(persistDataSeq)
val dataCounter = seqElements.toMat(sinkseq)(Keep.right)
val sum: Future[Int] = dataCounter.run()
sum.andThen({
case _ =>
sum.foreach(c => println(s"Total records Loaded: $c"))
})
Await.result(sum,Duration.Inf)
}
def generateURLS(data: Seq[Long], conf: Config): Source[String, NotUsed] = {
val s3URL = conf.getString("s3url")
val dataWithURLs = data.map(x => s3URL.concat("dt=").concat(DateUtils.formatDate(new Date(x), "yyyy-MM-dd")))
Source(dataWithURLs.to[scala.collection.immutable.Seq])
}
def readDataFromS3(lv: LiveSequenceFileReader)(implicit ec: ExecutionContext): Flow[String, Seq[KeyValue], NotUsed] = {
Flow[String].mapAsyncUnordered(Runtime.getRuntime().availableProcessors())(url => Future(readFiles(url, lv)))
}
def parseJsonSeq()(implicit ec: ExecutionContext): Flow[Seq[KeyValue], Seq[Try[OptimizedSearchQueryEventMessage]], NotUsed] = {
Flow[Seq[KeyValue]].mapAsyncUnordered(Runtime.getRuntime().availableProcessors())(line => Future(parseAllItems(line)))
}
def readFiles(url: String, lv: LiveSequenceFileReader): Seq[KeyValue] = {
println("Reading Files from " + url)
val files = lv.readAllFilesFromPath(url)
println("Records to process" + files.size())
files
}
def parseAllItems(seq: Seq[KeyValue]) = {
seq.map(kv => parseItem(kv.getValue))
}
def parseItem(data: String): Try[OptimizedSearchQueryEventMessage] = {
val retVal = Try(mapper.readValue(data, classOf[OptimizedSearchQueryEventMessage]))
retVal
}
def generateDates(startingDate: String, endDate: String): Seq[Long] = {
val fmt = new SimpleDateFormat("yyyy-MM-dd")
val startDate = fmt.parse(startingDate).getTime
val endingDate = fmt.parse(endDate).getTime
val list = for (currentDate <- startDate to endingDate by TimeUnit.DAYS.toMillis(1)) yield currentDate
list
}