Akka Streams只处理一个Collection

时间:2017-03-21 17:03:06

标签: scala akka

我有以下情况,我有一堆目录,其中包含大量文件。我正在使用AKKA处理它们,但由于某些原因,只处理了最后一个序列,这是我所拥有的方法的代码,如果你看错了,请告诉我

def read(): Unit = {


    implicit val system = ActorSystem("LiveS3Parser")

    implicit val materializer = ActorMaterializer()

    val reader = new LiveSequenceFileReader(conf.getString("s3url"))

    val dateList = generateDates(conf.getString("startDate"), conf.getString("endDate"))
   reader.readAllFilesFromPath(conf.getString("s3url"))




    val seqElements = generateURLS(dateList, conf).via(readDataFromS3(reader)).via(parseJsonSeq())

    val sinkseq = Sink.fold(0)(persistDataSeq)
    val dataCounter = seqElements.toMat(sinkseq)(Keep.right)


    val sum: Future[Int] = dataCounter.run()

    sum.andThen({

      case _ =>
        sum.foreach(c => println(s"Total records Loaded: $c"))

    })

    Await.result(sum,Duration.Inf)
  }

def generateURLS(data: Seq[Long], conf: Config): Source[String, NotUsed] = {

    val s3URL = conf.getString("s3url")
    val dataWithURLs = data.map(x => s3URL.concat("dt=").concat(DateUtils.formatDate(new Date(x), "yyyy-MM-dd")))

    Source(dataWithURLs.to[scala.collection.immutable.Seq])
  }


  def readDataFromS3(lv: LiveSequenceFileReader)(implicit ec: ExecutionContext): Flow[String, Seq[KeyValue], NotUsed] = {

    Flow[String].mapAsyncUnordered(Runtime.getRuntime().availableProcessors())(url => Future(readFiles(url, lv)))

  }


  def parseJsonSeq()(implicit ec: ExecutionContext): Flow[Seq[KeyValue], Seq[Try[OptimizedSearchQueryEventMessage]], NotUsed] = {


    Flow[Seq[KeyValue]].mapAsyncUnordered(Runtime.getRuntime().availableProcessors())(line => Future(parseAllItems(line)))
  }


  def readFiles(url: String, lv: LiveSequenceFileReader): Seq[KeyValue] = {

    println("Reading Files from " + url)

    val files = lv.readAllFilesFromPath(url)
    println("Records to process" + files.size())
    files

  }






  def parseAllItems(seq: Seq[KeyValue]) = {
    seq.map(kv => parseItem(kv.getValue))
  }

  def parseItem(data: String): Try[OptimizedSearchQueryEventMessage] = {

    val retVal = Try(mapper.readValue(data, classOf[OptimizedSearchQueryEventMessage]))

    retVal

  }

def generateDates(startingDate: String, endDate: String): Seq[Long] = {

    val fmt = new SimpleDateFormat("yyyy-MM-dd")

    val startDate = fmt.parse(startingDate).getTime

    val endingDate = fmt.parse(endDate).getTime


    val list = for (currentDate <- startDate to endingDate by TimeUnit.DAYS.toMillis(1)) yield currentDate


    list


  }

0 个答案:

没有答案