如何将Akka Stream与Akk-Http结合使用以流式传输响应

时间:2019-03-17 12:07:12

标签: scala akka

我是Akka Stream的新手。我将以下代码用于CSV解析。

class CsvParser(config: Config)(implicit system: ActorSystem) extends LazyLogging with NumberValidation {

  import system.dispatcher

  private val importDirectory = Paths.get(config.getString("importer.import-directory")).toFile
  private val linesToSkip = config.getInt("importer.lines-to-skip")
  private val concurrentFiles = config.getInt("importer.concurrent-files")
  private val concurrentWrites = config.getInt("importer.concurrent-writes")
  private val nonIOParallelism = config.getInt("importer.non-io-parallelism")

  def save(r: ValidReading): Future[Unit] = {
      Future()
  }

  def parseLine(filePath: String)(line: String): Future[Reading] = Future {
    val fields = line.split(";")
    val id = fields(0).toInt
    try {
      val value = fields(1).toDouble
      ValidReading(id, value)
    } catch {
      case t: Throwable =>
        logger.error(s"Unable to parse line in $filePath:\n$line: ${t.getMessage}")
        InvalidReading(id)
    }
  }

  val lineDelimiter: Flow[ByteString, ByteString, NotUsed] =
    Framing.delimiter(ByteString("\n"), 128, allowTruncation = true)

  val parseFile: Flow[File, Reading, NotUsed] =
    Flow[File].flatMapConcat { file =>
      val src = FileSource.fromFile(file).getLines()
      val source : Source[String, NotUsed] = Source.fromIterator(() => src)
      // val gzipInputStream = new GZIPInputStream(new FileInputStream(file))

      source
        .mapAsync(parallelism = nonIOParallelism)(parseLine(file.getPath))
    }

  val computeAverage: Flow[Reading, ValidReading, NotUsed] =
    Flow[Reading].grouped(2).mapAsyncUnordered(parallelism = nonIOParallelism) { readings =>
      Future {
        val validReadings = readings.collect { case r: ValidReading => r }
        val average = if (validReadings.nonEmpty) validReadings.map(_.value).sum / validReadings.size else -1
        ValidReading(readings.head.id, average)
      }
    }

  val storeReadings: Sink[ValidReading, Future[Done]] =
    Flow[ValidReading]
      .mapAsyncUnordered(concurrentWrites)(save)
      .toMat(Sink.ignore)(Keep.right)

  val processSingleFile: Flow[File, ValidReading, NotUsed] =
    Flow[File]
      .via(parseFile)
      .via(computeAverage)

  def importFromFiles = {
    implicit val materializer = ActorMaterializer()

    val files = importDirectory.listFiles.toList
    logger.info(s"Starting import of ${files.size} files from ${importDirectory.getPath}")

    val startTime = System.currentTimeMillis()

    val balancer = GraphDSL.create() { implicit builder =>
      import GraphDSL.Implicits._

      val balance = builder.add(Balance[File](concurrentFiles))
      val merge = builder.add(Merge[ValidReading](concurrentFiles))

      (1 to concurrentFiles).foreach { _ =>
        balance ~> processSingleFile ~> merge
      }

      FlowShape(balance.in, merge.out)
    }

    Source(files)
      .via(balancer)
      .withAttributes(ActorAttributes.supervisionStrategy { e =>
        logger.error("Exception thrown during stream processing", e)
        Supervision.Resume
      })
      .runWith(storeReadings)
      .andThen {
        case Success(_) =>
          val elapsedTime = (System.currentTimeMillis() - startTime) / 1000.0
          logger.info(s"Import finished in ${elapsedTime}s")
        case Failure(e) => logger.error("Import failed", e)
      }
  }
}

我想使用Akka HTTP,它将提供从CSV解析的所有ValidReading实体,但是我不知道该怎么做。

上面的代码从服务器获取文件并解析每行以生成ValidReading

如何通过akka-http传递/上传CSV,解析文件并将结果响应流回端点?

1 个答案:

答案 0 :(得分:0)

解决方案的“本质”是这样的:

import akka.http.scaladsl.server.Directives._
val route = fileUpload("csv") {
  case (metadata, byteSource) =>
    val source = byteSource.map(x => x)
    complete(HttpResponse(entity = HttpEntity(ContentTypes.`text/csv(UTF-8)`, source)))
}

您检测到上载的内容是具有名为“ csv”的块的multipart-form-data。您从中得到byteSource。进行计算(将逻辑插入.map(x=>x)部分)。将您的数据转换回ByteString。使用新来源完成请求。这会使您的Endint像代理人一样。