JSON字符串在apache flink中被截断

时间:2018-03-11 19:46:22

标签: scala apache-kafka apache-flink

我在apache flink中使用kafka作为我的源代码。我的kafka生产者正在发送一个大的json字符串,但它被截断为4095个字符,我的json解析失败。我无法控制json字符串的大小,你能解释一下并帮我解决这个问题吗?

代码:

import java.sql.Timestamp
import java.util.Properties

import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.flink.streaming.util.serialization.SimpleStringSchema
import models._
import org.apache.flink.types.Row
import net.liftweb.json._

case class ParsedPage(data: String, domain:String, url:String, text: String)

object HelloWorld{
  def main(args:Array[String]){

    val env = StreamExecutionEnvironment.getExecutionEnvironment
    val properties = new Properties()
    properties.setProperty("bootstrap.servers", "localhost:9092")
    properties.setProperty("group.id", "test")
    val kafkaConsumer011 = new FlinkKafkaConsumer010[String]("first_topic", new SimpleStringSchema(), properties);
    val stream: DataStream[String] = env.addSource[String](kafkaConsumer011);
    val result: DataStream[Row] = stream.filter(_.nonEmpty).flatMap{ p =>
      try {
        implicit val formats = DefaultFormats
        Seq(parse(p).extract[ParsedPage])
      } catch {
        case e: Exception =>
          println(s"Parsing failed for message in txn: $p")
          Seq()
      }}.map { p =>
      val bookJson = ""
      val cityId = ""
      Row.of(voyagerId, cityId)
    }
    stream.print()
    env.execute()
  }
}

Flink版本:1.4.2 Scala版本:2.11.8

0 个答案:

没有答案