如何在Flink中统一衡量指标

时间:2018-08-03 15:12:31

标签: scala apache-flink

我在flink中有一条数据链,并使用ProcessFunction中的gauge生成了自己的指标。
由于这些指标对我的活动很重要,因此我希望在执行流程后对它们进行单元测试。
不幸的是,我没有找到实现适当的测试报告器的方法。 这是解释我问题的简单代码。
此代码有两个问题:

  1. 我如何触发压力表
  2. 我如何通过env.execute使记者受到鼓动

这是样本

import java.util.concurrent.atomic.AtomicInteger

import org.apache.flink.api.scala.metrics.ScalaGauge
import org.apache.flink.configuration.{ConfigConstants, Configuration}
import org.apache.flink.metrics.reporter.AbstractReporter
import org.apache.flink.metrics.{Gauge, Metric, MetricConfig}
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.functions.sink.SinkFunction
import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _}
import org.apache.flink.util.Collector
import org.scalatest.FunSuite
import org.scalatest.Matchers._
import org.scalatest.PartialFunctionValues._

import scala.collection.JavaConverters._
import scala.collection.mutable

/* Test based on Flink test example https://ci.apache.org/projects/flink/flink-docs-master/dev/stream/testing.html */

class MultiplyByTwo extends ProcessFunction[Long, Long] {
  override def processElement(data: Long, context: ProcessFunction[Long, Long]#Context, collector: Collector[Long]): Unit = {
    collector.collect(data * 2L)
  }

  val nbrCalls = new AtomicInteger(0)

  override def open(parameters: Configuration): Unit = {
    getRuntimeContext.getMetricGroup
      .addGroup("counter")
      .gauge[Int, ScalaGauge[Int]]("call" , ScalaGauge[Int]( () => nbrCalls.get()))
  }
}

// create a testing sink
class CollectSink extends SinkFunction[Long] {
  override def invoke(value: Long): Unit = {
    synchronized {
      CollectSink.values.add(value)
    }
  }
}

object CollectSink {
  val values:  java.util.ArrayList[Long] = new  java.util.ArrayList[Long]()
}

class StackOverflowTestReporter extends AbstractReporter {
  var gaugesMetrics :  mutable.Map[String, String] = mutable.Map[String, String]()

  override def open(metricConfig: MetricConfig): Unit = {}

  override def close(): Unit = {}

  override def filterCharacters(s: String): String = s

  def report(): Unit = {
    gaugesMetrics = this.gauges.asScala.map(t => (metricValue(t._1), t._2))
  }

  private def metricValue(m: Metric): String = {
    m match {
      case g: Gauge[_] => g.getValue.toString
      case _ => ""
    }
  }
}

class StackOverflowTest extends FunSuite with StreamingMultipleProgramsTestBase{

  def createConfigForReporter(reporterName : String) : Configuration = {
    val cfg : Configuration = new Configuration()
    cfg.setString(ConfigConstants.METRICS_REPORTER_PREFIX + reporterName + "." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, classOf[StackOverflowTestReporter].getName)
    cfg
  }

  test("test_metrics") {

    val env = StreamExecutionEnvironment.createLocalEnvironment(
      StreamExecutionEnvironment.getDefaultLocalParallelism,
      createConfigForReporter("reporter"))

    // configure your test environment
    env.setParallelism(1)

    env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime)

    // values are collected in a static variable
    CollectSink.values.clear()

    // create a stream of custom elements and apply transformations
    env.fromElements[Long](1L, 21L, 22L)
      .process(new MultiplyByTwo())
      .addSink(new CollectSink())

    // execute
    env.execute()

    // verify your results
    CollectSink.values should have length 3
    CollectSink.values should contain (2L)
    CollectSink.values should contain (42L)
    CollectSink.values should contain (44L)

    //verify gauge counter
    //pseudo code ...
    val testReporter : StackOverflowTestReporter = _ // how to get testReporter instantiate in env
    testReporter.gaugesMetrics should have size 1
    testReporter.gaugesMetrics should contain key "count.call"
    testReporter.gaugesMetrics.valueAt("count.call") should be equals("3")
  }
}

感谢Chesnay Schepler的解决方案

import java.util.concurrent.atomic.AtomicInteger

import org.apache.flink.api.common.time.Time
import org.apache.flink.api.scala.metrics.ScalaGauge
import org.apache.flink.configuration.{ConfigConstants, Configuration}
import org.apache.flink.metrics.reporter.MetricReporter
import org.apache.flink.metrics.{Metric, MetricConfig, MetricGroup}
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.functions.sink.SinkFunction
import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _}
import org.apache.flink.test.util.MiniClusterResource
import org.apache.flink.util.Collector
import org.scalatest.Matchers._
import org.scalatest.PartialFunctionValues._
import org.scalatest.{BeforeAndAfterAll, FunSuite}

import scala.collection.mutable

/* Test based on Flink test example https://ci.apache.org/projects/flink/flink-docs-master/dev/stream/testing.html */

class MultiplyByTwo extends ProcessFunction[Long, Long] {
  override def processElement(data: Long, context: ProcessFunction[Long, Long]#Context, collector: Collector[Long]): Unit = {
    nbrCalls.incrementAndGet()
    collector.collect(data * 2L)
  }

  val nbrCalls = new AtomicInteger(0)

  override def open(parameters: Configuration): Unit = {
    getRuntimeContext.getMetricGroup
      .addGroup("counter")
      .gauge[Int, ScalaGauge[Int]]("call" , ScalaGauge[Int]( () => nbrCalls.get()))
  }
}

// create a testing sink
class CollectSink extends SinkFunction[Long] {
  import CollectSink._
  override def invoke(value: Long): Unit = {
    synchronized {
      values.add(value)
    }
  }
}

object CollectSink {
  val values:  java.util.ArrayList[Long] = new  java.util.ArrayList[Long]()
}

class StackOverflowTestReporter extends MetricReporter  {
  import StackOverflowTestReporter._

  override def open(metricConfig: MetricConfig): Unit = {}

  override def close(): Unit = {}

  override def notifyOfAddedMetric(metric: Metric, metricName: String, group: MetricGroup) : Unit = {
    metric match {
      case gauge: ScalaGauge[_] => {
        //drop group metrics meaningless for the test, seem's to be the first 6 items
        val gaugeKey = group.getScopeComponents.toSeq.drop(6).mkString(".") + "." + metricName
        gaugesMetrics(gaugeKey) = gauge.asInstanceOf[ScalaGauge[Int]]
      }
      case _ =>
    }
  }

  override def notifyOfRemovedMetric(metric: Metric, metricName: String, group: MetricGroup): Unit = {}
}

object StackOverflowTestReporter {
  var gaugesMetrics :  mutable.Map[String,  ScalaGauge[Int]] = mutable.Map[String,  ScalaGauge[Int]]()
}

class StackOverflowTest extends FunSuite with BeforeAndAfterAll{

  val miniClusterResource : MiniClusterResource = buildMiniClusterResource()

  override def beforeAll(): Unit = {
    CollectSink.values.clear()
    StackOverflowTestReporter.gaugesMetrics.clear()
    miniClusterResource.before()
  }

  override def afterAll(): Unit = {
    miniClusterResource.after()
  }

  def createConfigForReporter() : Configuration = {
    val cfg : Configuration = new Configuration()
    cfg.setString(ConfigConstants.METRICS_REPORTER_PREFIX + "reporter" + "." + ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX, classOf[StackOverflowTestReporter].getName)
    cfg
  }

  def buildMiniClusterResource() : MiniClusterResource = new MiniClusterResource(
    new MiniClusterResource.MiniClusterResourceConfiguration(
      createConfigForReporter(),1,1, Time.milliseconds(50L)))

  test("test_metrics") {
    val env = StreamExecutionEnvironment.getExecutionEnvironment

    env.fromElements[Long](1L, 21L, 22L)
      .process(new MultiplyByTwo())
      .addSink(new CollectSink())

    env.execute()

    CollectSink.values should have length 3
    CollectSink.values should contain (2L)
    CollectSink.values should contain (42L)
    CollectSink.values should contain (44L)

    //verify gauge counter
    val gaugeValues = StackOverflowTestReporter.gaugesMetrics.map(t => (t._1, t._2.getValue()))
    gaugeValues should have size 1
    gaugeValues should contain ("counter.call" -> 3)
  }
}

1 个答案:

答案 0 :(得分:0)

您最好的选择是在工作之前使用MiniClusterResource显式启动集群,并配置一个检查特定指标并通过静态字段公开它们的报告程序。

@Rule
public final MiniClusterResource clusterResource = new MiniClusterResource(
    new MiniClusterResourceConfiguration.Builder()
        .setConfiguration(getConfig()));

private static Configuration getConfig() {
    Configuration config = new Configuration();

    config.setString(
        ConfigConstants.METRICS_REPORTER_PREFIX +
            "myTestReporter." +
            ConfigConstants.METRICS_REPORTER_CLASS_SUFFIX,
        MyTestReporter.class.getName());    
    return config;
}

public static class MyTestReporter implements MetricReporter {
    static volatile Gauge<?> myGauge = null;

    @Override
    public void open(MetricConfig metricConfig) {
    }

    @Override
    public void close() {
    }

    @Override
    public void notifyOfAddedMetric(Metric metric, String name, MetricGroup metricGroup) {
        if ("myMetric".equals(name)) {
            myGauge = (Gauge<?>) metric;
        }
    }

    @Override
    public void notifyOfRemovedMetric(Metric metric, String s, MetricGroup metricGroup) {
    }
}