运行大量时间面临GC的重型sparksql

时间:2017-04-15 06:37:24

标签: java hadoop apache-spark garbage-collection jvm

我打算运行一个繁重的spark sql作业(最后附加),并且它运行了很长时间,我检查了执行程序监视器页面,它显示了执行期间消耗的GC时间:

enter image description here

Storage Memory非常低。然后我进入舞台监听页面,所有正在运行的任务都有高GC时间:

enter image description here

enter image description here

代码如下,任何人都可以给我一个方法/方向来调试/调整/调整这种情况吗?谢谢!

object etaWithSpeed {

  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
                              .appName("etaWithSpeed")
                              .config("num-executors", 15)
                              .config("executor-memory", 4)
                              .enableHiveSupport()
                              .getOrCreate()
    import spark.implicits._
    spark.sparkContext.setLogLevel("WARN")

    import ch.hsr.geohash.{WGS84Point, GeoHash, BoundingBox}
    import math._
    import scala.annotation.tailrec

    def haversine(lat1:Double, lon1:Double, lat2:Double, lon2:Double)={
      val dLat=(lat2 - lat1).toRadians
      val dLon=(lon2 - lon1).toRadians

      val a = pow(sin(dLat/2),2) + pow(sin(dLon/2),2) * cos(lat1.toRadians) * cos(lat2.toRadians)
      val c = 2 * asin(sqrt(a))
      6372.8 * c
    }

    def geohash(lat: Double, lon: Double)={
      GeoHash.withBitPrecision(lat, lon, 30).toBase32
    }

    def geohash_grid_expand(geohash: String)={
      GeoHash.fromGeohashString(geohash).getAdjacent().map(_.toBase32) :+ geohash
    }

    def geohash_envelope_between_two_grid(lat1: Double, lon1: Double, lat2: Double, lon2: Double): List[String] = {

      val gh1 = GeoHash.withBitPrecision(lat1, lon1, 30)
      val gh2 = GeoHash.withBitPrecision(lat2, lon2, 30)

      // MARK: decide border
      val sortedLons = List(gh1.getBoundingBox.getMaxLon, gh1.getBoundingBox.getMinLon, gh2.getBoundingBox.getMaxLon, gh2.getBoundingBox.getMinLon).sorted
      var lonMin = 0.0
      var lonMax = 0.0
      var kind = 0
      if(abs(gh1.getBoundingBoxCenterPoint.getLongitude-gh2.getBoundingBoxCenterPoint.getLongitude) <= 180.0)  {
        lonMin = sortedLons(0)
        lonMax = sortedLons(3)
        kind = 0
      }
      else  {
        lonMin = sortedLons(1)
        lonMax = sortedLons(2)
        kind = 1
      }
      val sortedLats = List(gh1.getBoundingBox.getMinLat, gh1.getBoundingBox.getMaxLat, gh2.getBoundingBox.getMinLat, gh2.getBoundingBox.getMaxLat).sorted
      var latMin = sortedLats.min
      var latMax = sortedLats.max

      def withinEnvelope(ghCur: GeoHash): Boolean = {
        val ghCurLon = ghCur.getBoundingBoxCenterPoint.getLongitude
        val ghCurLat = ghCur.getBoundingBoxCenterPoint.getLatitude
        if(ghCurLat >= latMin && ghCurLat <= latMax)  {
          if(kind == 0 && ghCurLon >= lonMin && ghCurLon <= lonMax) {
            true
          }
          else if(kind == 1 && (ghCurLon <= lonMin || ghCurLon >= lonMax)) {
            true
          }
          else  {
            false
          }
        }
        else  {
          false
        }
      }

      @tailrec
      def expand_neighbors_impl(toGoThrough: List[GeoHash], buffer: Set[GeoHash] = Set()): Set[GeoHash] = {
        toGoThrough.headOption match {
          case None => buffer
          case Some(ghCur) =>
            if (buffer contains ghCur) {
              expand_neighbors_impl(toGoThrough.tail, buffer)
            }
            else {
              val neighbors = get4GeoHashAround(ghCur).filter(withinEnvelope(_))
              expand_neighbors_impl(neighbors ++: toGoThrough, buffer + ghCur)
            }
        }
      }

      def expand_neighbors(): Set[GeoHash] = expand_neighbors_impl(List(gh1))

      def get4GeoHashAround(gh: GeoHash): Array[GeoHash] = {
        Array(gh.getNorthernNeighbour, gh.getSouthernNeighbour, gh.getWesternNeighbour, gh.getEasternNeighbour)
      }

      expand_neighbors.toList.map(_.toBase32)
    }

    spark.sqlContext.udf.register("haversine", haversine _)
    spark.sqlContext.udf.register("geohash", geohash _)
    spark.sqlContext.udf.register("geohash_grid_expand", geohash_grid_expand _)
    spark.sqlContext.udf.register("geohash_envelope_between_two_grid", geohash_envelope_between_two_grid _)

    spark.sql("""with tmp1 as(
select
    ied.*,
    explode(geohash_envelope_between_two_grid(ied.latitude1, ied.longitude1, ied.latitude2, ied.longitude2)) as geohash_value
from dev_oussama.inhouse_eta_data ied
)
select
    tmp1.time,
    tmp1.latitude1,
    tmp1.longitude1,
    tmp1.latitude2,
    tmp1.longitude2,
    tmp1.google_eta,
    tmp1.booking_id,
    tmp1.user_id,
    tmp1.real_eta,
    tmp1.osm_eta,
    tmp1.osm_distance,
    tmp1.osm_api_result,
    sum(gr.avg_speed_mps*gr.no_of_driver_ping)/sum(gr.no_of_driver_ping) as avg_speed_mps
from tmp1
join dev_jason_zhu.geohash_region_metrics gr on tmp1.geohash_value = gr.geohash_value
group by
    tmp1.time,
    tmp1.latitude1,
    tmp1.longitude1,
    tmp1.latitude2,
    tmp1.longitude2,
    tmp1.google_eta,
    tmp1.booking_id,
    tmp1.user_id,
    tmp1.real_eta,
    tmp1.osm_eta,
    tmp1.osm_distance,
    tmp1.osm_api_result
limit 1000""").show()
  }

}

P.S。 当我尝试在特定java进程中监视堆中的实例时,将打印以下内容。显然,有很多GeoHash个相关对象,以某种方式解决它?

bash-4.2$ jmap -histo:live 19808 | head

 num     #instances         #bytes  class name
----------------------------------------------
   1:          6199      155188800  [B
   2:           591      101908160  [J
   3:        766706       36801888  ch.hsr.geohash.BoundingBox
   4:        766706       24534592  ch.hsr.geohash.GeoHash
   5:        766706       24534592  ch.hsr.geohash.WGS84Point
   6:        766876       18405024  scala.collection.immutable.$colon$colon
   7:        219583        5269992  scala.collection.immutable.HashSet$HashSet1

执行者的stdout日志

2017-04-15T07:41:06.702+0000: [GC (Allocation Failure) 2017-04-15T07:41:06.702+0000: [ParNew: 306688K->34048K(306688K), 0.1675930 secs] 2717803K->2474880K(6257408K), 0.1676444 secs] [Times: user=0.65 sys=0.00, real=0.17 secs] 
2017-04-15T07:41:06.991+0000: [GC (Allocation Failure) 2017-04-15T07:41:06.991+0000: [ParNew: 306688K->34048K(306688K), 0.2082425 secs] 2747520K->2511759K(6257408K), 0.2082937 secs] [Times: user=0.80 sys=0.00, real=0.21 secs] 
2017-04-15T07:41:07.318+0000: [GC (Allocation Failure) 2017-04-15T07:41:07.318+0000: [ParNew: 306688K->34048K(306688K), 0.1989878 secs] 2784399K->2560107K(6257408K), 0.1990396 secs] [Times: user=0.76 sys=0.00, real=0.20 secs] 
2017-04-15T07:41:07.636+0000: [GC (Allocation Failure) 2017-04-15T07:41:07.636+0000: [ParNew: 306688K->34048K(306688K), 0.2265302 secs] 2832747K->2614298K(6257408K), 0.2265810 secs] [Times: user=0.87 sys=0.00, real=0.22 secs] 
2017-04-15T07:41:07.981+0000: [GC (Allocation Failure) 2017-04-15T07:41:07.981+0000: [ParNew: 306688K->34048K(306688K), 0.1750708 secs] 2886938K->2657702K(6257408K), 0.1751232 secs] [Times: user=0.66 sys=0.00, real=0.18 secs] 
2017-04-15T07:41:08.273+0000: [GC (Allocation Failure) 2017-04-15T07:41:08.273+0000: [ParNew: 306688K->34048K(306688K), 0.2805720 secs] 2930342K->2710186K(6257408K), 0.2806221 secs] [Times: user=1.09 sys=0.00, real=0.28 secs] 
2017-04-15T07:41:08.672+0000: [GC (Allocation Failure) 2017-04-15T07:41:08.672+0000: [ParNew: 306688K->34046K(306688K), 0.1939371 secs] 2982826K->2759971K(6257408K), 0.1940055 secs] [Times: user=0.74 sys=0.00, real=0.20 secs] 
2017-04-15T07:41:08.985+0000: [GC (Allocation Failure) 2017-04-15T07:41:08.985+0000: [ParNew: 306686K->34046K(306688K), 0.1824374 secs] 3032611K->2803546K(6257408K), 0.1824884 secs] [Times: user=0.70 sys=0.00, real=0.18 secs] 
2017-04-15T07:41:09.286+0000: [GC (Allocation Failure) 2017-04-15T07:41:09.286+0000: [ParNew: 306686K->34046K(306688K), 0.2459598 secs] 3076186K->2862064K(6257408K), 0.2460109 secs] [Times: user=0.95 sys=0.00, real=0.24 secs] 
2017-04-15T07:41:09.650+0000: [GC (Allocation Failure) 2017-04-15T07:41:09.650+0000: [ParNew: 306686K->34048K(306688K), 0.2248917 secs] 3134704K->2916068K(6257408K), 0.2249424 secs] [Times: user=0.87 sys=0.00, real=0.23 secs] 
2017-04-15T07:41:09.994+0000: [GC (Allocation Failure) 2017-04-15T07:41:09.994+0000: [ParNew: 306688K->34048K(306688K), 0.1995261 secs] 3188708K->2963537K(6257408K), 0.1995778 secs] [Times: user=0.77 sys=0.00, real=0.20 secs] 
2017-04-15T07:41:10.312+0000: [GC (Allocation Failure) 2017-04-15T07:41:10.312+0000: [ParNew: 306688K->34048K(306688K), 0.2394581 secs] 3236177K->3013025K(6257408K), 0.2395090 secs] [Times: user=0.91 sys=0.01, real=0.24 secs] 
2017-04-15T07:41:10.675+0000: [GC (Allocation Failure) 2017-04-15T07:41:10.675+0000: [ParNew: 306688K->34048K(306688K), 0.2054829 secs] 3285665K->3065302K(6257408K), 0.2055344 secs] [Times: user=0.78 sys=0.00, real=0.20 secs] 
2017-04-15T07:41:10.999+0000: [GC (Allocation Failure) 2017-04-15T07:41:10.999+0000: [ParNew: 306688K->34046K(306688K), 0.2380438 secs] 3337942K->3122984K(6257408K), 0.2380944 secs] [Times: user=0.91 sys=0.00, real=0.24 secs] 
2017-04-15T07:41:11.360+0000: [GC (Allocation Failure) 2017-04-15T07:41:11.360+0000: [ParNew: 306686K->34046K(306688K), 0.2232903 secs] 3395624K->3177798K(6257408K), 0.2233435 secs] [Times: user=0.85 sys=0.00, real=0.23 secs] 
2017-04-15T07:41:11.703+0000: [GC (Allocation Failure) 2017-04-15T07:41:11.703+0000: [ParNew: 306686K->34046K(306688K), 0.1517400 secs] 3450438K->3215043K(6257408K), 0.1517907 secs] [Times: user=0.58 sys=0.00, real=0.14 secs] 
20

0 个答案:

没有答案