我使用spark(Java)来读写DB。我使用Spark的内置集群管理器。该应用程序捆绑为胖jar并通过spark-submit命令运行:
"./spark-submit --class com.tte.site.sector.daily.main.Driver --master spark://ip-xxx-xx-xx-xx:7077 --deploy-mode client /home/ec2-user/jars-dir/site-sector-daily-1.0-jar-with-dependencies.jar
任务运行正常,应用程序日志中没有异常,或者在命令行上发出spark-submit。下面是运行spark-submit:
的整个stdout输出./ spark-submit --class com.tte.site.sector.daily.main.Driver --master spark:// ip-xxx-xx-xx-xx:7077 --deploy-mode client / home / EC2用户/罐-DIR /位点>>扇区每日-1.0-罐与 - dependencies.jar
15/12/17 06:42:06 INFO TaskSetManager:完成任务0.0,阶段0.0(TID 0),在861449毫秒xxx.xx.8.62(1/2) 15/12/17 06:46:01 INFO DAGScheduler:ResultStage 0(foreach at Driver.java:143)在1095.509 s完成 15/12/17 06:46:01 INFO TaskSetManager:在xxx.xx.17.222(2/2)的1095438毫秒的阶段0.0(TID 1)中完成的任务1.0 15/12/17 06:46:01 INFO TaskSchedulerImpl:从池中删除任务已完成的TaskSet 0.0 15/12/17 06:46:01 INFO DAGScheduler:作业0完成:foreach at Driver.java:143,取1095.7685> 15秒
由于适当的数据库更新已正确执行,似乎应用程序逻辑已完成,但Spark UI显示它仍在运行,当然spark-submit命令不会返回提示,所以有些事情仍然在持有资源。我尝试过使用JavaSparkContext.close(),虽然它有助于Spark UI显示作业已完成,但spark-submit命令仍未返回。我在这里做错了什么?
以下是代码:
public class Driver {
private static final Logger logger = LoggerFactory.getLogger(Driver.class);
public static void main(String[] args) {
Cluster cluster = Cluster.builder().addContactPoint("xxx.xx.xx.xx").build();// aws local
Session dbSession = cluster.connect("syringa");
SparkConf conf = new SparkConf()
.setAppName("sector_site_hourly_daily_job")
.setMaster("spark://ip-172-31-29-81:7077");
JavaSparkContext sc = new JavaSparkContext(conf);
// share with workers
final Broadcast<List<KpiEntity>> kpiFormulaEntityBroadcastVar = sc.broadcast(kpiFormulaEntity);
final Broadcast<Set<String>> inputCountersBroadcastVar = sc.broadcast(inputCountersInKPIFormulas);
final Broadcast<Map<Integer, List<Date>>> fromTohoursBroadcastVar = sc.broadcast(generateFromToHour2());
final Broadcast<Map<Integer, List<DateTime>>> fromTohoursSelectBroadcastVar = sc.broadcast(generateFromToHours());
final Broadcast<Map<Integer, List<DateTime>>> noneUTCDatesBroadcastVar = sc.broadcast(generateFromToHoursForSelecting());
// Alternate approach to cassandraTable - Sites-Sectors RDD
ResultSet siteSectorQueryResult = dbSession.execute("select * from kpi.site_sectors_zone_area");
List<Row> rows = siteSectorQueryResult.all();
List<SiteSectorsEntity> siteSectorsEntities = new ArrayList<>();
for(Row row: rows) {
SiteSectorsEntity siteSectorsEntity = new SiteSectorsEntity();
siteSectorsEntity.setSiteName(row.getString("site_name"));
siteSectorsEntity.setArea(row.getString("area"));
siteSectorsEntity.setLatitude(row.getString("latitude"));
siteSectorsEntity.setLongitude(row.getString("longitude"));
siteSectorsEntity.setSectorAzimuth(row.getMap("sector_azimuth", String.class, String.class));
siteSectorsEntity.setSectors(row.getList("sectors", String.class));
siteSectorsEntity.setZone(row.getString("zone"));
siteSectorsEntities.add(siteSectorsEntity);
}
logger.info("*** Number of SiteSectorEntities {}", siteSectorsEntities.size());
JavaRDD<SiteSectorsEntity> siteSectorsEntityRDD = sc.parallelize(siteSectorsEntities);
doWork(siteSectorsEntityRDD, kpiFormulaEntityBroadcastVar, inputCountersBroadcastVar,
fromTohoursBroadcastVar, fromTohoursSelectBroadcastVar, noneUTCDatesBroadcastVar);
} finally {
dbSession.close();
}
}
public static void doWork(JavaRDD<SiteSectorsEntity> siteSectorsEntityRDD, Broadcast<List<KpiEntity>> kpiFormulaEntityBroadcastVar,
Broadcast<Set<String>> inputCountersBroadcastVar, Broadcast<Map<Integer, List<Date>>> fromTohoursBroadcastVar,
Broadcast<Map<Integer, List<DateTime>>> fromTohoursSelectBroadcastVar,
Broadcast<Map<Integer, List<DateTime>>> noneUTCBroadvaseVar) {
// Distribute to workers
siteSectorsEntityRDD.foreach(new VoidFunction<SiteSectorsEntity>() {
private static final long serialVersionUID = 5219326359281542043L;
public void call(SiteSectorsEntity siteSectorsEntity) throws Exception { // remove throws
Cluster cluster = Cluster.builder().addContactPoint("xxx.xx.xx.xx").build();//aws local
Session dbSession = cluster.connect("syringa");
Map<Integer, List<Date>> fromTohours = fromTohoursBroadcastVar.value();
Map<Integer, List<DateTime>> fromTohoursSelect = fromTohoursSelectBroadcastVar.value();
Map<Integer, List<DateTime>> noneUTCDates = noneUTCBroadvaseVar.value();
DateTime now = new DateTime();
//omitting long line of code here...
logger.info("site-daily-counter-aggregation: compeleted for: {}", siteName);
logger.info("site_daily_kpi: starting daily kpi generation for {}", siteName);
String siteDailyAggregateValue;
Map<String, String> mapOfSiteToDailyKpiInputCountersTotal = new HashMap<>();
for(KpiEntity kpiEntity: kpiEntityList) {
for(String kpiInputCounter : kpiEntity.getFormulaCounterNames()) {
String Zfrom = fromTimestamp.toString();
String[] noZfrom = Zfrom.split("Z");
String from = noZfrom[0]+"-0800";
String Zto = toTimestamp.toString();
String[] noZto = Zto.split("Z");
String to = noZto[0]+"-0800";
List<Row> siteDailyAggregate =
DBUtil.selectSiteDailyCounterAggregate(siteName, from, to, kpiInputCounter, dbSession);
siteDailyAggregateValue = siteDailyAggregate.get(0).getString("counter_agg_value");
mapOfSiteToDailyKpiInputCountersTotal.put(kpiInputCounter, siteDailyAggregateValue);
}
String kpiFormula = kpiEntity.getKpiFormula();
for(String counter:kpiEntity.getFormulaCounterNames()) {
kpiFormula = kpiFormula.replaceAll("\\b"+counter+"\\b", mapOfSectorsToKpiInputCountersTotal.get(counter));
}
System.out.println("site_daily_kpi: KPI FORMULA TO BE EVAL'd :: "+kpiFormula +" for hour::" +fromTimestamp);
// create a script engine manager
ScriptEngineManager factory = new ScriptEngineManager();
// create a Nashorn script engine
ScriptEngine engine = factory.getEngineByName("nashorn");
// evaluate KPI formula as a JavaScript statement
try {
String red = kpiEntity.getKpiStatusRed();
String green = kpiEntity.getKpiStatusGreen();
String yellow = kpiEntity.getKpiStatusYellow();
Map<String, String> thresholdMap = new HashMap<>();
thresholdMap.put("red", red);
thresholdMap.put("yellow", yellow);
thresholdMap.put("green", green);
String[] yellowRange = yellow.split("-"); // assuming that only yellow carries a multi-value (range)
BigDecimal dailyKpiValue = evaluateExpression(kpiFormula, engine);
if(compareIfLessThanOneAndGreaterThanZero(dailyKpiValue)) {
dailyKpiValue = dailyKpiValue.setScale(1, RoundingMode.UP);
} else {
dailyKpiValue = dailyKpiValue.setScale(0, RoundingMode.DOWN);
}
System.out.println("site_hourly_kpi: site Hourly "+kpiEntity.getKpiName()+"="+dailyKpiValue.setScale(0, RoundingMode.DOWN) +" "+kpiEntity.getMeasurementUnit());
String kpiStatusColor = determineKpiStatusColor(dailyKpiValue, red, green, yellowRange, engine);
// populate sector counter aggregation table for a counter
Insert insert = QueryBuilder.insertInto("kpi", "site_daily_kpi")
.value("site_name", siteName)
.value("area", siteSectorsEntity.getArea())
.value("id", UUID.randomUUID())
.value("kpi_name", kpiEntity.getKpiName())
.value("kpi_status", kpiStatusColor)
.value("kpi_value", dailyKpiValue.toString())
.value("measurement", kpiEntity.getMeasurementUnit())
.value("thresholds", thresholdMap)
.value("time_stamp", isoFormat.parse(fromTimestamp.toString()))
.value("category", kpiEntity.getCategory())
.value("zone", siteSectorsEntity.getZone())
;
ResultSet results = dbSession.execute(insert);
} catch (NumberFormatException nfe) {
logger.info("site_daily_kpi: site hourly "+kpiEntity.getKpiName()+"="+0+" "+kpiEntity.getMeasurementUnit());
} catch (ClassCastException cce) {
logger.info("site_daily_kpi: ClassCastException site hourly: "+ siteName );
}
}// ends site hourly kpi gen.
logger.info("site_daily_kpi: completed site daily kpi genertion for: {}",siteName);
} finally {
try {
logger.info("in finally - closing DB session.");
dbSession.close();
} catch (Exception e) {
logger.error("Error during db session close", e.getMessage());
}
}
}
});
}
}
完成任务后添加jstack输出:
jstack 14073
2015-12-18 06:30:46
Full thread dump Java HotSpot(TM) 64-Bit Server VM (25.65-b01 mixed mode):
"ForkJoinPool-3-worker-3" #73 daemon prio=5 os_prio=0 tid=0x00007f59801c6000 nid=0x37fd waiting on condition [0x00007f59741f8000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000000cead31b8> (a scala.concurrent.forkjoin.ForkJoinPool)
at scala.concurrent.forkjoin.ForkJoinPool.scan(ForkJoinPool.java:2075)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
"DestroyJavaVM" #72 prio=5 os_prio=0 tid=0x00007f59a8008800 nid=0x3718 waiting on condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"Attach Listener" #69 daemon prio=9 os_prio=0 tid=0x00007f5980220800 nid=0x376e waiting on condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"cluster1-nio-worker-1" #10 prio=5 os_prio=0 tid=0x00007f59a85a9000 nid=0x3727 runnable [0x00007f5984625000]
java.lang.Thread.State: RUNNABLE
at io.netty.channel.epoll.Native.epollWait0(Native Method)
at io.netty.channel.epoll.Native.epollWait(Native.java:153)
at io.netty.channel.epoll.EpollEventLoop.epollWait(EpollEventLoop.java:184)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:209)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
at java.lang.Thread.run(Thread.java:745)
"threadDeathWatcher-2-1" #15 daemon prio=1 os_prio=0 tid=0x00007f59780ca000 nid=0x3726 waiting on condition [0x00007f5984926000]
java.lang.Thread.State: TIMED_WAITING (sleeping)
at java.lang.Thread.sleep(Native Method)
at io.netty.util.ThreadDeathWatcher$Watcher.run(ThreadDeathWatcher.java:137)
at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:137)
at java.lang.Thread.run(Thread.java:745)
"cluster1-timeouter-0" #11 prio=5 os_prio=0 tid=0x00007f597806b800 nid=0x3725 waiting on condition [0x00007f5984a27000]
java.lang.Thread.State: TIMED_WAITING (sleeping)
at java.lang.Thread.sleep(Native Method)
at io.netty.util.HashedWheelTimer$Worker.waitForNextTick(HashedWheelTimer.java:461)
at io.netty.util.HashedWheelTimer$Worker.run(HashedWheelTimer.java:360)
at java.lang.Thread.run(Thread.java:745)
"cluster1-nio-worker-0" #9 prio=5 os_prio=0 tid=0x00007f59a8584000 nid=0x3724 runnable [0x00007f5984b28000]
java.lang.Thread.State: RUNNABLE
at io.netty.channel.epoll.Native.epollWait0(Native Method)
at io.netty.channel.epoll.Native.epollWait(Native.java:153)
at io.netty.channel.epoll.EpollEventLoop.epollWait(EpollEventLoop.java:184)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:209)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
at java.lang.Thread.run(Thread.java:745)
"cluster1-scheduled-task-worker-0" #13 prio=5 os_prio=0 tid=0x00007f59a854e800 nid=0x3722 waiting on condition [0x00007f59ac103000]
java.lang.Thread.State: TIMED_WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000000d59c70b8> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2078)
at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:1093)
at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:809)
at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1067)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1127)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
"cluster1-connection-reaper-0" #8 prio=5 os_prio=0 tid=0x00007f59a8483800 nid=0x3721 waiting on condition [0x00007f59ac408000]
java.lang.Thread.State: TIMED_WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000000d59c8f20> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2078)
at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:1093)
at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:809)
at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1067)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1127)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
"Service Thread" #7 daemon prio=9 os_prio=0 tid=0x00007f59a80bc800 nid=0x371f runnable [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"C1 CompilerThread1" #6 daemon prio=9 os_prio=0 tid=0x00007f59a80af800 nid=0x371e waiting on condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"C2 CompilerThread0" #5 daemon prio=9 os_prio=0 tid=0x00007f59a80ad800 nid=0x371d waiting on condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"Signal Dispatcher" #4 daemon prio=9 os_prio=0 tid=0x00007f59a80ac000 nid=0x371c runnable [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"Finalizer" #3 daemon prio=8 os_prio=0 tid=0x00007f59a8074000 nid=0x371b in Object.wait() [0x00007f59ace39000]
java.lang.Thread.State: WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x00000000d5556950> (a java.lang.ref.ReferenceQueue$Lock)
at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:143)
- locked <0x00000000d5556950> (a java.lang.ref.ReferenceQueue$Lock)
at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:164)
at java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:209)
"Reference Handler" #2 daemon prio=10 os_prio=0 tid=0x00007f59a8072000 nid=0x371a in Object.wait() [0x00007f59acf3a000]
java.lang.Thread.State: WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x00000000d55563d0> (a java.lang.ref.Reference$Lock)
at java.lang.Object.wait(Object.java:502)
at java.lang.ref.Reference$ReferenceHandler.run(Reference.java:157)
- locked <0x00000000d55563d0> (a java.lang.ref.Reference$Lock)
"VM Thread" os_prio=0 tid=0x00007f59a806d000 nid=0x3719 runnable
"VM Periodic Task Thread" os_prio=0 tid=0x00007f59a80c0000 nid=0x3720 waiting on condition
JNI global references: 278
答案 0 :(得分:0)
获取运行java进程(worker / executors)的线程转储并检查哪些线程仍在运行。可能是你有一些逻辑创建了一些仍在运行的线程。没有看到完整的代码很难说。