我是否需要构建xgboost4j-spark或者我可以使用maven repo中的jar文件

时间:2018-06-18 21:06:28

标签: scala apache-spark hadoop xgboost

我正在尝试使用Maven Central中的xgboost jar文件,但是运行此example时出现以下错误的训练失败的模型虽然在我构建库时似乎找不到工作来自source并添加到spark.conf。

scala> val xgbModel = cv.fit(trainDF)
Tracker started, with env={DMLC_NUM_SERVER=0, DMLC_TRACKER_URI=172.31.24.44, 
DMLC_TRACKER_PORT=9091, DMLC_NUM_WORKER=1}
[Stage 40:======================================>                   (4 + 2) / 6]18/06/18 20:37:48 WARN TaskSetManager: Lost task 0.0 in stage 41.0 (TID 838, ip-172-31-22-11.ec2.internal, executor 2): java.lang.UnsatisfiedLinkError: /mnt/yarn/usercache/hadoop/appcache/application_1529329423475_0031/container_1529329423475_0031_01_000003/tmp/libxgboost4j2427279768220324654.so: /usr/lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by /mnt/yarn/usercache/hadoop/appcache/application_1529329423475_0031/container_1529329423475_0031_01_000003/tmp/libxgboost4j2427279768220324654.so)
        at java.lang.ClassLoader$NativeLibrary.load(Native Method)
        at java.lang.ClassLoader.loadLibrary0(ClassLoader.java:1941)
        at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1824)
        at java.lang.Runtime.load0(Runtime.java:809)
        at java.lang.System.load(System.java:1086)
        at ml.dmlc.xgboost4j.java.NativeLibLoader.loadLibraryFromJar(NativeLibLoader.java:66)
        at ml.dmlc.xgboost4j.java.NativeLibLoader.smartLoad(NativeLibLoader.java:152)
        at ml.dmlc.xgboost4j.java.NativeLibLoader.initXGBoost(NativeLibLoader.java:40)
        at ml.dmlc.xgboost4j.java.XGBoostJNI.<clinit>(XGBoostJNI.java:34)
        at ml.dmlc.xgboost4j.java.Rabit.init(Rabit.java:65)
        at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:131)
        at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:118)
        at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
        at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
        at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
        at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
        at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
        at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
        at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
        at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
        at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
        at org.apache.spark.scheduler.Task.run(Task.scala:108)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)

18/06/18 20:37:48 ERROR Utils: uncaught error in thread SparkListenerBus, stopping SparkContext
java.lang.InterruptedException: ExecutorLost during XGBoost Training: java.lang.UnsatisfiedLinkError: /mnt/yarn/usercache/hadoop/appcache/application_1529329423475_0031/container_1529329423475_0031_01_000003/tmp/libxgboost4j2427279768220324654.so: /usr/lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by /mnt/yarn/usercache/hadoop/appcache/application_1529329423475_0031/container_1529329423475_0031_01_000003/tmp/libxgboost4j2427279768220324654.so)
        at java.lang.ClassLoader$NativeLibrary.load(Native Method)
        at java.lang.ClassLoader.loadLibrary0(ClassLoader.java:1941)
        at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1824)
        at java.lang.Runtime.load0(Runtime.java:809)
        at java.lang.System.load(System.java:1086)
        at ml.dmlc.xgboost4j.java.NativeLibLoader.loadLibraryFromJar(NativeLibLoader.java:66)
        at ml.dmlc.xgboost4j.java.NativeLibLoader.smartLoad(NativeLibLoader.java:152)
        at ml.dmlc.xgboost4j.java.NativeLibLoader.initXGBoost(NativeLibLoader.java:40)
        at ml.dmlc.xgboost4j.java.XGBoostJNI.<clinit>(XGBoostJNI.java:34)
        at ml.dmlc.xgboost4j.java.Rabit.init(Rabit.java:65)
        at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:131)
        at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:118)
        at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
        at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
        at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
        at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
        at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
        at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
        at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
        at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
        at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
        at org.apache.spark.scheduler.Task.run(Task.scala:108)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)

        at org.apache.spark.TaskFailedListener.onTaskEnd(SparkParallelismTracker.scala:111)
        at org.apache.spark.scheduler.SparkListenerBus$class.doPostEvent(SparkListenerBus.scala:45)
        at org.apache.spark.scheduler.LiveListenerBus.doPostEvent(LiveListenerBus.scala:36)
        at org.apache.spark.scheduler.LiveListenerBus.doPostEvent(LiveListenerBus.scala:36)
        at org.apache.spark.util.ListenerBus$class.postToAll(ListenerBus.scala:63)
        at org.apache.spark.scheduler.LiveListenerBus.postToAll(LiveListenerBus.scala:36)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(LiveListenerBus.scala:94)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(LiveListenerBus.scala:79)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(LiveListenerBus.scala:79)
        at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1.apply$mcV$sp(LiveListenerBus.scala:78)
        at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1279)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1.run(LiveListenerBus.scala:77)
18/06/18 20:37:48 ERROR Utils: throw uncaught fatal error in thread SparkListenerBus
java.lang.InterruptedException: ExecutorLost during XGBoost Training: java.lang.UnsatisfiedLinkError: /mnt/yarn/usercache/hadoop/appcache/application_1529329423475_0031/container_1529329423475_0031_01_000003/tmp/libxgboost4j2427279768220324654.so: /usr/lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by /mnt/yarn/usercache/hadoop/appcache/application_1529329423475_0031/container_1529329423475_0031_01_000003/tmp/libxgboost4j2427279768220324654.so)
        at java.lang.ClassLoader$NativeLibrary.load(Native Method)
        at java.lang.ClassLoader.loadLibrary0(ClassLoader.java:1941)
        at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1824)
        at java.lang.Runtime.load0(Runtime.java:809)
        at java.lang.System.load(System.java:1086)
        at ml.dmlc.xgboost4j.java.NativeLibLoader.loadLibraryFromJar(NativeLibLoader.java:66)
        at ml.dmlc.xgboost4j.java.NativeLibLoader.smartLoad(NativeLibLoader.java:152)
        at ml.dmlc.xgboost4j.java.NativeLibLoader.initXGBoost(NativeLibLoader.java:40)
        at ml.dmlc.xgboost4j.java.XGBoostJNI.<clinit>(XGBoostJNI.java:34)
        at ml.dmlc.xgboost4j.java.Rabit.init(Rabit.java:65)
        at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:131)
        at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:118)
        at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
        at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
        at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
        at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
        at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
        at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
        at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
        at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
        at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
        at org.apache.spark.scheduler.Task.run(Task.scala:108)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)

        at org.apache.spark.TaskFailedListener.onTaskEnd(SparkParallelismTracker.scala:111)
        at org.apache.spark.scheduler.SparkListenerBus$class.doPostEvent(SparkListenerBus.scala:45)
        at org.apache.spark.scheduler.LiveListenerBus.doPostEvent(LiveListenerBus.scala:36)
        at org.apache.spark.scheduler.LiveListenerBus.doPostEvent(LiveListenerBus.scala:36)
        at org.apache.spark.util.ListenerBus$class.postToAll(ListenerBus.scala:63)
        at org.apache.spark.scheduler.LiveListenerBus.postToAll(LiveListenerBus.scala:36)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(LiveListenerBus.scala:94)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(LiveListenerBus.scala:79)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(LiveListenerBus.scala:79)
        at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1.apply$mcV$sp(LiveListenerBus.scala:78)
        at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1279)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1.run(LiveListenerBus.scala:77)
Exception in thread "SparkListenerBus" java.lang.InterruptedException: ExecutorLost during XGBoost Training: java.lang.UnsatisfiedLinkError: /mnt/yarn/usercache/hadoop/appcache/application_1529329423475_0031/container_1529329423475_0031_01_000003/tmp/libxgboost4j2427279768220324654.so: /usr/lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by /mnt/yarn/usercache/hadoop/appcache/application_1529329423475_0031/container_1529329423475_0031_01_000003/tmp/libxgboost4j2427279768220324654.so)
        at java.lang.ClassLoader$NativeLibrary.load(Native Method)
        at java.lang.ClassLoader.loadLibrary0(ClassLoader.java:1941)
        at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1824)
        at java.lang.Runtime.load0(Runtime.java:809)
        at java.lang.System.load(System.java:1086)
        at ml.dmlc.xgboost4j.java.NativeLibLoader.loadLibraryFromJar(NativeLibLoader.java:66)
        at ml.dmlc.xgboost4j.java.NativeLibLoader.smartLoad(NativeLibLoader.java:152)
        at ml.dmlc.xgboost4j.java.NativeLibLoader.initXGBoost(NativeLibLoader.java:40)
        at ml.dmlc.xgboost4j.java.XGBoostJNI.<clinit>(XGBoostJNI.java:34)
        at ml.dmlc.xgboost4j.java.Rabit.init(Rabit.java:65)
        at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:131)
        at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:118)
        at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
        at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
        at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
        at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
        at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
        at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
        at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
        at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
        at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
        at org.apache.spark.scheduler.Task.run(Task.scala:108)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)

        at org.apache.spark.TaskFailedListener.onTaskEnd(SparkParallelismTracker.scala:111)
        at org.apache.spark.scheduler.SparkListenerBus$class.doPostEvent(SparkListenerBus.scala:45)
        at org.apache.spark.scheduler.LiveListenerBus.doPostEvent(LiveListenerBus.scala:36)
        at org.apache.spark.scheduler.LiveListenerBus.doPostEvent(LiveListenerBus.scala:36)
        at org.apache.spark.util.ListenerBus$class.postToAll(ListenerBus.scala:63)
        at org.apache.spark.scheduler.LiveListenerBus.postToAll(LiveListenerBus.scala:36)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(LiveListenerBus.scala:94)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(LiveListenerBus.scala:79)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(LiveListenerBus.scala:79)
        at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1$$anonfun$run$1.apply$mcV$sp(LiveListenerBus.scala:78)
        at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1279)
        at org.apache.spark.scheduler.LiveListenerBus$$anon$1.run(LiveListenerBus.scala:77)
18/06/18 20:37:48 WARN TaskSetManager: Lost task 0.1 in stage 41.0 (TID 839, ip-172-31-22-11.ec2.internal, executor 2): java.lang.NoClassDefFoundError: Could not initialize class ml.dmlc.xgboost4j.java.XGBoostJNI
        at ml.dmlc.xgboost4j.java.Rabit.init(Rabit.java:65)
        at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:131)
        at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:118)
        at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
        at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
        at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
        at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
        at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
        at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
        at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
        at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
        at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
        at org.apache.spark.scheduler.Task.run(Task.scala:108)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)

18/06/18 20:37:48 ERROR LiveListenerBus: SparkListenerBus has already stopped! Dropping event SparkListenerTaskEnd(41,0,ResultTask,ExceptionFailure(java.lang.NoClassDefFoundError,Could not initialize class ml.dmlc.xgboost4j.java.XGBoostJNI,[Ljava.lang.StackTraceElement;@138eadab,java.lang.NoClassDefFoundError: Could not initialize class ml.dmlc.xgboost4j.java.XGBoostJNI
        at ml.dmlc.xgboost4j.java.Rabit.init(Rabit.java:65)
        at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:131)
        at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:118)
        at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
        at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
        at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
        at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
        at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
        at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
        at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
        at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
        at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
        at org.apache.spark.scheduler.Task.run(Task.scala:108)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
,Some(org.apache.spark.ThrowableSerializationWrapper@1ff6a2b9),Vector(AccumulableInfo(964,None,Some(34),None,false,true,None), AccumulableInfo(966,None,Some(0),None,false,true,None), AccumulableInfo(972,None,Some([(rdd_134_0,BlockStatus(StorageLevel(1 replicas),0,0))]),None,false,true,None)),Vector(LongAccumulator(id: 964, name: Some(internal.metrics.executorRunTime), value: 34), LongAccumulator(id: 966, name: Some(internal.metrics.resultSize), value: 0), CollectionAccumulator(id: 972, name: Some(internal.metrics.updatedBlockStatuses), value: [(rdd_134_0,BlockStatus(StorageLevel(1 replicas),0,0))]))),org.apache.spark.scheduler.TaskInfo@5b362aa0,org.apache.spark.executor.TaskMetrics@2cad9e74)
18/06/18 20:37:48 ERROR LiveListenerBus: SparkListenerBus has already stopped! Dropping event SparkListenerTaskStart(41,0,org.apache.spark.scheduler.TaskInfo@4aa88dbf)
18/06/18 20:37:48 ERROR LiveListenerBus: SparkListenerBus has already stopped! Dropping event SparkListenerBlockUpdated(BlockUpdatedInfo(BlockManagerId(2, ip-172-31-22-11.ec2.internal, 33823, None),rdd_134_0,StorageLevel(1 replicas),0,0))
18/06/18 20:37:48 ERROR LiveListenerBus: SparkListenerBus has already stopped! Dropping event SparkListenerTaskEnd(41,0,ResultTask,ExceptionFailure(java.lang.NoClassDefFoundError,Could not initialize class ml.dmlc.xgboost4j.java.XGBoostJNI,[Ljava.lang.StackTraceElement;@60611d39,java.lang.NoClassDefFoundError: Could not initialize class ml.dmlc.xgboost4j.java.XGBoostJNI
        at ml.dmlc.xgboost4j.java.Rabit.init(Rabit.java:65)
        at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:131)
        at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:118)
        at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
        at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
        at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
        at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
        at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
        at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
        at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
        at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
        at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
        at org.apache.spark.scheduler.Task.run(Task.scala:108)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
,Some(org.apache.spark.ThrowableSerializationWrapper@ae848bf),Vector(AccumulableInfo(964,None,Some(7),None,false,true,None), AccumulableInfo(966,None,Some(0),None,false,true,None), AccumulableInfo(972,None,Some([(rdd_134_0,BlockStatus(StorageLevel(1 replicas),0,0))]),None,false,true,None)),Vector(LongAccumulator(id: 964, name: Some(internal.metrics.executorRunTime), value: 7), LongAccumulator(id: 966, name: Some(internal.metrics.resultSize), value: 0), CollectionAccumulator(id: 972, name: Some(internal.metrics.updatedBlockStatuses), value: [(rdd_134_0,BlockStatus(StorageLevel(1 replicas),0,0))]))),org.apache.spark.scheduler.TaskInfo@4aa88dbf,org.apache.spark.executor.TaskMetrics@4af629a8)
18/06/18 20:37:48 ERROR TaskSetManager: Task 0 in stage 41.0 failed 4 times; aborting job
18/06/18 20:37:48 ERROR LiveListenerBus: SparkListenerBus has already stopped! Dropping event SparkListenerStageCompleted(org.apache.spark.scheduler.StageInfo@23fb4466)
18/06/18 20:37:48 ERROR LiveListenerBus: SparkListenerBus has already stopped! Dropping event SparkListenerJobEnd(22,1529354268953,JobFailed(org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 41.0 failed 4 times, most recent failure: Lost task 0.3 in stage 41.0 (TID 841, ip-172-31-22-11.ec2.internal, executor 2): java.lang.NoClassDefFoundError: Could not initialize class ml.dmlc.xgboost4j.java.XGBoostJNI
        at ml.dmlc.xgboost4j.java.Rabit.init(Rabit.java:65)
        at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:131)
        at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:118)
        at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
        at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
        at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
        at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
        at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
        at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
        at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
        at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
        at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
        at org.apache.spark.scheduler.Task.run(Task.scala:108)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)

...
...
...
Caused by: java.lang.NoClassDefFoundError: Could not initialize class ml.dmlc.xgboost4j.java.XGBoostJNI
    at ml.dmlc.xgboost4j.java.Rabit.init(Rabit.java:65)
    at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:131)
    at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$buildDistributedBoosters$1.apply(XGBoost.scala:118)
    at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
    at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:336)
    at org.apache.spark.rdd.RDD$$anonfun$8.apply(RDD.scala:334)
    at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1038)
    at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1029)
    at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:969)
    at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1029)
    at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:760)
    at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
    at org.apache.spark.scheduler.Task.run(Task.scala:108)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)
ml.dmlc.xgboost4j.java.XGBoostError: XGBoostModel training failed
  at ml.dmlc.xgboost4j.scala.spark.XGBoost$.postTrackerReturnProcessing(XGBoost.scala:376)
  at ml.dmlc.xgboost4j.scala.spark.XGBoost$.trainDistributed(XGBoost.scala:345)
  at ml.dmlc.xgboost4j.scala.spark.XGBoostEstimator.train(XGBoostEstimator.scala:139)
  at ml.dmlc.xgboost4j.scala.spark.XGBoostEstimator.train(XGBoostEstimator.scala:36)
  at org.apache.spark.ml.Predictor.fit(Predictor.scala:118)
  at org.apache.spark.ml.Predictor.fit(Predictor.scala:82)
  at org.apache.spark.ml.Pipeline$$anonfun$fit$2.apply(Pipeline.scala:153)
  at org.apache.spark.ml.Pipeline$$anonfun$fit$2.apply(Pipeline.scala:149)
  at scala.collection.Iterator$class.foreach(Iterator.scala:893)
  at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
  at scala.collection.IterableViewLike$Transformed$class.foreach(IterableViewLike.scala:44)
  at scala.collection.SeqViewLike$AbstractTransformed.foreach(SeqViewLike.scala:37)
  at org.apache.spark.ml.Pipeline.fit(Pipeline.scala:149)
  at org.apache.spark.ml.Pipeline.fit(Pipeline.scala:96)
  at org.apache.spark.ml.Estimator.fit(Estimator.scala:61)
  at org.apache.spark.ml.Estimator$$anonfun$fit$1.apply(Estimator.scala:82)
  at org.apache.spark.ml.Estimator$$anonfun$fit$1.apply(Estimator.scala:82)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
  at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
  at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
  at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
  at org.apache.spark.ml.Estimator.fit(Estimator.scala:82)
  at org.apache.spark.ml.tuning.CrossValidator$$anonfun$fit$1.apply(CrossValidator.scala:115)
  at org.apache.spark.ml.tuning.CrossValidator$$anonfun$fit$1.apply(CrossValidator.scala:110)
  at org.apache.spark.ml.tuning.CrossValidator.fit(CrossValidator.scala:110)
  ... 52 elided

0 个答案:

没有答案