我正在尝试使用sparkr" glm"来创建广义线性模型。方法。
df <- as.DataFrame(iris)
linear_model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family = "gaussian")
write.ml(linear_model, '/tmp/linmodel.ml', TRUE)
## Is this possible?
converted_linear_model <- convert_spark_to_R_glm(linear_model)
saveRDS(converted_linear_model, '/tmp/linmodel.rds')
虽然我可以将直接创建的模型应用到&#34;预测&#34; spark ML库的功能,但我想在原生R函数中使用该模型。
outDf <- dapplyCollect(df, function(x) {
# converted_linear_model <- readRDS('/tmp/linmodel.rds') # this can help to access the model inside the R worker
result <- predict(converted_linear_model, x)
return(result)
})
head(outDf)
有没有办法将火花线性模型( linear_model )转换为原生R线性模型( converted_linear_model )?还有如何将它传递到R函数内部?
编辑: 以下是我尝试过的错误:
CODE
# Include spark path
if (nchar(Sys.getenv("SPARK_HOME")) < 1) {
Sys.setenv(SPARK_HOME = "/home/dev/spark")
}
library(SparkR)
library(httr)
# Connect to spark master
sc = sparkR.session(master = "spark://dev-Inspiron-7560:7077", sparkConfig = list(spark.app.name="TEST ZIP GROUPING"))
# Load the defaulot dataframe
df <- as.DataFrame(iris)
# Create the linear model
linear_model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family = "gaussian")
summary(linear_model)
# fitted values on training data
fitted <- dapplyCollect(df, function(x) {
r <- predict(linear_model, x)
return(r)
})
# Print output
head(select(fitted, "Sepal_Length", "prediction"))
# Stop the SparkSession now
sparkR.session.stop()
ERROR
> # Include spark path
> if (nchar(Sys.getenv("SPARK_HOME")) < 1) {
+ Sys.setenv(SPARK_HOME = "/home/dev/spark")
+ }
> library(SparkR)
> library(httr)
>
> # Connect to spark master
> sc = sparkR.session(master = "spark://dev-Inspiron-7560:7077", sparkConfig = list(spark.app.name="TEST ZIP GROUPING"))
Spark package found in SPARK_HOME: /home/dev/spark
Launching java with spark-submit command /home/dev/spark/bin/spark-submit sparkr-shell /tmp/RtmpSzkyW9/backend_port378063436025
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
18/01/16 17:21:37 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
18/01/16 17:21:37 WARN util.Utils: Your hostname, dev-Inspiron-7560 resolves to a loopback address: 127.0.1.1; using 192.168.1.181 instead (on interface wlan0)
18/01/16 17:21:37 WARN util.Utils: Set SPARK_LOCAL_IP if you need to bind to another address
18/01/16 17:21:44 WARN metastore.ObjectStore: Failed to get database global_temp, returning NoSuchObjectException
>
> # Load the defaulot dataframe
> df <- as.DataFrame(iris)
Warning messages:
1: In FUN(X[[i]], ...) :
Use Sepal_Length instead of Sepal.Length as column name
2: In FUN(X[[i]], ...) :
Use Sepal_Width instead of Sepal.Width as column name
3: In FUN(X[[i]], ...) :
Use Petal_Length instead of Petal.Length as column name
4: In FUN(X[[i]], ...) :
Use Petal_Width instead of Petal.Width as column name
>
> # Create the linear model
> linear_model <- spark.glm(df, Sepal_Length ~ Sepal_Width, family = "gaussian")
18/01/16 17:21:49 WARN optim.WeightedLeastSquares: regParam is zero, which might cause numerical instability and overfitting.
18/01/16 17:21:49 WARN netlib.BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeSystemBLAS
18/01/16 17:21:49 WARN netlib.BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeRefBLAS
18/01/16 17:21:49 WARN netlib.LAPACK: Failed to load implementation from: com.github.fommil.netlib.NativeSystemLAPACK
18/01/16 17:21:49 WARN netlib.LAPACK: Failed to load implementation from: com.github.fommil.netlib.NativeRefLAPACK
> summary(linear_model)
Deviance Residuals:
(Note: These are approximate quantiles with relative error <= 0.01)
Min 1Q Median 3Q Max
-1.55614 -0.64446 -0.12315 0.53321 2.22255
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.52622 0.47890 13.6276 0.0000
Sepal_Width -0.22336 0.15508 -1.4403 0.1519
(Dispersion parameter for gaussian family taken to be 0.6807844)
Null deviance: 102.17 on 149 degrees of freedom
Residual deviance: 100.76 on 148 degrees of freedom
AIC: 372
Number of Fisher Scoring iterations: 1
>
> # fitted values on training data
> fitted <- dapplyCollect(df, function(x) {
+ r <- predict(linear_model, x)
+ return(r)
+ })
18/01/16 17:21:53 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 15.0 (TID 15, 192.168.1.181, executor 0): org.apache.spark.SparkException: R computation failed with
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
at org.apache.spark.api.r.RRunner.compute(RRunner.scala:108)
at org.apache.spark.sql.execution.r.MapPartitionsRWrapper.apply(MapPartitionsRWrapper.scala:59)
at org.apache.spark.sql.execution.r.MapPartitionsRWrapper.apply(MapPartitionsRWrapper.scala:29)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:196)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:193)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
18/01/16 17:21:53 WARN scheduler.TaskSetManager: Lost task 0.1 in stage 15.0 (TID 16, 192.168.1.181, executor 0): org.apache.spark.SparkException: R computation failed with
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
at org.apache.spark.api.r.RRunner.compute(RRunner.scala:108)
at org.apache.spark.sql.execution.r.MapPartitionsRWrapper.apply(MapPartitionsRWrapper.scala:59)
at org.apache.spark.sql.execution.r.MapPartitionsRWrapper.apply(MapPartitionsRWrapper.scala:29)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:196)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:193)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
18/01/16 17:21:54 WARN scheduler.TaskSetManager: Lost task 0.2 in stage 15.0 (TID 17, 192.168.1.181, executor 0): org.apache.spark.SparkException: R computation failed with
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
at org.apache.spark.api.r.RRunner.compute(RRunner.scala:108)
at org.apache.spark.sql.execution.r.MapPartitionsRWrapper.apply(MapPartitionsRWrapper.scala:59)
at org.apache.spark.sql.execution.r.MapPartitionsRWrapper.apply(MapPartitionsRWrapper.scala:29)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:196)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:193)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
18/01/16 17:21:54 WARN scheduler.TaskSetManager: Lost task 0.3 in stage 15.0 (TID 18, 192.168.1.181, executor 0): org.apache.spark.SparkException: R computation failed with
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
at org.apache.spark.api.r.RRunner.compute(RRunner.scala:108)
at org.apache.spark.sql.execution.r.MapPartitionsRWrapper.apply(MapPartitionsRWrapper.scala:59)
at org.apache.spark.sql.execution.r.MapPartitionsRWrapper.apply(MapPartitionsRWrapper.scala:29)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:196)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:193)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
18/01/16 17:21:54 ERROR scheduler.TaskSetManager: Task 0 in stage 15.0 failed 4 times; aborting job
18/01/16 17:21:54 ERROR r.RBackendHandler: collect on 21 failed
java.lang.reflect.InvocationTargetException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.api.r.RBackendHandler.handleMethodCall(RBackendHandler.scala:167)
at org.apache.spark.api.r.RBackendHandler.channelRead0(RBackendHandler.scala:108)
at org.apache.spark.api.r.RBackendHandler.channelRead0(RBackendHandler.scala:40)
at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)
at io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:287)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)
at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)
at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:293)
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:267)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)
at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1294)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:911)
at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:131)
at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:643)
at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:566)
at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:480)
at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:442)
at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:131)
at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:144)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 15.0 failed 4 times, most recent failure: Lost task 0.3 in stage 15.0 (TID 18, 192.168.1.181, executor 0): org.apache.spark.SparkException: R computation failed with
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
at org.apache.spark.api.r.RRunner.compute(RRunner.scala:108)
at org.apache.spark.sql.execution.r.MapPartitionsRWrapper.apply(MapPartitionsRWrapper.scala:59)
at org.apache.spark.sql.execution.r.MapPartitionsRWrapper.apply(MapPartitionsRWrapper.scala:29)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:196)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:193)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2022)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2043)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2062)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2087)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:936)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.collect(RDD.scala:935)
at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:278)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:2853)
at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2390)
at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2390)
at org.apache.spark.sql.Dataset$$anonfun$55.apply(Dataset.scala:2837)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:2836)
at org.apache.spark.sql.Dataset.collect(Dataset.scala:2390)
... 36 more
Caused by: org.apache.spark.SparkException: R computation failed with
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
at org.apache.spark.api.r.RRunner.compute(RRunner.scala:108)
at org.apache.spark.sql.execution.r.MapPartitionsRWrapper.apply(MapPartitionsRWrapper.scala:59)
at org.apache.spark.sql.execution.r.MapPartitionsRWrapper.apply(MapPartitionsRWrapper.scala:29)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:196)
at org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:193)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
... 1 more
Error in handleErrors(returnStatus, conn) :
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 15.0 failed 4 times, most recent failure: Lost task 0.3 in stage 15.0 (TID 18, 192.168.1.181, executor 0): org.apache.spark.SparkException: R computation failed with
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
Error in callJMethod(object@jobj, "transform", newData@sdf) :
Invalid jobj 18. If SparkR was restarted, Spark operations need to be re-executed.
at org.apache.spark.api.r.RRunner.compute(RRunner.scala:108)
at org.apache.spark.sql.execution.r.MapPartitionsRWrapper.apply(MapPartitionsRWrapper
>
> # Print output
> head(select(fitted, "Sepal_Length", "prediction"))
Error in (function (classes, fdef, mtable) :
unable to find an inherited method for function ‘select’ for signature ‘"standardGeneric", "character"’
>
> # Stop the SparkSession now
> sparkR.session.stop()
感谢任何帮助。