我正在尝试使用neo4j-spark连接器,我遇到了问题。
下面是代码
import org.apache.spark.graphx.{Edge, Graph}
import org.apache.spark.{SparkConf, SparkContext}
import org.neo4j.spark.Neo4jGraph
object AereplanesNeo4jPush {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setAppName("Neo4j Aeroplanes push test").setMaster("local")
conf.set("spark.neo4j.bolt.url", "bolt://localhost:7687")
conf.set("spark.neo4j.bolt.user", "neo4j")
conf.set("spark.neo4j.bolt.password", "admin")
val sc: SparkContext = new SparkContext(conf)
val vertices = Array((1L, "SFO"), (2L, "ORD"), (3L, "DFW"), (4L, "DEL"))
val vRDD = sc.parallelize(vertices)
val edges = Array(Edge(1L, 2L, 1800), Edge(2L, 3L, 800), Edge(3L, 1L, 1400), Edge(4L, 1L, 25000))
val eRDD = sc.parallelize(edges)
val graph = Graph(vRDD, eRDD)
graph.triplets.foreach(trip => println(trip))
Neo4jGraph.saveGraph(sc, graph, "Place", "Distance")
println("saved")
sc.stop()
}
}
请在下面找到异常堆栈跟踪
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1952)
at org.apache.spark.rdd.RDD$$anonfun$fold$1.apply(RDD.scala:1081)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.RDD.fold(RDD.scala:1075)
at org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply$mcD$sp(DoubleRDDFunctions.scala:34)
at org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply(DoubleRDDFunctions.scala:34)
at org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply(DoubleRDDFunctions.scala:34)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.DoubleRDDFunctions.sum(DoubleRDDFunctions.scala:33)
at org.neo4j.spark.Neo4jGraph$.saveGraph(Neo4jGraph.scala:93)
at com.collectivei.bigdata.graphanalytics.AereplanesNeo4jPush$.main(AereplanesNeo4jPush.scala:38)
at com.collectivei.bigdata.graphanalytics.AereplanesNeo4jPush.main(AereplanesNeo4jPush.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:144)
Caused by: org.neo4j.driver.v1.exceptions.ClientException: Unable to convert scala.collection.immutable.$colon$colon to Neo4j Value.
at org.neo4j.driver.v1.Values.value(Values.java:96)
at org.neo4j.driver.v1.Values.value(Values.java:182)
at org.neo4j.driver.v1.Values.value(Values.java:82)
at org.neo4j.driver.v1.Values.value(Values.java:232)
at org.neo4j.driver.internal.InternalSession.run(InternalSession.java:72)
at org.neo4j.spark.Neo4jGraph$.execute(Neo4jGraph.scala:105)
at org.neo4j.spark.Neo4jGraph$$anonfun$8.apply(Neo4jGraph.scala:89)
at org.neo4j.spark.Neo4jGraph$$anonfun$8.apply(Neo4jGraph.scala:87)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
at org.apache.spark.scheduler.Task.run(Task.scala:89)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
但是,我可以使用Neo4jDataFrame.withDataType
和Neo4jGraph.loadGraph
将Neo4J上的现有数据检索到Dataframes或Graph。
我正在使用
任何指针都会非常有用。