Neo4j-spark连接器问题

时间:2016-08-31 09:41:55

标签: apache-spark neo4j spark-graphx

我正在尝试使用neo4j-spark连接器,我遇到了问题。

下面是代码

import org.apache.spark.graphx.{Edge, Graph}
import org.apache.spark.{SparkConf, SparkContext}
import org.neo4j.spark.Neo4jGraph

object AereplanesNeo4jPush {
  def main(args: Array[String]): Unit = {

    val conf: SparkConf = new SparkConf().setAppName("Neo4j Aeroplanes push test").setMaster("local")

    conf.set("spark.neo4j.bolt.url", "bolt://localhost:7687")
    conf.set("spark.neo4j.bolt.user", "neo4j")
    conf.set("spark.neo4j.bolt.password", "admin")
    val sc: SparkContext = new SparkContext(conf)

    val vertices = Array((1L, "SFO"), (2L, "ORD"), (3L, "DFW"), (4L, "DEL"))
    val vRDD = sc.parallelize(vertices)

    val edges = Array(Edge(1L, 2L, 1800), Edge(2L, 3L, 800), Edge(3L, 1L, 1400), Edge(4L, 1L, 25000))
    val eRDD = sc.parallelize(edges)

    val graph = Graph(vRDD, eRDD)

    graph.triplets.foreach(trip => println(trip))

    Neo4jGraph.saveGraph(sc, graph, "Place", "Distance")

    println("saved")

    sc.stop()
  }
}

请在下面找到异常堆栈跟踪

Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
at scala.Option.foreach(Option.scala:236)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1952)
at org.apache.spark.rdd.RDD$$anonfun$fold$1.apply(RDD.scala:1081)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.RDD.fold(RDD.scala:1075)
at org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply$mcD$sp(DoubleRDDFunctions.scala:34)
at org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply(DoubleRDDFunctions.scala:34)
at org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply(DoubleRDDFunctions.scala:34)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.DoubleRDDFunctions.sum(DoubleRDDFunctions.scala:33)
at org.neo4j.spark.Neo4jGraph$.saveGraph(Neo4jGraph.scala:93)
at com.collectivei.bigdata.graphanalytics.AereplanesNeo4jPush$.main(AereplanesNeo4jPush.scala:38)
at com.collectivei.bigdata.graphanalytics.AereplanesNeo4jPush.main(AereplanesNeo4jPush.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:144)

Caused by: org.neo4j.driver.v1.exceptions.ClientException: Unable to convert scala.collection.immutable.$colon$colon to Neo4j Value.
        at org.neo4j.driver.v1.Values.value(Values.java:96)
        at org.neo4j.driver.v1.Values.value(Values.java:182)
        at org.neo4j.driver.v1.Values.value(Values.java:82)
        at org.neo4j.driver.v1.Values.value(Values.java:232)
        at org.neo4j.driver.internal.InternalSession.run(InternalSession.java:72)
        at org.neo4j.spark.Neo4jGraph$.execute(Neo4jGraph.scala:105)
        at org.neo4j.spark.Neo4jGraph$$anonfun$8.apply(Neo4jGraph.scala:89)
        at org.neo4j.spark.Neo4jGraph$$anonfun$8.apply(Neo4jGraph.scala:87)
        at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710)
        at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$20.apply(RDD.scala:710)
        at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
        at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
        at org.apache.spark.scheduler.Task.run(Task.scala:89)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)

但是,我可以使用Neo4jDataFrame.withDataTypeNeo4jGraph.loadGraph将Neo4J上的现有数据检索到Dataframes或Graph。

我正在使用

  • 的Neo4j-社区3.0.4
  • spark 1.6.0
  • neo4j-spark连接器版本1.0.0-RC1
  • scala版本是2.10

任何指针都会非常有用。

0 个答案:

没有答案