我在spark中安装了GraphFrames包,我已按照此链接中的说明操作:https://www.datareply.co.uk/blog/2016/9/20/running-graph-analytics-with-spark-graphframes-a-simple-example
当我尝试执行以下代码时,收到错误消息:
from pyspark import SparkContext
from pyspark import SparkConf
from pyspark.sql import SQLContext
from pyspark.graphframes import graphframe as gf
import pandas as pd
import os
import sys
os.environ["PYSPARK_SUBMIT_ARGS"] = ( "--packages graphframes:graphframes:0.2.0-spark2.0-s_2.11 pyspark-shell" )
SPARK_HOME="/usr/local/spark/" sys.path.append(SPARK_HOME + "/python")
sys.path.append(SPARK_HOME + "/python" + "/lib/py4j-0.10.1-src.zip")
conf = SparkConf()
SC = SparkContext(conf=conf)
sqlcontext = SQLContext(SC)
v = sqlcontext.createDataFrame([ ("a", "Alice", 34), ("b", "Bob", 36), ("c", "Charlie", 30), ], ["id", "name", "age"])
e = sqlcontext.createDataFrame([ ("a", "b", "friend"), ("b", "c", "follow"), ("c", "b", "follow"), ], ["src", "dst", "relationship"])
g = gf.GraphFrame(v, e)
g.inDegrees.show()
g.edges.filter("relationship = 'follow'").count()
results = g.pageRank(resetProbability=0.01, maxIter=20)
results.vertices.select("id", "pagerank").show()
以下是错误消息:
Traceback (most recent call last):
File "TP2.py", line 35, in <module>
g = gf.GraphFrame(v, e)
File "/usr/local/spark/python/pyspark/graphframes/graphframe.py", line 63, in __init__
self._jvm_graph = self._jvm_gf_api.createGraph(v._jdf, e._jdf)
File "/Users/Khaled/anaconda2/lib/python2.7/site-packages/py4j/java_gateway.py", line 1160, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "/usr/local/spark/python/pyspark/sql/utils.py", line 63, in deco
return f(*a, **kw)
File "/Users/Khaled/anaconda2/lib/python2.7/site-packages/py4j/protocol.py", line 320, in get_return_value
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o54.createGraph.
: java.lang.IncompatibleClassChangeError: Implementing class
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at org.graphframes.GraphFrame$.apply(GraphFrame.scala:556)
at org.graphframes.GraphFramePythonAPI.createGraph(GraphFramePythonAPI.scala:9)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:745)
答案 0 :(得分:0)
每当遇到java.lang.IncompatibleClassChangeError
时,您就会知道依赖项之间存在冲突。在这种情况下,您有两个用于GraphFrame功能的软件包-pyspark.graphframes
和graphframes
。修改您的通话和导入可以解决此问题:
from graphframes import GraphFrame
....
g = GraphFrame(v, e)