我无法让TDigestFI功能在pyspark中工作。这是一个库包,我从github上获取了zip文件,并添加到了sparkcontext中,并导入了该模块。
sc.addPyFile("/home/jovyan/isarnproject.zip")
from isarnproject.pipelines.fi import *
但是它抱怨'JavaPackage'不可用。
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-20-aefd6fabdcc5> in <module>
----> 1 fi = TDigestFI() \
2 .setDelta(0.75) \
3 .setMaxDiscrete(35)
4
5 fiModel = fi.fit(trainFV) \
/usr/local/spark/python/pyspark/__init__.py in wrapper(self, *args, **kwargs)
108 raise TypeError("Method %s forces keyword arguments." % func.__name__)
109 self._input_kwargs = kwargs
--> 110 return func(self, **kwargs)
111 return wrapper
112
/tmp/spark-752eb5ef-d984-4f79-a7c9-cb82221f1445/userFiles-fd28c425-1770-4d1e-ad50-4229f1540f2e/isarn.zip/isarnproject/pipelines/fi.py in __init__(self, delta, maxDiscrete, featuresCol)
93 def __init__(self, delta = 0.5, maxDiscrete = 0, featuresCol = "features"):
94 super(TDigestFI, self).__init__()
---> 95 self._java_obj = self._new_java_obj("org.isarnproject.pipelines.TDigestFI", self.uid)
96 self._setDefault(delta = 0.5, maxDiscrete = 0, featuresCol = "features")
97 kwargs = self._input_kwargs
/usr/local/spark/python/pyspark/ml/wrapper.py in _new_java_obj(java_class, *args)
65 java_obj = getattr(java_obj, name)
66 java_args = [_py2java(sc, arg) for arg in args]
---> 67 return java_obj(*java_args)
68
69 @staticmethod
TypeError: 'JavaPackage' object is not callable
我尝试用sbt打包源代码并将其添加到PYSPARK_SUBMIT_ARGS,但这没有帮助。
import os
os.environ['PYSPARK_SUBMIT_ARGS']='--jars /home/jovyan/sisarn-sketches-spark_2.11-0.3.1-sp2.4-py3.6.jar pyspark-shell'