在执行以下代码时,我将pyhton类对象传递给pyspark rdd映射函数,并得到以下错误: PicklingError:无法序列化对象:TypeError:无法对_thread.RLock对象进行腌制
我也尝试将其转换为静态方法,但问题仍然相同。 How to process RDDs using a Python class?
from pyspark import SparkContext, SparkConf
import logging
class test(object):
def __init__(self, sc, logger):
logger.info('Ind=side init method.')
data = sc.parallelize([1,2,3,4])
data.foreach(lambda x: test.log_data(x, logger))
@staticmethod
def log_data(data, logger):
logger.info(data)
conf = SparkConf().setAppName('test')
sc = SparkContext(conf=conf)
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
test(sc, logger)