有人在k实施过Recall评估使用Pyspark构建的推荐系统吗?我已在k处实现精度,请参考此处给出的详细信息(使用内置的RankMetrics类):Build a recommender system with Spark: Implicit ALS。
class RankingEvaluator(Evaluator):
@keyword_only
def __init__(self, k=None):
super(RankingEvaluator, self).__init__()
self.k = Param(self, 'k', 'Top K')
self._setDefault(k=20)
kwargs = self._input_kwargs
self.setParams(**kwargs)
@keyword_only
def setParams(self, k=None):
kwargs = self._input_kwargs
return self._set(**kwargs)
def isLargerBetter(self):
return True
def setK(self, value):
self._paramMap[self.k] = value
return self
def getK(self):
return self.getOrDefault(self.k)
def _evaluate(self, predictedDF):
k = self.getK()
windowSpec = Window.partitionBy('user_id').orderBy(col('prediction').desc())
perUserPredictedItemsDF = predictedDF \
.select('user_id', 'article_id', 'prediction', F.rank().over(windowSpec).alias('rank')) \
.where('rank <= {0}'.format(k)) \
.groupBy('user_id') \
.agg(expr('collect_list(article_id) as items'))
windowSpec = Window.partitionBy('user_id').orderBy(col('article_count').desc())
perUserActualItemsDF = predictedDF \
.select('user_id', 'article_id', 'article_count', F.rank().over(windowSpec).alias('rank')) \
.where('rank <= {0}'.format(k)) \
.groupBy('user_id') \
.agg(expr('collect_list(article_id) as items'))
perUserItemsRDD = perUserPredictedItemsDF.join(F.broadcast(perUserActualItemsDF), 'user_id', 'inner') \
.rdd \
.map(lambda row: (row[1], row[2]))
if perUserItemsRDD.isEmpty():
return 0.0
rankingMetrics = RankingMetrics(perUserItemsRDD)
precision_at_k = rankingMetrics.precisionAt(k)
return precision_at_k
但是,我找不到任何可以帮助我解决问题的评估类。
谢谢。