在pyspark中是否有类似于var findNeedles = function (needle, haystack) {
var getPotentialNeedle = (c, i, a) => haystack.slice(i, i + needle.length);
var needlePartMatches = (part, index) => part === needle[index];
var isNeedle = (potentialNeedle) => potentialNeedle.every(needlePartMatches);
return haystack
// we only need to check as far as there is room for the needle
.slice(0, haystack.length - needle.length + 1)
.map(getPotentialNeedle)
.filter(isNeedle);
}
var haystack = [ "a", "b", "c", "d", "e", "a", "b", "c", "d", "e", "a"];
var needle = ["a", "b"];
console.log("Found matches: ", findNeedles(needle, haystack).length);
函数的功能?
示例:
idxmax()
我想为每个df.show(10)
+---------+-----+-----+------+-----+
|group_col| col1| col2| col3| col4|
+---------+-----+-----+------+-----+
|121052570|21273| 4236| 39496|64555|
|121650551|14678|10626| 3334| 1865|
|121684345|40416| null| null| null|
|122320107| 8585| null| 3524|30231|
|122707777|12168|61157|126115|15113|
|122798622|22172|11352| 60039| 5548|
|122995995|56064|26811| 12582|85412|
|123036128| 8539|27938| 27691|32692|
|123048398|14613| 3973| 10273| 4449|
|123141852|15819| 9526| 10659| 7288|
+---------+-----+-----+------+-----+
查找具有最大价值的名字。在熊猫中,我可以使用group_col
方法,但是如何在pyspark中实现呢?