我需要得到一个数组的所有组合,以及下面的,因为输入数据规模很大,我无法将其缓存在内存中,有谁知道如何加速它?提前谢谢。
val sc = new SparkContext(conf)
sc.textFile(subInputPath).filter(s => ((s != "") && (!s.isEmpty))).map(line => {
str = line + separator
val lineArr = line.split(separator)
for (i<-startIndex to lineArr.length - 1) {
for (j<-(i + 1) to lineArr.length - 1) {
obj_i = DistributedLRFunctions.safeStringToDouble(lineArr(i))
obj_j = DistributedLRFunctions.safeStringToDouble(lineArr(j))
if (!obj_i.equals(None) &&
!obj_j.equals(None))
str = str + obj_i.get * obj_j.get + separator
else
str = str + missingValue + separator
}
}
(str.substring(0, str.length - 1))
}).saveAsTextFile(outputPath)