有人可以向我提供有关如何将此Scala代码(Java命令式方法)转换为更实用的方式的见解吗?
将目录分组为4GB,将目录/文件细分为4组,每组限制为1GB,将每个组添加到列表中。
object MyMapper {
def main(args: Array[String]) {
var map: Map[String, Long] = Map[String, Long]();
map += ("a" -> 5);
map += ("b" -> 5);
map += ("c" -> 3);
map += ("d" -> 2);
map += ("e" -> 4);
map += ("f" -> 2);
map += ("g" -> 1);
limitMapper(map)
}
def limitMapper(map: Map[String, Long]): Unit = {
var offset = 0L;
var result = Vector[Vector[String]]()
var list = Vector[String]()
val iterator: Iterator[(String, Long)] = map.toVector.iterator
val LIMIT: Int = 10
while (iterator.hasNext) {
val (path, size) = iterator.next()
println((path, size))
offset += size
if (offset > LIMIT) {
offset = size;
result = result :+ list
list = Vector[String]()
}
list = list :+ path
}
//add remaining paths
if (list.size > 0) {
result = result :+ list
list = Vector[String]()
}
println(result)
println(s"LIMIT = $LIMIT")
}
}
Output:
(e,4)
(f,2)
(a,5)
(b,5)
(g,1)
(c,3)
(d,2)
Vector(Vector(e, f), Vector(a, b), Vector(g, c, d))
答案 0 :(得分:2)
我不确定这是不是最好的做法。但我会做类似的事情:
val map = ...
val limit = 1073741824L
val res = map.foldLeft(Vector(Vector[String]())){ case (result, (path, size)) =>
if(result.last.map(map(_)).sum + size > limit) {
result :+ Vector(path)
} else result.updated(result.size - 1, result.last :+ path)
}
答案 1 :(得分:1)
以下是一种可能的解决方案:
val (_, lastList, otherLists) = map.foldLeft(0L, List[String](), List[List[String]]()) {
case ((soFar, newList, oldLists), (path, size)) =>
if (soFar + size > limit)
(size, List(path), newList :: oldLists)
else
(soFar + size, path :: newList, oldLists)
}
lastList :: otherLists