我在Spark上执行矩阵乘法,对小矩阵运行时没有问题,但对于大型矩阵有错误:
java.lang.IllegalArgumentException: requirement failed: Row index out of range [0, 840): 840
怎么可能?
object SPMMultiply {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("SparseMatrixStorage").setMaster("spark://master:7077").setJars(List("E:\\Jasmine\\gSmat2.0\\SPMMultiply\\out\\artifacts\\SPMMultiply_jar\\SPMMultiply.jar"))
val sc = new SparkContext(conf)
val m1 = sc.textFile("hdfs://master:9000/input/9.txt").map(line=>line.split(' ')).map(_.map(_.toDouble)).map(m => (m(0).toLong,m(1).toLong,1)).map(k => new MatrixEntry(k._1,k._2,k._3))
val mat1 = new CoordinateMatrix(m1,26440,26440)
val m2 = sc.textFile("hdfs://master:9000/input/1.txt").map(line=>line.split(' ')).map(_.map(_.toDouble)).map(m => (m(0).toLong,m(1).toLong,1)).map(k => new MatrixEntry(k._1,k._2,k._3))
val mat2 = new CoordinateMatrix(m2,26440,26440)
val m3 = sc.textFile("hdfs://master:9000/input/lubm10/1.txt").map(line=>line.split(' ')).map(_.map(_.toDouble)).map(m => (m(0).toLong,m(1).toLong,1)).map(k => new MatrixEntry(k._1,k._2,k._3))
var middle_mat=mat1.toBlockMatrix()
middle_mat=mat1.toBlockMatrix().multiply(mat2.toBlockMatrix())
middle_mat.toCoordinateMatrix().entries.map{ entry=> (entry.i,entry.j)}
.saveAsTextFile("hdfs://master:9000/output/lubm10/test_SPMmultiply222/")
}
}
错误:
18/06/14 12:58:17 WARN TaskSetManager: Lost task 1.0 in stage 2.0 (TID 9, 192.168.49.128): java.lang.IllegalArgumentException: requirement failed: Row index out of range [0, 840): 840.
at scala.Predef$.require(Predef.scala:233)
at org.apache.spark.mllib.linalg.SparseMatrix$$anonfun$fromCOO$2.apply(Matrices.scala:697)
at org.apache.spark.mllib.linalg.SparseMatrix$$anonfun$fromCOO$2.apply(Matrices.scala:691)
at scala.collection.immutable.Stream.foreach(Stream.scala:547)
at scala.collection.GenTraversableViewLike$Appended$class.foreach(GenTraversableViewLike.scala:100)
at scala.collection.SeqViewLike$$anon$2.foreach(SeqViewLike.scala:77)
at org.apache.spark.mllib.linalg.SparseMatrix$.fromCOO(Matrices.scala:691)
at org.apache.spark.mllib.linalg.distributed.CoordinateMatrix$$anonfun$4.apply(CoordinateMatrix.scala:145)
at org.apache.spark.mllib.linalg.distributed.CoordinateMatrix$$anonfun$4.apply(CoordinateMatrix.scala:142)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.insertAll(BypassMergeSortShuffleWriter.java:118)
at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)