我正在尝试使用intel tbb中的任务进行矩阵乘法,我使用的算法是Strassen的算法......
以下是main()
的代码:
#include "Matrix.h"
#include "tbb/tick_count.h"
using namespace tbb;
using namespace std;
//Here is how I call MatTask class
Matrica callParallel(Matrx& A, Matrix& B, Matrix& C, int n){
MatTask& t = *new (task::allocate_root ()) MatTask (A, B, &C, n);
task::spawn_root_and_wait (t);
return C;
}
int main(){
int rows, columns;
Matrix serialC;
cout << "*******************\n" << "If rows and columns are < 6 you will enter the matric manualy\n" << "********************\n" <<endl;
cout << "Enter rows for matrix A: ";
cin >> rows;
cout << "Enter columns for matrix A: ";
cin >> columns;
Matrix A(rows, columns);
if(rows > 5 && columns > 5){
A.createMatrixAutomatic();
}else {
A.createMatricManualy();
}
cout << "Enter rows for matrix B: ";
cin >> rows;
cout << "Enter columns for matrix B: ";
cin >> columns;
Matrix B(rows, columns);
if(rows > 5 && columns > 5){
B.createMatrixAutomatic();
}else {
B.createMatricManualy();
}
cout << "Matrix A: " << endl;
A.printMatrix();
cout << "Matrix B: " << endl;
B.printMatrix();
cout << "Matrix C: " << endl;
tick_count start_time = tick_count::now();
serialC.MultSerial(A, B);
tick_count end_time = tick_count::now();
cout << "\nTime for serial: " << (end_time - start_time).seconds() * 1000 << " ms" << endl;
serialC.printMatrix();
//Creating matrix for result and calling the parallel algorithm
Matrix parallelC(rows, columns);
parallelC = callParallel(A, B, parallelC, rows);
//This here prints the result matrix
parallelC.printMatrix();
system("PAUSE");
}
这是我的Matrix.cpp代码:
#include "Matrix.h"
Matrix::Matrix(){}
Matrix::Matrix(int rows, int columns){
vr = rows;
kol = columns;
}
void Matrix::createMatrixAutomatic(){
for(int i = 0; i < vr; i++){
for (int j = 0; j < kol; j++){
int number = rand() % 5 + 1;
matr[i][j] = number;
}
}
}
void Matrix::createMatricManualy(){
cout << "Enter the elements: " << endl;
for(int i = 0; i < vr; i++){
for (int j = 0; j < kol; j++){
cout << "Enter [" << i << "]" << "[" << j << "] element: ";
cin >> matr[i][j];
}
}
}
void Matrix::printMatrix(){
for (int i = 0; i < vr; i++){
for (int j = 0; j < kol; j++){
cout << matr[i][j] << " ";
}
cout << endl << endl;
}
}
void Matrix::MultSerial(Matrix& A, Matrix& B){
for(int i = 0; i < A.vr; i++){
for(int j = 0; j < B.kol; j++){
matr[i][j] = 0;
for(int k = 0; k < B.vr; k++){
matr[i][j] += (A.matr[i][k] * B.matr[k][j]);
vr = A.vr;
kol = B.kol;
}
}
}
}
void Matrix::substract(Matrix& A, Matrix& B, int dim){
for (int i = 0; i < dim; i++) {
for (int j = 0; j < dim; j++) {
matr[i][j] = A.matr[i][j] - B.matr[i][j];
}
}
}
void Matrix::Add(Matrix& A, Matrix& B, int dim){
for (int i = 0; i < dim; i++) {
for (int j = 0; j < dim; j++) {
matr[i][j] = A.matr[i][j] + B.matr[i][j];
}
}
}
这是我的MatTask类和Matrica.h类
#pragma once
#include <iostream>
#include <tbb/task.h>
using namespace tbb;
using namespace std;
class Matrix{
public:
int vr, kol;
int matr[100][100];
Matrix();
Matrix(int rows, int columns);
void createMatrixAutomatic();
void createMatricManualy();
void printMatrix();
void MultSerial(Matrix&, Matrix&);
void Add(Matrix& A, Matrix& B, int dim);
void substract(Matrix& A, Matrix& B, int dim);
};
class MatTask: public task{
public:
Matrix A, B;
Matrix* C;
int dimension;
MatTask(Matrix& _A, Matrix& _B, Matrix* _C, int dim):
A(_A), B(_B), C(_C), dimension(dim){}
task* execute(){
if(dimension == 1){
C->MultSerial(A, B);
} else {
int newDimension = dimension/2;
task_list list;
int count = 1;
Matrica a11(newDimension, newDimension), a12(newDimension, newDimension), a21(newDimension, newDimension), a22(newDimension, newDimension),
b11(newDimension, newDimension), b12(newDimension, newDimension), b21(newDimension, newDimension), b22(newDimension, newDimension),
*c11, *c12, *c21, *c22,
p1(newDimension, newDimension), *p2, *p3, *p4, *p5, *p6, *p7,
aResult(newDimension, newDimension), bResult(newDimension, newDimension);
//Delimo matrice u 4 podmatrice
for(int i = 0; i < newDimension; i++){
for(int j = 0; j < newDimension; j++){
(a11).matr[i][j] = A.matr[i][j];
(a12).matr[i][j] = A.matr[i][j + newDimension];
(a21).matr[i][j] = A.matr[i + newDimension][j];
(a22).matr[i][j] = A.matr[i + newDimension][j + newDimension];
(b11).matr[i][j] = B.matr[i][j];
(b12).matr[i][j] = B.matr[i][j + newDimension];
(b21).matr[i][j] = B.matr[i + newDimension][j];
(b22).matr[i][j] = B.matr[i + newDimension][j + newDimension];
}
}
//RACUNAMO p1...p7
//p1 = (a11 + a22) * (b11 + b22)
aResult.Add(a11, a22, newDimension); //a11 + a22
bResult.Add(b11, b22, newDimension); //b11 + b22
count++;
//MatTask& a = *new( allocate_child() ) MatTask(aResult, bResult, &p1, newDimension);
//lista.push_back(a);
lista.push_back(*new (allocate_child()) MatTask(aResult, bResult, &p1, newDimension));
//p2 = (a21 + a22) * b11
//aResult.Add(a21, a22, newDimension); //a21 + a22
//count++;
////lista.push_back(*new (allocate_child()) MatTask(aResult, b11, p2, newDimension));
////p3 = a11 * (b12 - b22)
//bResult.substract(b12, b22, newDimension); // b12 - b22
//count++;
////lista.push_back(*new (allocate_child()) MatTask(a11, bResult, p3, newDimension));
////p4 = a22 * (b21 - b11)
//bResult.substract(b21, b11, newDimension); // b21 - b11
//count++;
////lista.push_back(*new (allocate_child()) MatTask(a22, bResult, p4, newDimension));
////p5 = (a11 + a12) * b22
//aResult.Add(a11, a12, newDimension); // a11 + a12
//count++;
////lista.push_back(*new (allocate_child()) MatTask(aResult, b22, p5, newDimension));
////p6 = (a21 - a11) * (b11 + b12)
//bResult.Add(b11, b12, newDimension); //b11 + b12
//aResult.substract(a21, a11, newDimension); //a21 - a11
//count++;
////lista.push_back(*new (allocate_child()) MatTask(aResult, bResult, p6, newDimension));
////p7 = (a12 - a22) * (b21 + b22)
//bResult.Add(b21, b22, newDimension); //b21 + b22
//aResult.substract(a12, a22, newDimension); //a12 - a22
//count++;
////lista.push_back(*new (allocate_child()) MatTask(aResult, bResult, p7, newDimension));
set_ref_count(count);
//spawn(a);
spawn_and_wait_for_all(list);
//spawn_and_wait_for_all(a);
//RACUNAMO d11, d12, d21, d22
//c11 = p1 + p4 - p5 + p7
//aResult.Add(p1, p4, newDimension); // p1 + p4
//bResult.Add(aResult, p7, newDimension); // p1 + p4 + p7
//c11.oduzmi(bResult, p5, newDimension); // c11 = p1 + p4 + p7 - p5
//// c12 = p3 + p5
//c12.Add(p3, p5, newDimension);
//
//// c21 = p2 + p4
//c21.Add(p2, p4, newDimension);
//// c22 = p1 + p3 - p2 + p6
//aResult.Add(p1, p3, newDimension); //p1 + p3
//bResult.Add(aResult, p6, newDimension); //p1 + p3 + p6
//c22.substract(bResult, p2, newDimension); // c22 = p1 + p3 + p6 - p2
//Grouping the results obtained in a single matrix:
//for (int i = 0; i < novaDimenzija ; i++) {
// for (int j = 0 ; j < novaDimenzija ; j++) {
// C.matr[i][j] = c11.matr[i][j];
// C.matr[i][j + newDimension] = c12.matr[i][j];
// C.matr[i + newDimension][j] = c21.matr[i][j];
// C.matr[i + newDimension][j + newDimension] = c22.matr[i][j];
// }
//}
}
return NULL;
}
};
正如您所看到的,函数和类的名称不是英文的,但我不认为这将是一个问题,因为代码非常简单。
我收到错误:
MnozenjeMatrica.exe中0x01193787处的未处理异常:0xC00000FD: 堆栈溢出。
我认为错误发生在行spawn_and_wait_for_all(lista)
中,我不确定。
请您查看我的代码并帮助我解决问题。也许我没有正确地调用这些功能,我不知道真的,请帮忙。谢谢
答案 0 :(得分:0)
阻塞式并行性加上大量使用堆栈的矩阵导致堆栈溢出。因此,每个任务为其数据保留一些堆栈,然后调用spawn_root_and_wait_for_all
,而@echo off
setlocal enableDelayedExpansion
set "fullver=1.2.456"
:: Parse each node and store in an "array"
set cnt=0
for %%A in (%fullver:.= %) do (
set /a cnt+=1
set "node.!cnt!=%%A"
)
:: Show the results
for /l %%N in (1 1 %cnt%) do echo node.%%N = !node.%%N!
又执行同一任务的另一个实例,递归地继续增加堆栈。
使用continuation-style programming并避免在堆栈上分配大量数据(如果可能,在任务内部 - 它会降低任务分配器的效率)。