Question

我有一个相当大的N * N整数矩阵Matrix2D（假设有足够的内存），

1 ，在每个行/列中，我需要记录元素的 col / row 索引价值与其右/下邻居不同。

2 ，我想找到一个可以平行的最佳算法，理想情况是OMP。

所以，最后我会有一些数据结构，比如

std::vector<std::vector<int>>   RowWiseDiscontinuity(N);// N= #of rows
std::vector<std::vector<int>>   ColWiseDiscontinuity(N);// N= #of cols

其中内部std::vector<int>记录行/列索引。

我把我的串行版本放在这里，但发现很难进行OMP并行化...有人可以提供一些想法如何使用omp实现遍历这个2D矩阵？

代码段，

std::vector<std::vector<int>>   RowWiseDiscontinuity(N);// N= #of rows
std::vector<std::vector<int>>   ColWiseDiscontinuity(N);// N= #of cols
std::vector<int> TempX1;
std::vector<int> TempX2;
for (int y=0; y<N; ++y)
{
    TempX1.clear();
    for (int x =0; x<N; ++x)
    {
        int value = Matrix2D(x,y);
        TempX1.push_back(value);
    }

    auto iter1 = TempX1.begin();
    auto iter2 = TempX2.begin();

    if (y>0)
    for (int x =0; x<N; ++x)
    {
         if (*iter1 !=*(iter1+1))
         {
             RowWiseDiscontinuity[y].push_back(x); //Critical for OMP
         }
         ++iter1;
         ++iter2;
         if (*iter1 != *iter2)
         {
             ColWiseDiscontinuity[x].push_back(y); //Critical for OMP
         }
     }

     TempX2.swap(TempX1); // proceed to next row, remember previous

}

Answer 1

在矩阵上执行两次传递（可以在不同的线程上执行），一次用于行式不连续，另一次用于列式不连续。

行传递如下所示：

for (int x = 0; x < N; ++x)    // Can be parallelized
{
    for (int y = 0; y < N - 1; ++y)
    {
        if(Matrix(x, y) != Matrix(x, y + 1))
            ColWiseDiscontinuity[x].push_back(y);
    }
}

列通过类似：

Row

两种情况下的外环都可以并行化。 {？1}} / ColWiseDiscontinuity的不同元素在外循环的每次迭代中都会发生变异，从而阻止数据竞争。传递本身可以在不同的线程上执行。

作为旁注，您可以通过将矩阵存储在两个行主要和列主要顺序中，并使用每个矩阵来减少缓存未命中（以内存为代价），从而进一步优化此算法在适当的时候订购在行主要订单中，元素(x + 1, y)始终位于(x, y)旁边。对于列主要顺序中的元素(x, y + 1)也是如此。

Answer 2

我会创建另一个数组，它包含列和行的最近邻居。显然，这必须作为第一次通过。我建议创建一个包含所需索引的二维数组（对）。而不是两个向量，我会做一对矢量。对可平行且易于分类。

vector<vector<pair<int, int>>> elements(N);

Answer 3

这是一种算法，它执行查找相邻对角线邻居的基本测试，并使用4x4单位矩阵记录结果。这不包括任何OMP或并行计算的使用。然而，这是MxN矩阵的通用类模板，其足够简单易用。而不是将内容存储在矢量矢量中;我已经将数据展平为单个1D向量，并且在模板实例化时已经保留了内存量。我使用函数模板来比较传回索引(M,N)或(x,y)的矩阵内的元素，以及结果是真还是假。我在这里使用一个结构来包含x-y索引和＆amp;的关系。博尔的结果。检查邻居的启发式避免了查看最后一列＆amp;矩阵的最后一行，因为没有任何元素父亲向右，也没有向下：这可以在主函数中看到。这可能对您有所帮助，您可以尝试应用类，结构和＆amp;功能到OMP库。

template<unsigned Col, unsigned Row>
class Matrix2D {
public:
    const unsigned col_size = Col;
    const unsigned row_size = Row;
    const unsigned stride_ = col_size;
    const unsigned matrix_size = col_size * row_size;

private:
    std::vector<int> data_;

public:
    Matrix2D() {
        data_.resize( matrix_size );
    }

    void addElement( unsigned x, unsigned y, int val ) {
        data_[(x * col_size + y)] = val;
    }

    /*int getElement( unsigned x, unsigned y ) {
        int value = data_[(x * col_size + y)];
        return value;
    }*/

    int getElement( unsigned idx ) {
        return data_[idx];
    }
};

struct Neighbor {
    unsigned indexCol;
    unsigned indexRow;
    bool     notSame;
};


template<unsigned Col, unsigned Row>
void compareMatrixDiagonals( Matrix2D<Col, Row>& mat, Neighbor& n, unsigned colIdx, unsigned rowIdx );

int main() {

    Matrix2D<4, 4> mat4x4;
    mat4x4.addElement( 0, 0, 1 );
    mat4x4.addElement( 0, 1, 0 );
    mat4x4.addElement( 0, 2, 0 );
    mat4x4.addElement( 0, 3, 0 );

    mat4x4.addElement( 1, 0, 0 );
    mat4x4.addElement( 1, 1, 1 );
    mat4x4.addElement( 1, 2, 0 );
    mat4x4.addElement( 1, 3, 0 );

    mat4x4.addElement( 2, 0, 0 );
    mat4x4.addElement( 2, 1, 0 );
    mat4x4.addElement( 2, 2, 1 );
    mat4x4.addElement( 2, 3, 0 );

    mat4x4.addElement( 3, 0, 0 );
    mat4x4.addElement( 3, 1, 0 );
    mat4x4.addElement( 3, 2, 0 );
    mat4x4.addElement( 3, 3, 1 );

    unsigned idx = 0;
    for ( unsigned i = 0; i < mat4x4.matrix_size; i++ ) {
        std::cout << mat4x4.getElement( i ) << " ";
        idx++;

        if ( idx == 4 ) {
            std::cout << "\n";
            idx = 0;
        }
    }
    std::cout << "\n";    

    unsigned colIdx = 0;
    unsigned rowIdx = 0;
    std::vector<Neighbor> neighbors;
    Neighbor n;

    // If we are in the last col or row we can ignore
    // (0,3),(1,3),(2,3),(3,3),(3,0),(3,1),(3,2), {*(3,3)* already excluded}
    // This is with a 4x4 matrix: we can substitute and use LastCol - LastRow 
    // for any size MxN Matrix.
    const unsigned LastCol = mat4x4.col_size - 1;
    const unsigned LastRow = mat4x4.row_size - 1;

    for ( unsigned i = 0; i < LastCol; i++ ) {
        for ( unsigned j = 0; j < LastRow; j++ ) {
            compareMatrixDiagonals( mat4x4, n, i, j );
            neighbors.push_back( n );
        }
    }

    for ( unsigned i = 0; i < neighbors.size(); i++ ) {
        std::cout << "(" << neighbors[i].indexCol
            << "," << neighbors[i].indexRow
            << ") " << neighbors[i].notSame
            << "\n";
    }

    std::cout << "\nPress any key & enter to quit." << std::endl;
    char c;
    std::cin >> c;

    return 0;
}

template<unsigned Col, unsigned Row>
void compareMatrixDiagonals( Matrix2D<Col, Row>& mat, Neighbor& N, unsigned colIdx, unsigned rowIdx ) {
    unsigned firstIdx = (colIdx * mat.col_size + rowIdx);
    unsigned nextIdx  = ((colIdx + 1) * mat.col_size +  (rowIdx + 1));
    if ( mat.getElement( firstIdx ) != mat.getElement( nextIdx ) ) {
        N.indexCol = colIdx;
        N.indexRow = rowIdx;
        N.notSame  = true;          
    } else {
        N.indexCol = colIdx;
        N.indexRow = rowIdx;
        N.notSame  = false;     
    }
}

可平行算法，用于遍历知道col / row-wise邻域的2D矩阵

3 个答案: