Question

我正在尝试使用行向量进行列向量乘法。我可以使用dgemm吗？

换句话说，D = A * B. 其中D是矩阵，A是列向量，B是行向量。

我按照https://software.intel.com/en-us/node/520775中的文档进行操作。我似乎无法获得适合cblas_dgemm

的参数

这是我的尝试。在我的情况下，m = nRows，n = nCols，k = 1

问题似乎是lda，ldb和ldc。我已将它们分别定义为nCols，k，nRows。

#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>

#include <math.h>
#include <mkl.h>

#define nCols 5
#define nRows 20
#define k 1

void PrintMatrix(double* pMatrix, const size_t nR, const size_t nC, const CBLAS_ORDER Order) {
    unsigned int i, j;
    if (Order == CblasRowMajor)
    {
        for (i = 0; i < nR; i++)
        {
            for (j = 0; j < nC; j++)
            {
                printf("%f \t ", pMatrix[i * nC + j]); // !!!
            }
            printf("\n"); // !!!
        }

    }

    else

    {
        for (i = 0; i < nR; i++) {
            for (j = 0; j < nC; j++) {
                printf("%f \t ", pMatrix[i + j* nR ]); // !!!
            }
            printf("\n"); // !!!
        }

    }
    printf("\n"); // !!!

}

int main(void) {

    double A[] = { 8, 4, 7, 3, 5, 1, 1, 3, 2, 1, 2, 3, 2, 0, 1, 1 , 2, 3, 4, 1};

    double B[] = { -1, 2, -1, 1, 2 };


    double alpha = 1.0, beta = 0.0;
    int i, lda, ldb, ldc;
    double *C, *D;
    D = (double*) malloc(nRows * nCols * sizeof(double));
    C = (double*) malloc(nRows * nCols * sizeof(double));

    for (i = 0; i < nRows*nCols; i++)
        D[i] = 0.0;
    for (i = 0; i < nRows*nCols; i++)
        C[i] = 0.0;

    lda = nCols;
    ldb = k;
    ldc = nRows;

    cblas_dger(CblasRowMajor, nRows, nCols, alpha, A, 1, B, 1, C, nCols);

    PrintMatrix(C, nRows, nCols,CblasRowMajor);
    cblas_dgemm (CblasRowMajor, CblasNoTrans, CblasNoTrans, nRows,  nCols, k, alpha, A, lda, B, ldb, beta, D, ldc);

    PrintMatrix(D, nRows, nCols, CblasRowMajor);

    free(D);
    free(C);

    return 0;
}

Answer 1

简短回答是，是的，您可以使用dgemm进行排名1更新。当然建议使用dger，因为预计它会针对此操作进行更好的优化。

至于cblas_dgemm的使用。如您所知，领导维度的定义是：

lda：矩阵A的第一维的大小

您要执行的操作是： D（20x5）= A（20x1）* B（1x5）

您正在使用CblasRowMajor所以前导维度为所有矩阵的列数（有关解释，请参阅https://stackoverflow.com/a/30208420/2707697）。含义：

lda = 1;
ldb = 5;
ldc = 5;

Answer 2

以下代码有效。我切换到专栏专业，因为它更容易理解Fortran BLAS文档中的领先维度问题。我很抱歉我无法正确地解决你的问题

#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>

#include <math.h>
#include <mkl.h>

void PrintMatrix(double* pMatrix, const size_t nR, const size_t nC, const CBLAS_ORDER Order) {
    unsigned int i, j;
    if (Order == CblasRowMajor) {
        for (i = 0; i < nR; i++) {
            for (j = 0; j < nC; j++) {
                printf("%f \t ", pMatrix[i * nC + j]); // !!!
            }
            printf("\n"); // !!!
        }
    } else {
        for (i = 0; i < nR; i++) {
            for (j = 0; j < nC; j++) {
                printf("%f \t ", pMatrix[i + j* nR ]); // !!!
            }
            printf("\n"); // !!!
        }
    }
    printf("\n"); // !!!
}

int main(void)
{
    const int m = 20;
    const int n = 5;
    const int k = 1;

    double A[] = { 8, 4, 7, 3, 5, 1, 1, 3, 2, 1, 2, 3, 2, 0, 1, 1, 2, 3, 4, 1};
    double B[] = { -1, 2, -1, 1, 2 };

    double alpha = 1.0, beta = 0.0;

    double * C = (double*) malloc(m * n * sizeof(double));
    double * D = (double*) malloc(m * n * sizeof(double));

    for (int i = 0; i < m*n; i++) C[i] = 0.0;
    for (int i = 0; i < m*n; i++) D[i] = 0.0;

    int lda = 20;
    int ldb = 1;
    int ldc = 20;

    cblas_dger(CblasColMajor, m, n, alpha, A, 1, B, 1, C, ldc);

    cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, 
                m, n, k, 
                alpha, A, lda, 
                B, ldb, beta, 
                D, ldc);

    PrintMatrix(C, m, n, CblasRowMajor);
    PrintMatrix(D, m, n, CblasRowMajor);

    free(D);
    free(C);

    return 0;
}

你如何使用cblas_dgemm来做一个矢量外积？

2 个答案: