任何人都可以告诉我为什么输出(Anew)是:
Anew = 0.000000
Anew = 2.000000
Anew = 4.000000
Anew = 6.000000
Anew = 16.000000
Anew = 20.000000
Anew = 24.000000
Anew = 28.000000
而不是:
Anew = 0.000000
Anew = 2.000000
Anew = 4.000000
Anew = 6.000000
Anew = 8.000000
Anew = 10.000000
Anew = 12.000000
Anew = 14.000000
代码:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mkl.h>
int main(int argc, const char* argv[]) {
int rows = 2, cols = 2, Layers = 2;
int PerLayerElmts = rows * cols;
float* A = malloc(PerLayerElmts * Layers * sizeof(*A));
// create A matrix
int ImagIdx;
for (int n = 0; n < Layers; n++) {
ImagIdx = 0;
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
A[n * PerLayerElmts + ImagIdx] = n * PerLayerElmts + ImagIdx;
ImagIdx++;
}
}
}
// print A matrix
for (int n = 0; n < Layers; n++) {
ImagIdx = 0;
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
printf("\nA = %f", A[n * PerLayerElmts + ImagIdx]);
ImagIdx++;
}
}
}
float scalar = 2.0;
size_t AddressOffset = 0;
for (int i = 0; i < Layers; i++, AddressOffset += PerLayerElmts) {
// multiply A by scalar
cblas_sscal(PerLayerElmts * Layers, scalar, A + AddressOffset, 1);
}
// print A matrix
for (int n = 0; n < Layers; n++) {
ImagIdx = 0;
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
printf("\n\nAnew = %f", A[n * PerLayerElmts + ImagIdx]);
ImagIdx++;
}
}
}
printf("\n");
free(A);
return 0;
}
我只是创建一个矩阵,然后使用cblas
调用将每个元素乘以标量(2)。
我这样做,使用addressoffset
作为图层数。
问题出在第二层,其中元素乘以4而不是2!
答案 0 :(得分:1)
你打电话cblas_sscal
的方式似乎不对。而不是
cblas_sscal( PerLayerElmts * Layers , scalar , A + AddressOffset , 1 );
我希望像
这样的东西cblas_sscal( PerLayerElmts , scalar , A + AddressOffset , 1 );
因为你为每一层调用一次。