我试图写一个矢量的超级简单的C程序multiply-add" axpy"整数数据类型的算法。程序输出执行时间以测量机器的性能。矩阵由随机数填充。
int benchmark(void) {
int N; /* The matrix size, controlled by user input */
int r, c; /* Row and Column number */
int random; /* Random number to fill the matix */
int a = rand() % 20; /* Scale number to multiply x matrix */
printf("Enter the size(N*N) of the matrices(Maximum 1,000,000)\n");
scanf("%d", &N);
if (N > 1000000) {
fprintf(stderr, "Size of matrix is too large!\n");
return 0;
}
/* Initialize and fill the matrix x and y */
int xMatrix[N][N], yMatrix[N][N], resultMatrix[N][N];
/* Compute time */
clock_t t;
t = clock();
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
random = rand() % 100;
xMatrix[r][c] = a * random; /* Multiply matrix x with random value a */
}
}
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
int random = rand() % 100;
yMatrix[r][c] = random;
}
}
/* Add two matrix together */
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
resultMatrix[r][c] = xMatrix[r][c] + yMatrix[r][c];
}
}
t = clock() - t;
double timeTaken = ((double)t) / CLOCKS_PER_SEC;
printf("\n -> Total time : %f seconds\n", timeTaken);
printf("\n -> Vector length : %d", N * N);
}
用户控制矩阵的大小。
当N
的值小于800
时,该程序正常工作。
答案 0 :(得分:3)
使用自动存储(在堆栈)上分配的对象的大小太大,您会得到未定义的行为,更具体地说是堆栈溢出。
您应该从堆中分配对象:
/* Initialize and fill the matix x and y */
int (*xMatrix)[N] = malloc(N * sizeof(*xMatrix));
int (*yMatrix)[N] = malloc(N * sizeof(*yMatrix));
int (*resultMatrix)[N] = malloc(N * sizeof(*resultMatrix));
并验证malloc()
返回的指针均不是NULL
。
以下是修改后的代码:
int benchmark(void) {
int N; /* The matrix size, controlled by user input */
int r, c; /* Row and Column number */
int random; /* Random number to fill the matix */
int a = rand() % 20; /* Scale number to multiply x matrix */
printf("Enter the size(N*N) of the matrices (Maximum 1,000,000)\n");
if (scanf("%d", &N) != 1) {
fprintf(stderr, "Input error!\n");
return 0;
}
if (N > 1000000) {
fprintf(stderr, "Matrix size is too large!\n");
return 0;
}
/* Initialize and fill the matrix x and y */
int (*xMatrix)[N] = malloc(N * sizeof(*xMatrix));
int (*yMatrix)[N] = malloc(N * sizeof(*yMatrix));
int (*resultMatrix)[N] = malloc(N * sizeof(*resultMatrix));
if (xMatrix == NULL || yMatrix == NULL || resultMatrix == NULL) {
fprintf(stderr, "Memory allocation failed!\n");
free(xMatrix);
free(yMatrix);
free(resultMatrix);
return 0;
}
/* Compute time */
clock_t t = clock();
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
random = rand() % 100;
xMatrix[r][c] = a * random; /* Multiply matrix x with random value a */
}
}
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
random = rand() % 100;
yMatrix[r][c] = random;
}
}
/* Add two matrix together */
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
resultMatrix[r][c] = xMatrix[r][c] + yMatrix[r][c];
}
}
t = clock() - t;
double timeTaken = ((double)t) / CLOCKS_PER_SEC;
printf("\n -> Total time : %f seconds\n", timeTaken);
printf("\n -> Vector length : %lld", (long long)N * N);
free(xMatrix);
free(yMatrix);
free(resultMatrix);
return 0;
}
但请注意,您的计算非常简单,大部分时间都可能花在rand()
函数上。
答案 1 :(得分:1)
您正在尝试动态分配memmory,我建议您使用 stdlib.h 中的 malloc ,如下所示。
另外,请查看以下SO帖子:memory allocation in Stack and Heap和What and where are the stack and heap?
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int benchmark(void) {
int N; /* The matrix size, controlled by user input */
int r, c; /* Row and Column number */
int random; /* Random number to fill the matix */
int a = rand() % 20; /* Scale number to multiply x matrix */
printf("Enter the size(N*N) of the matrixs(Maximum 1,000,000)\n");
scanf("%d", &N);
if(N > 1000000) {
fprintf(stderr, "Size of matrix is too large!\n");
return 0;
}
/* Initialize and fill the matix x and y */
int** xMatrix = NULL;
int** yMatrix = NULL;
int** resultMatrix = NULL;
/* Using the heap memory allocation instead of the stack */
xMatrix = (int **) malloc(N * sizeof(int *));
yMatrix = (int **) malloc(N * sizeof(int *));
resultMatrix = (int **) malloc(N * sizeof(int *));
for (r = 0; r < N; r++) {
xMatrix[r] = (int *) malloc(N * sizeof(int));
yMatrix[r] = (int *) malloc(N * sizeof(int));
resultMatrix[r] = (int *) malloc(N * sizeof(int));
}
/* Compute time */
clock_t t;
t = clock();
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
random = rand() % 100;
xMatrix[r][c] = a * random; /* Multiply matix x with random value a */
}
}
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
int random = rand() % 100;
yMatrix[r][c] = random;
}
}
/* Add two matrix together */
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
resultMatrix[r][c] = xMatrix[r][c] + yMatrix[r][c];
}
}
t = clock() - t;
double timeTaken = ((double)t)/CLOCKS_PER_SEC;
printf("\n -> Total time : %f seconds\n", timeTaken);
printf("\n -> Vector length : %d", N*N);
/* Always remember to free your allocated memory */
for (r = 0; r < N; r++) {
free(xMatrix[r]);
free(yMatrix[r]);
free(resultMatrix[r]);
}
free(xMatrix);
free(yMatrix);
free(resultMatrix);
}
int main() {
benchmark();
return 0;
}