Question

如果我使用不同大小的操作数，任何人都可以告诉我GMP的时间如何不同。例如：以下代码

#include <stdio.h>
#include <gmp.h>
#include <stdlib.h>
#include <time.h>
#define REPEAT 10000

void full_mult(mpz_t r,mpz_t a,mpz_t b)
{
    mpz_t temp;
    mpz_init(temp);

    mpz_mul(r,a,b);
    mpz_add(temp,a,b);
    mpz_sub(a,a,b);
    mpz_mul(temp,temp,a);
    /*the above code 10 more times*/
}

void half_mult(mpz_t r,mpz_t a,mpz_t b)
{
    mpz_t temp;
    mpz_init(temp);

    mpz_mul(r,a,b);
    mpz_add(temp,a,b);
    mpz_sub(a,a,b);
    mpz_mul(temp,temp,a);
    /*the above code then more times*/
}


void main()
{

    long int i;
    clock_t start, end;
    double cpu_time_used;

    gmp_randstate_t state;
    gmp_randinit_mt(state);

    mpz_t a[REPEAT];
    mpz_t b[REPEAT];
    mpz_t a1[REPEAT];
    mpz_t b1[REPEAT];
    mpz_t r[REPEAT];
    mpz_t r1[REPEAT];

    for(i=0;i<REPEAT;i++)
    {
        mpz_init(a[i]);mpz_init(b[i]);
        mpz_init(a1[i]);mpz_init(b1[i]);
        mpz_init(r[i]);mpz_init(r1[i]);
    }

    for(i=0;i<REPEAT;i++)
    {
        mpz_urandomb(a[i],state,128);
        mpz_urandomb(b[i],state,128);

    }

    start=clock();

    for(i=0;i<REPEAT;i++)
        half_mult(r[i],a[i],b[i]);

    end=clock();
    printf( "Number of seconds: %f\n", (end-start)/(double)CLOCKS_PER_SEC );


    for(i=0;i<REPEAT;i++)
    {
        mpz_urandomb(a1[i],state,256);
        mpz_urandomb(b1[i],state,256);

    }

    start=clock();

    for(i=0;i<REPEAT;i++)
        full_mult(r1[i],a1[i],b1[i]);

    end=clock();

    printf( "Number of seconds: %f\n", (end-start)/(double)CLOCKS_PER_SEC );

}

正如您所看到的，我正在尝试使用两种类型的整数来测量时序。一个是256位，另一个是128位。但是我没有得到这个代码的任何结论性结果。有时，128位操作的时间更长，有时256位操作的时间更长。

Answer 1

根据GMP documentation 15.1部分，库对不同大小的操作数使用不同的乘法算法。查看阈值表：

| Algorithm | Threshold            |
|-----------|----------------------|
| Basecase  | (none)               |
| Karatsuba | MUL_TOOM22_THRESHOLD |
| Toom-3    | MUL_TOOM33_THRESHOLD |
| Toom-4    | MUL_TOOM44_THRESHOLD |
| Toom-6.5  | MUL_TOOM6H_THRESHOLD |
| Toom-8.5  | MUL_TOOM8H_THRESHOLD |
| FFT       | MUL_FFT_THRESHOLD    |

因此，由于算法不同，时间也可能不同。

Answer 2

尝试更长的样本。我已将REPEAT更改为10000000和那些

mpz_t a[REPEAT];
mpz_t b[REPEAT];
mpz_t a1[REPEAT];
mpz_t b1[REPEAT];
mpz_t r[REPEAT];
mpz_t r1[REPEAT];

到

static mpz_t a[REPEAT];
static mpz_t b[REPEAT];
static mpz_t a1[REPEAT];
static mpz_t b1[REPEAT];
static mpz_t r[REPEAT];
static mpz_t r1[REPEAT];

所以经过3次跑步后我得到了：

$ gcc -O2 gmp_bench.c -lgmp
$ time ./a.out             
Number of seconds: 12.689352
Number of seconds: 18.295134
./a.out  34.54s user 1.27s system 99% cpu 35.820 total
$ time ./a.out
Number of seconds: 12.647052
Number of seconds: 17.918326
./a.out  34.08s user 1.35s system 99% cpu 35.426 total
$ time ./a.out
Number of seconds: 12.647854
Number of seconds: 18.106714
./a.out  34.29s user 1.28s system 99% cpu 35.581 total
$

通过监控执行情况，我发现分配的内存不断增加，因此分配开销可能比算法本身的性能提高。

不同整数大小的GMP时序差异;

2 个答案: