能否更快地完成这项工作,寻找可以向我展示如何更快地完成工作的人

时间:2018-10-02 18:06:06

标签: c arrays algorithm performance

问题:

给出两个固定大小的数组:

d_D = (unsigned int*)malloc(50 * 6 * sizeof(unsigned int));

C = (unsigned int*)malloc(1 * 6 * sizeof(unsigned int));

目标:

仅当数组d_D的每一行与数组C至少有一个共同的数字时,才打印数组C的所有数字

E.G。 d_D {1,8,10,15,20,30}的第一行,数组C的编号{1,2,3,4,5,6},它们共有1,因此通过了测试。 此检查执行了五十次,并且必须通过五十次才能打印数组C的数量。

寻找一种比提供的代码更快的方法。

提供的代码是第一个数组的每一行和第二个数组的唯一行之间的交集。相交完成了五十次,如果五十个相交中的每一个返回一个公共数字,它将打印第二个数组的六个数字。整个程序的核心是循环,该循环将第一个数组的每一行的每个数字与第二个数组的唯一行的每个数字进行比较,因此即使没有六个数字的printf,也要用break代替转到未找到的标签,循环仍将进行交叉。该代码也被称为五千万次

我尝试了单个数组和多维数组的组合,结果证明多维数组更快。

这是我最快想到的方法:链接到整个计划以再次获得基准:https://onlinegdb.com/B159y4vcQ

您可以转到上面的链接并添加您的版本,然后对其进行计时,以查看是否可以做得更好。

        // Iterate through all the rows of array d_D: 1 to 50
        for (i = 0; i < 50; i++) 
        {
             // Compare the first number of array C against all the numbers of array d_D row i
             if ((d_D[i * 6 + 0] == C[0 * 6 + 0]) || (d_D[i * 6 + 1] == C[0 * 6 + 0]) || (d_D[i * 6 + 2] == C[0 * 6 + 0]) || (d_D[i * 6 + 3] == C[0 * 6 + 0]) || (d_D[i * 6 + 4] == C[0 * 6 + 0]) || (d_D[i * 6 + 5] == C[0 * 6 + 0]))
             {
              goto NextFilter;
             }
             // Compare the second number of array C against all the numbers of array d_D row i 
             if ((d_D[i * 6 + 0] == C[0 * 6 + 1]) || (d_D[i * 6 + 1] == C[0 * 6 + 1]) || (d_D[i * 6 + 2] == C[0 * 6 + 1]) || (d_D[i * 6 + 3] == C[0 * 6 + 1]) || (d_D[i * 6 + 4] == C[0 * 6 + 1]) || (d_D[i * 6 + 5] == C[0 * 6 + 1]))
             {
              goto NextFilter;
             }
             // Compare the third number of array C against all the numbers of array d_D row i
             if ((d_D[i * 6 + 0] == C[0 * 6 + 2]) || (d_D[i * 6 + 1] == C[0 * 6 + 2]) || (d_D[i * 6 + 2] == C[0 * 6 + 2]) || (d_D[i * 6 + 3] == C[0 * 6 + 2]) || (d_D[i * 6 + 4] == C[0 * 6 + 2]) || (d_D[i * 6 + 5] == C[0 * 6 + 2]))
             {
              goto NextFilter;
             }
             // Compare the fourth number of array C against all the numbers of array d_D row i
             if ((d_D[i * 6 + 0] == C[0 * 6 + 3]) || (d_D[i * 6 + 1] == C[0 * 6 + 3]) || (d_D[i * 6 + 2] == C[0 * 6 + 3]) || (d_D[i * 6 + 3] == C[0 * 6 + 3]) || (d_D[i * 6 + 4] == C[0 * 6 + 3]) || (d_D[i * 6 + 5] == C[0 * 6 + 3]))
             {
              goto NextFilter;
             }
             // Compare the fifth number of array C against all the numbers of array d_D row i 
             if ((d_D[i * 6 + 0] == C[0 * 6 + 4]) || (d_D[i * 6 + 1] == C[0 * 6 + 4]) || (d_D[i * 6 + 2] == C[0 * 6 + 4]) || (d_D[i * 6 + 3] == C[0 * 6 + 4]) || (d_D[i * 6 + 4] == C[0 * 6 + 4]) || (d_D[i * 6 + 5] == C[0 * 6 + 4]))
             {
               goto NextFilter;
             }
             // Compare the sixth number of array C against all the numbers of array d_D row i
             if ((d_D[i * 6 + 0] == C[0 * 6 + 5]) || (d_D[i * 6 + 1] == C[0 * 6 + 5]) || (d_D[i * 6 + 2] == C[0 * 6 + 5]) || (d_D[i * 6 + 3] == C[0 * 6 + 5]) || (d_D[i * 6 + 4] == C[0 * 6 + 5]) || (d_D[i * 6 + 5] == C[0 * 6 + 5]))
             {
               goto NextFilter;
             }
             goto notfound;
        NextFilter:
           ;
        }
        // Print the six numbers of array C if all the rows of array d_D have at least one number in common with array C
        printf("%d %d %d %d %d %d\n", C[0 * 6 + 0], C[0 * 6 + 1], C[0 * 6 + 2], C[0 * 6 + 3], C[0 * 6 + 4], C[0 * 6 + 5]);
 notfound:
 ;

这是做同一件事的另一种方法,但是需要花费两倍的时间: https://onlinegdb.com/rk97kq_9Q

for (unsigned int i = 0; i < 50; i++) 
{

   unsigned int i0 = 0, j0 = 0; 
   while (i0 < 6 && j0 < 6) 
   { 
     if (d_D[i * 6 + i0] < C[0 * 6 + j0]) 
        i0++; 
     else if (C[0 * 6 + j0] < d_D[i * 6 + i0]) 
        j0++; 
     else // if equal 
     { 
        goto NextFilter;  
     } 
   } 
   goto notfound;
 NextFilter:
 ;
}
printf("%d %d %d %d %d %d\n", C[0 * 6 + 0], C[0 * 6 + 1], C[0 * 6 + 2], C[0 * 6 + 3], C[0 * 6 + 4], C[0 * 6 + 5]);
notfound:
;

P.S。使用GOTO的原因是,我正在展开cuda实现的循环,因此不能选择继续,但是在这里,我必须循环,否则代码将太长。

4 个答案:

答案 0 :(得分:2)

  

寻找最快的方法。

好吧,几乎看不到C代码并判断性能。现代编译器非常擅长优化代码以使其快速运行。除了编译器优化之外,硬件中还发挥了各种技巧(例如,分支预测和缓存未命中/命中的影响等)。难以预测性能的东西。

一种查看方式是,如果您以最大的优化程度编译代码,然后尝试单步执行(或查看生成的机器代码)。您很可能甚至无法理解它,因为优化使它变成了您无法识别的某种东西(不仅是您自己,而是我们所有人...)。变成了“奇怪的代码”,实际上可以有效地执行您想要的操作...

所以最好的建议是:编写代码,使其易于理解,然后让编译器进行优化。然后分析代码以查看是否存在性能问题。

答案 1 :(得分:1)

您的代码不起作用:for循环不执行任何操作,因此好的编译器会删除它,而最终进行基准测试的唯一方法就是初始化和printf

这是完整代码的重新格式化版本,具有单独的时间支持我的观点:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define ROWS  5000000

int main(int argc, char *argv[]) {
    clock_t start = clock();
    int *d_D, *C;

    d_D = (int*)malloc(ROWS * 6 * sizeof(int));
    C = (int*)malloc(1 * 6 * sizeof(int));

    C[0 * 6 + 0] = 1;
    C[0 * 6 + 1] = 2;
    C[0 * 6 + 2] = 3;
    C[0 * 6 + 3] = 4;
    C[0 * 6 + 4] = 5;
    C[0 * 6 + 5] = 6;

    // make sure that there is a 1 to 6 in each row
    for (unsigned int i = 0; i < ROWS; i++)
        d_D[i * 6 + 0] = (rand() % 6) + 1;

    // Put other random number in the rest of the array d_D
    for (unsigned int i = 0; i < ROWS; i++)
        for (unsigned int j = 1; j < 6; j++)
            d_D[i * 6 + j] = (rand() % ROWS) + 1;

    clock_t stop1 = clock();

    /* Check if all rows in d_D contain at least one number from array C.
       loop index `i` should have function body scope to allow testing 
       after the loop exits.
     */
    for (unsigned int i = 0; i < ROWS; i++) {
        if ((d_D[i * 6 + 0] == C[0 * 6 + 0]) || (d_D[i * 6 + 1] == C[0 * 6 + 0]) ||
            (d_D[i * 6 + 2] == C[0 * 6 + 0]) || (d_D[i * 6 + 3] == C[0 * 6 + 0]) ||
            (d_D[i * 6 + 4] == C[0 * 6 + 0]) || (d_D[i * 6 + 5] == C[0 * 6 + 0])) {
            goto NextFilter;
        }
        if ((d_D[i * 6 + 0] == C[0 * 6 + 1]) || (d_D[i * 6 + 1] == C[0 * 6 + 1]) ||
            (d_D[i * 6 + 2] == C[0 * 6 + 1]) || (d_D[i * 6 + 3] == C[0 * 6 + 1]) ||
            (d_D[i * 6 + 4] == C[0 * 6 + 1]) || (d_D[i * 6 + 5] == C[0 * 6 + 1])) {
            goto NextFilter;
        }
        if ((d_D[i * 6 + 0] == C[0 * 6 + 2]) || (d_D[i * 6 + 1] == C[0 * 6 + 2]) ||
            (d_D[i * 6 + 2] == C[0 * 6 + 2]) || (d_D[i * 6 + 3] == C[0 * 6 + 2]) ||
            (d_D[i * 6 + 4] == C[0 * 6 + 2]) || (d_D[i * 6 + 5] == C[0 * 6 + 2])) {
            goto NextFilter;
        }
        if ((d_D[i * 6 + 0] == C[0 * 6 + 3]) || (d_D[i * 6 + 1] == C[0 * 6 + 3]) ||
            (d_D[i * 6 + 2] == C[0 * 6 + 3]) || (d_D[i * 6 + 3] == C[0 * 6 + 3]) ||
            (d_D[i * 6 + 4] == C[0 * 6 + 3]) || (d_D[i * 6 + 5] == C[0 * 6 + 3])) {
            goto NextFilter;
        }
        if ((d_D[i * 6 + 0] == C[0 * 6 + 4]) || (d_D[i * 6 + 1] == C[0 * 6 + 4]) ||
            (d_D[i * 6 + 2] == C[0 * 6 + 4]) || (d_D[i * 6 + 3] == C[0 * 6 + 4]) ||
            (d_D[i * 6 + 4] == C[0 * 6 + 4]) || (d_D[i * 6 + 5] == C[0 * 6 + 4])) {
            goto NextFilter;
        }
        if ((d_D[i * 6 + 0] == C[0 * 6 + 5]) || (d_D[i * 6 + 1] == C[0 * 6 + 5]) ||
            (d_D[i * 6 + 2] == C[0 * 6 + 5]) || (d_D[i * 6 + 3] == C[0 * 6 + 5]) ||
            (d_D[i * 6 + 4] == C[0 * 6 + 5]) || (d_D[i * 6 + 5] == C[0 * 6 + 5])) {
            goto NextFilter;
        }
        break;
    NextFilter:
        ;
    }
    clock_t stop2 = clock();

    /* In the code posted, this line is always executed.
       Probably not the intended behavior.
       There is a missing test `if (i == ROWS)`
     */
    printf("%d %d %d %d %d %d\n",
           C[0 * 6 + 0], C[0 * 6 + 1], C[0 * 6 + 2],
           C[0 * 6 + 3], C[0 * 6 + 4], C[0 * 6 + 5]);

    clock_t stop = clock();
    printf("Time elapsed in ms: %f\n", (double)(stop - start) * 1000.0 / CLOCKS_PER_SEC);
    printf("Init time in ms: %f\n", (double)(stop1 - start) * 1000.0 / CLOCKS_PER_SEC);
    printf("Scan time in ms: %f\n", (double)(stop2 - stop1) * 1000.0 / CLOCKS_PER_SEC);
    return 0;
}

输出(在OS / X上与clang -O3编译):

1 2 3 4 5 6
Time elapsed in ms: 218.076000
Init time in ms: 218.010000
Scan time in ms: 0.003000

如果问题陈述为“ ”,则“如果两个数组[...]打印数组C中的所有数字,则只有数组d_D的每一行与数组C至少有一个共同的数字”。 >,您的代码与该问题不一致,因为它无条件打印C的内容。

此外,由于主循环没有副作用,因此编译器可能对其进行了优化,并且计时表明它完全不影响经过的时间(Scan time in ms: 0.003000)。

答案 2 :(得分:1)

将更新后的代码分别设置为初始化和扫描时间,这在OS / X上可以实现:

1 2 3 4 5 6
Time elapsed in ms: 382.522000
Init time in ms: 317.204000
Scan time in ms: 65.237000

请注意,由于多次调用rand(),因此初始化阶段大部分时间都是如此。

进行测试,即:在尝试下一个测试之前,将d_C中每一行的每个元素与C的6个元素进行比较,极大地缩短了扫描时间,降低了 5倍

1 2 3 4 5 6
Time elapsed in ms: 327.496000
Init time in ms: 315.463000
Scan time in ms: 11.970000

A,这是初始化d_C数组的方式的一个副作用:每行的第一个元素是匹配的数组,因此首先对C的所有元素进行测试会更好高效。如果这种强烈的偏见代表了您的实际数据,那么此方法绝对是更好的方法。

为消除这种偏见,我更改了初始化以随机设置匹配元素,计时如下:

1 2 3 4 5 6
Time elapsed in ms: 447.353000
Init time in ms: 389.708000
Scan time in ms: 57.530000

初始化需要更长的时间,但扫描速度比参考速度快10%。

为完整起见,以下是使用原始代码的这组新数据的时间安排:

1 2 3 4 5 6
Time elapsed in ms: 427.428000
Init time in ms: 356.916000
Scan time in ms: 70.434000

当随机选择d_C每行上的匹配元素时,使用原始代码的扫描阶段比使用偏置数据的扫描阶段要慢10%。因此,使用以下代码,总体改进约为 20%

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

#define ROWS  5000000

int main(int argc, char *argv[]) {
    clock_t start = clock();
    clock_t stop, stop1, stop2;

    int *d_D = (int*)malloc(ROWS * 6 * sizeof(int));
    int *C = (int*)malloc(1 * 6 * sizeof(int));

    C[0] = 1; C[1] = 2; C[2] = 3; C[3] = 4; C[4] = 5; C[5] = 6;

    // Put random numbers in the array d_D, but make sure
    // at least one element matches
    for (unsigned int i = 0; i < ROWS; i++) {
        unsigned int index = rand() % 6;
        for (unsigned int j = 0; j < 6; j++) {
            d_D[i * 6 + j] = (j == index) ?
                (rand() % 6) + 1 : (rand() % ROWS) + 1;
        }
    }

    stop1 = clock();

#define OP ||  /* you can try changing the operator to | or + */

    int allfound = 1;
    for (unsigned int i = 0; i < ROWS; i++) {
        if ((d_D[i * 6 + 0] == C[0]) OP (d_D[i * 6 + 0] == C[1]) OP
            (d_D[i * 6 + 0] == C[2]) OP (d_D[i * 6 + 0] == C[3]) OP
            (d_D[i * 6 + 0] == C[4]) OP (d_D[i * 6 + 0] == C[5])) {
            goto NextFilter;
        }
        if ((d_D[i * 6 + 1] == C[0]) OP (d_D[i * 6 + 1] == C[1]) OP
            (d_D[i * 6 + 1] == C[2]) OP (d_D[i * 6 + 1] == C[3]) OP
            (d_D[i * 6 + 1] == C[4]) OP (d_D[i * 6 + 1] == C[5])) {
            goto NextFilter;
        }
        if ((d_D[i * 6 + 2] == C[0]) OP (d_D[i * 6 + 2] == C[1]) OP
            (d_D[i * 6 + 2] == C[2]) OP (d_D[i * 6 + 2] == C[3]) OP
            (d_D[i * 6 + 2] == C[4]) OP (d_D[i * 6 + 2] == C[5])) {
            goto NextFilter;
        }
        if ((d_D[i * 6 + 2] == C[0]) OP (d_D[i * 6 + 2] == C[1]) OP
            (d_D[i * 6 + 2] == C[2]) OP (d_D[i * 6 + 2] == C[3]) OP
            (d_D[i * 6 + 2] == C[4]) OP (d_D[i * 6 + 2] == C[5])) {
            goto NextFilter;
        }
        if ((d_D[i * 6 + 3] == C[0]) OP (d_D[i * 6 + 3] == C[1]) OP
            (d_D[i * 6 + 3] == C[2]) OP (d_D[i * 6 + 3] == C[3]) OP
            (d_D[i * 6 + 3] == C[4]) OP (d_D[i * 6 + 3] == C[5])) {
            goto NextFilter;
        }
        if ((d_D[i * 6 + 4] == C[0]) OP (d_D[i * 6 + 4] == C[1]) OP
            (d_D[i * 6 + 4] == C[2]) OP (d_D[i * 6 + 4] == C[3]) OP
            (d_D[i * 6 + 4] == C[4]) OP (d_D[i * 6 + 4] == C[5])) {
            goto NextFilter;
        }
        if ((d_D[i * 6 + 5] == C[0]) OP (d_D[i * 6 + 5] == C[1]) OP
            (d_D[i * 6 + 5] == C[2]) OP (d_D[i * 6 + 5] == C[3]) OP
            (d_D[i * 6 + 5] == C[4]) OP (d_D[i * 6 + 5] == C[5])) {
            goto NextFilter;
        }
        allfound = 0;
        break;
    NextFilter:
        ;
    }
    stop2 = clock();
    if (allfound) {
        printf("%d %d %d %d %d %d\n",
               C[0 * 6 + 0], C[0 * 6 + 1], C[0 * 6 + 2],
               C[0 * 6 + 3], C[0 * 6 + 4], C[0 * 6 + 5]);
    }
    stop = clock();
    printf("Time elapsed in ms: %f\n", (double)(stop - start) * 1000.0 / CLOCKS_PER_SEC);
    printf("Init time in ms: %f\n", (double)(stop1 - start) * 1000.0 / CLOCKS_PER_SEC);
    printf("Scan time in ms: %f\n", (double)(stop2 - stop1) * 1000.0 / CLOCKS_PER_SEC);
    return 0;
}

您可能希望在目标系统上运行此基准测试,以查看它是否显示出相同的改进。请注意,这种微优化类型高度依赖于编译器版本,设置和目标系统硬件。

答案 3 :(得分:0)

您的代码不执行您在问题中所写的内容。您需要迭代直到第一行不包含为止。您根本不需要检查。

您需要以更快的速度进行基准测试:

int foo1(int *d_D, int *C)
{
    int found = 1, all;
    for (size_t i = 0; found && i < 50; i++) 
    {
        all = 0;
        for (size_t j = 0; !all && j < 6; j++) 
        {
            all = (d_D[i * 6 + 0] == C[j]) || (d_D[i * 6 + 1] == C[j]) || (d_D[i * 6 + 2] == C[j]) || (d_D[i * 6 + 3] == C[j]) || (d_D[i * 6 + 4] == C[j]) || (d_D[i * 6 + 5] == C[j]);
        }
    found = found && all;
    }    
    return found;
}

int foo(int *d_D, int *C)
{
    int found = 1, all;
    for (size_t i = 0; found && i < 50; i++) 
    {
        all = 0;
        for (size_t j = 0; !all && j < 6; j++) 
        {
            for (size_t z = 0; !all && z < 6; z++) 
            {
                all = all || d_D[i * 6 + j] == C[z];
            }    
        }
    found = found && all;
    }    
    return found;
}

int AtLeastOneInAllRows(int *d_D, int *C)
{
    int found = 0;

    for (size_t i = 0; i < 50; i++) 
    {

        if ((d_D[i * 6 + 0] == C[1]) || (d_D[i * 6 + 1] == C[1]) || (d_D[i * 6 + 2] == C[1]) || (d_D[i * 6 + 3] == C[1]) || (d_D[i * 6 + 4] == C[1]) || (d_D[i * 6 + 5] == C[1]))
        {                         
            found = 1;
            continue;
        }


        if (!found && ((d_D[i * 6 + 0] == C[2]) || (d_D[i * 6 + 1] == C[2]) || (d_D[i * 6 + 2] == C[2]) || (d_D[i * 6 + 3] == C[2]) || (d_D[i * 6 + 4] == C[2]) || (d_D[i * 6 + 5] == C[2])))
        {
            found = 1;
            continue;                        
        }


        if (!found && ((d_D[i * 6 + 0] == C[3]) || (d_D[i * 6 + 1] == C[3]) || (d_D[i * 6 + 2] == C[3]) || (d_D[i * 6 + 3] == C[3]) || (d_D[i * 6 + 4] == C[3]) || (d_D[i * 6 + 5] == C[3])))
        {
            found = 1;
            continue;
        }


        if (!found && ((d_D[i * 6 + 0] == C[4]) || (d_D[i * 6 + 1] == C[4]) || (d_D[i * 6 + 2] == C[4]) || (d_D[i * 6 + 3] == C[4]) || (d_D[i * 6 + 4] == C[4]) || (d_D[i * 6 + 5] == C[4])))
        {
            found = 1;
            continue;
        }


        if (!found && ((d_D[i * 6 + 0] == C[5]) || (d_D[i * 6 + 1] == C[5]) || (d_D[i * 6 + 2] == C[5]) || (d_D[i * 6 + 3] == C[5]) || (d_D[i * 6 + 4] == C[5]) || (d_D[i * 6 + 5] == C[5])))
        {
            found = 1;
            continue;
        }


        if (!found && ((d_D[i * 6 + 0] == C[6]) || (d_D[i * 6 + 1] == C[6]) || (d_D[i * 6 + 2] == C[6]) || (d_D[i * 6 + 3] == C[6]) || (d_D[i * 6 + 4] == C[6]) || (d_D[i * 6 + 5] == C[6])))
        {
            found = 1;
            continue;
        }
        if(!found) break;            
        }
        return found;
}

this calculator