Question

我正在尝试使用调度队列异步填充数组在iPhone 5的两个核心上。我正在测试以下代码：

float res[20000]; // an array to fill

dispatch_queue_t aQueue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0);
dispatch_group_t group = dispatch_group_create();
float coresNumber=[[NSProcessInfo processInfo] activeProcessorCount];
for (float i=0;i<coresNumber;i++)
dispatch_group_async(group, aQueue, ^{
    for (int k = i*20000/coresNumber; k < (i+1)*20000/coresNumber; k++) {
        float acc=0;
        for (int j=0;j<10000;j++){
            acc+=sinf(j);
        }
        res[k]=acc; // fill an array using some function (sum of sines is an example)
    }
});
dispatch_group_wait(group, DISPATCH_TIME_FOREVER);

这里我实际上是将数组分成两部分并异步填充这些部分。但它的表现类似于在一个周期中简单地填充整个阵列。可能是什么原因？

Answer 1

以下是您使用不同数量的并发块运行的代码版本：

const int kArraySize = 20000;
const int kSineIterations = 10000;

float res[kArraySize];
float *r = res;         // use ptr to access array from block

dispatch_queue_t aQueue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0);
dispatch_group_t group = dispatch_group_create();

int coresNumber=[[NSProcessInfo processInfo] activeProcessorCount];

for (int coresToUse = 1; coresToUse <= coresNumber; coresToUse++) {
    NSDate *fillStart = [NSDate date];

    if (coresToUse == 1) {
        for (int k = 0; k < kArraySize; k++) {
            float acc=0;
            for (int j=0;j<kSineIterations;j++){
                acc+=sinf(j);
            }
            r[k]=acc; // fill an array using some function (sum of sines is an example)
        }
    } else {
        for (int i=0;i<coresToUse;i++) {
            dispatch_group_async(group, aQueue, ^{
                for (int k = i*kArraySize/coresToUse; k < (i+1)*kArraySize/coresToUse; k++) {
                    float acc=0;
                    for (int j=0;j<kSineIterations;j++){
                        acc+=sinf(j);
                    }
                    r[k]=acc; // fill an array using some function (sum of sines is an example)
                }
            });
        }
        dispatch_group_wait(group, DISPATCH_TIME_FOREVER);
    }

    NSDate *fillFinish = [NSDate date];
    NSTimeInterval executionTime = [fillFinish timeIntervalSinceDate:fillStart];
    NSLog(@"coresToUse = %d executionTime = %f", coresToUse, executionTime);
}

这是一个使用dispatch_apply（）的实现，尝试了许多不同的步幅（如遗留man page所示）：

const int kArraySize = 20000;
const int kSineIterations = 10000;
const int kMaxStride = 32;

float res[kArraySize];
float *r = res;         // use ptr to access array from block

dispatch_queue_t aQueue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0);

for (int stride = 1; stride <= kMaxStride; stride *= 2) {
    NSDate *fillStart = [NSDate date];

     dispatch_apply(kArraySize / stride, aQueue, ^(size_t idx) {
        for (int k = idx * stride; k < (idx + 1) * stride; k++) {
            float acc=0;
            for (int j=0;j<kSineIterations;j++){
                acc+=sinf(j);
            }
            r[k]=acc; // fill an array using some function (sum of sines is an example)
        }
     });

    NSDate *fillFinish = [NSDate date];
    NSTimeInterval executionTime = [fillFinish timeIntervalSinceDate:fillStart];
    NSLog(@"stride = %d executionTime = %f", stride, executionTime);
}

我的测试结果因运行而异，但一般来说dispatch_apply（）方法更简单，并且具有良好的性能：

coresToUse = 1 executionTime = 7.866005
coresToUse = 2 executionTime = 4.457676
coresToUse = 3 executionTime = 3.347830
coresToUse = 4 executionTime = 2.550073
coresToUse = 5 executionTime = 2.150453
coresToUse = 6 executionTime = 1.814090
coresToUse = 7 executionTime = 1.637852
coresToUse = 8 executionTime = 1.810749
stride = 1 executionTime = 1.634940
stride = 2 executionTime = 1.990378
stride = 4 executionTime = 2.199857
stride = 8 executionTime = 2.157229
stride = 16 executionTime = 2.010102
stride = 32 executionTime = 2.451976

dispatch_group_async性能

1 个答案: