Question

我正在研究Project Euler #22，并在大约9.6毫秒内得到了我的解决方案。这就是我所拥有的：

#import <Foundation/Foundation.h>

NSUInteger valueOfName(NSString *name) {
    NSUInteger sum = 0;
    for (int i = 0; i < [name length]; i++) {
        unichar character = [name characterAtIndex:i];
        sum += (character - 64);
    }
    return sum;
}

int main(int argc, const char * argv[]) {
    @autoreleasepool {
        CFAbsoluteTime currentTime = CFAbsoluteTimeGetCurrent();
        NSMutableString *names = [NSMutableString stringWithContentsOfFile:[@"~/Documents/Developer/Project Euler/Problem22/names.txt" stringByExpandingTildeInPath] encoding:NSASCIIStringEncoding error:nil];
        CFAbsoluteTime diskIOTime = CFAbsoluteTimeGetCurrent();
        [names replaceOccurrencesOfString:@"\"" withString:@"" options:NSLiteralSearch range:NSMakeRange(0, [names length])];
        NSArray *namesArray = [names componentsSeparatedByString:@","];
        namesArray = [namesArray sortedArrayUsingSelector:@selector(compare:)];
        // Marker 1
            int totalScore = 0;
        for (int i = 0; i < [namesArray count]; i++) {
            NSString *name = namesArray[i];
            NSUInteger sum = valueOfName(name);
            NSUInteger position = i + 1;
            totalScore += (sum * position);
        }
        // Marker 2
        CFAbsoluteTime endTime = CFAbsoluteTimeGetCurrent();
        double timeDiff = (endTime - currentTime) * 1000;
        printf("Total score: %d\n", totalScore);
        printf("Disk IO Time: %fms\tTime: %fms\n", ((diskIOTime - currentTime) * 1000), timeDiff);
    }
    return 0;
}

这是一段美好的时光，但我开始考虑如何通过使用多个线程来加快速度。使用四核CPU，理论上我应该能够在不同的线程上处理四分之一的名称，然后从那里获得总数。这是我尝试的（替换上面标记之间的代码）：

__block int totalScore = 0;
        int quarterArray = [namesArray count] /4 ;
        typedef void(^WordScoreBlock)(void);
        WordScoreBlock block1 = ^{
            for (int i = 0; i < quarterArray; i++) {
                NSString *name = namesArray[i];
                NSUInteger sum = valueOfName(name);
                NSUInteger position = i + 1;
                totalScore += (sum * position);
            }
            printf("Total score block 1: %d\n", totalScore);
        };
        WordScoreBlock block2 = ^{
            for (int i = quarterArray; i < (quarterArray * 2); i++) {
                NSString *name = namesArray[i];
                NSUInteger sum = valueOfName(name);
                NSUInteger position = i + 1;
                totalScore += (sum * position);
            }
        };
        WordScoreBlock block3 = ^{
            for (int i = (quarterArray * 2); i < (quarterArray * 3); i++) {
                NSString *name = namesArray[i];
                NSUInteger sum = valueOfName(name);
                NSUInteger position = i + 1;
                totalScore += (sum * position);
            }
        };
        WordScoreBlock block4 = ^{
            for (int i = (quarterArray * 3); i < [namesArray count]; i++) {
                NSString *name = namesArray[i];
                NSUInteger sum = valueOfName(name);
                NSUInteger position = i + 1;
                totalScore += (sum * position);
            }
        };
        dispatch_queue_t processQueue = dispatch_queue_create("Euler22", NULL);
        dispatch_async(processQueue, block1);
        dispatch_async(processQueue, block2);
        dispatch_async(processQueue, block3);
        dispatch_async(processQueue, block4);

然而，我得到0的结果，但我的时间快了大约一毫秒。

这种多线程方法是否可行？
如果是，我将如何实施？

Answer 1

首先创建一个并发队列，以便您的块并行执行：

dispatch_queue_t processQueue = dispatch_queue_create("Euler22", DISPATCH_QUEUE_CONCURRENT);

然后创建一个调度组，将所有块添加到该组，并等待该组完成：

dispatch_group_t group = dispatch_group_create();
dispatch_group_async(group, processQueue, block1);
dispatch_group_async(group, processQueue, block2);
dispatch_group_async(group, processQueue, block3);
dispatch_group_async(group, processQueue, block4);
dispatch_group_wait(group, DISPATCH_TIME_FOREVER);

最后：添加到totalScore不是原子操作，因此当所有线程执行并行时，您将得到错误的结果。您必须使用原子增量操作，或让所有线程计算自己的分数，并在完成后添加所有线程的值。

Answer 2

你真的想加载文件作为时间的一部分吗？

此外，如果要同时执行这些操作，则需要使用并发队列。您正在创建一个串行队列，因此所有块将一个接一个地执行。

// Create a concurrent queue
dispatch_queue_t processQueue = dispatch_queue_create("Euler22", DISPATCH_QUEUE_CONCURRENT);

或者，您可以调用* dispatch_get_global_queue *，并要求并发队列。

现在，当您添加任务时，GCD会将它们分配给可用的工作线程。

既然任务已经完成，你需要等待它们完成。这可以通过几种方式实现。如果您使用多个队列，则调度组可能是最佳方法。

使用相同的队列，在所有* dispatch_sync *（）调用之后，您可以放置一个屏障块，它将等待所有前面的块完成，然后运行...

dispatch_barrier_async(processQueue, ^{
    // We know that all previously enqueued blocks have finished, even if running
    // concurrently.  So, we can process the final results of those computations.
});

但是，在这种情况下，我们使用一个队列（虽然是并发的，但它会同时执行多个任务......虽然它按照它们入队的顺序拉出队列）。

可能最简单的方法是使用* dispatch_apply *，因为它是为了这个目的而设计的。您多次调用同一个块，传入索引。该块获取索引，您可以使用它来对数据数组进行分区。

修改

好的，尝试在您的特定问题上使用apply（使用您的块代码作为示例......我认为它可以满足您的需求）。注意，我只是键入它（这里也没有语法高亮显示），所以你可能需要稍微使用它来进行编译......但它应该给你一般的想法。

// You need to separate both source and destination data. size_t const numChunks = 4; // number of concurrent chunks to execute __block int scores[numChunks]; size_t dataLen = [namesArray count]; size_t chunkSize = dataLen / numChunks; // amount of data to process in each chunk dispatch_queue_t queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0); dispatch_apply(numChunks, queue, ^(size_t index) { // GCD will schedule these tasks concurrently as best as possible. // You know the current iteration index from the parameter. size_t beginIndex = index * chunkSize; // beginning of chunk size_t endIndex = beginIndex + chunkSize; // one past end of chunk if (endIndex > dataLen) endIndex = dataLen; int score = 0; for (size_t i = beginIndex; i < endIndex; ++i) { NSString *name = namesArray[i]; NSUInteger sum = valueOfName(name); NSUInteger position = i + 1; score += (sum * position); } scores[index] = score; }); // Since dispatch_apply waits for all bucks to complete, by the time you // get here you know that all the blocks are done. If your result is just // a sum of all the individual answers, sum them up now. int totalScore = 0; for (size_t i = 0; i < numChunks; ++i) { totalScore += scores[i]; }

希望这是有道理的。如果你让它工作，请告诉我。

现在，如果你遇到了真正需要数学表现的情况，你应该研究一下Accelerate框架。一个词。真棒。

多线程程序算法

2 个答案: