我有2个未排序的数组和2个这些数组的副本。我使用两个不同的线程来排序两个数组,然后我逐个排序其他两个未排序的数组。我认为线程进程会更快,但事实并非如此,那么线程如何花费更多时间呢?
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <pthread.h>
struct thread_data
{
int count;
unsigned int *arr;
};
struct thread_data thread_data_array[2];
void insertionSort(unsigned int arr[], int n)
{
int i, key, j;
for (i = 1; i < n; i++)
{
key = arr[i];
j = i-1;
while (j >= 0 && arr[j] > key)
{
arr[j+1] = arr[j];
j = j-1;
}
arr[j+1] = key;
}
}
void *sortAndMergeArrays(void *threadarg)
{
int count;
unsigned int *arr;
struct thread_data *my_data;
my_data = (struct thread_data *) threadarg;
count = my_data->count;
arr = my_data->arr;
insertionSort(arr, count);
pthread_exit(NULL);
}
int main(int argc, char *argv[])
{
int count, i, rc;
clock_t start, end, total_t;
pthread_t threads[2];
//get the loop count. If loop count is not provided take 10000 as default loop count.
if(argc == 2){
count = atoi(argv[1]);
}
else{
count = 10000;
}
unsigned int arr1[count], arr2[count], copyArr1[count], copyArr2[count];
srand(time(0));
for(i = 0; i<count; i++){
arr1[i] = rand();
arr2[i] = rand();
copyArr1[i] = arr1[i];
copyArr2[i] = arr2[i];
}
start = clock();
for(int t=0; t<2; t++) {
thread_data_array[t].count = count;
if(t==0)
thread_data_array[t].arr = arr1;
else
thread_data_array[t].arr = arr2;
rc = pthread_create(&threads[t], NULL, sortAndMergeArrays, (void *) &thread_data_array[t]);
if (rc) {
printf("ERROR; return code from pthread_create() is %d\n", rc);
exit(-1);
}
}
pthread_join(threads[0], NULL);
pthread_join(threads[1], NULL);
end = clock();
total_t = (double)(end - start);
printf("Total time taken by CPU to sort using threads: %d\n", total_t);
start = clock();
insertionSort(copyArr1, count);
insertionSort(copyArr2, count);
end = clock();
total_t = (double)(end - start);
printf("Total time taken by CPU to sort sequentially: %d\n", total_t);
pthread_exit(NULL);
}
我正在使用Linux服务器来执行代码。首先,我随机填充数组并将它们复制到两个单独的数组。对于前两个数组,我使用pthread创建两个线程并将两个数组传递给它们,它使用插入排序对它们进行排序。而对于其他两个数组,我只是逐个排序。
我预计通过使用线程我会减少执行时间但实际上需要更多时间。
答案 0 :(得分:0)
你得到几乎相同的时间 - 从线程代码比从顺序代码稍微多一点时间 - 的原因是clock()
测量CPU时间,两种排序方式占用几乎相同的CPU时间,因为他们正在做同样的工作(由于设置和拆除线程的时间,线程数可能略大)。
clock()
函数应返回实现对自进程使用的处理器时间的最佳近似值,因为实现定义的时代仅与进程调用相关。
BSD(macOS)手册页:
clock()
函数确定自调用调用进程以来使用的处理器时间量,以CLOCKS_PER_SECs为单位测量。
对两个数组进行排序所需的CPU时间基本相同;差异是线程的开销(或多或少)。
我有一组可以使用clock_gettime()
的功能(timer.c
中的代码和GitHub中的timer.h
)。这些测量挂钟时间 - 经过的时间,而不是CPU时间。
这里是您的代码的温和调整版本 - 实质性更改正在将排序函数中key
的类型从int
更改为unsigned int
以匹配数据中的数据数组,并修复%d
到%ld
的转换规范,以匹配GCC标识为clock_t
的类型。我温和地调整了参数处理和时序消息,使它们的长度保持一致,并添加了经过时间的测量代码:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <pthread.h>
#include "timer.h"
struct thread_data
{
int count;
unsigned int *arr;
};
struct thread_data thread_data_array[2];
static
void insertionSort(unsigned int arr[], int n)
{
for (int i = 1; i < n; i++)
{
unsigned int key = arr[i];
int j = i - 1;
while (j >= 0 && arr[j] > key)
{
arr[j + 1] = arr[j];
j = j - 1;
}
arr[j + 1] = key;
}
}
static
void *sortAndMergeArrays(void *threadarg)
{
int count;
unsigned int *arr;
struct thread_data *my_data;
my_data = (struct thread_data *)threadarg;
count = my_data->count;
arr = my_data->arr;
insertionSort(arr, count);
pthread_exit(NULL);
}
int main(int argc, char *argv[])
{
int count = 10000;
int i, rc;
clock_t start, end, total_t;
pthread_t threads[2];
// get the loop count. If loop count is not provided take 10000 as default loop count.
if (argc == 2)
count = atoi(argv[1]);
unsigned int arr1[count], arr2[count], copyArr1[count], copyArr2[count];
srand(time(0));
for (i = 0; i < count; i++)
{
arr1[i] = rand();
arr2[i] = rand();
copyArr1[i] = arr1[i];
copyArr2[i] = arr2[i];
}
Clock clk;
clk_init(&clk);
start = clock();
clk_start(&clk);
for (int t = 0; t < 2; t++)
{
thread_data_array[t].count = count;
if (t == 0)
thread_data_array[t].arr = arr1;
else
thread_data_array[t].arr = arr2;
rc = pthread_create(&threads[t], NULL, sortAndMergeArrays, (void *)&thread_data_array[t]);
if (rc)
{
printf("ERROR; return code from pthread_create() is %d\n", rc);
exit(-1);
}
}
pthread_join(threads[0], NULL);
pthread_join(threads[1], NULL);
clk_stop(&clk);
end = clock();
char buffer[32];
printf("Elapsed using threads: %s s\n", clk_elapsed_us(&clk, buffer, sizeof(buffer)));
total_t = (double)(end - start);
printf("CPU time using threads: %ld\n", total_t);
start = clock();
clk_start(&clk);
insertionSort(copyArr1, count);
insertionSort(copyArr2, count);
clk_stop(&clk);
end = clock();
printf("Elapsed sequentially: %s s\n", clk_elapsed_us(&clk, buffer, sizeof(buffer)));
total_t = (double)(end - start);
printf("CPU time sequentially: %ld\n", total_t);
return 0;
}
示例运行(程序inssortthread23
) - 在带有16 GiB RAM和2.7 GHz Intel Core i7 CPU的MacBook Pro(15&#34; 2016)上运行,运行macOS High Sierra 10.13,使用GCC 7.2.0汇编。
我有例行的后台程序 - 例如浏览器没有被主动使用,没有音乐或视频播放,没有正在进行的下载等。(这些事情对于基准测试非常重要。)
$ inssortthread23 100000
Elapsed using threads: 1.060299 s
CPU time using threads: 2099441
Elapsed sequentially: 2.146059 s
CPU time sequentially: 2138465
$ inssortthread23 200000
Elapsed using threads: 4.332935 s
CPU time using threads: 8616953
Elapsed sequentially: 8.496348 s
CPU time sequentially: 8469327
$ inssortthread23 300000
Elapsed using threads: 9.984021 s
CPU time using threads: 19880539
Elapsed sequentially: 20.000900 s
CPU time sequentially: 19959341
$
在这里,您可以清楚地看到:
所有这些都非常符合预期 - 一旦你意识到clock()
正在测量CPU时间,而不是经过时间。
您还可以看到,在某些情况下,我的线程CPU时间略小于顺序排序的CPU时间。我没有对此作出解释 - 我认为它已经在噪音中消失了,尽管效果持续存在:
$ inssortthread23 100000
Elapsed using threads: 1.051229 s
CPU time using threads: 2081847
Elapsed sequentially: 2.138538 s
CPU time sequentially: 2132083
$ inssortthread23 100000
Elapsed using threads: 1.053656 s
CPU time using threads: 2089886
Elapsed sequentially: 2.128908 s
CPU time sequentially: 2122983
$ inssortthread23 100000
Elapsed using threads: 1.058283 s
CPU time using threads: 2093644
Elapsed sequentially: 2.126402 s
CPU time sequentially: 2120625
$
$ inssortthread23 200000
Elapsed using threads: 4.259660 s
CPU time using threads: 8479978
Elapsed sequentially: 8.872929 s
CPU time sequentially: 8843207
$ inssortthread23 200000
Elapsed using threads: 4.463954 s
CPU time using threads: 8883267
Elapsed sequentially: 8.603401 s
CPU time sequentially: 8580240
$ inssortthread23 200000
Elapsed using threads: 4.227154 s
CPU time using threads: 8411582
Elapsed sequentially: 8.816412 s
CPU time sequentially: 8797965
$