我正在试图对用于字符串搜索的天真算法进行并行化。我创建了简单版本,然后尝试使用多个线程来加速它。
但是下面的代码会让它慢得多:
template<typename T> long unsigned int simple_paralel(const T * str1, unsigned long int str1_length, const T * str2, unsigned long int str2_length) {
unsigned int long result = ~0;
unsigned int long count = 0;
unsigned long int in;
unsigned int long top;
#pragma omp parallel
#pragma for ordered shared(result, count) private(in, top) firstprivate(str1, str2, str1_length, str2_length)
for (top = 0; top < str1_length; top++) {
in = 0;
// & top + in < str1_length
while (in < str2_length ) {
if ( top + in >= str1_length)
break;
if ( str1[top+in] != str2[in] ) {
break;
}
++in;
if( in == str2_length ) {
// shared and we want to have the smallest index
if( result >= top + 1 ) {
result = top + 1;
}
count++;
}
}
}
return count;
}
我做错了什么?
答案 0 :(得分:0)
所以我退后一步尝试了另一种方法:
template<typename T> long unsigned int simple_paralel(const T * str1, unsigned long int str1_length, const T * str2, unsigned long int str2_length, int threads) {
count = 0;
unsigned long int block = str1_length/threads;
unsigned long int total = 0;
unsigned long int result = 0;
unsigned long int smallest = ~0;
#pragma omp parallel for shared(count)
for(int i=0; i<threads; i++)
{
unsigned long int res;
unsigned long int start = i * block;
if(i != threads -1 )
{
total += block;
res = simple_p(str1 + start, block, str2, str2_length, false);
} else {
res = simple_p(str1 + start, str1_length - total, str2, str2_length);
}
if(res < result && res != 0 ){
result = res;
}
}
return count;
}
它按预期工作。但我仍然无法相信,我无法使用OpenMP 执行此操作。