我正在编写一个简单的堆化算法。它需要处理大数据大小,大小为100,000,数字范围为0到10 ^ 9。
该算法的大小约为25,000。但是,如果数据集大于此值,则会开始抛出分段错误。
我一直使用unsigned long long int,所以我不知道问题出在哪里。我使用向量来存储我的数据,所有数据都是long long int类型。
以前有没有遇到过这样的问题?
以下是我使用的堆化程序:
vector<unsigned long long> array;
vector<unsigned long long> orig_index;
vector<unsigned long long> new_index;
unsigned long long heapify (unsigned long long count) {
unsigned long long temp, left, right, min;
long long j;
unsigned long long n_swaps = 0;
for(long long i=(count%2 ? (count-2)/2 : (count-1)/2 ); i>=0; --i) {
left= (2*i)+1 ;
right= (2*i)+2 ;
j= i;
while( ( (array[j] > array[left]) && left<count) || ( (array[j] > array[right]) && (right<count) ) ){
//Swap
//Find lesser of left or right
if(right>= count) {
min= array[left];
} else {
min= array[left] > array[right] ? array[right] : array[left];
}
if(array[j] > array[left] && (min== array[left])) {
temp= array[j];
array[j]= array[left] ;
array[left]= temp;
//Update indexes
orig_index.push_back(j);
new_index.push_back(left);
j= left;
right= (2*left)+2 ;
left= (2*left)+1 ;
++n_swaps;
}
else if ( (right < count) && (array[j] > array[right]) ) {
temp= array[j];
array[j]= array[right] ;
array[right]= temp;
//Update indexes
orig_index.push_back(j);
new_index.push_back(right);
j= right;
left= (2*right)+1 ;
right= (2*right)+2 ;
++n_swaps;
}
}
}
return n_swaps;
}
这是我使用的随机数据生成器功能。 Count是这里的数据大小(如20k或30k),max是范围。
void generate(unsigned long long count, unsigned long long max) {
srand( time(NULL) );
//Dummy array of max size
vector<unsigned long long> dummy;
//Populate the dummy
for(unsigned long long i=0; i<max; ++i) {
dummy.push_back(i);
}
//Select random number from dummy, swap with last and pop
unsigned long long temp;
unsigned long long swap;
unsigned long long dummy_size= max-1;
cout<<"****************Indices************"<<endl;
for(unsigned long long i=0; i<count; ++i) {
temp= rand() % dummy_size ;
cout<<temp<<endl;
array.push_back( dummy[temp] );
//Swap and pop
swap= dummy[temp];
dummy[temp] = dummy[dummy_size];
dummy[dummy_size] = swap;
--dummy_size;
}
cout<<"*************End*****************"<<endl;
dummy.clear();
}
主要功能
int main(void) {
unsigned long long count= 25000;
unsigned long long max= 1000000 ;
//Generate random numbers and push on array
generate(count, max);
//Print array
for(unsigned long long i=0; i<array.size(); ++i) {
cout<<array[i]<<" ";
}
cout<<endl;
//Build heap
unsigned long long n_swaps = heapify(count);
cout<<n_swaps<<"\n";
for(unsigned long long i=0; i<orig_index.size(); ++i) {
cout<<orig_index[i]<<" "<<new_index[i]<<endl;
}
return 0;
}
我希望算法是正确的,但是无法找到为什么大数据集发生分段错误,而不是小数据集。
答案 0 :(得分:2)
&amp;&amp;从左到右进行评估(作为写入顺序),所以我改了一行,没有&#34;崩溃&#34;,(segmentation-fault)。
while( ( (array[j] > array[left]) && left<count) || ( (array[j] > array[right]) && (right<count) ) ){
替换为:
while( ( left<count && (array[j] > array[left])) || ( (right<count) && (array[j] > array[right]) ) ){
完整代码:
using namespace std;
vector<unsigned long long> array;
vector<unsigned long long> orig_index;
vector<unsigned long long> new_index;
unsigned long long heapify(unsigned long long count) {
unsigned long long temp, left, right, min;
long long j;
unsigned long long n_swaps = 0;
for (long long i = (count % 2 ? (count - 2) / 2 : (count - 1) / 2); i >= 0;
--i) {
left = (2 * i) + 1;
right = (2 * i) + 2;
j = i;
while ((left < count && (array[j] > array[left])) ||
((right < count) && (array[j] > array[right]))) {
// Swap
// Find lesser of left or right
if (right >= count) {
min = array[left];
} else {
min = array[left] > array[right] ? array[right] : array[left];
}
if (array[j] > array[left] && (min == array[left])) {
temp = array[j];
array[j] = array[left];
array[left] = temp;
// Update indexes
orig_index.push_back(j);
new_index.push_back(left);
j = left;
right = (2 * left) + 2;
left = (2 * left) + 1;
++n_swaps;
} else if ((right < count) && (array[j] > array[right])) {
temp = array[j];
array[j] = array[right];
array[right] = temp;
// Update indexes
orig_index.push_back(j);
new_index.push_back(right);
j = right;
left = (2 * right) + 1;
right = (2 * right) + 2;
++n_swaps;
}
}
}
return n_swaps;
}
void generate(unsigned long long count, unsigned long long max) {
srand(time(NULL));
// Dummy array of max size
vector<unsigned long long> dummy;
// Populate the dummy
for (unsigned long long i = 0; i < max; ++i) {
dummy.push_back(i);
}
// Select random number from dummy, swap with last and pop
unsigned long long temp;
unsigned long long swap;
unsigned long long dummy_size = max - 1;
cout << "****************Indices************" << endl;
for (unsigned long long i = 0; i < count; ++i) {
temp = rand() % dummy_size;
cout << temp << endl;
array.push_back(dummy[temp]);
// Swap and pop
swap = dummy[temp];
dummy[temp] = dummy[dummy_size];
dummy[dummy_size] = swap;
--dummy_size;
}
cout << "*************End*****************" << endl;
dummy.clear();
}
int main(void) {
unsigned long long count = 25000;
unsigned long long max = 1000000;
// Generate random numbers and push on array
generate(count, max);
// Print array
for (unsigned long long i = 0; i < array.size(); ++i) {
cout << array[i] << " ";
}
cout << endl;
// Build heap
unsigned long long n_swaps = heapify(count);
cout << n_swaps << "\n";
for (unsigned long long i = 0; i < orig_index.size(); ++i) {
cout << orig_index[i] << " " << new_index[i] << endl;
}
return 0;
}