用C ++排序2百万个数字

时间:2015-12-20 11:37:22

标签: c++ arrays sorting

我试着编写一个代码来比较c ++中排序算法之间的时间。当整数的数量为100000和500000时,我的代码工作正常。但是,当我将数字增加到2000000时,它崩溃了。谷歌搜索后,我试图通过声明一个数组将我的数字放入堆中:

int * array = new int [N];

我已经测试过,这个数组可以包含200万个整数,但是当我试图将它们放入我的排序算法时,我的代码仍然崩溃。这就是代码:

    #include <string>
    #include <stdio.h>
    #include <stdlib.h>
    #include <time.h>
    #include <iostream>
    #include <fstream>
    #include <iomanip>

    #define N  2000000

 using namespace std;

void HeapDown(int a[] , int k, int N1) 
{ 
 int j; 
 int temp;
 while (2*k <= N1) 
 { 
  j = 2*k; 
  if (j < N1 && (a[j] < a[j+1])) j++; 
    if (!(a[k]<a[j])) break;  
    temp = a[k];
    a[k]=a[j];
    a[j]=temp;
  k = j ; 
 } 
} 

#define pq(A) a[l-1+A] 
void heapsort(int a[], int l, int r) 
{ 
 int temp;
  int k, N2 = r-l+1; 
 for (k = N2/2; k >= 1; k--) 
   HeapDown(&pq(0), k, N2); 
 while (N2 > 1) 
  { 

    temp = pq(1);
    pq(1)=pq(N2);
    pq(N2)=temp; 
    HeapDown(&pq(0), 1,--N2); 
  } 
  cout << "The sequence was sorted by heap sort" << endl; 
} 

int main(){

    int i;
    static int a[N];
    clock_t start;
    clock_t end;
    int* array = new int[N];

/* Generate random numbers and put them in the text file  */
//  ofstream myfile;
//      myfile.open ("2000000.txt");
//      
//  for (i=0; i < N; i++){
//      a[i] = 1000*(1.0*rand()/RAND_MAX);
//  //  printf("%3d ",a[i]);
//      myfile << a[i] << endl;
//      
//      }
//      cout << "done!" << endl;
//  myfile.close();
/*                                                      */


/******************* Open file and add the numbers into array **************************/   
    string line;
  ifstream myfile2 ("2000000.txt");
  if (myfile2.is_open())
  {
    i = 0;
    while ( getline (myfile2,line) )
    {
 //     a[i] = atoi(line.c_str());
        array[i] = atoi(line.c_str());
//      cout << a[i] << endl;
//          cout << line << '\n';
        i++;
    }
    myfile2.close();
  }

  else cout << "Unable to open file"; 
/*                                                                                      */  

//for (i=0; i< N; i++){
//      printf("%3d ",array[i]);
//  }

/* Chose the sorting algorithms and calculate the time */

    start = clock();

//  insertionSort(array, 0, N-1);
//  selectionSort(array, 0 , N-1);  
//  bubbleSort (array, N);
//  shellSort (array, N);
//  quicksort (array , 0, N-1);
//  usequicksort (array , 0, N-1);
    heapsort (array , 0 , N-1);
//  radixsort (array , N);

    end = clock();
    double rs = end - start;
    delete[] array;

// print out the sorted sequence and time consuming

//      printf("\n The sequence after sorted \n");  
    for (i=0; i< N; i++){
        printf("%3d ",a[i]);
    }

    cout << "Time consuming: " <<  rs << endl;

    return 0;
}

我认为问题是当我将数组放入我的排序函数时。不幸的是,我无法找到解决方案。如果你们能帮助我,那将是巨大的,谢天谢地

2 个答案:

答案 0 :(得分:1)

你的代码很好(没有检查算法 - 很好'崩溃明智',除了一件事 - 您不会检查new是否成功 正如@ jonathan-potter所说,如果你没有足够的内存,它会抛出异常。

答案 1 :(得分:0)

以下是一些可能有助于您前进的提示。

首先,尽可能使用标准容器和算法。它大大减少了bug的变化。

其次,无需将随机数存储在文件中。如果每次为生成器提供相同的种子,您将获得相同的伪随机序列。

以下是我如何处理它的一个例子。

您可能会发现它是比较各种排序算法的有用框架:

#include <iostream>
#include <random>
#include <vector>
#include <algorithm>
#include <chrono>

auto main() -> int
{
    using namespace std;

    // make the test space big enough to defeat artificial cacheing gains.
    vector<int> v(20000000);

    auto genstart = chrono::high_resolution_clock::now();

    // use a known seed to force the same random sequence for each test
    generate(begin(v),
             end(v),
             std::bind(uniform_int_distribution<>(0, 999999),
                       default_random_engine(0)));


    auto start = chrono::high_resolution_clock::now();
    sort(begin(v), end(v));
    auto finish = chrono::high_resolution_clock::now();

    auto genduration = chrono::duration_cast<chrono::milliseconds>(start - genstart).count();
    auto duration = chrono::duration_cast<chrono::milliseconds>(finish - start).count();

    // sort time should be in the order of twice the generation time
    cout << "generation took " << genduration << "ms" << endl;
    cout << "sort took " << duration << "ms" << endl;

    return 0;
}

示例输出:

generation took 801ms
sort took 1556ms