优化程序处理大数据的结构化输入

时间:2018-10-28 12:20:56

标签: c optimization large-data

我有一项任务。为了更加清楚,我将使用以下图片作为示例。输入和输出用虚线分隔。输入的第一行是数字 N -套数。对于每一组,它的第一行是2个数字-第一个声明要处理的数字,第二个是间隔数。第二行指定要处理的数字,第三行包含两个数字 X Y ,它们创建并间隔。对于每个间隔,我必须输出3个数字-间隔中的最小数字,间隔中最大数字的索引和所有数字的XOR。一切运行良好,但大数据的运行速度确实很慢,而且我不知道如何提高工作速度。我还附加了代码和大数据输入。

input.txt

#include <stdio.h>
#include <stdlib.h>

typedef struct {
  int id;
  int index;
} Censor;


int Xor(const int x, const int y, const Censor array[]) {
  int xor = array[x].id;
  if (x == y) {
    return xor;
  }
  for (int i = x + 1; i <= y; i++) {
    xor ^= array[i].id;
  }
  return xor;
}

int int_cmp(const void *a, const void *b) { 
    const Censor *ia = (const Censor *)a; 
    const Censor *ib = (const Censor *)b;
    return (ia->id  - ib->id); 
} 

int LowestId(const int x, const int y, Censor array[]) {
  int id = array[x].id;
  if (x == y) {
    return id;
  }
  qsort(array, y - x + 1, sizeof(Censor), int_cmp);
  return array[0].id;
}

int HighestIdIndex(const int x, const int y, Censor array[]) {
  int index = array[x].index;
  if (x == y) {
    return index;
  }
  qsort(array, y - x + 1, sizeof(Censor), int_cmp);
  return array[y].index;
}

int main() {
  int t, n, q, b, e;
  int max = 100;
  int count = 0;
  int *output = (int *)malloc(max * sizeof(output));
  scanf("%d", &t); //number of sets
  for (int i = 0; i < t; i++) {
    scanf("%d %d", &n, &q);
    //I am making 3 separate arrays for numbers, because some of them are being sorted and some of them not
    Censor lowest_id[n];
    Censor highest_id_index[n];
    Censor xor[n];
    //This loop fills arrays with the numbers to be processed
    for (int j = 0; j < n; j++) {
      scanf("%d", &(lowest_id[j].id));
      lowest_id[j].index = j;
      highest_id_index[j].id = lowest_id[j].id;
      highest_id_index[j].index = j;
      xor[j].id = lowest_id[j].id;
      xor[j].index = j;
    }
    // Now I am scanning intervals and creating output. Output is being stored in one dynamically allocated array.
    for (int k = 0; k < q; k++) {
      scanf("%d %d", &b, &e);
      if (count + 3 >= max) {
        max *=2;
        int *tmp = (int *)realloc(output, max * sizeof(tmp));
        if (tmp == NULL) {
          return 1;
        } else {
          output = tmp;
        }
      }
      output[count++] = LowestId(b, e, lowest_id);
      output[count++] = HighestIdIndex(b, e, highest_id_index);  
      output[count++] = Xor(b, e, xor);
    }
  }
  printf("---------------------\n");
  for (int i = 0; i < count; i++) {
    printf("%d\n", output[i]);
  }
  free(output);
  return 0;
}

1 个答案:

答案 0 :(得分:0)

感谢@DanMašek和@Alex Lop。在这种情况下,无需对子数组进行排序。更容易的是以线性复杂度遍历子数组。