线程内的错误数组读取

时间:2017-11-29 14:37:12

标签: c arrays multithreading

我有一个多线程C程序,我有4个线程使用一些全局数组进行一些算术计算。这是代码的示例。

__m256 *array_1;
__m256 *array_2;
__m256 *array_3;
#define ALIGNMENT 32
#define SIMD_STEP 8

void Init_arrays()
{
   int i;
   posix_memalign((void**) &array_1, ALIGNMENT, 32*sizeof(__m256));
   posix_memalign((void**) &array_2, ALIGNMENT, 4 *sizeof(__m256));
   posix_memalign((void**) &array_3, ALIGNMENT, 2 *sizeof(__m256));

   for(i=0;i < 256; i+= SIMD_STEP)
   {
       // Filling array for the 1st stage
   }
   for(i=0;i < 64; i+= SIMD_STEP)
   {
       // Filling array for the 2nd stage
   }
   for(i=0;i < 16; i+= SIMD_STEP)
   {
       // Filling array for the 3rd stage
   }
}

void *routine(void *thread_info)
{
  int n;
  unsigned t_start,t_stop;
  unsigned ind1, ind2, ind3;
  float *arr_in , *arr_out;
  struct thread_data *mydata;

  mydata = (struct thread_data*) thread_info;
  t_start = mydata->start;
  t_stop  = mydata->stop;
  arr_in  = mydata->input;
  arr_out = mydata->output;

  for (n = t_start; n < t_stop; n += 8)
  {  
    ind1 = 256 + n;
    ind2 = 512 + n;

    vec_a = _mm256_load_ps((float *) (&arr_in[n   ]) );
    vec_b = _mm256_load_ps((float *) (&arr_in[ind1]) );
    vec_c = _mm256_load_ps((float *) (&arr_in[ind2]) );

    T_fac1 = array_1[n];
    T_fac2 = array_2[n];
    T_fac3 = array_3[n];
    // print data 'printf()'

    // further computations

    _mm256_store_ps((float *) (&arr_out[n   ]), (vec_a) );
    _mm256_store_ps((float *) (&arr_out[ind1]), (vec_b) );
    _mm256_store_ps((float *) (&arr_out[ind2]), (vec_c) );
  }   
  pthread_exit(NULL);
}

void foo(float* in,float* out)
{
  unsigned t,i=0;
  for(t=0;t<256;t+=64)
  {
     thread_data_array[i].start    = t;
     thread_data_array[i].stop = t+QUARTER;
     thread_data_array[i].input    = in;
     thread_data_array[i].output   = out;
     pthread_create(&threads[i],NULL,routine,(void*)&thread_data_array[i]);
     i++;
  }
    for(i=0; i<NUM_THREADS; i++)
    {
       int rc = pthread_join(threads[i], NULL);
       if (rc)
       {
           fprintf(stderr, "failed to join thread #%u - %s\n",i, strerror(rc));
       }
    }
}

int main()
{
  float *data1;
  float *data2;

  posix_memalign((void**)&data1, 32, 1024 * sizeof(float));
  posix_memalign((void**)&data2, 32, 1024 * sizeof(float));

  Load_inputs(reals,imags);//load data into the two arrays
  Init_arrays();
  // print data 'printf()'
  foo(data1,data2);
  return EXIT_SUCCESS;
 }

出于某种原因,从array_1读取的例子不起作用,因为它应该在线程中,我不知道它背后的原因。这里显示array_1应该是

     Display from the main                Display from the thread
RE = 1.000000    IM = -0.000000     RE = 1.000000    IM = -0.000000 
RE = 0.999981    IM = -0.006136     RE = 0.399624    IM = 0.671559 
RE = 0.999925    IM = -0.012272     RE = 0.416430    IM = 0.634393 
RE = 0.999831    IM = -0.018407     RE = 0.433094    IM = 0.595699 
RE = 0.999699    IM = -0.024541     RE = 0.449612    IM = 0.555570 
RE = 0.999529    IM = -0.030675     RE = 0.465977    IM = 0.514103 
RE = 0.999322    IM = -0.036807     RE = 0.482184    IM = 0.471397 
RE = 0.999078    IM = -0.042938     RE = 0.498228    IM = 0.427555 
RE = 0.998795    IM = -0.049068     // the same 
RE = 0.998476    IM = -0.055195     // the same 
RE = 0.998118    IM = -0.061321     // the same 
RE = 0.997723    IM = -0.067444     // the same 
RE = 0.997290    IM = -0.073565     // the same 
RE = 0.996820    IM = -0.079682     // the same 
RE = 0.996313    IM = -0.085797     // the same 
RE = 0.995767    IM = -0.091909     // the same 

有人知道这个错误结果背后的原因是什么?

1 个答案:

答案 0 :(得分:3)

给出

__m256 *array_1;
__m256 *array_2;
__m256 *array_3;
#define ALIGNMENT 32
#define SIMD_STEP 8

void Init_arrays()
{
   int i;
   posix_memalign((void**) &array_1, ALIGNMENT, 32*sizeof(__m256));
   posix_memalign((void**) &array_2, ALIGNMENT, 4 *sizeof(__m256));
   posix_memalign((void**) &array_3, ALIGNMENT, 2 *sizeof(__m256));
       .
       .
       .

此循环引用 方式 超出范围的数组元素:

for (n = t_start; n < t_stop; n += 8)
{
    .
    .
    .
   T_fac1 = array_1[n];
   T_fac2 = array_2[n];
   T_fac3 = array_3[n];

array_3包含所有两个成员:2 *sizeof(__m256)但索引增加 8