我想使用AVX2指令将浮点缓冲区转换为int8缓冲区。
当我测量转换函数的时间时,我可以看到它非常稳定,但有时一种测量方法确实要比另一种测量方法高。
这是我的代码
#include <immintrin.h>
#include <stdio.h>
#include <time.h>
static __m64 func (const __m256* coef_8xFP32_,
float** input_1xFP32_ptr_)
{
__m256 input_8xFP32 = {};
__m256 coeffed_8xFP32 = {};
__m256i convert_8xINT32;
__m128i* convertLow_4xINT8_ptr = nullptr;
__m128i* convertHigh_4xINT8_ptr = nullptr;
__m128i packed_8xINT16 = {};
__m64* packedLow_8xINT8_ptr = nullptr;
__m64* packedHigh_8xINT8_ptr = nullptr;
__m64 packed_8xINT8 = {};
// init 8xFP32 with input data
input_8xFP32 = _mm256_set_ps((*input_1xFP32_ptr_)[7], (*input_1xFP32_ptr_)[6], (*input_1xFP32_ptr_)[5], (*input_1xFP32_ptr_)[4],
(*input_1xFP32_ptr_)[3], (*input_1xFP32_ptr_)[2], (*input_1xFP32_ptr_)[1], (*input_1xFP32_ptr_)[0]);
(*input_1xFP32_ptr_) += 8;
// multiple 8xFP32 per coef
coeffed_8xFP32 = _mm256_mul_ps(input_8xFP32, *coef_8xFP32_);
// convert 8xFP32 into 8xINT32
convert_8xINT32 = _mm256_cvtps_epi32 (coeffed_8xFP32);
// pack 8xINT32 into 8xINT16
convertLow_4xINT8_ptr = (__m128i*)&convert_8xINT32;
convertHigh_4xINT8_ptr = (__m128i*)&convert_8xINT32 + 1;
packed_8xINT16 = _mm_packs_epi32 (*convertLow_4xINT8_ptr, *convertHigh_4xINT8_ptr);
// pack 8xINT16 into 8xINT8
packedLow_8xINT8_ptr = (__m64*)&packed_8xINT16;
packedHigh_8xINT8_ptr = (__m64*)&packed_8xINT16 + 1;
packed_8xINT8 = _mm_packs_pi16 (*packedLow_8xINT8_ptr, *packedHigh_8xINT8_ptr);
return (packed_8xINT8);
}
int main(int argc, char** argv)
{
const int bufferSize = 8 * 10000;
float input[bufferSize] = {};
int8_t output[bufferSize] = {};
float coef = 1.;
const int statSize = 1000;
struct timespec start;
struct timespec end;
double val[statSize] = {};
double min = 1;
double max = 0;
double accu = 0;
double mean = 0;
for (int i = 0; i < statSize; i++)
{
srand(time(NULL));
for (int j = 0; j < bufferSize; j += 1)
input[j] = (rand() % 2560) / 10. - 128;
__m256 coef_8xFP32 = _mm256_set1_ps(coef);
float* input_1xFP32_ptr = (float*)input;
__m64* output_ptr_8xINT8 = (__m64*)output;
clock_gettime(CLOCK_REALTIME, &start);
for (int j = 0; j < bufferSize; j += 8)
{
*output_ptr_8xINT8 = func (&coef_8xFP32, &input_1xFP32_ptr);
++output_ptr_8xINT8;
}
clock_gettime(CLOCK_REALTIME, &end);
val[i] = (end.tv_sec - start.tv_sec) / 1e-3 + (end.tv_nsec - start.tv_nsec) / 1e6;
if (val[i] > max)
max = val[i];
if (val[i] < min)
min = val[i];
accu += val[i];
}
// for (int j = 0; j < bufferSize; j += 1)
// printf ("%d %.3f %d\n", j, input[j], output[j]);
mean = accu / statSize;
for (int i = 0; i < statSize; i++)
if (val[i] > ((max - mean) / 2 + mean))
printf ("%3d %f\n", i, val[i]);
printf ("min %f\n", min);
printf ("max %f\n", max);
printf ("mean %f\n", mean);
printf ("DELTA %f\n", max / mean);
return (0);
}
还有编译命令
g++ -Wall -Werror -O3 -std=c++11 -mavx2 -c floatToInt8.cpp -o floatToInt8.o
g++ floatToInt8.o -o floatToInt8
我期望最小值,最大值和平均值非常相似。 这是我的结果(我在几台Linux主机上进行了尝试,结果也相似):
0 0.060360
34 0.068733
92 0.067925
369 0.058768
565 0.059178
604 0.055466
621 0.065434
635 0.068510
min 0.036119
max 0.068733
mean 0.040500
DELTA 1.697113
如何解释这几个太高的值?