我正在研究一种需要大量计算的算法,最高可达e + 30。我正在使用一个32位系统,编译器支持32位长/浮点/双精度。到目前为止,通过在线搜索,我了解到单精度浮点(FP)可以用于双精度FP。
从前面某人(Emulate “double” using 2 “float”s)提出的这个问题中,我发现这篇论文的算法与GPU中的双精度FP一起使用。我在C中实现太困惑了。我只需要四个基本的数学运算。有什么方法可以帮我更好地理解它吗?
提前致谢。
这是我正在制作的准则。它可能有我无法看到的错误,任何建议都将被赞赏以纠正错误,但这是我试图实现的安全。在算法中,POLYNOMIAL_ORDER应该能够按顺序上升(如果标准偏差较小,则可以在三阶处稳定)。我不确定的事情很少1)程序make_float()和make_float()是否正确,2)在程序中使用make_float()。
#define POLYNOMIAL_ORDER (3)
#define TC_TABLE_SIZE (14)
typedef struct vector_float2{
float x;
float y;
}float2;
typedef struct
{
float tc0;
float tc1;
float tc2;
float tc3;
}POLYNOMIALS;
typedef struct {
int16_t Temp;
int16_t Comp;
} TempCompPair;
volatile TempCompPair TCtable[TC_TABLE_SIZE] = {{22452,1651},
{25318,1444},
{28268,1133},
{31120,822},
{34027,511},
{36932,185},
{39770,-81},
{42685,-288},
{45531,-407},
{48425,-632},
{51401,-703},
{54460,-1143},
{57202,-1420},
{60027,-1652}};
POLYNOMIALS polynomials;
float matrix[TC_TABLE_SIZE][TC_TABLE_SIZE] = {0};
float average[TC_TABLE_SIZE] = {0};
float make_float(float x, float y)
{
return x+y;
}
float2 make_float2(float a, float b)
{
float2 f2 = {a,b};
return f2;
}
float2 quickTwoSum(float a, float b)
{
float s = a+b;
float e = b - (s - a);
float2 result = {s, e};
return result;
}
float2 twoSum(float a, float b)
{
volatile float s = a + b;
float v = s - a;
float e = (a - (s - v)) + (b - v);
float2 result = {s , e};
return result;
}
float2 df64_add(float2 a, float2 b)
{
float2 s,t;
s = twoSum(a.x, b.x);
t = twoSum(a.y, b.y);
s.y += t.x;
s = quickTwoSum(s.x, s.y);
s.y += t.y;
s = quickTwoSum(s.x, s.y);
return s;
}
float2 split(float a)
{
const float split = 4097; //(1<<12) + 1
float t = a *split;
float a_hi = t - (t - a);
float a_lo = a - a_hi;
float2 result = {a_hi, a_lo};
return result;
}
float2 twoProd(float a, float b)
{
float p = a*b;
float2 aS = split(a);
float2 bS = split(b);
float err = ((aS.x * bS.x - p)
+ aS.x * bS.y + aS.y * bS.x)
+ aS.y * bS.y;
float2 result = {p, err};
return result;
}
float2 df64_mult(float2 a, float2 b)
{
float2 p;
p = twoProd(a.x,b.x);
p.y += a.x * b.y;
p.y += a.y * b.x;
p = quickTwoSum(p.x,p.y);
return p;
}
float2 calculate_power(float base, int pow)
{
int i = 0;
float2 base_f2 = make_float2(base,0);
float2 result_f2 = {1,0};
if(pow == 0)
{
return result_f2;
}
if(pow > 0)
{
if(pow == 1)
{
return base_f2;
}
else
{
for(i = 0; i < pow; i++)
{
result_f2 = df64_mult(result_f2,base_f2);
}
return result_f2;
}
}
else
{
return result_f2;
//Mechanism for negative powers
}
}
void TComp_Polynomial()
{
int i;
int j;
int k;
int size;
float temp;
float2 sum = {0,0};
float2 result0 = {0,0};
float2 result1 = {0,0};
float x[TC_TABLE_SIZE];
float y[TC_TABLE_SIZE];
for(i = 0; i < TC_TABLE_SIZE; i++)
{
x[i] = (float) TCtable[i].Temp;
y[i] = (float) TCtable[i].Comp;
}
size = i;
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
for(j = 0; j <= POLYNOMIAL_ORDER; j++)
{
sum.x = 0;
sum.y = 0;
for(k = 0; k < size; k++)
{
// Expression simplified below: **sum += pow(x[k],i+j)**
result0 = calculate_power(x[k], i+j);
sum = df64_add(result0,sum);
}
matrix[i][j] = make_float(sum.x,sum.y);
}
}
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
sum.x = 0;
sum.y = 0;
for(j = 0; j < size; j++)
{
// Expression simplified below: **sum += y[j] * pow(x[j],i)**
result0 = calculate_power(x[j], i);
result1 = df64_mult( result0 , make_float2(y[j],0) );
sum = df64_add(result1,sum);
}
average[i] = make_float(sum.x,sum.y);
}
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
for(j = 0; j <= POLYNOMIAL_ORDER; j++)
{
if(j != i)
{
if(matrix[i][i]!= 0)
{
temp = matrix[j][i]/matrix[i][i];
}
for(k = i; k < POLYNOMIAL_ORDER; k++)
{
matrix[j][k] -= temp*matrix[i][k];
}
average[j] -= temp*average[i];
}
}
}
if(matrix[0][0] != 0)
{
polynomials.tc0 = average[0]/matrix[0][0];
}
if(matrix[1][1] != 0)
{
polynomials.tc1 = average[1]/matrix[1][1];
}
if(matrix[2][2] != 0)
{
polynomials.tc2 = average[2]/matrix[2][2];
}
if(matrix[3][3] != 0)
{
polynomials.tc3 = average[3]/matrix[3][3];
}
}
然后在下面的表达式中使用struct polynomials.tc0 / 1/2/3
// Y = T^3 * X3 + T^2 * X2 + T^1 * X1 + X0 ;
double calculate_equation(uint16_t TEMP)
{
double Y;
if(POLYNOMIAL_ORDER == 1)
{
Y = polynomials.tc1*(double)TEMP + polynomials.tc0;
}
else if(POLYNOMIAL_ORDER == 2)
{
Y = (polynomials.tc2 * (double)TEMP + polynomials.tc1)*(double)TEMP + polynomials.tc0;
}
else if(POLYNOMIAL_ORDER == 3)
{
Y = ((polynomials.tc3 * (double)TEMP + polynomials.tc2)*(double)TEMP + polynomials.tc1)*(double)TEMP + polynomials.tc0;
}
else if(POLYNOMIAL_ORDER == 4)
{
Y = (((polynomials.tc4 * (double)TEMP + polynomials.tc3)*(double)TEMP + polynomials.tc2)*(double)TEMP + polynomials.tc1)*(double)TEMP + polynomials.tc0;
}
return Y;
}
标准偏差计算如下:
//sqrt(sigma(error^2))
for(i = 0; i < TC_TABLE_SIZE; i++)
{
actual_comp[i] =(int) calculate_equation(TCtable[i].Temp);
error[i] = TCtable[i].Comp - actual_comp[i] ;
error_sqr += error[i]*error[i];
printf("%u\t%d\t\t%e\n", TCtable[i].Temp, TCtable[i].Comp, actual_comp[i] );
}
error_sqrt = sqrt(error_sqr);
参考: http://hal.archives-ouvertes.fr/docs/00/06/33/56/PDF/float-float.pdf GuillaumeDaGraça,David Defour在图形硬件上实现浮动浮动运算符,第7届实数和计算机会议,RNC7。
答案 0 :(得分:0)
我能够在不使用双精度的情况下实现此代码,因为计算在Float范围内。 这是我的实施,请告诉我是否可以更好地优化它。
typedef struct
{ int64_t tc0;
int64_t tc1;
int64_t tc2;
int64_t tc3;
int64_t tc4;
}POLYNOMIALS;
POLYNOMIALS polynomials = {0,0,0,0,0};
int16_t TempCompIndex;
int64_t x[TC_TABLE_SIZE];
int64_t y[TC_TABLE_SIZE];
float matrix[POLYNOMIAL_ORDER+1][POLYNOMIAL_ORDER+1] = {0};
float average[POLYNOMIAL_ORDER+1] = {0};
void TComp_Polynomial()
{
int i;
int j;
int k;
int size;
float temp;
float sum = 0;
float powr = 0;
float prod;
int64_t x[TC_TABLE_SIZE];
int64_t y[TC_TABLE_SIZE];
for(i = 0; i < TC_TABLE_SIZE; i++)
{
x[i] = (int64_t) TCtable[i].Temp;
y[i] = (int64_t) TCtable[i].Comp<<PRECISION;
printf("x: %lld, y:%lld\n",x[i],y[i]);
}
size = i;
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
for(j = 0; j <= POLYNOMIAL_ORDER; j++)
{
sum = 0;
powr = 0;
for(k = 0; k < size; k++)
{
//printf("x[%d]: %ld, i: %d ,j: %d ", k, x[k],i,j);
powr = pow(x[k],i+j);
//printf("Power: %f, sum: %f\n ",powr,sum);
sum += powr;
//printf("%f\r\n",powr);
//printf("sum: %lf\n",sum );
}
matrix[i][j] = sum;
printf("sum: %g\n",sum);
}
}
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
sum = 0;
powr = 0;
for(j = 0; j < size; j++)
{
//sum += y[j] * pow(x[j],i)
//printf("sum: %lf, y[%d]: %lf, x[%d]: %lf^%d ",sum,j,y[j], i, x[j],i);
//printf("x[%d]:%lld ^ %d\t",j,x[j],i);
powr = (float) pow(x[j],i);
printf("powr: %f\t",powr);
prod = (float) y[j] * powr;
printf("prod:%f \t %lld \t", prod,y[j]);
sum += (float) prod;
printf("sum: %f \n",sum);
}
average[i] = sum;
//printf("#Avg: %f\n",average[i]);
}
printf("\n\n");
for(i = 0; i <= POLYNOMIAL_ORDER; i++)
{
for(j = 0; j <= POLYNOMIAL_ORDER; j++)
{
if(j != i)
{
if(matrix[i][i]!= 0)
{
//printf("matrix%d%d: %g / matrix%d%d: %g =\t ",j,i,matrix[j][i],i,i,matrix[i][i]);
temp = matrix[j][i]/matrix[i][i];
//printf("Temp: %g\n",temp);
}
for(k = i; k < POLYNOMIAL_ORDER; k++)
{
matrix[j][k] -= temp*matrix[i][k];
//printf("matrix[%d][%d]:%g, %g, matrix[%d][%d]:%g\n",j,k,matrix[j][k], temp,i,k,matrix[i][k]);
}
//printf("\n\n");
//print_matrix();
printf("\n\n");
//printf("avg%d: %g\ttemp: %g\tavg%d: %g\n\n",j,average[j],temp,i,average[i]);
average[j] -= temp*average[i];
printf("#Avg%d:%g\n",j,average[j]);
//print_average();
}
}
}
print_matrix();
print_average();
/* Calculate polynomial Coefficients (n+1) based on the POLYNOMIAL_ORDER (n) */
#ifndef POLYNOMIAL_ORDER
#elif POLYNOMIAL_ORDER == 0
if(matrix[0][0] != 0)
{
polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);
}
#elif POLYNOMIAL_ORDER == 1
if(matrix[1][1] != 0)
{
polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);
polynomials.tc1 = (int64_t) (average[1]/matrix[1][1]);
}
#elif POLYNOMIAL_ORDER == 2
if(matrix[2][2] != 0)
{
polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);
polynomials.tc1 = (int64_t) (average[1]/matrix[1][1]);
polynomials.tc2 = (int64_t) (average[2]/matrix[2][2]);
}
#elif POLYNOMIAL_ORDER == 3
if(matrix[3][3] != 0)
{
polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);
polynomials.tc1 = (int64_t) (average[1]/matrix[1][1]);
polynomials.tc2 = (int64_t) (average[2]/matrix[2][2]);
polynomials.tc3 = (int64_t) (average[3]/matrix[3][3]);
}
#elif POLYNOMIAL_ORDER == 4
if(matrix[4][4] != 0)
{
polynomials.tc0 = (int64_t) (average[0]/matrix[0][0]);
polynomials.tc1 = (int64_t) (average[1]/matrix[1][1]);
polynomials.tc2 = (int64_t) (average[2]/matrix[2][2]);
polynomials.tc3 = (int64_t) (average[3]/matrix[3][3]);
polynomials.tc4 = (int64_t) (average[4]/matrix[4][4]);
}
#endif
}
int16_t calculate_equation(uint16_t TEMP)
{
int64_t Y = 0;
int16_t TempComp = 0;
#ifndef POLYNOMIAL_ORDER
#elif POLYNOMIAL_ORDER == 0
Y = polynomials.tc0;
#elif POLYNOMIAL_ORDER == 1
Y = polynomials.tc1* ((int64_t)TEMP) + polynomials.tc0;
#elif POLYNOMIAL_ORDER == 2
Y = (polynomials.tc2 * ((int64_t)TEMP) + polynomials.tc1)*(int64_t)TEMP + polynomials.tc0;
#elif POLYNOMIAL_ORDER == 3
Y = ((polynomials.tc3 * ((int64_t)TEMP) + polynomials.tc2)*((int64_t)TEMP) + polynomials.tc1)*((int64_t)TEMP) + polynomials.tc0;
#elif POLYNOMIAL_ORDER == 4
Y = (((polynomials.tc4 * (int64_t)TEMP + polynomials.tc3)*(int64_t)TEMP + polynomials.tc2)*(int64_t)TEMP + polynomials.tc1)*(int64_t)TEMP + polynomials.tc0;
#endif
TempComp = (int16_t) (Y>>PRECISION_BITS);
return TempComp;
}
void main(){
int16_t TempComp = 0;
TempCompValue = (int16_t) calculate_equation(Mon_Temp);
}
注意:Calculate_Equation()每秒调用一次,并且为了避免浮点运算,需要不使用float,因此我在该函数中使用非浮点变量。
它适合我,并且在初步测试后没有发现任何错误。 感谢每一个人对我的帖子感兴趣,如果不是答案,就要学习一些新技术。谢谢@chux。