Question

这是简单的代码，可重现我得到的错误：

#include <math.h> 
#include <iostream>
//#include <omp.h>
//handling Not a number exception:
#include <fenv.h>
#include <signal.h>
#include "unistd.h"

void handler(int sig)
{
  printf("Floating Point Exception\n");
  exit(0);
}
#define EKCOR
const float alpha=200.0/137;
const int N=4096;//4096;//8192;//16384;
const float md=940;
const float Ep=0.1f;
float E1;
int STEP=1;
struct float3
{
  float x, y, z;
};
float3 Pi;
struct Particle
{
  float x;
  float y;
  float z;
  float t;
  float vx;
  float vy;
  float vz;
  float m;
};
Particle p[N] __attribute__((aligned(64)));
inline float3 RandomDirection()
{
  float number1 = static_cast <float> (rand())/(static_cast <float> (RAND_MAX));
  float z   = 2.0*number1 - 1.;  
  float rho = sqrtf((1.+z)*(1.-z));
  float number2 = static_cast <float> (rand())/(static_cast <float> (RAND_MAX));
  float phi = M_PI*2.0*number2;
  float3 result={rho*cosf(phi), rho*sinf(phi), z};
  return result;
}
void function()
{
  float K=0.0;
  Pi={0.0, 0.0, 0.0};
  double Px=0.0;
  double Py=0.0;
  double Pz=0.0;
  float P3=0.0;
  float P4=0.0;
  //#1
  for(int i=0; i<N; ++i)
  {
    Px+=p[i].vx*p[i].m;
    Py+=p[i].vy*p[i].m;
    Pz+=p[i].vz*p[i].m;
    float Penergy=0.0;
  #pragma novector
    for(int j=0; j<N; ++j) if(i!=j)
    {
      float rdist1=sqrt((p[i].x-p[j].x)*(p[i].x-p[j].x)+(p[i].y-p[j].y)*(p[i].y-p[j].y)+(p[i].z-p[j].z)*(p[i].z-p[j].z));
      Penergy+=alpha/rdist1;
      P4+=alpha/rdist1;
    }
    P3+=Penergy;
    float v2=p[i].vx*p[i].vx+p[i].vy*p[i].vy+p[i].vz*p[i].vz;
    K+=p[i].m*v2/2;
  }
  P4/=2;
  Pi.x=Px;
  Pi.y=Py;
  Pi.z=Pz;
  P3/=2;
  float E2=K+P3;
  float r=(E1-P3)/K;
  std::cout<<"r="<<r<<",E1="<<E1<<",P3="<<P3<<",K="<<K<<std::endl;
  float rc=sqrt(r);
  std::cout<<"E2="<<K+P3<<",K="<<K<<",P3="<<P3<<",P4="<<P4<<",Px="<<Pi.x<<",Py="<<Pi.y<<",Pz="<<Pi.z<<std::endl;
}
void init()
{
  const double pi=3.1415926536;   
  float RADIUS=pow(50.0*N,1.0/3.0);
  Pi={0.0, 0.0, 0.0};
  double Px=0.0, Py=0.0, Pz=0.0;
#pragma omp single
  for(int i=0; i<N; ++i)
  {
    float DISTANCE=0.0f;
    if(i>0)
    {
      while(DISTANCE<=1.0f)
      {
        float theta=pi*static_cast <float> (rand())/(static_cast <float> (RAND_MAX));
        float phi=2*pi*static_cast <float> (rand())/(static_cast <float> (RAND_MAX));
        float rr=RADIUS*static_cast <float> (rand())/(static_cast <float> (RAND_MAX));       
        p[i].x =rr*sin(theta)*cos(phi);
        p[i].y =rr*sin(theta)*sin(phi);
        p[i].z =rr*cos(theta);
        DISTANCE=10000.0f;
      #pragma simd reduction(min:DISTANCE)     
        for(int j=0; j<i; ++j)
        {
          float dij=sqrt((p[i].x-p[j].x)*(p[i].x-p[j].x)+(p[i].y-p[j].y)*(p[i].y-p[j].y)+(p[i].z-p[j].z)*(p[i].z-p[j].z));
          if(dij<DISTANCE) DISTANCE=dij;
        }
      }
    }
    else
    {
      float theta=pi*static_cast <float> (rand())/(static_cast <float> (RAND_MAX));
      float phi=2*pi*static_cast <float> (rand())/(static_cast <float> (RAND_MAX));
      float rr=RADIUS*static_cast <float> (rand())/(static_cast <float> (RAND_MAX));       
      p[i].x =rr*sin(theta)*cos(phi);
      p[i].y =rr*sin(theta)*sin(phi);
      p[i].z =rr*cos(theta);
    }
    float modv=sqrt(2.0*Ep/md);
    float3 res=RandomDirection();
    float3 v;
    v.x=modv*res.x;
    v.y=modv*res.y;
    v.z=modv*res.z; 
    p[i].vx =v.x;
    p[i].vy =v.y;
    p[i].vz =v.z;
    p[i].m=md;
    Px+=p[i].vx*p[i].m;
    Py+=p[i].vy*p[i].m;
    Pz+=p[i].vz*p[i].m;   
  }
  Px/=N;
  Py/=N;
  Pz/=N;
#pragma novector
  for(int i=0; i<N; ++i)
  {
    p[i].vx-=Px/p[i].m;
    p[i].vy-=Py/p[i].m;
    p[i].vz-=Pz/p[i].m;
  }
  Px=0.0, Py=0.0, Pz=0.0;
  float K1=0.0;
  float P1=0.0;
  float P2=0.0;
  //#2
#pragma novector
  for(int i=0; i<N; ++i)
  {
    Px+=p[i].vx*p[i].m;
    Py+=p[i].vy*p[i].m;
    Pz+=p[i].vz*p[i].m;
    K1+=p[i].vx*p[i].vx+p[i].vy*p[i].vy+p[i].vz*p[i].vz;
    float pp=0.0;
  #pragma novector
    for(int j=0; j<N; ++j) if(i!=j)
    {
       float rd=sqrt((p[i].x-p[j].x)*(p[i].x-p[j].x)+(p[i].y-p[j].y)*(p[i].y-p[j].y)+(p[i].z-p[j].z)*(p[i].z-p[j].z));
       P1+=alpha/rd;
       pp+=alpha/rd;
    }
    P2+=pp;
  }
  Pi.x=Px;
  Pi.y=Py;
  Pi.z=Pz;
  K1*=md/2;
  P1/=2;
  P2/=2;
  E1=K1+P1;
  std::cout<<"INIT Px="<<Pi.x<<" Py="<<Pi.y<<" Pz="<<Pi.z<<" K1="<<K1<<" P1="<<P1<<" P2="<<P2<<" E1="<<E1<<std::endl;
}

int
main(int argc, char **argv)
{
  //handling Not a number exception:
  feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
  signal(SIGFPE, handler);
  //
  init();
  function();
 }

在N <1024时P1 = P2和P3 = P4。只有在N = 256时，才有很小的差异：

N = 256 P1 = 3492.48 P2 = 3492.5 P3 = 3492.5 P4 = 3492.48

但是在N = 1024和N> 1024时，差异变得越来越大：

N = 1024 P1 = 34968.6 P2 = 34969.7 P3 = 34969.7 P4 = 34968.6
  N = 2048 P1 = 114493 P2 = 114482 P3 = 114482 P4 = 114493
  N = 4096 P1 = 357880 P2 = 362032   r = -9.14142

在这里程序崩溃，因为r = -9.14142和sqrt（r）引发浮点异常。

我的操作系统是Fedora 23，处理器是Intel Core i7-3770，我使用过编译器 gcc版本5.3.1和Intel C ++编译器icpc版本17.0.1（如果是）必要。即使不使用OpenMP，他们俩都给出了错误。

在代码下面给出了故障的描述。我的问题是：

为什么P1与P2有所不同，而P3与P4为何如此之大，以至于N> = 1024（一个可以使用Intel（icpc）或gcc（g ++）编译器编译而没有参数）？该程序在1个线程中运行。 它们必须具有相同的值！
我需要编写代码，以便嵌套＃1和＃2循环使用
并行化
#pragma omp并行以减少（+：P） for（int i = 0; i（p [i] .xp [j] .x）+（p [i] .yp [j] .y）（p [i] .yp [j] .y）+（ p [i] .zp [j] .z）*（p [i] .zp [j] .z））; PP + = alpha / r; } P + = PP； } P / = 2;

并使用所有优化标志（我使用了 -DCMAKE_CXX_FLAGS =“-march = native -mtune = native -ipo16 -fp-model fast = 2 -O3 -qopt-report = 5 -mcmodel = large”（对于Intel编译器）。我做不到（即使只有“ -O0”也是如此）。可能是由于1）错误所致，错误的值。

Answer 1

您可能还会对why floating point arithmetic don't usually do what you think it should do系列的randomascii感兴趣。这只是一篇文章的引文，探讨了为什么计算机在浮点（类似数学）的计算中不精确：

浮点数学不正确。 0.1之类的简单值无法使用二进制浮点数精确表示，并且浮点数的有限精度意味着操作顺序会发生细微变化 或中间体的精度可以改变结果。这意味着比较两个浮点数以查看它们是否相等通常不是您想要的。

（...）

以下是可能出现的不精确现象的一个例子：
float f = 0.1f;
float sum;
sum = 0;

for (int i = 0; i < 10; ++i)
    sum += f;
float product = f * 10;
printf("sum = %1.15f, mul = %1.15f, mul2 = %1.15f\n",
        sum, product, f * 10);
此代码尝试以三种不同的方式计算“一个”：重复加法和乘法的两个轻微变体。自然地，我们得到三个不同的结果，只有一个是1.0：
sum=1.000000119209290, mul=1.000000000000000,  mul2=1.000000014901161
（...）

以下是0.1，float（0.1）和double（0.1）的确切值：

==========================================================================
| Number     | Value                                                     |
|------------|-----------------------------------------------------------|
| 0.1        | 0.1 (of course)                                           |
| float 0.1  | 0.100000001490116119384765625                             |
| double 0.1 | 0.1000000000000000055511151231257827021181583404541015625 |
==========================================================================

解决之后，让我们看一下上面代码的结果：


sum = 1.000000119209290：此计算从四舍五入后的值开始，然后将其相加十次，并在每次加法时进行潜在的四舍五入，因此存在很大的误差余地。最终结果不是1.0，并且不是10 *浮点数（0.1）。但是，它是1.0之上的下一个可表示的浮动，因此非常接近。

mul = 1.000000000000000：此计算从四舍五入后的值开始，然后乘以10，因此，出现误差的机会较少。事实证明，从0.1到float（0.1）的转换是四舍五入的，但是乘法在这种情况下，乘以10时会四舍五入，有时两回合是对的。 因此，由于错误的原因，我们得到了正确的答案。也许这是错误的答案，因为它实际上不是float（0.1）的十倍！

mul2 = 1.000000014901161：此计算从取整值开始，然后将 double 精度乘以10，从而避免了任何后续的取整误差。因此，我们得到了一个不同的正确答案–精确值10 * float（0.1）（可以存储在 double 中，但不能存储在 float < / em>）。


因此，答案之一是不正确的，但答案只有一个 float 。答案二是正确的（但不精确），而答案三是完全正确的（但似乎是错误的）。

强调和标记是我的。关于randomascii的帖子甚至提出了解决该 inexactness 问题的一些可能解决方案，但它们并不能解决问题（他们只是将不精确性移至浮点数行的不同部分）。

因此，在处理浮点算术时，您永远不会得到 exact 的结果。但是您可以采取一些措施来提高计算的准确性：

增加浮点数的有效位数。 C ++的float具有21个有效位（大约7个有效数字） ，而double s具有52个有效位（大约17个有效数字） < / li>
减少涉及的计算量（因此4.0*c比c+c+c+c更精确）
尝试确保您将按照完全相同的顺序进行完全相同的计算（只有这样，您才能== / !=两个值并得出合理的结果）

因此，例如，如果将代码float s（精度为7位）更改为double s（精度为17位），您将看到结果得到更准确的和显示更多数字。如果您尝试在代码中使用并行化，则您的计算可能（也可能不会，具体取决于实现方式）在不同的线程/核心以不同的顺序发生，从而导致所涉及的每个数字的浮点精度都大不相同。

例如，下面是使用double而不是float s的randomascii代码：

  double f = 0.1;
  double sum;
  sum = 0;

  for (int i = 0; i < 10; ++i)
      sum += f;
  double product = f * 10;
  printf("sum = %1.15f, mul = %1.15f, mul2 = %1.15f\n",
          sum, product, f * 10);

哪个输出：

  sum = 1.000000000000000, mul = 1.000000000000000, mul2 = 1.000000000000000

这看似正确，但是当您将printf的精度从1.15f提高到1.17f时：

  sum = 0.99999999999999989, mul = 1.00000000000000000, mul2 = 1.00000000000000000

同样，您会看到不精确性逐渐蔓延。sum进行了+的10次操作，而mul和mul2进行了一次*的一次操作，因此这就是sum的不准确性大于其他两个不准确性的原因。

如果什至17位精度对您来说还不够，那么您可能对C ++ 任意精度 的解决方案感兴趣。

Definition of BigNum from wikipedia：

在计算机科学中，任意精度算术（也称为 bignum 算术，多精度算术或有时是无限精度算术）表示对仅对精度位数进行限制的数字执行计算通过主机系统的可用内存。

（...）

任意精度用于算术速度不是限制因素的应用，或者要求精确结果且数量很大的应用 >。

再次强调我的。

Here's a related answer suggesting a BigNum library for C++：

GNU多精度算法库可以满足您的需求http://gmplib.org/

这是以前使用GMP实现的代码（使用64位精度或大约21位有效数字）：

 // Compile like: g++ question.cpp -o out.tmp -lgmpxx -lgmp
 #include <stdio.h>
 #include <gmpxx.h>

 int main(){
      mpf_class f("0.1", 64);
      mpf_class sum("0", 64);

      for (int i = 0; i < 10; ++i)
          sum += f;
      mpf_class product = f * 10;
      printf("sum = %1.17f, mul = %1.17f, mul2 = %1.17f\n",
             sum.get_d(), product.get_d(), ((mpf_class) (f * 10)).get_d());
 }

哪个输出：

  sum = 0.99999999999999989, mul = 0.99999999999999989, mul2 = 0.99999999999999989

以64位精度进行计算，然后四舍五入为51位（C ++的double）并打印出来的结果。

但是，您可以直接从GMP打印值：

 // Compile like: g++ question.cpp -o out.tmp -lgmpxx -lgmp
 #include <stdio.h>
 #include <gmpxx.h>
 #include <string>

 int main(){
      mpf_class f("0.1", 64);
      mpf_class sum("0", 64);

      for (int i = 0; i < 10; ++i)
          sum += f;
      mpf_class product = f * 10;
      long exp = 10;
      int base = 10;
      int digits = 21;
      printf("sum = %s, mul = %s, mul2 = %s\n",
             sum.get_str(exp, base, digits).c_str(),
             product.get_str(exp, base, digits).c_str(),
             ((mpf_class) (f * 10)).get_str(exp, base, digits).c_str());
 }

哪个输出：

      sum = 1, mul = 1, mul2 = 1

哪个结果比double表示更精确。您可以检查GMP C ++接口here和here。 但是请注意，任意精度库通常比内置float或double慢。好处是，为了提高精度，您只需要更改mpf_class variable(expression, precision);行。

也不要忘记查看PaulMcKenzie的建议Stack Overflow: Is floating point math broken?：

问题：

考虑以下代码：

0.1 + 0.2 == 0.3 -> false

0.1 + 0.2 -> 0.30000000000000004

为什么会出现这些错误？

答案：

二进制浮点数学就是这样。在大多数编程语言中，它基于IEEE 754标准。 （...）问题的症结在于数字以这种格式表示为整数乘以2的幂。 有理数（例如0.1，即1/10）分母不是2的幂的。

程序中的常量0.2和0.3也将近似值为其真实值。恰好是最近的 double 与0.2 大于 rational 的数字0.2 ，但是与double 最接近的 0.3 小于 {{ 1}} 号rational 。 0.3和0.1的总和大于 0.2 个数字rational ，因此与代码中的常量。

强调和标记是我的。

Answer 2

请注意，即使从理论上讲，P1应该等于P2，P3应该等于P4，这些都是浮点变量。更重要的是，它们是单精度浮点变量。根据计算顺序，您肯定会得到不同的结果。由于浮点表示的不精确性，每次计算都会累积错误。

请查看并运行以下代码（tst_float.cpp）：

/* g++ -Wall tst_float.cpp -o tst_float && ./tst_float */

#include <stdio.h>

int main()
{
    int ok;
    int i;
    float x;

    x = 0.0;
    for (i = 0; i < 10; ++i) {
        x += 0.1;
    }

    ok = x == 1.0;

    if (ok) {
        printf("ok!\n");
    } else {
        printf("uh-uh?\n");
    }
    printf("x == %10.9f\n", x);

    return 0;
}

我得到：

$ g++ -Wall tst_float.cpp -o tst_float && ./tst_float
uh-uh?
x == 1.000000119

总而言之，不要将浮点变量视为具有整数变量的精度。

Answer 3

您可能需要做更多分析，但是我的第一个猜测是您的求和循环正在引起问题。

提高精度损失的技术的三个指针：

通过增加尺寸来订购商品-如果尚未分类，这可能会太昂贵。
Pairwise summation
Kahan summation

嵌套for循环的for（int i = 0; ...）{for（int j = 0; ...）{summation}}中的势能求和不起作用

3 个答案: