Question

以下代码是一个简化的案例，导致我在代码中看到的错误。当我手动调用A(const A&)的构造函数时，everthing很好，但是当我尝试将一个vector<A>复制到另一个时，我在A(const A& in) : vec(in.vec) {}行得到了一个段错误。

为什么会发生这种情况，我该如何处理我的代码呢？

编辑：所以我添加了@mpromonet的代码，它现在运行完成，但是它慢得多（超过3倍）和2.当我用-pg编译时找出来为什么，我在拨打_mm256_add_pd时会遇到段错误。

#include <immintrin.h>
#include <vector>
using std::vector;


struct A {
    union {
        struct {
            double a, b, c;
        };
        __m256d vec;
    };

    A() : a(0), b(0), c(0) {}
    A(const A& in) : vec(in.vec) {}
};

int main() {
    vector<A> e(10);
    vector<A> b;
    b = e;
}

编辑：此代码在我的程序中占大部分时间。用g++ --std=c++11 -ffast-math -march=native -O3 -fno-inline -g -pg编译。请参阅下面的调用图。我有一组Electron个对象，其中包含两个用Vector声明的__attribute__((aligned(32)))个对象。为了使数组对齐，我做

Electron* data_orig = new Electron[s+3]; 
Electron* data = (Electron*)((void*)data_orig + 64 - (size_t)data_orig % 64);

没有AVX

struct Vector {
    double x;
    double y;
    double z;

    Vector(double a, double b, double c) :
        x(a), y(b), z(c)
        {}

    Vector(const Vector& u) :
        x(u.x), y(u.y), z(u.z)
        {}


    Vector(const UnitVector& u);


    Vector operator*=(const double m) { x*=m; y*=m; z*=m; return *this; }
    Vector operator+=(const Vector& in) {
        x+=in.x; y+=in.y; z+=in.z;
        return *this;
    }
    ostream& operator<<(ostream& os) {
        os << "x:" << x << " y:" << y << " z:" << z;
        return os;
    }

};
Vector::Vector(const UnitVector& u) :
        x(u.x*u.mag), y(u.y*u.mag), z(u.z*u.mag)
        {}

使用AVX

struct Vector {
    union {
        struct {
            double x;
            double y;
            double z;
        };
        __m256d vec __attribute__ ((aligned (32)));
    };

    Vector(double a, double b, double c) :
        x(a), y(b), z(c)
        {}

    Vector(const Vector& in) : vec(in.vec) {}
    //: x(in.x), y(in.y), z(in.z) {}


    Vector(const UnitVector& u);

    Vector operator*=(const double m) {
        vec = _mm256_mul_pd(vec, _mm256_set1_pd(m));
        return *this;
    }

    Vector operator+=(const Vector& in) {
        vec = _mm256_add_pd(vec, in.vec);
        return *this;
    }
    ostream& operator<<(ostream& os) {
        os << "x:" << x << " y:" << y << " z:" << z;
        return os;
    }

};
Vector::Vector(const UnitVector& u) { vec = _mm256_mul_pd(u.vec, _mm256_set1_pd(u.mag));}

Call Graph without AVX

Answer 1

麻烦的是系统的分配器不保证32B对齐，编译器假定为__m256d类型。

听起来你的目标是铿锵？如果是这样，请执行以下操作：

typedef double __attribute__((vector_size(32),aligned(16))) m256d_unaligned;

然后在你的结构定义中使用：

m256d_unaligned vec;

告诉编译器不要假设向量是对齐的。（如果你的系统不能保证甚至16B对齐 - 不太可能 - 你可能需要进一步减少对齐的参数。）

更具侵入性（但可能性能更高）的解决方案是使用自定义分配器，保证32B对齐需要它的类型。

在构造函数中复制__m256d会导致segfault

1 个答案: