我正在尝试编写一些常用的实用函数,例如在gcc向量中添加所有元素。
inline float add_all(float const in __attribute__((vector_size(8))))
{
return in[0] + in[1];
}
inline float add_all(float const in __attribute__((vector_size(16))))
{
return in[0] + in[1] + in[2] + in[3];
}
inline double add_all(double const in __attribute__((vector_size(16))))
{
return in[0] + in[1];
}
inline double add_all(double const in __attribute__((vector_size(32))))
{
return in[0] + in[1] + in[2] + in[3];
}
但是,在编译时,gcc对象:
In file included from matrix.hpp:5:0,
from matrix.cpp:3:
vector.hpp:22:1: error: 'float vxl::add_all(__vector(4) float)' conflicts with a previous declaration
}
^
vector.hpp:14:14: note: previous declaration 'float vxl::add_all(__vector(2) float)'
inline float add_all(float const in __attribute__((vector_size(8))))
^
vector.hpp:19:14: note: -fabi-version=6 (or =0) avoids this error with a change in mangling
inline float add_all(float const in __attribute__((vector_size(16))))
^
vector.hpp:32:1: error: 'double vxl::add_all(__vector(4) double)' conflicts with a previous declaration
}
^
vector.hpp:24:15: note: previous declaration 'double vxl::add_all(__vector(2) double)'
inline double add_all(double const in __attribute__((vector_size(16))))
^
vector.hpp:29:15: note: -fabi-version=6 (or =0) avoids this error with a change in mangling
inline double add_all(double const in __attribute__((vector_size(32))))
除了gcc建议的解决方法之外,是否存在解决方法?
答案 0 :(得分:4)
提供一个额外的默认参数,为函数提供不同的错位名称:
typedef float __attribute__((vector_size(8))) vector_f8;
typedef float __attribute__((vector_size(16))) vector_f16;
typedef double __attribute__((vector_size(16))) vector_d16;
typedef double __attribute__((vector_size(32))) vector_d32;
template <int _len>
struct vector_len {
static int const len = _len;
};
float
add_all(vector_f8 in, vector_len<8> *_p = NULL) {
return in[0] + in[1];
}
float
add_all(vector_f16 in, vector_len<16> *_p = NULL) {
return in[0] + in[1] + in[2] + in[3];
}
float
add_all(vector_d16 in, vector_len<16> *_p = NULL) {
return in[0] + in[1];
}
float
add_all(vector_d32 in, vector_len<32> *_p = NULL) {
return in[0] + in[1] + in[2] + in[3];
}