我正在探索C99中基于函数签名自动向量化的简单循环的不同实现。
这是我的代码:
/* #define PRAGMA_SIMD _Pragma("simd") */
#define PRAGMA_SIMD
#ifdef __INTEL_COMPILER
#define ASSUME_ALIGNED(a) __assume_aligned(a,64)
#else
#define ASSUME_ALIGNED(a)
#endif
#ifndef ARRAY_RESTRICT
#define ARRAY_RESTRICT
#endif
void foo1(double * restrict a, const double * restrict b, const double * restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i) {
if (c[i] > 0) {
a[i] = b[i];
} else {
a[i] = 0.0;
}
}
}
void foo2(double * restrict a, const double * restrict b, const double * restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i) {
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
/* Undetermined size version */
void foo3(int n, double * restrict a, const double * restrict b, const double * restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i) {
if (c[i] > 0) {
a[i] = b[i];
} else {
a[i] = 0.0;
}
}
}
void foo4(int n, double * restrict a, const double * restrict b, const double * restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i) {
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
/* Static array versions */
void foo5(double ARRAY_RESTRICT a[2048], const double ARRAY_RESTRICT b[2048], const double ARRAY_RESTRICT c[2048])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i) {
if (c[i] > 0) {
a[i] = b[i];
} else {
a[i] = 0.0;
}
}
}
void foo6(double ARRAY_RESTRICT a[2048], const double ARRAY_RESTRICT b[2048], const double ARRAY_RESTRICT c[2048])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i) {
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
/* VLA versions */
void foo7(int n, double ARRAY_RESTRICT a[n], const double ARRAY_RESTRICT b[n], const double ARRAY_RESTRICT c[n])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i) {
if (c[i] > 0) {
a[i] = b[i];
} else {
a[i] = 0.0;
}
}
}
void foo8(int n, double ARRAY_RESTRICT a[n], const double ARRAY_RESTRICT b[n], const double ARRAY_RESTRICT c[n])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i) {
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
当我用
编译时$ icc -O3 -std=c99 -opt-report5 -mavx -S foo.c
icc: remark #10397: optimization reports are generated in *.optrpt files in the output location
我看到VLA案例没有自动向量化,但是当我添加标志以断言没有别名-fno-alias
时,它们就是。因此,我得出结论,我应该在源中规定这一点,所以我尝试通过编译
$ icc -O3 -std=c99 -opt-report5 -mavx -DARRAY_RESTRICT=restrict -S foo.c
icc: remark #10397: optimization reports are generated in *.optrpt files in the output location
编译器错误输出包括
foo.c(98): error: "restrict" is not allowed
void foo7(int n, double ARRAY_RESTRICT a[n], const double ARRAY_RESTRICT b[n],
const double ARRAY_RESTRICT c[n])
^
但正如您所看到的,我的VLA参数不允许限制。
所以我的问题是:在ISO C中没有办法断言没有VLA别名吗?
请注意,我可以使用编译指示在源代码中断言没有别名 - 例如simd
,omp simd
,ivdep
等等 - 并获得我想要的自动矢量化但这些不是ISO C.
在这种情况下,ISO C意味着C的最新版本,当然在撰写本文时是C11。
答案 0 :(得分:5)
您的原始代码对我来说很不错,例如:
void foo7(int n, double ARRAY_RESTRICT a[n], const double ARRAY_RESTRICT b[n], const double ARRAY_RESTRICT c[n])
^
restrict.c:126:1: error: invalid use of ‘restrict’
restrict.c:126:1: error: invalid use of ‘restrict’
restrict.c:145:1: error: invalid use of ‘restrict’
转移评论的选定部分
§6.7.6.3函数声明符(包括原型)有例5,它说下面的函数原型声明符是等价的:
void f(double (* restrict a)[5]);
void f(double a[restrict][5]);
void f(double a[restrict 3][5]);
void f(double a[restrict static 3][5]);
这是标准中唯一一个与数组类型直接关联的地方。 §6.7.6一般是关于声明符的,关于数组声明符的是§6.7.6.2,它看起来好像限制必须出现在数组维的第一个组件内。在您的上下文中,它应该是:
void foo7(int n, double a[ARRAY_RESTRICT n],
const double b[ARRAY_RESTRICT n],
const double c[ARRAY_RESTRICT n])
如果没有看到标准中的例子并且你提出问题,我就不会相信这种符号!请注意,这适用于数组和VLA。
这个修订后的代码基于评论,在相同的编译选项下完全编译:
gcc -g -O3 -std=c11 -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes \
-Wold-style-definition -Wold-style-declaration -Werror -c new.restrict.c
编译选项要求事先声明非静态函数,因此声明文件顶部的声明。我还强制#define ARRAY_RESTRICT restrict
在源代码中,而不是将其作为编译选项。
编译器是在Ubuntu 14.04衍生产品上运行的GCC 4.9.2。
档案
new.restrict.c
:
/* #define PRAGMA_SIMD _Pragma("simd") */
#define PRAGMA_SIMD
#ifdef __INTEL_COMPILER
#define ASSUME_ALIGNED(a) __assume_aligned(a, 64)
#else
#define ASSUME_ALIGNED(a)
#endif
#define ARRAY_RESTRICT restrict
#ifndef ARRAY_RESTRICT
#define ARRAY_RESTRICT
#endif
void foo1(double *restrict a, const double *restrict b, const double *restrict c);
void foo2(double *restrict a, const double *restrict b, const double *restrict c);
void foo3(int n, double *restrict a, const double *restrict b, const double *restrict c);
void foo4(int n, double *restrict a, const double *restrict b, const double *restrict c);
void foo5(double a[ARRAY_RESTRICT 2048], const double b[ARRAY_RESTRICT 2048], const double c[ARRAY_RESTRICT 2048]);
void foo6(double a[ARRAY_RESTRICT 2048], const double b[ARRAY_RESTRICT 2048], const double c[ARRAY_RESTRICT 2048]);
void foo7(int n, double a[ARRAY_RESTRICT n], const double b[ARRAY_RESTRICT n], const double c[ARRAY_RESTRICT n]);
void foo8(int n, double a[ARRAY_RESTRICT n], const double b[ARRAY_RESTRICT n], const double c[ARRAY_RESTRICT n]);
void foo1(double *restrict a, const double *restrict b, const double *restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i)
{
if (c[i] > 0)
{
a[i] = b[i];
}
else
{
a[i] = 0.0;
}
}
}
void foo2(double *restrict a, const double *restrict b, const double *restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i)
{
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
/* Undetermined size version */
void foo3(int n, double *restrict a, const double *restrict b, const double *restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i)
{
if (c[i] > 0)
{
a[i] = b[i];
}
else
{
a[i] = 0.0;
}
}
}
void foo4(int n, double *restrict a, const double *restrict b, const double *restrict c)
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i)
{
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
/* Static array versions */
void foo5(double a[ARRAY_RESTRICT 2048], const double b[ARRAY_RESTRICT 2048], const double c[ARRAY_RESTRICT 2048])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i)
{
if (c[i] > 0)
{
a[i] = b[i];
}
else
{
a[i] = 0.0;
}
}
}
void foo6(double a[ARRAY_RESTRICT 2048], const double b[ARRAY_RESTRICT 2048], const double c[ARRAY_RESTRICT 2048])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < 2048; ++i)
{
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
/* VLA versions */
void foo7(int n, double a[ARRAY_RESTRICT n], const double b[ARRAY_RESTRICT n], const double c[ARRAY_RESTRICT n])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i)
{
if (c[i] > 0)
{
a[i] = b[i];
}
else
{
a[i] = 0.0;
}
}
}
void foo8(int n, double a[ARRAY_RESTRICT n], const double b[ARRAY_RESTRICT n], const double c[ARRAY_RESTRICT n])
{
ASSUME_ALIGNED(a);
ASSUME_ALIGNED(b);
ASSUME_ALIGNED(c);
PRAGMA_SIMD
for (int i = 0; i < n; ++i)
{
a[i] = ((c[i] > 0) ? b[i] : 0.0);
}
}
答案 1 :(得分:4)
此代码中的所有参数都没有可变修改类型。除foo6
外,foo7
和int n
功能签名完全相同。请参阅Why do C and C++ compilers allow array lengths in function signatures when they're never enforced?。
这些都完全相同:
void foo8(int n, T *a);
void foo8(int n, T a[16]);
void foo8(int n, T a[n]);
版本void foo8(int n, T a[]);
几乎相同,但它有一个极端情况,如果T
是不完整的类型,则不允许这样做。
foo8
。
虽然数组声明符具有可变修改类型,但在调整参数类型之前, array-of-T 到指向T 调整。 。因此,T a[n]
会调整为T *a
,而不会对其进行可变修改;但是,void foo9(int n, T a[][n]);
会为T (*)[n]
生成可变修改后的a
类型。
将restrict
与数组声明符合并的最简单方法是实际使用指针形式,这里:
void foo8(int n, T *restrict a ) {
尝试void foo8(int n, T restrict a[]);
不起作用,因为它等同于void foo8(int n, T restrict *a);
。 restrict
是一个限定符,它在语法上与其他限定符(如const
)的行为相同。
如Jonathan Leffler所述,还有另一种语法:
void foo8(int n, T a[restrict]) { // n is optional , as before
在这种情况下,允许以两种不同的方式指定相同的东西似乎是多余的,但there also exists static
只能与数组声明符(不是指针声明符)一起使用。如果您想使用此static
形式以及restrict
形式,那么除了在方括号内设置restrict
之外别无选择:
void foo8(int n, T a[restrict static n]) {
要清楚,最后一种情况仍然不是一种可变修改的类型; static
是一个承诺,a
,是一个指针,指向至少n
元素数组的第一个元素。
此外,调用函数时不需要在编译时检查static
(当然,如果编译器强制执行,那将是很好的。)
最终说明:原型probably has no effect中的restrict
,它只在函数定义中有意义。