我的数学库需要更快的四元数向量乘法例程。现在我正在使用规范v' = qv(q^-1)
,它产生与将矢量乘以四元数矩阵相同的结果,因此我对它的正确性充满信心。
到目前为止,我已经实施了3种替代“更快”的方法:
#1,我不知道从哪里得到这个:
v' = (q.xyz * 2 * dot(q.xyz, v)) + (v * (q.w*q.w - dot(q.xyz, q.zyx))) + (cross(q.xyz, v) * q.w * w)
实施为:
vec3 rotateVector(const quat& q, const vec3& v)
{
vec3 u(q.x, q.y, q.z);
float s = q.w;
return vec3(u * 2.0f * vec3::dot(u, v))
+ (v * (s*s - vec3::dot(u, u)))
+ (vec3::cross(u, v) * s * 2.0f);
}
#2,this fine blog提供
t = 2 * cross(q.xyz, v);
v' = v + q.w * t + cross(q.xyz, t);
实施为:
__m128 rotateVector(__m128 q, __m128 v)
{
__m128 temp = _mm_mul_ps(vec4::cross(q, v), _mm_set1_ps(2.0f));
return _mm_add_ps(
_mm_add_ps(v, _mm_mul_ps(_mm_shuffle_ps(q, q, _MM_SHUFFLE(3, 3, 3, 3)), temp)),
vec4::cross(q, temp));
}
和#3,来自众多来源,
v' = v + 2.0 * cross(cross(v, q.xyz) + q.w * v, q.xyz);
实施为:
__m128 rotateVector(__m128 q, __m128 v)
{
//return v + 2.0 * cross(cross(v, q.xyz) + q.w * v, q.xyz);
return _mm_add_ps(v,
_mm_mul_ps(_mm_set1_ps(2.0f),
vec4::cross(
_mm_add_ps(
_mm_mul_ps(_mm_shuffle_ps(q, q, _MM_SHUFFLE(3, 3, 3, 3)), v),
vec4::cross(v, q)),
q)));
}
所有这3个产生不正确的结果。但是,我注意到了一些有趣的模式。首先,#1和#2产生相同的结果。如果所述矩阵被转置,那么#3产生的结果与从矢量乘以导出矩阵得到的结果相同(我偶然发现了这个结果,以前我的quat-to-matrix代码假设了行主矩阵,这是不正确的)。
我的四元数的数据存储定义为:
union
{
__m128 data;
struct { float x, y, z, w; };
float f[4];
};
我的实施是否有缺陷,或者我在这里遗漏了什么?
答案 0 :(得分:1)
主要问题,如果要按四元数旋转3d矢量,则需要计算旋转矩阵的所有9个标量。在您的示例中,旋转矩阵的计算是IMPLICIT。计算顺序可能不是最佳的。
如果从四元数和乘法向量生成3x3矩阵,则应该具有相同数量的算术运算(底部为@see代码)。
我的建议。
尝试生成矩阵3x3并乘以矢量,测量速度并与之前进行比较。
分析显式解决方案,并尝试针对自定义架构进行优化。
尝试实现替代四元数乘法,并从方程q * v * q'导出乘法。
// ============ 替代乘法伪代码
/**
alternative way of quaternion multiplication,
can speedup multiplication for some systems (PDA for example)
http://mathforum.org/library/drmath/view/51464.html
http://www.lboro.ac.uk/departments/ma/gallery/quat/src/quat.ps
in provided code by url's many bugs, have to be rewriten.
*/
inline xxquaternion mul_alt( const xxquaternion& q) const {
float t0 = (x-y)*(q.y-q.x);
float t1 = (w+z)*(q.w+q.z);
float t2 = (w-z)*(q.y+q.x);
float t3 = (x+y)*(q.w-q.z);
float t4 = (x-z)*(q.z-q.y);
float t5 = (x+z)*(q.z+q.y);
float t6 = (w+y)*(q.w-q.x);
float t7 = (w-y)*(q.w+q.x);
float t8 = t5 + t6 + t7;
float t9 = (t4 + t8)*0.5;
return xxquaternion ( t3+t9-t6,
t2+t9-t7,
t1+t9-t8,
t0+t9-t5 );
// 9 multiplications 27 addidtions 8 variables
// but of couse we can clean 4 variables
/*
float r = w, i = z, j = y, k =x;
float br = q.w, bi = q.z, bj = q.y, bk =q.x;
float t0 = (k-j)*(bj-bk);
float t1 = (r+i)*(br+bi);
float t2 = (r-i)*(bj+bk);
float t3 = (k+j)*(br-bi);
float t4 = (k-i)*(bi-bj);
float t5 = (k+i)*(bi+bj);
float t6 = (r+j)*(br-bk);
float t7 = (r-j)*(br+bk);
float t8 = t5 + t6 + t7;
float t9 = (t4 + t8)*0.5;
float rr = t0+t9-t5;
float ri = t1+t9-t8;
float rj = t2+t9-t7;
float rk = t3+t9-t6;
return xxquaternion ( rk, rj, ri, rr );
*/
}
// ============ 显式矢量旋转变量
/**
rotate vector by quaternion
*/
inline vector3 rotate(const vector3& v)const{
xxquaternion q(v.x * w + v.z * y - v.y * z,
v.y * w + v.x * z - v.z * x,
v.z * w + v.y * x - v.x * y,
v.x * x + v.y * y + v.z * z);
return vector3(w * q.x + x * q.w + y * q.z - z * q.y,
w * q.y + y * q.w + z * q.x - x * q.z,
w * q.z + z * q.w + x * q.y - y * q.x)*( 1.0f/norm() );
// 29 multiplications, 20 addidtions, 4 variables
// 5
/*
// refrence implementation
xxquaternion r = (*this)*xxquaternion(v.x, v.y, v.z, 0)*this->inverted();
return vector3( r.x, r.y, r.z );
*/
/*
// alternative implementation
float wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
x2 = q.x + q.x; y2 = q.y + q.y; z2 = q.z + q.z;
xx = q.x * x2; xy = q.x * y2; xz = q.x * z2;
yy = q.y * y2; yz = q.y * z2; zz = q.z * z2;
wx = q.w * x2; wy = q.w * y2; wz = q.w * z2;
return vector3( v.x - v.x * (yy + zz) + v.y * (xy - wz) + v.z * (xz + wy),
v.y + v.x * (xy + wz) - v.y * (xx + zz) + v.z * (yz - wx),
v.z + v.x * (xz - wy) + v.y * (yz + wx) - v.z * (xx + yy) )*( 1.0f/norm() );
// 18 multiplications, 21 addidtions, 12 variables
*/
};
祝你好运。