有没有办法对这个matlab代码进行矢量化?

时间:2016-03-04 19:12:05

标签: performance matlab vectorization nested-loops mex

我有以下代码:

function [Ps,Pd,Pv] = Arii2010_Modified_1Pixel(C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33)
MeanOrientationAngleStep = 1/100;
StandardDeviationStep = 1/100;
NumberOfAngles = floor(2*pi/MeanOrientationAngleStep);
NumberOfStandandardDeviations = 1/StandardDeviationStep;
for i = 0:NumberOfAngles
    for j = 0:NumberOfStandandardDeviations
        theta = -pi+i*MeanOrientationAngleStep;
        sigma = j*StandardDeviationStep;
        Fv = FindFv(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33);
    end
end
end

function Fv = FindFv(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33)
coef1 = 1/2-(((1-2*sigma^2)*(1-sigma^2)*cos(2*theta))/(2*(1+sigma^2)));
coef2 = ((1-4*sigma^2)*(1-3*sigma^2)*(1-2*sigma^2)*(1-sigma^2)*cos(4*theta))/(8*(1+sigma^2)*(1+2*sigma^2)*(1+3*sigma^2));
coef3 = ((1-2*sigma^2)*(1-sigma^2)*sin(2*theta))/(2*sqrt(2)*(1+sigma^2));
coef4 = ((1-4*sigma^2)*(1-3*sigma^2)*(1-2*sigma^2)*(1-sigma^2)*sin(4*theta))/(4*sqrt(2)*(1+sigma^2)*(1+2*sigma^2)*(1+3*sigma^2));
Fv1 = C11/(coef1+coef2);
aQuadratic1 = -2*coef2*(coef1+coef2);
aQuadratic2 = (coef3-coef4)^2;
aQuadratic = aQuadratic1 - aQuadratic2;
bQuadratic1 = 2*C11*coef2-C22*(coef1+coef2);
bQuadratic2 = -2*C12_real*(coef3-coef4);
bQuadratic = bQuadratic1 - bQuadratic2;
cQuadratic1 = C11*C22;
cQuadratic2 = C12_real^2+C12_imag^2;
cQuadratic = cQuadratic1 - cQuadratic2;
rQuadratic = roots([aQuadratic bQuadratic cQuadratic]);
sel1 = rQuadratic == real(rQuadratic);
rQuadratic = rQuadratic(sel1);
sel2 = rQuadratic == abs(rQuadratic);
rQuadratic = rQuadratic(sel2);
Fv2 = max(rQuadratic);
aCubic1 = 2*coef2*(1-coef1+coef2)*(coef1+coef2);
aCubic2 = 2*coef2*(coef3-coef4)*(coef3+coef4);
aCubic3 = -(coef1+coef2)*(coef3+coef4)^2;
aCubic4 = 2*coef2^3;
aCubic5 = -(1-coef1+coef2)*(coef3-coef4)^2;
aCubic = aCubic1+aCubic2-aCubic3-aCubic4-aCubic5;
bCubic1 = -2*coef2*(C11*(1-coef1+coef2)+C33*(coef1+coef2))+C22*(1-coef1+coef2)*(coef1+coef2);
bCubic2 = -2*coef2*(C23_real*(coef3-coef4)+C12_real*(coef3+coef4))+2*C13_real*(coef3-coef4)*(coef3+coef4);
bCubic3 = 2*C23_real*(coef1+coef2)*(coef3+coef4)+C11*(coef3+coef4)^2;
bCubic4 = (4*C13_real+C22)*coef2^2;
bCubic5 = 2*C12_real*(1-coef1+coef2)*(coef3-coef4)+C33*(coef3-coef4)^2;
bCubic = bCubic1+bCubic2-bCubic3-bCubic4-bCubic5;
cCubic1 = 2*C11*C33*coef2-C11*C22*(1-coef1+coef2)-C22*C33*(coef1+coef2);
cCubic2 = 2*(C12_real*C23_real-C12_imag*C23_imag)*coef2-2*(C13_real*C23_real+C13_imag*C23_imag)*...
    (coef3-coef4)-2*(C12_real*C13_real+C12_imag*C13_imag)*(coef3+coef4);
cCubic3 = -(C23_real^2+C23_imag^2)*(coef1+coef2)-2*C11*C23_real*(coef3+coef4);
cCubic4 = 2*(C13_real^2+C13_imag^2+C13_real*C22)*coef2;
cCubic5 = -(C12_real^2+C12_imag^2)*(1-coef1+coef2)-2*C33*C12_real*(coef3-coef4);
cCubic = cCubic1+cCubic2-cCubic3-cCubic4-cCubic5;
dCubic1 = C11*C22*C33;
dCubic2 = 2*(C12_real*(C13_real*C23_real+C13_imag*C23_imag)+C12_imag*(C13_imag*C23_real-C13_real*C23_imag));
dCubic3 = C11*(C23_real^2+C23_imag^2);
dCubic4 = C22*(C13_real^2+C13_imag^2);
dCubic5 = C33*(C12_real^2+C12_imag^2);
dCubic = dCubic1+dCubic2-dCubic3-dCubic4-dCubic5;
rCubic = roots([aCubic bCubic cCubic dCubic]);
sel3 = rCubic == real(rCubic);
rCubic = rCubic(sel3);
sel4 = rCubic == abs(rCubic);
rCubic = rCubic(sel4);
Fv3 = max(rCubic);
Fv = min([Fv1 Fv2 Fv3]);
end  

我想知道是否有任何函数,例如bsxfunblockprocarrayfun来对这部分代码进行矢量化?

for i = 0:NumberOfAngles
    for j = 0:NumberOfStandandardDeviations
        theta = -pi+i*MeanOrientationAngleStep;
        sigma = j*StandardDeviationStep;
        Fv = FindFv(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33);
    end
end

1 个答案:

答案 0 :(得分:1)

我评论说你应该将循环放在矢量化的worker函数中,或者/并使用编译的MEX代码。这是我对第一种方法的看法:

function [Ps,Pd,Pv] = Arii2010_Modified_1Pixel(C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33)
MeanOrientationAngleStep = 1/100;
StandardDeviationStep = 1/100;
NumberOfAngles = floor(2*pi/MeanOrientationAngleStep);
NumberOfStandandardDeviations = 1/StandardDeviationStep;

%// set up vectorized input
thetav = -pi + (0:NumberOfAngles)*MeanOrientationAngleStep;
sigmav = (0:NumberOfStandandardDeviations)*StandardDeviationStep;

%// grid for parameter pairs
[theta, sigma] = meshgrid(thetav,sigmav);

%// run vectorized function
Fv_vec = FindFv_vec(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33);
end


function Fv_vec = FindFv_vec(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33)
coef1 = 1./2-(((1-2.*sigma.^2).*(1-sigma.^2).*cos(2.*theta))./(2.*(1+sigma.^2)));
coef2 = ((1-4.*sigma.^2).*(1-3.*sigma.^2).*(1-2.*sigma.^2).*(1-sigma.^2).*cos(4.*theta))./(8.*(1+sigma.^2).*(1+2.*sigma.^2).*(1+3.*sigma.^2));
coef3 = ((1-2.*sigma.^2).*(1-sigma.^2).*sin(2.*theta))./(2.*sqrt(2).*(1+sigma.^2));
coef4 = ((1-4.*sigma.^2).*(1-3.*sigma.^2).*(1-2.*sigma.^2).*(1-sigma.^2).*sin(4.*theta))./(4.*sqrt(2).*(1+sigma.^2).*(1+2.*sigma.^2).*(1+3.*sigma.^2));
%// this is OK:
Fv1 = C11./(coef1+coef2);

aQuadratic1 = -2.*coef2.*(coef1+coef2);
aQuadratic2 = (coef3-coef4).^2;
aQuadratic = aQuadratic1 - aQuadratic2;
bQuadratic1 = 2.*C11.*coef2-C22.*(coef1+coef2);
bQuadratic2 = -2.*C12_real.*(coef3-coef4);
bQuadratic = bQuadratic1 - bQuadratic2;
cQuadratic1 = C11.*C22;
cQuadratic2 = C12_real.^2+C12_imag.^2;
cQuadratic = cQuadratic1 - cQuadratic2;
%// move this down to one block for looping:
%//rQuadratic = roots([aQuadratic bQuadratic cQuadratic]);
%//sel1 = rQuadratic == real(rQuadratic);
%//rQuadratic = rQuadratic(sel1);
%//sel2 = rQuadratic == abs(rQuadratic);
%//rQuadratic = rQuadratic(sel2);
%//Fv2 = max(rQuadratic);

aCubic1 = 2.*coef2.*(1-coef1+coef2).*(coef1+coef2);
aCubic2 = 2.*coef2.*(coef3-coef4).*(coef3+coef4);
aCubic3 = -(coef1+coef2).*(coef3+coef4).^2;
aCubic4 = 2.*coef2.^3;
aCubic5 = -(1-coef1+coef2).*(coef3-coef4).^2;
aCubic = aCubic1+aCubic2-aCubic3-aCubic4-aCubic5;
bCubic1 = -2.*coef2.*(C11.*(1-coef1+coef2)+C33.*(coef1+coef2))+C22.*(1-coef1+coef2).*(coef1+coef2);
bCubic2 = -2.*coef2.*(C23_real.*(coef3-coef4)+C12_real.*(coef3+coef4))+2.*C13_real.*(coef3-coef4).*(coef3+coef4);
bCubic3 = 2.*C23_real.*(coef1+coef2).*(coef3+coef4)+C11.*(coef3+coef4).^2;
bCubic4 = (4.*C13_real+C22).*coef2.^2;
bCubic5 = 2.*C12_real.*(1-coef1+coef2).*(coef3-coef4)+C33.*(coef3-coef4).^2;
bCubic = bCubic1+bCubic2-bCubic3-bCubic4-bCubic5;
cCubic1 = 2.*C11.*C33.*coef2-C11.*C22.*(1-coef1+coef2)-C22.*C33.*(coef1+coef2);
cCubic2 = 2.*(C12_real.*C23_real-C12_imag.*C23_imag).*coef2-2.*(C13_real.*C23_real+C13_imag.*C23_imag).*...
    (coef3-coef4)-2.*(C12_real.*C13_real+C12_imag.*C13_imag).*(coef3+coef4);
cCubic3 = -(C23_real.^2+C23_imag.^2).*(coef1+coef2)-2.*C11.*C23_real.*(coef3+coef4);
cCubic4 = 2.*(C13_real.^2+C13_imag.^2+C13_real.*C22).*coef2;
cCubic5 = -(C12_real.^2+C12_imag.^2).*(1-coef1+coef2)-2.*C33.*C12_real.*(coef3-coef4);
cCubic = cCubic1+cCubic2-cCubic3-cCubic4-cCubic5;
dCubic1 = C11.*C22.*C33;
dCubic2 = 2.*(C12_real.*(C13_real.*C23_real+C13_imag.*C23_imag)+C12_imag.*(C13_imag.*C23_real-C13_real.*C23_imag));
dCubic3 = C11.*(C23_real.^2+C23_imag.^2);
dCubic4 = C22.*(C13_real.^2+C13_imag.^2);
dCubic5 = C33.*(C12_real.^2+C12_imag.^2);
dCubic = dCubic1+dCubic2-dCubic3-dCubic4-dCubic5;
%// move this down to one block for looping:
%//rCubic = roots([aCubic bCubic cCubic dCubic]);
%//sel3 = rCubic == real(rCubic);
%//rCubic = rCubic(sel3);
%//sel4 = rCubic == abs(rCubic);
%//rCubic = rCubic(sel4);
%//Fv3 = max(rCubic);

Fv_vec = zeros(size(theta));
for k=1:numel(theta)
   rQuadratic = roots([aQuadratic(k) bQuadratic(k) cQuadratic(k)]);
   %//sel1 = rQuadratic == real(rQuadratic);
   %//rQuadratic = rQuadratic(sel1);
   %//sel2 = rQuadratic == abs(rQuadratic);
   %//rQuadratic = rQuadratic(sel2);

   %// suggestion instead: (with optional tol=1e-10 or something else earlier)
   rQuadratic = rQuadratic(abs(rQuadratic-abs(rQuadratic))<1e-10);  % allow some tolerance due to floating-point
   Fv2 = max(rQuadratic);

   rCubic = roots([aCubic(k) bCubic(k) cCubic(k) dCubic(k)]);
   %//sel3 = rCubic == real(rCubic);
   %//rCubic = rCubic(sel3);
   %//sel4 = rCubic == abs(rCubic);
   %//rCubic = rCubic(sel4);
   rCubic = rCubic(abs(rCubic-abs(rCubic))<1e-10);
   Fv3 = max(rCubic);
   Fv_vec(k) = min([Fv1(k); Fv2; Fv3]);  %// Fv1 is a matrix!
end 

end

由于你没有提供任何样本输入,我没有试图提出一个,所以我还没有对它进行测试。

我所做的是将每个算术运算更改为它们的矢量化对(.*./.^),并重构代码以使用单个循环,其中正在计算roots。输入thetasigma现在可以是矩阵,对应于输入向量对(用meshgrid构造)。我的解决方案假设所有输入参数都是标量。我也改变了找到最大绝对值的部分,允许由于浮点运算引起的一些小错误。