向量MATLAB代码比较中的快速查找值

时间:2016-09-03 11:57:47

标签: performance matlab vectorization mex

我正在使用MATLAB从两个输入值OCT_EXP中查找两个向量OCT_LOGu,v中的值,并输出val作为条件

if (( u == 0 )||( v == 0 ))
    val = 0;
else
    val = OCT_EXP( OCT_LOG(u) + OCT_LOG(v) + 1);

我尝试使用三种方式:正常方式(no_vectorized方式),矢量化方式和mex方式。我期望mex方式将是最佳方式,然后是矢量化方式。然而,当我测量时间消耗时,第一种方式(没有矢量化方式)是最好的,矢量化方式是最糟糕的方式。我的代码会发生什么?谢谢所有

我想考虑功能的加速,因为它会被多次调用:300.000次

第一种方式:

function val = gfmult_no_vec( u, v )

OCT_EXP = [ 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38,...
   76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157,...
   39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35,...
   70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222,...
   161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60,...
   120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163,...
   91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52,...
   104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59,...
   118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218,...
   169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85,...
   170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198,...
   145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171,...
   75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25,...
   50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81,...
   162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9,...
   18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11,...
   22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71,...
   142, 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38,...
   76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192,...
   157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159,...
   35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111,...
   222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30,...
   60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223,...
   163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26,...
   52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147,...
   59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218,...
   169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85,...
   170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198,...
   145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171,...
   75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25,...
   50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81,...
   162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9,...
   18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11,...
   22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71,...
   142 ];

OCT_LOG = [ 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4,...
   100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5,...
   138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69,...
   29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114,... end

   166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145,...
   34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92,...
   131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40,...
   84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212,...
   229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103,...
   74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180,...
   124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188,...
   207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171,...
   20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216,...
   183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161,...
   59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203,...
   89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215,...
   79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80,...
   88, 175 ];

    if (( u == 0 )||( v == 0 ))
        val = 0;
    else
        val = OCT_EXP( OCT_LOG(u) + OCT_LOG(v) + 1);

第二种方式:矢量化方式

function val = gfmult_vec( u, v )

OCT_EXP = [ 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38,...
   76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157,...
   39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35,...
   70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222,...
   161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60,...
   120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163,...
   91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52,...
   104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59,...
   118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218,...
   169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85,...
   170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198,...
   145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171,...
   75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25,...
   50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81,...
   162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9,...
   18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11,...
   22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71,...
   142, 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38,...
   76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192,...
   157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159,...
   35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111,...
   222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30,...
   60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223,...
   163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26,...
   52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147,...
   59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218,...
   169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85,...
   170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198,...
   145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171,...
   75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25,...
   50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81,...
   162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9,...
   18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11,...
   22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71,...
   142 ];

OCT_LOG = [ 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4,...
   100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5,...
   138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69,...
   29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114,... 
   166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145,...
   34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92,...
   131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40,...
   84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212,...
   229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103,...
   74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180,...
   124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188,...
   207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171,...
   20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216,...
   183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161,...
   59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203,...
   89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215,...
   79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80,...
   88, 175 ];

    uv0 =  (~(( u == 0 )|( v == 0 )));
    val = zeros(size(u));
    val(uv0) = OCT_EXP( OCT_LOG(u(uv0)) + OCT_LOG(v(uv0)) + 1);
end

最后一种方式:mex代码

#include "mex.h"
double look_up(double u, double v)
{
   double OCT_EXP [510] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38,
   76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157,
   39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35,
   70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222,
   161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60,
   120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163,
   91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52,
   104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59,
   118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218,
   169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85,
   170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198,
   145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171,
   75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25,
   50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81,
   162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9,
   18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11,
   22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71,
   142, 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38,
   76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192,
   157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159,
   35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111,
   222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30,
   60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223,
   163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26,
   52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147,
   59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218,
   169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85,
   170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198,
   145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171,
   75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25,
   50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81,
   162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9,
   18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11,
   22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71,
   142 };

   double OCT_LOG[255] = { 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4,
   100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5,
   138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69,
   29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 
   166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145,
   34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92,
   131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40,
   84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212,
   229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103,
   74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180,
   124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188,
   207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171,
   20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216,
   183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161,
   59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203,
   89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215,
   79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80,
   88, 175 };

    if (( u == 0 )||( v == 0 ))
        return 0;
    else
    {
        int index=OCT_LOG[int(u-1)] + OCT_LOG[int(v-1)] + 1;
        return OCT_EXP [index-1];
    }

}


void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
    double *u, *v, *uv;
    int mrows, ncols;
    plhs[0] = mxCreateDoubleMatrix(1,1, mxREAL);
    /* Assign pointers to each input and output. */
     u = mxGetPr(prhs[0]);    
     v = mxGetPr(prhs[1]);
     uv = mxGetPr(plhs[0]);
    *uv = look_up(*u, *v);
}

测量代码

function main
    function test1()
        for i=1:200
            for j=1:200
                gfmult_no_vec(i,j);
            end
        end
    end

    function test2()
        for i=1:200
            for j=1:200
                gfmult_vec(i,j);
            end
        end
    end

    function test3()
        for i=1:200
            for j=1:200
                gfmult_mex(i,j);
            end
        end
    end
f1=@()test1();
t1=timeit(f1)

f2=@()test2();
t2=timeit(f2)

f3=@()test3();
t3=timeit(f3)
end

报告时间 t1 = 0.1934, t2 = 1.1739, t3 = 0.3584

1 个答案:

答案 0 :(得分:2)

此解决方案在我的计算机中 0.0006 秒(并且它是矢量化的):

angular
  .module('test',[])
  .controller('TestController',TestController);
function TestController() {
  var testVm = this;
  testVm.turnOn = turnOn;
  testVm.turnOff = turnOff;
  testVm.state = false;
  function turnOn() {
    testVm.state = true;
  }
  function turnOff() {
    testVm.state = false;
  }

现在u = randi(5,200,1)-1; % some arbitrary data including zeros v = randi(5,200,1)-1; % some arbitrary data including zeros [U,V] = ndgrid(u(u~=0),v(v~=0)); % make all possible combinations of u and v val = zeros(length(u),length(v)); % initialize the output size, in case the last value in u or v is zero. f = @(u,v) OCT_EXP(OCT_LOG(u)+OCT_LOG(v)+1); val((u~=0),(v~=0)) = f(U,V); 如果val(u,v) = OCT_EXP(OCT_LOG(u)+OCT_LOG(v)+1)u都不是零,则为v

如果您希望val(u,v) = 0有标量输入,那么您的第一种方法似乎是最快的方法。但是,我会在函数外部定义gfmultOCT_EXP并将它们传递给它,而不是一遍又一遍地分配这些值:

OCT_LOG

在我的计算机中,它将运行时间从 0.21444 (使用您的版本)减少到 0.158 秒进行100K迭代,这不是一个很大的改进(0.05644秒),但如果你有数百万,那可能很重要。