Linux中的并行计算 - 效率

时间:2015-07-31 06:19:24

标签: linux multithreading performance matlab parallel-processing

我试图在Linux的Matlab中使用 parfor 运行代码。

有两个问题。

  1. 拥有Linux的计算机比我拥有的Windows强大得多。 (Linux计算机上有12个更强的核心,而Windows机器上有4个核心)。但是,它的运行速度要慢得多(261s到457s)。
  2. 你知道为什么会这样吗?在运行我的工作之前,我有什么必须做的,这会降低性能吗?或者,与在Windows中运行的代码相比,在Linux中运行时我需要在代码中修改哪些内容以便更有效地运行?

    1. 我觉得每次parfor循环结束时parpool会话都会关闭。屏幕左下角的四条垂直线显示并行计算,在Linux中变为蓝色,然后变为绿色等。在Windows中,它们保持绿色直到整个作业结束。这是我的想法与否?
    2. 这是占用大部分时间的代码的一部分,我使用parfor

      if some_condition
          parfor i = 1:s_m
              burg_i = single(loop_sm(i))*burgers(:,slip_sm(i));
              plane_i = planes(:,slip_sm(i));
      
              z1 = u_sm(:,i);
              x1 = [plane_i(2)*z1(3)-plane_i(3)*z1(2)
                    plane_i(3)*z1(1)-plane_i(1)*z1(3)
                    plane_i(1)*z1(2)-plane_i(2)*z1(1)];
              x1 = x1/sqrt(x1(1)^2+x1(2)^2+x1(3)^2);        
              y1 = [z1(2)*x1(3)-z1(3)*x1(2);z1(3)*x1(1)-z1(1)*x1(3);z1(1)*x1(2)-z1(2)*x1(1)];
      
              x0 = [1; 0; 0];
              y0 = [0; 1; 0];
              z0 = [0; 0; 1];
      
              Tseg = [x1'; y1'; z1']*[x0 y0 z0]; 
              bn = burg_i'*[x1 y1 z1];
      
              %% Segmented Dislocations Stress (misfits)
              for n = 1:s_th
                  sig1 = zeros(3);
      
                  sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sm(:,i),P2_sm(:,i),l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig1;
      
                  sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sm(:,i)+[box_length1;0;0],P2_sm(:,i)+[box_length1;0;0],l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig1;
                  sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sm(:,i)-[box_length1;0;0],P2_sm(:,i)-[box_length1;0;0],l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig1;
                  sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sm(:,i)+[0;box_length1;0],P2_sm(:,i)+[0;box_length1;0],l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig1;
                  sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sm(:,i)-[0;box_length1;0],P2_sm(:,i)-[0;box_length1;0],l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig1;
      
                  sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sm(:,i)+[box_length1;box_length1;0],P2_sm(:,i)+[box_length1;box_length1;0],l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig1;
                  sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sm(:,i)-[box_length1;box_length1;0],P2_sm(:,i)-[box_length1;box_length1;0],l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig1;
                  sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sm(:,i)+[-box_length1;box_length1;0],P2_sm(:,i)+[-box_length1;box_length1;0],l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig1;
                  sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sm(:,i)+[box_length1;-box_length1;0],P2_sm(:,i)+[box_length1;-box_length1;0],l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig1;
      
                  sig_th11(:,:,n,i) =  Tseg' * sig1 * Tseg ;
      
                  sig2 = zeros(3);
      
                  sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sm(:,i),P2_sm(:,i),l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig2;
      
                  sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sm(:,i)+[box_length1;0;0],P2_sm(:,i)+[box_length1;0;0],l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig2;
                  sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sm(:,i)-[box_length1;0;0],P2_sm(:,i)-[box_length1;0;0],l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig2;
                  sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sm(:,i)+[0;box_length1;0],P2_sm(:,i)+[0;box_length1;0],l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig2;
                  sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sm(:,i)-[0;box_length1;0],P2_sm(:,i)-[0;box_length1;0],l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig2;
      
                  sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sm(:,i)+[box_length1;box_length1;0],P2_sm(:,i)+[box_length1;box_length1;0],l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig2;
                  sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sm(:,i)-[box_length1;box_length1;0],P2_sm(:,i)-[box_length1;box_length1;0],l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig2;
                  sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sm(:,i)+[-box_length1;box_length1;0],P2_sm(:,i)+[-box_length1;box_length1;0],l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig2;
                  sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sm(:,i)+[box_length1;-box_length1;0],P2_sm(:,i)+[box_length1;-box_length1;0],l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig2;
      
                  sig_th21(:,:,n,i) =  Tseg' * sig2 * Tseg ;
              end
          end
          for i = 1 : s_th
              burg_i = burgers(:,slip_th(i));
              plane_i = planes(:,slip_th(i));
      
              z1 = u_th(:,i);
              x1 = [plane_i(2)*z1(3)-plane_i(3)*z1(2)
                    plane_i(3)*z1(1)-plane_i(1)*z1(3)
                    plane_i(1)*z1(2)-plane_i(2)*z1(1)];
              x1 = x1/sqrt(x1(1)^2+x1(2)^2+x1(3)^2);        
              y1 = [z1(2)*x1(3)-z1(3)*x1(2);z1(3)*x1(1)-z1(1)*x1(3);z1(1)*x1(2)-z1(2)*x1(1)];
      
              x0 = [1; 0; 0];
              y0 = [0; 1; 0];
              z0 = [0; 0; 1];
      
              Tseg = [x1'; y1'; z1']*[x0 y0 z0]; 
              bn = burg_i'*[x1 y1 z1];
              %% Segmented Dislocations Stress (Threads)
              for n = 1:s_th
                  if n ~= i
                      sig1 = zeros(3);
      
                      sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sth(:,i),P2_sth(:,i),l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig1;
      
                      sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sth(:,i)+[box_length1;0;0],P2_sth(:,i)+[box_length1;0;0],l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig1;
                      sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sth(:,i)-[box_length1;0;0],P2_sth(:,i)-[box_length1;0;0],l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig1;
                      sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sth(:,i)+[0;box_length1;0],P2_sth(:,i)+[0;box_length1;0],l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig1;
                      sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sth(:,i)-[0;box_length1;0],P2_sth(:,i)-[0;box_length1;0],l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig1;
      
                      sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sth(:,i)+[box_length1;box_length1;0],P2_sth(:,i)+[box_length1;box_length1;0],l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig1;
                      sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sth(:,i)-[box_length1;box_length1;0],P2_sth(:,i)-[box_length1;box_length1;0],l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig1;
                      sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sth(:,i)+[-box_length1;box_length1;0],P2_sth(:,i)+[-box_length1;box_length1;0],l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig1;
                      sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sth(:,i)+[box_length1;-box_length1;0],P2_sth(:,i)+[box_length1;-box_length1;0],l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig1;
      
                      sig_th12(:,:,n,i) =  Tseg' * sig1 * Tseg ;
      
                      sig2 = zeros(3);
      
                      sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sth(:,i),P2_sth(:,i),l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig2;
      
                      sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sth(:,i)+[box_length1;0;0],P2_sth(:,i)+[box_length1;0;0],l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig2;
                      sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sth(:,i)-[box_length1;0;0],P2_sth(:,i)-[box_length1;0;0],l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig2;
                      sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sth(:,i)+[0;box_length1;0],P2_sth(:,i)+[0;box_length1;0],l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig2;
                      sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sth(:,i)-[0;box_length1;0],P2_sth(:,i)-[0;box_length1;0],l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig2;
      
                      sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sth(:,i)+[box_length1;box_length1;0],P2_sth(:,i)+[box_length1;box_length1;0],l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig2;
                      sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sth(:,i)-[box_length1;box_length1;0],P2_sth(:,i)-[box_length1;box_length1;0],l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig2;
                      sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sth(:,i)+[-box_length1;box_length1;0],P2_sth(:,i)+[-box_length1;box_length1;0],l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig2;
                      sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sth(:,i)+[box_length1;-box_length1;0],P2_sth(:,i)+[box_length1;-box_length1;0],l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig2;
      
                      sig_th22(:,:,n,i) =  Tseg' * sig2 * Tseg ;
                  end
              end
          end
      
      else
          parfor i = 1:s_m
              burg_i = single(loop_sm(i))*burgers(:,slip_sm(i));
              plane_i = planes(:,slip_sm(i));
      
              z1 = u_sm(:,i);
              x1 = [plane_i(2)*z1(3)-plane_i(3)*z1(2)
                  plane_i(3)*z1(1)-plane_i(1)*z1(3)
                  plane_i(1)*z1(2)-plane_i(2)*z1(1)];
              x1 = x1/sqrt(x1(1)^2+x1(2)^2+x1(3)^2);
              y1 = [z1(2)*x1(3)-z1(3)*x1(2);z1(3)*x1(1)-z1(1)*x1(3);z1(1)*x1(2)-z1(2)*x1(1)];
      
              x0 = [1; 0; 0];
              y0 = [0; 1; 0];
              z0 = [0; 0; 1];
      
              Tseg = [x1'; y1'; z1']*[x0 y0 z0];
              bn = burg_i'*[x1 y1 z1];
      
              %% Segmented Dislocations Stress (misfits)
              for n = 1:s_th
                  sig1 = zeros(3);
      
                  sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sm(:,i),P2_sm(:,i),l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig1;
      
                  sig_th11(:,:,n,i) =  Tseg' * sig1 * Tseg ;
      
                  sig2 = zeros(3);
      
                  sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sm(:,i),P2_sm(:,i),l_sm(i),v,start_dis_sm(i),bn,x1,y1,z1)...
                      + sig2;
      
                  sig_th21(:,:,n,i) =  Tseg' * sig2 * Tseg ;
              end
          end
          parfor i = 1 : s_th
              burg_i = burgers(:,slip_th(i));
              plane_i = planes(:,slip_th(i));
      
              z1 = u_th(:,i);
              x1 = [plane_i(2)*z1(3)-plane_i(3)*z1(2)
                  plane_i(3)*z1(1)-plane_i(1)*z1(3)
                  plane_i(1)*z1(2)-plane_i(2)*z1(1)];
              x1 = x1/sqrt(x1(1)^2+x1(2)^2+x1(3)^2);
              y1 = [z1(2)*x1(3)-z1(3)*x1(2);z1(3)*x1(1)-z1(1)*x1(3);z1(1)*x1(2)-z1(2)*x1(1)];
      
              x0 = [1; 0; 0];
              y0 = [0; 1; 0];
              z0 = [0; 0; 1];
      
              Tseg = [x1'; y1'; z1']*[x0 y0 z0];
              bn = burg_i'*[x1 y1 z1];
              %% Segmented Dislocations Stress (Threads)
              for n = 1:s_th
                  if n ~= i
                      sig1 = zeros(3);
      
                      sig1 = segment_disloc6_6(P_th_mid1(1,n),P_th_mid1(2,n),P_th_mid1(3,n), P1_sth(:,i),P2_sth(:,i),l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig1;
      
                      sig_th12(:,:,n,i) =  Tseg' * sig1 * Tseg ;
      
                      sig2 = zeros(3);
      
                      sig2 = segment_disloc6_6(P_th_mid2(1,n),P_th_mid2(2,n),P_th_mid2(3,n), P1_sth(:,i),P2_sth(:,i),l_sth(i),v,start_dis_th(i),bn,x1,y1,z1)...
                          + sig2;
      
                      sig_th22(:,:,n,i) =  Tseg' * sig22 * Tseg ;
                  end
              end
          end
      end
      
              sig_th1 = s0 * (sum(sig_th11(:,:,1:s_th,:),4)+sum(sig_th12(:,:,1:s_th,:),4)) + sig_th_mis1;
              sig_th2 = s0 * (sum(sig_th21(:,:,1:s_th,:),4)+sum(sig_th22(:,:,1:s_th,:),4)) + sig_th_mis2;
      

      这是被调用的函数:

      function S = segment_disloc6_6(xp,yp,zp, P1,P2,l,v,start,bn,x1,y1,z1)
      
      
      if start == 1
          x = ([xp,yp,zp]-P1')*x1;
          y = ([xp,yp,zp]-P1')*y1;
          z = ([xp,yp,zp]-P1')*z1;
      elseif start == 2
          x = ([xp,yp,zp]-P2')*x1;
          y = ([xp,yp,zp]-P2')*y1;
          z = ([xp,yp,zp]-P2')*z1;
      else
          display('ERROR! Starting point is indicated as neither 1 nor 2!');
      end
      
      
      % T = [dot(x1,x0),dot(x1,y0),dot(x1,z0)
      %      dot(y1,x0),dot(y1,y0),dot(y1,z0)
      %      dot(z1,x0),dot(z1,y0),dot(z1,z0)];
      
      
      % bx = bn(1); by = bn(2); bz = bn(3);
      
      lambda = l-z;
      R = sqrt(x^2+y^2+(lambda)^2); %l is disl. length
      if R < 2.5500e-9
          R = 2.5500e-9; %Notice that this is 10*bval! if bval changed in the main code, this must also be changed!
      end
      
      sigxx2 = bn(1) * (y/(R*(R+lambda))) * (1+x^2/R^2+x^2/(R*(R+lambda))) + bn(2) * (x/(R*(R+lambda))) * (1-x^2/R^2-x^2/(R*(R+lambda)));
      sigyy2 = -bn(1) * (y/(R*(R+lambda))) * (1-y^2/R^2-y^2/(R*(R+lambda))) - bn(2) * (x/(R*(R+lambda))) * (1+y^2/R^2+y^2/(R*(R+lambda)));
      sigzz2 = bn(1) * (2*v*y/(R*(R+lambda)) + y*lambda/R^3) + bn(2) * (-2*v*x/(R*(R+lambda)) - x*lambda/R^3);
      sigxy2 = -bn(1) * (x/(R*(R+lambda))) * (1-y^2/R^2-y^2/(R*(R+lambda))) + bn(2) * (y/(R*(R+lambda))) * (1-x^2/R^2-x^2/(R*(R+lambda)));
      sigxz2 = -bn(1) * x*y/R^3 + bn(2) * (-v/R + x^2/R^3) + bn(3) * y*(1-v)/(R*(R+lambda));
      sigyz2 = bn(2) * x*y/R^3 + bn(1) * (v/R - y^2/R^3) - bn(3) * x*(1-v)/(R*(R+lambda));
      
      lambda = -z;
      R = sqrt(x^2+y^2+(lambda)^2); %l is disl. length
      if R < 2.5500e-9
          R = 2.5500e-9; %Notice that this is 10*bval! if bval changed in the main code, this must also be changed!
      end
      
      sigxx1 = bn(1) * (y/(R*(R+lambda))) * (1+x^2/R^2+x^2/(R*(R+lambda))) + bn(2) * (x/(R*(R+lambda))) * (1-x^2/R^2-x^2/(R*(R+lambda)));
      sigyy1 = -bn(1) * (y/(R*(R+lambda))) * (1-y^2/R^2-y^2/(R*(R+lambda))) - bn(2) * (x/(R*(R+lambda))) * (1+y^2/R^2+y^2/(R*(R+lambda)));
      sigzz1 = bn(1) * (2*v*y/(R*(R+lambda)) + y*lambda/R^3) + bn(2) * (-2*v*x/(R*(R+lambda)) - x*lambda/R^3);
      sigxy1 = -bn(1) * (x/(R*(R+lambda))) * (1-y^2/R^2-y^2/(R*(R+lambda))) + bn(2) * (y/(R*(R+lambda))) * (1-x^2/R^2-x^2/(R*(R+lambda)));
      sigxz1 = -bn(1) * x*y/R^3 + bn(2) * (-v/R + x^2/R^3) + bn(3) * y*(1-v)/(R*(R+lambda));
      sigyz1 = bn(2) * x*y/R^3 + bn(1) * (v/R - y^2/R^3) - bn(3) * x*(1-v)/(R*(R+lambda));
      
      
      S = [sigxx2-sigxx1,sigxy2-sigxy1,sigxz2-sigxz1
          sigxy2-sigxy1,sigyy2-sigyy1,sigyz2-sigyz1
          sigxz2-sigxz1,sigyz2-sigyz1,sigzz2-sigzz1];
      
      % S = S_l2 - S_l1;
      end
      

      谢谢!

0 个答案:

没有答案