我有运行ubuntu的intel i5(NUC)系统,我正在尝试使用点云开发实时应用程序。
我有一个307200(640x480)迭代的for循环,但有一些条件。代码段是:
void pc_segmentation(pcl::PointCloud<pcl::PointXYZRGB>::Ptr input_cloud,Eigen::Vector4f pt_min,Eigen::Vector4f pt_max)
{
pcl::PointCloud<pcl::PointXYZRGB> slice_0;
pcl::PointCloud<pcl::PointXYZRGB> slice_1;
pcl::PointCloud<pcl::PointXYZRGB> slice_2;
pcl::PointCloud<pcl::PointXYZRGB> slice_3;
pcl::PointCloud<pcl::PointXYZRGB> slice_4;
pcl::PointCloud<pcl::PointXYZRGB> slice_5;
pcl::PointCloud<pcl::PointXYZRGB> slice_z;
pcl::PointCloud<pcl::PointXYZRGB> slice_y(input_cloud->width,input_cloud->height);
pcl::PointCloud<pcl::PointXYZRGB> slice_x(input_cloud->width,input_cloud->height);
//clearing pointclouds
slice_0.clear();
slice_1.clear();
slice_2.clear();
slice_3.clear();
slice_4.clear();
slice_5.clear();
slice_z.clear();
float xmin0=100, xmin1=100, xmin2=100, xmin3=100, xmin4=100, xmin5=100;
float ymin0=100, ymin1=100, ymin2=100, ymin3=100, ymin4=100, ymin5=100;
float xmax0=-100, xmax1=-100, xmax2=-100, xmax3=-100, xmax4=-100, xmax5=-100;
float ymax0=-100, ymax1=-100, ymax2=-100, ymax3=-100, ymax4=-100, ymax5=-100;
//Slices for Y direction
Eigen::VectorXf slice_pts_y(NO_OF_SLICES);
Eigen::VectorXf diff_y(NO_OF_SLICES);
Eigen::VectorXf slice_pts_x(NO_OF_SLICES_X);
Eigen::VectorXf diff_x(NO_OF_SLICES_X);
//Slice Vector Y
double dist_btw_slice = (pt_max[1] - pt_min[1])/NO_OF_SLICES;
for(int i=1; i<=NO_OF_SLICES;i++)
slice_pts_y[i-1] = pt_min[1] + (i*dist_btw_slice);
//Slice Vector X
dist_btw_slice = (pt_max[0] - pt_min[0])/NO_OF_SLICES_X;
for(int i=1; i<=NO_OF_SLICES_X;i++)
slice_pts_x[i-1] = pt_min[1] + (i*dist_btw_slice);
ros::Time start_for_loop_time = ros::Time::now();
for(int ii=0;ii<input_cloud->width;ii++)
for(int jj=0;jj<input_cloud->height;jj++)
{
pcl::PointXYZRGB point = input_cloud->at(ii,jj);
if((pcl_isfinite(point.z)))
{
if((point.z > 0.9)&&(point.z < 1))
{
slice_0.push_back(point);
}
else if ((point.z > 1.4)&&(point.z < 1.5))
{
slice_1.push_back(point);
}
else if((point.z > 1.9)&&(point.z < 2))
{
slice_2.push_back(point);
}
else if ((point.z > 2.4)&&(point.z < 2.5))
{
slice_3.push_back(point);
}
else if((point.z > 2.9)&&(point.z < 3))
{
slice_4.push_back(point);
}
else if ((point.z > 3.4)&&(point.z < 3.5))
{
slice_5.push_back(point);
}
}
if((pcl_isfinite(point.y)))
{
diff_y = slice_pts_y - (point.y * Eigen::VectorXf::Ones(NO_OF_SLICES));
diff_y=diff_y.array().abs();
std::sort(diff_y.data(),diff_y.data()+NO_OF_SLICES,std::less<float>());
if(diff_y[0]<0.005)
{
slice_y.at(ii,jj)=point;
}
}
if((pcl_isfinite(point.x)))
{
diff_x = slice_pts_x - (point.x * Eigen::VectorXf::Ones(NO_OF_SLICES_X));
diff_x=diff_x.array().abs();
std::sort(diff_x.data(),diff_x.data()+NO_OF_SLICES_X,std::less<float>());
if(diff_x[0]<0.005)
{
slice_x.at(ii,jj)=point;
}
}
}
ros::Time end_for_loop_time = ros::Time::now();
if(seg_pcl_time.is_open())
seg_pcl_time<<end_for_loop_time.toSec() - start_for_loop_time.toSec()<<std::endl;
else
std::cout<<"Segmented Point Cloud for Z slices "<<end_for_loop_time.toSec() - start_for_loop_time.toSec()<<" seconds"<<std::endl;
ros::Time start_pc_add_time = ros::Time::now();
slice_z = slice_0 + slice_1;
slice_z += slice_2;
slice_z += slice_3;
slice_z += slice_4;
slice_z += slice_5;
pcl::copyPointCloud(slice_z,slices_z_pc); //slices_z_pc is defined globally
ros::Time end_pc_add_time = ros::Time::now();
std::cout<<"Adding and Publishing Point Cloud for Z slices "<<end_pc_add_time.toSec() - start_pc_add_time.toSec()<<" seconds"<<std::endl;
// Publish point clouds
slices_z_pc.header = input_cloud->header;
slice_x.header = input_cloud->header;
slice_y.header = input_cloud->header;
pub_x_slices.publish(slice_x);
pub_y_slices.publish(slice_y);
pub_z_slices.publish(slices_z_pc);
}
NO_OF_SLICES = 20 and NO_OF_SLICES_X = 20
在i5系统上大约需要100ms - 110ms,在运行ubuntu的Odroid(octacore ARM处理器)上需要340ms。我觉得这个处理时间非常长。
我不确定如何优化它以加快运行速度。我还在编译时启用"O3"
优化。任何帮助是极大的赞赏。
PS:我也尝试通过3D数组示例point_cloud[640][480][3]
循环它,但处理时间略短(约20ms)。
编辑:根据@Toby Speight的建议将代码从for循环的片段更改为整个函数