我正在尝试实现INTEL MKL的dft api以在CentOS 7上测试其速度。编译成功,但是运行时出现分段错误。但是,该代码已在Windows上的Visual Studio 2017中成功运行。 Windows上的结果如下:result on windows
#include <opencv2/core/core.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <execinfo.h>
#include <cxxabi.h>
#include <omp.h>
#include <complex>
#include "mkl_dfti.h"
int main() {
MKL_LONG len[2] = { 1080, 961 }, status;
float x_in[1080][1920];
DFTI_DESCRIPTOR_HANDLE fft;
status = DftiCreateDescriptor(&fft, DFTI_SINGLE, DFTI_REAL, 2, len);
status = DftiSetValue(fft, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
status = DftiCommitDescriptor(fft);
//float x[1080* 2000];
std::complex<float> x_out[1080][961];
for (int i = 0; i < 10; i++) {
double totalcputime = (double)cv::getTickCount();
//std::cout << status << std::endl;
status = DftiComputeForward(fft, x_in, x_out);
//std::cout << status << std::endl;
totalcputime = ((double)cv::getTickCount() - totalcputime) / cv::getTickFrequency();
std::cout << "MKL-DFT Time: " << totalcputime << std::endl;
}
cv::Mat sizedimage = cv::Mat::zeros(1080, 1920, CV_32FC1);
cv::Mat opencvtransform = cv::Mat(1080, 1920 / 2 + 1, CV_32FC1);
for (int i = 0; i < 10; i++) {
double totalcputime = (double)cv::getTickCount();
cv::dft(sizedimage, opencvtransform);
totalcputime = ((double)cv::getTickCount() - totalcputime) / cv::getTickFrequency();
std::cout << "opencv-DFT Time: " << totalcputime << std::endl;
}
return 0;
}
我已经使用GDB调试了我的代码,它为我提供了以下信息:
Program received signal SIGSEGV, Segmentation fault.
0x00000000004012b8 in main () at comparison.cpp:25
25 status = DftiCreateDescriptor(&fft, DFTI_SINGLE, DFTI_REAL, 2, len);
以下参数成功编译了文件:
g++ comparison.cpp `pkg-config opencv --cflags --libs` -lmkl_rt -g
有人知道这个错误的原因吗?
答案 0 :(得分:1)
您能检查MKL 2019 u4的问题吗?
我略微重新设计了代码,以通过删除opencv条目并动态分配输入/输出数组来检查最新的mkl 2019是否存在一些问题
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
//#include <execinfo.h>
//#include <cxxabi.h>
#include <assert.h>
#include <omp.h>
#include <complex>
#include "mkl.h"
#define N1 1080
#define N2 961
#define N3 1920
int main()
{
// MKL_LONG len[2] = { 1080, 961 }, status;
MKL_LONG status;
MKL_LONG len[2];
len[0] = N1;
len[1] = N2;
//float x_in[1080][1920];
float* x_in = (float*)mkl_malloc(N1*N3*sizeof(float), 64);
assert(NULL != x_in);
DFTI_DESCRIPTOR_HANDLE fft;
status = DftiCreateDescriptor(&fft, DFTI_SINGLE, DFTI_REAL, 2, len);
if (0 != status){
std::cout << "\t DftiCreateDescriptor Error : " << status << std::endl;
}
status = DftiSetValue(fft, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
if (0 != status){
std::cout << "\t DftiSetValue Error : " << status << std::endl;
}
status = DftiCommitDescriptor(fft);
if (0 != status){
std::cout << "\t DftiCommitDescriptor Error : " << status << std::endl;
}
double t1,texec;
// std::complex<float> x_out[1080][961];
MKL_Complex8* x_out = (MKL_Complex8*)mkl_malloc(N1*N2*sizeof(MKL_Complex8), 64);
t1 = dsecnd();
for (int i = 0; i < 10; i++) {
t1 = dsecnd();
status = DftiComputeForward(fft, x_in, x_out);
if (0 != status){std::cout << "\t DftiComputeForward Error : " << status << std::endl;}
texec = dsecnd() - t1;
std::cout << "MKL-DFT Time: " << texec << std::endl;
}
status = DftiFreeDescriptor(&fft);
if (0 != status){
std::cout << "\t DftiFreeDescriptor Error : " << status << std::endl;
}
return 0;
}
这是我看到的输出:
]$ ./a.out
MKL-DFT Time: 0.00725237
MKL-DFT Time: 0.00381843
MKL-DFT Time: 0.00362679
MKL-DFT Time: 0.0021284
MKL-DFT Time: 0.00221884
MKL-DFT Time: 0.00215556
MKL-DFT Time: 0.00211133
MKL-DFT Time: 0.002133
MKL-DFT Time: 0.00212184
MKL-DFT Time: 0.00215306
答案 1 :(得分:0)
您还可以尝试设置MKL_VERBOSE模式以查看所有所需的运行时详细信息: 出口MKL_VERBOSE = 1 这是fft调用的mkl详细输出:
./a.out
MKL_VERBOSE Intel(R) MKL 2019.0 Update 4 Product build 20190411 for Intel(R) 64 architecture Intel(R) Advanced Vector Extensions (Intel(R) AVX) enabled processors, Lnx 2.80GHz intel_thread
.........
MKL_VERBOSE FFT(srfo1080:961:961x961:1:1,pack:ccs,tLim:20,desc:0x1b4df40) 3.83ms CNR:OFF Dyn:1 FastMM:1 TID:0 NThr:20
MKL-DFT Time: 0.0038483