我正忙着使用C ++ 11(MS VS 2015)测试SSE4.2字符串指令。 因为MS VC ++不支持内联汇编,所以我使用内部函数。
测试用例很简单:在Huge(12,5 + M行)文本文件中计算行数。 我通过计算'\ n'LF的数量来做到这一点。
我的代码如此票价:
#include "nmmintrin.h"
#include <iostream>
#include <fstream>
#include <string>
#include <chrono>
static inline long long popcnt128(__m128i n)
{
return _mm_popcnt_u64(n.m128i_u64[0])
+_mm_popcnt_u64(n.m128i_u64[1]);
}
static inline size_t sse4_strChrCount(const char* pcStr, size_t iStrLen, const char chr)
{
const __m128i mSet = _mm_set1_epi8(chr);
const int iMode = _SIDD_CMP_EQUAL_EACH;
size_t iResult = 0;
for (size_t i = 0; i < iStrLen; i += 16)
{
const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pcStr + i));
__m128i ret = _mm_cmpistrm(data, mSet, iMode);
iResult += popcnt128(ret);
}
return iResult;
}
int main(int argc, char** argv)
{
// NOTE: NO CHECKS FOR SSE4.2 SUPPORT! So be carefull!
const int bufSize = 4096 * 128; // +/- 5Mb on Heap
char* buf = new char[bufSize];
if (argc <= 1)
{
std::cerr << "Provide filename to count newlines on!" << std::endl;
exit(0);
}
std::string fileName(argv[1]);
std::cout << "C++ LineCounter for " << fileName << " with bufSize: " << bufSize << std::endl;
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
size_t lineCount = 0;
std::ifstream inFile;
inFile.open(fileName, std::ios_base::in | std::ios_base::binary);
while (inFile.good())
{
inFile.read(buf, bufSize);
if (inFile || inFile.gcount() > 0)
{
lineCount += sse4_strChrCount(buf, inFile.gcount(), '\n');
}
}
inFile.close();
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
std::cout << "Find newline char using SSE4.2 intrinsic functions: Counted " << lineCount << " lines in " << std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() << "(ms) " << std::endl;
return 0;
}
结果是:
C++ LineCounter for ..\HugeLogfile.txt with bufSize: 524288
Find newline char using SSE4.2 intrinsic functions: Counted 12867995 lines in 11568(ms)
我的问题:
在Parallels下使用Windows 10在我的Macbook Pro上进行测试。
CPU规格:Intel(R)Core(TM)i7-4960HQ CPU @ 2.60GHz。
感谢您的任何意见和反馈!