我正在尝试在ARM平台上运行PicoHTTPParser。有一个函数使用SSE指令 _mm_cmpestri 进行快速char comparisson。 NEON还有替代品吗?我似乎不得不使用像 VCGT.U8 这样的东西,但看起来效率不高。
static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
{
*found = 0;
#if __SSE4_2__
if (likely(buf_end - buf >= 16)) {
__m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
size_t left = (buf_end - buf) & ~15;
do {
__m128i b16 = _mm_loadu_si128((const __m128i*)buf);
int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
if (unlikely(r != 16)) {
buf += r;
*found = 1;
break;
}
buf += 16;
left -= 16;
} while (likely(left != 0));
}
#else
/* suppress unused parameter warning */
(void)buf_end;
(void)ranges;
(void)ranges_size;
#endif
return buf;
}