如何检查CPU是否支持SSE3指令集?

时间:2011-05-25 08:49:44

标签: c++ sse instruction-set avx cpuid

以下代码是否有效以检查CPU是否支持SSE3指令集?

使用IsProcessorFeaturePresent()功能显然不适用于Windows XP(请参阅http://msdn.microsoft.com/en-us/library/ms724482(v=vs.85).aspx)。

bool CheckSSE3()
{
    int CPUInfo[4] = {-1};

    //-- Get number of valid info ids
    __cpuid(CPUInfo, 0);
    int nIds = CPUInfo[0];

    //-- Get info for id "1"
    if (nIds >= 1)
    {
        __cpuid(CPUInfo, 1);
        bool bSSE3NewInstructions = (CPUInfo[2] & 0x1) || false;
        return bSSE3NewInstructions;     
    }

    return false;      
}

6 个答案:

答案 0 :(得分:85)

我已经创建了一个GitHub repro,它将检测所有主要x86 ISA扩展的CPU和操作系统支持:https://github.com/Mysticial/FeatureDetector

这是一个较短的版本:


首先,您需要访问CPUID指令:

#ifdef _WIN32

//  Windows
#define cpuid(info, x)    __cpuidex(info, x, 0)

#else

//  GCC Intrinsics
#include <cpuid.h>
void cpuid(int info[4], int InfoType){
    __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
}

#endif

然后您可以运行以下代码:

//  Misc.
bool HW_MMX;
bool HW_x64;
bool HW_ABM;      // Advanced Bit Manipulation
bool HW_RDRAND;
bool HW_BMI1;
bool HW_BMI2;
bool HW_ADX;
bool HW_PREFETCHWT1;

//  SIMD: 128-bit
bool HW_SSE;
bool HW_SSE2;
bool HW_SSE3;
bool HW_SSSE3;
bool HW_SSE41;
bool HW_SSE42;
bool HW_SSE4a;
bool HW_AES;
bool HW_SHA;

//  SIMD: 256-bit
bool HW_AVX;
bool HW_XOP;
bool HW_FMA3;
bool HW_FMA4;
bool HW_AVX2;

//  SIMD: 512-bit
bool HW_AVX512F;    //  AVX512 Foundation
bool HW_AVX512CD;   //  AVX512 Conflict Detection
bool HW_AVX512PF;   //  AVX512 Prefetch
bool HW_AVX512ER;   //  AVX512 Exponential + Reciprocal
bool HW_AVX512VL;   //  AVX512 Vector Length Extensions
bool HW_AVX512BW;   //  AVX512 Byte + Word
bool HW_AVX512DQ;   //  AVX512 Doubleword + Quadword
bool HW_AVX512IFMA; //  AVX512 Integer 52-bit Fused Multiply-Add
bool HW_AVX512VBMI; //  AVX512 Vector Byte Manipulation Instructions

int info[4];
cpuid(info, 0);
int nIds = info[0];

cpuid(info, 0x80000000);
unsigned nExIds = info[0];

//  Detect Features
if (nIds >= 0x00000001){
    cpuid(info,0x00000001);
    HW_MMX    = (info[3] & ((int)1 << 23)) != 0;
    HW_SSE    = (info[3] & ((int)1 << 25)) != 0;
    HW_SSE2   = (info[3] & ((int)1 << 26)) != 0;
    HW_SSE3   = (info[2] & ((int)1 <<  0)) != 0;

    HW_SSSE3  = (info[2] & ((int)1 <<  9)) != 0;
    HW_SSE41  = (info[2] & ((int)1 << 19)) != 0;
    HW_SSE42  = (info[2] & ((int)1 << 20)) != 0;
    HW_AES    = (info[2] & ((int)1 << 25)) != 0;

    HW_AVX    = (info[2] & ((int)1 << 28)) != 0;
    HW_FMA3   = (info[2] & ((int)1 << 12)) != 0;

    HW_RDRAND = (info[2] & ((int)1 << 30)) != 0;
}
if (nIds >= 0x00000007){
    cpuid(info,0x00000007);
    HW_AVX2   = (info[1] & ((int)1 <<  5)) != 0;

    HW_BMI1        = (info[1] & ((int)1 <<  3)) != 0;
    HW_BMI2        = (info[1] & ((int)1 <<  8)) != 0;
    HW_ADX         = (info[1] & ((int)1 << 19)) != 0;
    HW_SHA         = (info[1] & ((int)1 << 29)) != 0;
    HW_PREFETCHWT1 = (info[2] & ((int)1 <<  0)) != 0;

    HW_AVX512F     = (info[1] & ((int)1 << 16)) != 0;
    HW_AVX512CD    = (info[1] & ((int)1 << 28)) != 0;
    HW_AVX512PF    = (info[1] & ((int)1 << 26)) != 0;
    HW_AVX512ER    = (info[1] & ((int)1 << 27)) != 0;
    HW_AVX512VL    = (info[1] & ((int)1 << 31)) != 0;
    HW_AVX512BW    = (info[1] & ((int)1 << 30)) != 0;
    HW_AVX512DQ    = (info[1] & ((int)1 << 17)) != 0;
    HW_AVX512IFMA  = (info[1] & ((int)1 << 21)) != 0;
    HW_AVX512VBMI  = (info[2] & ((int)1 <<  1)) != 0;
}
if (nExIds >= 0x80000001){
    cpuid(info,0x80000001);
    HW_x64   = (info[3] & ((int)1 << 29)) != 0;
    HW_ABM   = (info[2] & ((int)1 <<  5)) != 0;
    HW_SSE4a = (info[2] & ((int)1 <<  6)) != 0;
    HW_FMA4  = (info[2] & ((int)1 << 16)) != 0;
    HW_XOP   = (info[2] & ((int)1 << 11)) != 0;
}

请注意,这仅检测CPU是否支持该指令。要实际运行它们,您还需要支持操作系统。

具体而言,需要操作系统支持:

  • x64说明。 (您需要64位操作系统。)
  • 使用(AVX)256位ymm寄存器的指令。有关如何检测此问题,请参阅Andy Lutomirski's answer
  • 使用(AVX512)512位zmm和掩码寄存器的指令。检测AVX512的操作系统支持与AVX相同,但使用标记0xe6而不是0x6

答案 1 :(得分:47)

Mysticial的答案有点危险 - 它解释了如何检测CPU支持但不支持OS支持。您需要使用_xgetbv来检查操作系统是否已启用所需的CPU扩展状态。有关其他来源,请参阅hereEven gcc has made the same mistake.代码的内容是:

bool avxSupported = false;

int cpuInfo[4];
__cpuid(cpuInfo, 1);

bool osUsesXSAVE_XRSTORE = cpuInfo[2] & (1 << 27) || false;
bool cpuAVXSuport = cpuInfo[2] & (1 << 28) || false;

if (osUsesXSAVE_XRSTORE && cpuAVXSuport)
{
    unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
    avxSupported = (xcrFeatureMask & 0x6) == 0x6;
}

答案 2 :(得分:8)

经过大量的谷歌搜索,我也找到了英特尔的解决方案:

链接:https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family

    void cpuid(uint32_t eax, uint32_t ecx, uint32_t* abcd) {
#if defined(_MSC_VER)
            __cpuidex((int*)abcd, eax, ecx);
#else
            uint32_t ebx, edx;
# if defined( __i386__ ) && defined ( __PIC__ )
            /* in case of PIC under 32-bit EBX cannot be clobbered */
            __asm__("movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi" : "=D" (ebx),
# else
            __asm__("cpuid" : "+b" (ebx),
# endif
            "+a" (eax), "+c" (ecx), "=d" (edx));
            abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx;
#endif
    }

    int check_xcr0_ymm()
    {
        uint32_t xcr0;
#if defined(_MSC_VER)
        xcr0 = (uint32_t)_xgetbv(0);  /* min VS2010 SP1 compiler is required */
#else
        __asm__("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx");
#endif
        return ((xcr0 & 6) == 6); /* checking if xmm and ymm state are enabled in XCR0 */
    }

另请注意,GCC有一些特殊的内在函数可供您使用(参见:https://gcc.gnu.org/onlinedocs/gcc-4.9.2/gcc/X86-Built-in-Functions.html):

    if (__builtin_cpu_supports("avx2"))
    // ...

如果你把它与上面的信息放在一起,那一切都会很好。

答案 3 :(得分:3)

在Mac OS上,这可行:

sysctl -a | grep machdep.cpu.features

在我的机器中输出:

  

machdep.cpu.features:FPU VME DE PSE TSC MSR PAE MCE CX8 APIC SEP MTRR PGE MCA CMOV PAT PSE36 CLFSH DS ACPI MMX FXSR SSE SSE2 SS HTT TM PBE SSE3 PCLMULQDQ DTES64 MON DSCPL VMX EST TM2 SSSE3 FMA CX16 TPR PDCM SSE4.1 SSE4.2 x2APIC MOVBE POPCNT AES PCID XSAVE OSXSAVE SEGLIM64 TSCTMR AVX1.0 < / strong> RDRAND F16C

正如您所看到的以粗体书写的说明,支持SSE3和一堆其他SIMD指令。

答案 4 :(得分:2)

Linux 或 wsl2 上的替代方案,来自 util-linux 存储库的 lscpu 命令将完成这项工作。

例如:

lscpu | grep sse3

答案 5 :(得分:1)

添加Abhiroop的回答: 在linux上,您可以运行此shell命令来查找CPU支持的功能

cat /proc/cpuinfo | grep flags | uniq

在我的机器上打印

  

国旗:FPU VME德PSE TSC MSR PAE MCE CX8 APIC SEP MTRR PGE MCA CMOV轻拍PSE36 CLFLUSH MMX FXSR SSE SSE2 HT系统调用的nx pdpe1gb rdtscp流明constant_tsc rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu PNI pclmulqdq SSSE3 FMA CX16 PCID sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single retpoline kaiser fsgsbase bmi1 hle avx2 smep bmi2 erms invpcid rtm rdseed adx xsaveopt