_mm_aesimc_si128无法使用MSVC正确编译

时间:2017-04-23 08:08:43

标签: c++ visual-c++ aes aes-ni

我正在阅读AES-NI白皮书,并希望通过编写一个基于英特尔提供的代码的简单演示程序来自己尝试,但我得到了一些奇怪的结果。它适用于Debug / Release x86和Debug x64模式,但我会在Release x64模式下获得一些随机结果。我也尝试过与海湾合作委员会并没有这样的问题。经过一番挖掘后,似乎MSVC与AESIMC指令的源和目标混淆。当实际aesimc xmm3,xmmword ptr[rsp+20h]为源且xmm3为目标时,它会生成[rsp+20h]之类的代码。在x86模式下,它会生成正确的代码,例如aesimc xmm0,xmm5 movaps xmmword ptr[K4],xmm0(需要两条指令,因为类似aesimc xmmword ptr[K4],xmm5之类的内容我觉得无效。)

我不确定这确实是编译错误还是我的代码有问题。

发布x64反汇编:(查看下面的完整代码)

    K11 = _mm_aesimc_si128(K11);
    K12 = _mm_aesimc_si128(K12);
00007FF6C0A717C6 66 0F 38 DB 5C 24 20 aesimc      xmm3,xmmword ptr [rsp+20h]  
00007FF6C0A717CD 66 0F 6F 1C 24       movdqa      xmm3,xmmword ptr [rsp]  

    auto K14 = AES256_GENKEY_1(K12, K13, 0x40);
00007FF6C0A717D2 66 44 0F EF F9       pxor        xmm15,xmm1  
    K13 = _mm_aesimc_si128(K13);
00007FF6C0A717D7 66 0F 38 DB 54 24 10 aesimc      xmm2,xmmword ptr [rsp+10h]  

    auto blocks = size >> 4;
    auto feedback = _mm_loadu_si128(static_cast<const __m128i *>(iVec));
00007FF6C0A717DE F3 0F 6F 12          movdqu      xmm2,xmmword ptr [rdx]  
00007FF6C0A717E2 66 45 0F 38 DB F6    aesimc      xmm14,xmm14  
00007FF6C0A717E8 66 45 0F 38 DB ED    aesimc      xmm13,xmm13  
00007FF6C0A717EE 66 45 0F 38 DB E4    aesimc      xmm12,xmm12  
00007FF6C0A717F4 66 45 0F 38 DB DB    aesimc      xmm11,xmm11  
00007FF6C0A717FA 66 45 0F 38 DB D2    aesimc      xmm10,xmm10  
00007FF6C0A71800 66 45 0F 38 DB C9    aesimc      xmm9,xmm9  
00007FF6C0A71806 66 45 0F 38 DB C0    aesimc      xmm8,xmm8  
00007FF6C0A7180C 66 0F 38 DB FF       aesimc      xmm7,xmm7  
00007FF6C0A71811 66 0F 38 DB F6       aesimc      xmm6,xmm6  
00007FF6C0A71816 66 0F 38 DB ED       aesimc      xmm5,xmm5  
00007FF6C0A7181B 66 0F 38 DB E4       aesimc      xmm4,xmm4  
    {
        auto lastIn = _mm_loadu_si128(static_cast<const __m128i *>(input) + i);
00007FF6C0A71820 F3 41 0F 6F 0C 00    movdqu      xmm1,xmmword ptr [r8+rax]  
00007FF6C0A71826 48 8D 40 10          lea         rax,[rax+10h]  

        auto m = _mm_xor_si128(lastIn, K14);
00007FF6C0A7182A 66 0F 6F C1          movdqa      xmm0,xmm1  
00007FF6C0A7182E 66 41 0F EF C7       pxor        xmm0,xmm15  
        m = _mm_aesdec_si128(m, K13);
00007FF6C0A71833 66 0F 38 DE 44 24 10 aesdec      xmm0,xmmword ptr [K13]  
        m = _mm_aesdec_si128(m, K12);
00007FF6C0A7183A 66 0F 38 DE 44 24 20 aesdec      xmm0,xmmword ptr [K12]  
        m = _mm_aesdec_si128(m, K11);

完整代码:(应与MSVC和GCC一起使用)

#include <cstdio>
#include <cstring>
#include <cstdint>
#include <cstddef>
#include <wmmintrin.h>

#if defined(_MSC_VER)
#include <intrin.h>
#elif defined(__GNUC__)
#include <cpuid.h>
#else
#error compiler not supported
#endif

static int check_aes_support()
{
#if defined(_MSC_VER)
    int info[4];
    __cpuid(info, 0x01);
    return info[2] & 0x2000000;
#else
    unsigned int eax, ebx, ecx, edx;
    __get_cpuid(0x01, &eax, &ebx, &ecx, &edx);
    return ecx & 0x2000000;
#endif
}


static inline __m128i aes256_key_assist_1(__m128i key1, __m128i key2)
{
    key2 = _mm_shuffle_epi32(key2, _MM_SHUFFLE(3, 3, 3, 3));
    key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4));
    key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4));
    key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4));
    return _mm_xor_si128(key1, key2);
}
static inline __m128i aes256_key_assist_2(__m128i key1, __m128i key2)
{
    key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4));
    key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4));
    key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4));
    return _mm_xor_si128(key2, _mm_shuffle_epi32(_mm_aeskeygenassist_si128(key1, 0x00),
        _MM_SHUFFLE(2, 2, 2, 2)));
}


#define AES256_GENKEY_1(K1, K2, C)      aes256_key_assist_1(K1, _mm_aeskeygenassist_si128(K2, C))
#define AES256_GENKEY_2(K1, K2)         aes256_key_assist_2(K1, K2)

static int aes256_cbc_encrypt(const void *key, const void *iVec,
    const void *input, std::size_t size, void *output)
{
    if (!size || size & 0xF)
        return 1;

    auto K0 = _mm_loadu_si128(static_cast<const __m128i *>(key));
    auto K1 = _mm_loadu_si128(static_cast<const __m128i *>(key) + 1);

    auto K2 = AES256_GENKEY_1(K0, K1, 0x01);
    auto K3 = AES256_GENKEY_2(K2, K1);

    auto K4 = AES256_GENKEY_1(K2, K3, 0x02);
    auto K5 = AES256_GENKEY_2(K4, K3);

    auto K6 = AES256_GENKEY_1(K4, K5, 0x04);
    auto K7 = AES256_GENKEY_2(K6, K5);

    auto K8 = AES256_GENKEY_1(K6, K7, 0x08);
    auto K9 = AES256_GENKEY_2(K8, K7);

    auto K10 = AES256_GENKEY_1(K8, K9, 0x10);
    auto K11 = AES256_GENKEY_2(K10, K9);

    auto K12 = AES256_GENKEY_1(K10, K11, 0x20);
    auto K13 = AES256_GENKEY_2(K12, K11);

    auto K14 = AES256_GENKEY_1(K12, K13, 0x40);

    auto blocks = size >> 4;
    auto m = _mm_loadu_si128(static_cast<const __m128i *>(iVec));

    for (decltype(blocks) i = 0; i < blocks; i++)
    {
        m = _mm_xor_si128(m, _mm_loadu_si128(static_cast<const __m128i *>(input) + i));
        m = _mm_xor_si128(m, K0);
        m = _mm_aesenc_si128(m, K1);
        m = _mm_aesenc_si128(m, K2);
        m = _mm_aesenc_si128(m, K3);
        m = _mm_aesenc_si128(m, K4);
        m = _mm_aesenc_si128(m, K5);
        m = _mm_aesenc_si128(m, K6);
        m = _mm_aesenc_si128(m, K7);
        m = _mm_aesenc_si128(m, K8);
        m = _mm_aesenc_si128(m, K9);
        m = _mm_aesenc_si128(m, K10);
        m = _mm_aesenc_si128(m, K11);
        m = _mm_aesenc_si128(m, K12);
        m = _mm_aesenc_si128(m, K13);
        m = _mm_aesenclast_si128(m, K14);
        _mm_storeu_si128(static_cast<__m128i *>(output) + i, m);
    }

    return 0;
}

static int aes256_cbc_decrypt(const void *key, const void *iVec,
    const void *input, std::size_t size, void *output)
{
    if (!size || size & 0xF)
        return 1;

    auto K0 = _mm_loadu_si128(static_cast<const __m128i *>(key));
    auto K1 = _mm_loadu_si128(static_cast<const __m128i *>(key) + 1);

    auto K2 = AES256_GENKEY_1(K0, K1, 0x01);
    auto K3 = AES256_GENKEY_2(K2, K1);

    auto K4 = AES256_GENKEY_1(K2, K3, 0x02);
    auto K5 = AES256_GENKEY_2(K4, K3);

    auto K6 = AES256_GENKEY_1(K4, K5, 0x04);
    auto K7 = AES256_GENKEY_2(K6, K5);

    auto K8 = AES256_GENKEY_1(K6, K7, 0x08);
    auto K9 = AES256_GENKEY_2(K8, K7);

    auto K10 = AES256_GENKEY_1(K8, K9, 0x10);
    auto K11 = AES256_GENKEY_2(K10, K9);

    auto K12 = AES256_GENKEY_1(K10, K11, 0x20);
    auto K13 = AES256_GENKEY_2(K12, K11);

    auto K14 = AES256_GENKEY_1(K12, K13, 0x40);

    K1 = _mm_aesimc_si128(K1);
    K2 = _mm_aesimc_si128(K2);
    K3 = _mm_aesimc_si128(K3);
    K4 = _mm_aesimc_si128(K4);
    K5 = _mm_aesimc_si128(K5);
    K6 = _mm_aesimc_si128(K6);
    K7 = _mm_aesimc_si128(K7);
    K8 = _mm_aesimc_si128(K8);
    K9 = _mm_aesimc_si128(K9);
    K10 = _mm_aesimc_si128(K10);
    K11 = _mm_aesimc_si128(K11);
    K12 = _mm_aesimc_si128(K12);
    K13 = _mm_aesimc_si128(K13);

    auto blocks = size >> 4;
    auto feedback = _mm_loadu_si128(static_cast<const __m128i *>(iVec));

    for (decltype(blocks) i = 0; i < blocks; i++)
    {
        auto lastIn = _mm_loadu_si128(static_cast<const __m128i *>(input) + i);

        auto m = _mm_xor_si128(lastIn, K14);
        m = _mm_aesdec_si128(m, K13);
        m = _mm_aesdec_si128(m, K12);
        m = _mm_aesdec_si128(m, K11);
        m = _mm_aesdec_si128(m, K10);
        m = _mm_aesdec_si128(m, K9);
        m = _mm_aesdec_si128(m, K8);
        m = _mm_aesdec_si128(m, K7);
        m = _mm_aesdec_si128(m, K6);
        m = _mm_aesdec_si128(m, K5);
        m = _mm_aesdec_si128(m, K4);
        m = _mm_aesdec_si128(m, K3);
        m = _mm_aesdec_si128(m, K2);
        m = _mm_aesdec_si128(m, K1);
        m = _mm_aesdeclast_si128(m, K0);

        m = _mm_xor_si128(m, feedback);
        _mm_storeu_si128(static_cast<__m128i *>(output) + i, m);
        feedback = lastIn;
    }

    return 0;
}


int main()
{
    auto aesSupport = check_aes_support();
    std::printf("AES: %s\n", aesSupport ? "yes" : "no");

    if (!aesSupport)
        return -1;

    std::uint64_t data[] = {0x1122334455667788, 0xAABBCCDDEEFFBBAA, 0xAAAAAAAAAAAAAAAA, 0x4444333333333333};
    std::uint64_t key[] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x1111111111111111, 0x1111111111111111};
    std::uint64_t iVec[] = {0x123456789ABCDEF0, 0x0FEDCBA987654321};
    std::uint64_t cipher[4] = {0};

    aes256_cbc_encrypt(key, iVec, data, sizeof(data), cipher);
    std::printf("0x%016llX 0x%016llX 0x%016llX 0x%016llX\n", cipher[0], cipher[1], cipher[2], cipher[3]);

    std::memset(data, 0, sizeof(data));
    aes256_cbc_decrypt(key, iVec, cipher, sizeof(data), data);
    std::printf("0x%016llX 0x%016llX 0x%016llX 0x%016llX\n", data[0], data[1], data[2], data[3]);
}

应输出:

0xCF8A4156843F0A3E 0x04D4BB63524324E6 0xAAB88C080DB40B2F 0xCC346B02BA6B16E8
0x1122334455667788 0xAABBCCDDEEFFBBAA 0xAAAAAAAAAAAAAAAA 0x4444333333333333

但是我会在Release x64模式下获得一些随机的东西:

0xCF8A4156843F0A3E 0x04D4BB63524324E6 0xAAB88C080DB40B2F 0xCC346B02BA6B16E8
0xEE64C4650D902107 0x0D03C7FA41AA930B 0x257F65FF49A99474 0xFACB372EDED13BAA

0 个答案:

没有答案