Question

我目前正在使用x86处理器开发结构化计算机组织的类项目。我访问的值是一个1字节的char，但我不知道如何将它与大写字母进行比较。他们说要使用十六进制格式的ASCII表，但我不确定如何比较两者。

void changeCase (char char_array[], int array_size ) {
    __asm {
            // BEGIN YOUR CODE HERE

        mov eax, char_array;        //eax is base image
        mov edi, 0;

    readArray:
        cmp edi, array_size;
        jge  exit;
        mov ebx, edi;           //using ebx as offset
        shl ebx, 2;
        mov cl, [eax + ebx];    //using ecx to be the storage register

    check:
        //working on it
        cmp cl, 0x41;       //check if cl is <= than ASCII value 65 (A)
        jl next_indx;
        cmp cl, 0x7A;       //check if cl is >= than ASCII value 122 (z)
        jg next_indx;
        cmp cl, 'a';
        jl convert_down;
        jge convert_up;


    convert_down:
        or cl, 0x20;        //make it lowercase
        jmp write;

    convert_up:
        and cl, 0x20;       //make it uppercase
        jmp write;

    write:
        mov byte ptr [eax + ebx], cl    //slight funky town issue here,

    next_indx:
        inc edi;

    exit:
        cmp edi, array_size;
        jl readArray;

    mov char_array, eax;
            // END YOUR CODE HERE
    }
}

此时有任何帮助。提前感谢您的帮助！

编辑1：

感谢所有建议和明确要点，编辑我的代码以反映变化。现在存在访问冲突的一些问题。

编辑2（+）：

感谢有用的眼睛的人。我现在还在翻译所有的信件。

Answer 1

这个问题的变化总是被问到。这个版本的问题（需要超出if(isalpha(c)) c|=0x20;的条件行为））使问题变得复杂，以至于如何有效地完成它并不是显而易见的。

事实证明xor并不难想象，将此代码转换为无条件的大写或小写仅需要从xor 0x20到and ~0x20或{的简单更改{1}}。（也可以简化一点。）

这里＆＃39> 我＆＃d; 如何尝试以最佳效率asm。我甚至包括一个带SIMD向量的版本，另一个版本的字节循环使用了我从矢量化中得到的无分支的想法。

阅读此答案可能只有在您了解了使用未经优化的代码解决此问题所涉及的基本原则后才有用。 OTOH，实际上只需要很少的操作，所以没有太多的代码可以解决。我做了很多评论。 x86标记wiki中有许多有用的链接，从教程到参考指南到性能调整。

在小写和大写字母ASCII字符之间进行转换只需要设置或清除or 0x20位，因为ASCII字符集的布局范围为32，而不是跨越mod32边界。

对于每个字节：

复制并无条件或与0x20
检查它是否在0x20和'a'
如果是，请使用'z'翻转ASCII字母大小写位并将结果存储回数组。

以这种方式执行ASCII xor测试是安全的：在设置该位的isalpha(3) .. 'a'范围内结束的唯一源字节是大写字母字符。它只是数学，适用于任何两个不等于'z'边界的大小范围。（如果相关位为%32，则为%64边界。

为了更有效地进行比较，我使用无符号比较技巧，因此循环内只有一个条件分支（除了循环条件本身）。请参阅代码中的注释以获得解释。

0x40

如果某些＆＃34;设计文档＆＃34;此代码可能更具可读性。东西在代码之外的一个区块中。它使事情变得很混乱，并使它看起来像很多代码，但实际上很少有指令。（他们很难用简短的评论来解释。评论代码很棘手：过于明显的评论只是杂乱无章，并且需要时间阅读代码和有用的评论。）

矢量化

实际上对于x86，我一次使用SSE或AVX做16B，做同样的算法，但是要与两个/******** Untested. ************/ // ASCII characters are flipped to the opposite case (upper <-> lower) // non-ASCII characters are left unchanged void changeCase (char char_array[], int array_size ) { __asm{ // BEGIN YOUR CODE HERE mov esi, char_array; // MSVC inline asm requires these potentially-redundant copies :( mov ecx, array_size; test ecx,ecx; // return if(size <= 0) jle early_out; next_char: mov al, [esi]; // load the current character mov dl, al; // check if the character is alphabetic or not // there are two equal-size ranges of characters: one with 0x20 set, and one without or al, 0x20; // set 0x20 and then just check that lowercase range // unsigned compare trick: 0 <= n < high can be done with one unsigned compare instead of two signed compares // low < n < high can be done by shifting the range first sub al, 'a'; // if al is less than 'a', it will become a large unsigned number cmp al, 'z'-'a'; ja non_alpha; // conditionally skip the flip & store xor dl, 0x20; // toggle the ASCII case bit mov [esi], dl; // xor [esi], 0x20 // saves the mov earlier, but is otherwise slower non_alpha: inc esi; dec ecx; jz next_char; early_out: // END YOUR CODE HERE } }进行比较。当然，无条件地存储结果，所以所有非字母字符的数组仍然会在缓存中变脏，使用更多的内存带宽。

没有未签名的SSE比较，但我们仍然可以将我们寻找的范围向下移动到底部。没有小于pcmpgtb的值，因此在签名比较中，它的工作方式与-128在无符号比较中的工作方式相同。

为此，请减去0。 (or add, or xor (carryless add); there's nowhere for the carry / borrow to go)。这可以在与减去128。

相同的操作中完成

然后使用比较结果作为掩码将'a'的向量中的字节清零，因此只有字母字符与0x20进行异或运算。（0是XOR / add / sub的标识元素，对于SIMD条件通常非常方便）。

另请参阅strtoupper version that has been tested和代码以在循环中调用它，包括处理隐式长度C字符串上的非16对多输入（搜索在飞行中终止0。）

0x20

这个compiles to nice code, even without AVX，只有一个额外的#include <immintrin.h> // Call this function in a loop, with scalar cleanup. (Not implemented, since it's the same as any other vector loop.) // Flip the case of all alphabetic ASCII bytes in src __m128i inline flipcase(__m128i src) { // subtract 'a'+128, so the alphabetic characters range from -128 to -128+25 (-128+'z'-'a') // note that adding 128 and subtracting 128 are the same thing for 8bit integers. // There's nowhere for the carry to go, so it's just xor (carryless add), flipping the high bit __m128i lcase = _mm_or_si128(src, _mm_set1_epi8(0x20)); __m128i rangeshift= _mm_sub_epi8(lcase, _mm_set1_epi8('a'+128)); __m128i non_alpha = _mm_cmpgt_epi8(rangeshift, _mm_set1_epi8(-128 + 25)); // 0:alphabetic -1:non-alphabetic __m128i flip = _mm_andnot_si128(non_alpha, _mm_set1_epi8(0x20)); // 0x20:alpha 0:non-alpha return _mm_xor_si128(src, flip); // just mask the XOR-mask so non-alphabetic elements are XORed with 0 instead of 0x20 // XOR's identity value is 0, same as for addition }来保存寄存器的副本。请参阅两个早期版本的godbolt链接（一个使用两个比较以保持简单，另一个使用movdqa，然后我记得屏蔽pblendvb s的向量而不是结果。）

0x20

使用无分支测试的相同想法也适用于字节循环：

flipcase:
        movdqa  xmm2, XMMWORD PTR .LC0[rip]    ; 0x20
        movdqa  xmm1, xmm0
        por     xmm1, xmm2
        psubb   xmm1, XMMWORD PTR .LC1[rip]    ; -31
        pcmpgtb xmm1, XMMWORD PTR .LC2[rip]    ; -103
        pandn   xmm1, xmm2
        pxor    xmm0, xmm1
        ret

section .rodata
    .LC0:   times 16 db  32
    .LC1:   times 16 db  -31
    .LC2:   times 16 db  -103

对于64位代码，只需使用mov esi, char_array; mov ecx, array_size; test ecx,ecx; // return if(size <= 0) jle .early_out; ALIGN 16 ; really only need align 8 here, since the next 4 instructions are all 2 bytes each (because op al, imm8 insns have a special encoding) .next_char: mov al, [esi]; // load the current character mov dl, al; // check if the character is alphabetic or not or al, 0x20; sub al, 'a'; cmp al, 'z'-'a'; // unsigned compare trick: 'a' <= al <= 'z' setna al; // 0:non-alpha 1:alpha (not above) shl al, 5; // 0:non-alpha 0x20:alpha xor dl, al; // conditionally toggle the ASCII case bit mov [esi], dl; // unconditionally store inc esi; dec ecx; // for AMD CPUs, or older Intel, it would be better to compare esi against an end pointer, since cmp/jz can fuse but dec can't. This saves an add ecx, esi outside the loop jz .next_char; .early_out:代替rsi。其他一切都是一样的。

显然是MSVC inline asm doesn't allow .label local-symbol names。我为第一个版本更改了它们（使用条件分支），但不是这个。

在某些CPU上使用esi可能稍好一些，以避免在函数入口上对eax值的错误依赖。 OTOH，只有AMD有这个问题（和Silvermont），但movzx eax, byte [esi]并不像AMD的负载那样便宜。（它在Intel上;一个uop仅使用加载端口，而不是ALU端口）。之后在movzx上运行仍然很好，因为它会在al写eax之后避免partial-register stall（或避免它的额外说明）阅读setcc。（不幸的是，没有al，只有setcc r/m32。）

我不得不想知道如果有人将这样的代码交给像这样的作业，教授会怎么想。：P我怀疑即使是智能编译器也会使用r/m8 / setcc技巧，除非你引导编译器。（也许shift或其他什么。）编译器确实知道无符号比较技巧，但是gcc doesn't use it in some cases for non-compile-time-constant range checks, even when it can prove that the range is small enough。

Answer 2

为了清楚起见，我只是使用纯装配并假设...

char_array是[ebp+8]处的32位指针。
array_size是[ebp+12]的双位32位数字。
对于您的平台（无论如何都是这种方式），char的编码是ASCII。

您应该能够将自己推断为内联汇编。现在，如果你看一下the table everyone is supposed to remember but barely anyone does，你会注意到一些重要的细节......

大写字母A到Z分别映射到代码0x41到0x5A。
小写字母a到z分别映射到代码0x61到0x7A。
其他所有内容都不是字母，因此无需转换大小写。
如果查看大写和小写字母范围的二进制表示，您会注意到它们完全相同，唯一的例外是大写字母的第6位已清除，而小写字母则已设置。

结果，算法将是......

while array_size != 0
    byte = *char_array
    if byte >= 0x41 and byte <= 0x5A
        *char_array |= 0x20 // Turn it lowercase
    else if byte >= 0x61 and byte <= 0x7A
        *char_array &= 0xDF // Turn it uppercase
    array_size -= 1
    char_array += 1

现在，让我们将其转化为汇编......

mov eax, [ebp+8]      # char *eax = char_array
mov ecx, [ebp+12]     # int ecx = array_size

.loop:
    or ecx, ecx       # Compare ecx against itself
    jz .end_loop      # If ecx (array_size) is zero, we're done
    mov dl, [eax]     # Otherwise, store the byte at *eax (*char_array) into `char dl`
    cmp dl, 'A'       # Compare dl (*char_array) against 'A' (lower bound of uppercase letters)
    jb .continue      # If dl` (*char_array) is lesser than `A`, continue the loop
    cmp dl, 'Z'       # Compare dl (*char_array) against 'Z' (upper bound of uppercase letters)
    jbe .is_uppercase # If dl (*char_array) is lesser or equal to 'Z', then jump to .is_uppercase
    cmp dl, 'a'       # Compare dl (*char_array) against 'a' (lower bound of lowercase letters)
    jb .continue      # If dl (*char_array) is lesser than 'a', continue the loop
    cmp dl, 'z'       # Compare dl (*char_array) against 'z' (upper bound of lowercase letters)
    jbe .is_lowercase # If dl (*char_array) is lesser or equal to 'z', then jump to .is_lowercase
    jmp .continue     # All tests failed, so continue the loop

    .is_uppercase:
        or dl, 20h    # Set the 6th bit
        mov [eax], dl # Send the byte back to where it came from
        jmp .continue # Continue the loop

    .is_lowercase:
        and dl, DFh   # Clear the 6th bit
        mov [eax], dl # Send the byte back to where it came from
        jmp .continue # Continue the loop

    .continue:
        inc eax       # Increment `eax` (`char_array`), much of like a pointer increment
        dec ecx       # Decrement `ecx` (`array_size`), so as to match the previous pointer increment
        jmp .loop     # Continue

.end_loop:

代码到达.end_loop后，您就完成了。

我希望这能引起你的注意！

Answer 3

在ASCII＆＃39; a＆＃39; - ＆＃39; z＆＃39;和＆＃39; A＆＃39; - ＆＃39; Z＆＃39;除了一位，0x20

之外是等价的

你的朋友在这里是XOR。

如果您有一个字母（＆＃39; A＆＃39; - ＆＃39; Z＆＃39;或＆＃39; a＆＃39; - ＆＃39; z＆＃39;），请将其与0x20将切换案例;

在XORing之前，进行范围检查是有道理的。（查看该值是否真的是一个字母）
您可以通过对要检查的值进行OR运算来简化此范围检查，这将使得＆＃39; a＆＃39;到＆＃39; A＆＃39;和＆＃39; z＆＃39;到＆＃39; Z＆＃39;，然后只进行一次范围检查（如果你只是比较＆lt;＆＃39;＆＃39; && 39;＆＃39; Z＆＃39;你会错过中间的人物（＆＃39; [＆＃39;，＆＃39;]＆＃39;等...）

Answer 4

感谢@KemyLand对汇编代码的有用细分，我已经想出如何将大写转换为小写，反之亦然。

void changeCase (char char_array[], int array_size ) {
     //this function is designed to change lowercase letters to uppercase, and vice-versa, from a char-array given the array and its size.
__asm{
        // BEGIN YOUR CODE HERE

    mov eax, [ebp + 8];     //move to register value parameter 1 (the array)
    mov ecx, [ebp + 12];    //likewise parameter 2 (the array size)

    START:

        or ecx, ecx;    //check if pointer is 0
        cmp ecx, 0;
        je endloop;   //go to end loop

        mov dl,byte ptr [eax];  //not sure if needed, but reassurance
        cmp dl, 0x41;   // is char an A?
        jl cont;

        cmp dl, 0x5A;   // is char a Z?
        jle convertUP;

        cmp dl, 0x61;   // is char an a?
        jl cont;

        cmp dl, 0x7A;   // is char a z?
        jle convertDOWN;

        jmp cont;


    convertUP:
        or dl, 0x20;        //Yes! Finally got it working!
        mov byte ptr [eax], dl;

        jmp cont;

    convertDOWN:
        and dl, 0xdf;       //this will work for sure.
        mov[eax], dl;

        jmp cont


    cont:
        inc eax;
        dec ecx;

        jmp START;

    endloop:
}

}

随意帮助解释我可能错过的内容！谢谢大家帮助我更好地理解x86组装处理器。

Answer 5

在ascii表中，所有字母都是连续的：

A=0x41=01000001
a=0x61=01100001

Z=0x5A=01011010
z=0x7A=01111010

所以你可以看到，通过切换第6位，你可以将表格从大写转换为小写。

如何访问char数组并将小写字母更改为大写，反之亦然

编辑1：

编辑2（+）：

5 个答案:

矢量化

使用无分支测试的相同想法也适用于字节循环：