因此,这是一个名为armkey的ARM程序,它将带有可打印字符和控制字符(00h-7Fh)的ASCII文本行从文件key.in读入输入字符串。读取字符串ARM SWI将删除任何行结束指示或字符,并用单个二进制0替换它们。如果没有更多行,则读取字符串ARM SWI将为读取的字节数返回零计数。


    ; File:     armkey.s
    ; Function: This program reads a line of ASCII text
    ;           from a file and formats the output into 
    ;           key.out 
    ; Author:
    ; Changes:  Date        Reason
    ;           -------------------------------------
    ;           04/05/2018  Original Version

    ; Software Interrupt values
            .equ SWI_Open,  0x66     ;Open  a file
            .equ SWI_Close, 0x68     ;Close a file
           .equ SWI_PrStr, 0x69     ;Write a null-ending string
           .equ SWI_RdStr, 0x6a     ;Read a string and terminate with null char
           .equ SWI_Exit,  0x11     ;Stop execution

             .global   _start

    ; open input file
    ; - r0 points to the file name
    ; - r1 0 for input
    ; - the open swi is 66h
    ; - after the open r0 will have the file handle
             ldr  r0, =InFileName     ;r0 points to the file name
             ldr  r1, =0              ;r1 = 0 specifies the file is input
             swi  SWI_Open            ;open the file ... r0 will be the file 
             ldr  r1, =InFileHandle   ;r1 points to handle location
             str  r0, [r1]            ;store the file handle

    ; open output file
    ; - r0 points to the file name
    ; - r1 1 for output
    ; - the open swi is 66h
    ; - after the open r0 will have the file handle
             ldr  r0, =OutFileName    ;r0 points to the file name
             ldr  r1, =1              ;r1 = 1 specifies the file is output
             swi  SWI_Open            ;open the file ... r0 will be the file 
             ldr  r1, =OutFileHandle  ;r1 points to handle location
             str  r0, [r1]            ;store the file handle

    ; read a string from the input file
    ; - r0 contains the file handle
    ; - r1 points to the input string buffer
    ; - r2 contains the max number of characters to read
    ; - the read swi is 6ah
    ; - the input string will be terminated with 0  
;----------------------------------                          ;
         ldr  r0, =InFileHandle   ;r0 points to the input file handle
         ldr  r0, [r0]            ;r0 has the input file handle
         ldr  r1, =InString         ;r1 points to the input string
         ldr  r2, =80            ;r2 has the max size of the input string
         swi  SWI_RdStr           ;read a string from the input file
         cmp  r0,#0               ;no characters read means EOF
         beq  _exit               ;so close and exit
;// Implement key here

; Move the input string to the output string
; This code uses post increment of the input pointer,
; but not for the output pointer ... just to show both techniques
         ldr  r0, =InString       ;r0 points to the input  string
         ldr  r1, =OutString      ;r1 points to the output string
_loop:                            ;

        ldrb r2, [r0], #1        ;get the next input byte

        cmp   r2, #0x20   ; Was the character a space

        beq   _output             ; Print it
        cmp   r2, #0x00             ; Is the character a 0 
        beq  _output                ;Output it

        cmp   r2, #0x41           ; Check if the charactet is nor a letter
        blt     _loop             ; Throw out the character if nor a letter
        cmp   r2, #0x5a           ; Every letter will be less than Z
        ble   _output             ; If the character is valid output it
        sub   r2, r2, #0x20       ; Subtract ing 20 seitches an upper case letter to lower
        b     _lower              ; Check to see if lowercase letter

         strb r2, [r1]            ;store it in the output buffer
         cmp  r2, #0x00             ;was it the null terminator
         beq  _finloop            ;yes ... exit
         add  r1, r1, #1          ;no  ... advance the output pointer
         b    _loop               ;loop
_finloop:                         ;

; Write the outputs string
; Then writes a CR LF pair

         ldr  r0, =OutFileHandle  ;r0 points to the output file handle
         ldr  r0, [r0]            ;r0 has the output file handle
         ldr  r1, =OutString      ;r1 points to the output string
         swi  SWI_PrStr           ;write the null terminated string

         ldr  r1, =CRLF           ;r1 points to the CRLF string
         swi  SWI_PrStr           ;write the null terminated string
         bal  _read               ;read the next line

; Close input and output files
; Terminate the program
_exit:                            ;
         ldr  r0, =InFileHandle   ;r0 points to the input  file handle
         ldr  r0, [r0]            ;r0 has the input file handle
         swi  SWI_Close           ;close the file
         ldr  r0, =OutFileHandle  ;r0 points to the output file handle
         ldr  r0, [r0]            ;r0 has the output file handle
         swi  SWI_Close           ;close the file
         swi  SWI_Exit            ;terminate the program

InFileHandle:  .skip 4            ;4 byte field to hold the input  file handle
OutFileHandle: .skip 4            ;4 byte field to hold the output file handle
InFileName:    .asciz "KEY.IN"   ;Input  file name, null terminated
InString:      .skip 128         ;reserve a 128 byte string for input
OutString:     .skip 128         ;reserve a 128 byte string for output
CRLF:          .byte 13, 10, 0    ;CR LF
OutFileName:   .asciz "KEY.OUT"  ;Output file name, null terminated




你将把大部分时间花在这段代码中的I / O上,但是我们可以想象缓冲区非常大,所以与SWI的开销相比,内存复制并不重要。

首先,您可以执行左包装(根据比较过滤数组)使用NEON SIMD说明。但ARM缺少一些nice x86 features that make left-packing efficient with SSSE3 or AVX2。 (例如pmovmskb将向量比较结果转换为整数寄存器中的位掩码,您可以将其用作表索引来查找随机掩码。并popcnt看看它有多远可以推进你的输出指针。)我不知道如何用NEON高效实现strchr:/

如果你一次可以处理多个字节,那么这种方式的主要加速可能是可能的。 Branchless也很好,以避免分支错误预测。

您不需要单独的输出缓冲区;您可以就地过滤阵列。搜索第一个' ',然后在同一个缓冲区中运行src和dst指针。 src始终位于dst之前,对于您跳过的每个角色都会越走越远。但是,当存储时,它不会在高速缓存中仍然很热,因此您可以避免存储到冷缓存行的所有读取所有权流量。您触摸的内存总量大约是一半,因此您可以从缓存中删除更少的数据。

重新排列循环so a conditional branch is at the bottom,并且没有b回到顶部。有时这需要扭曲循环,因此你必须剥离最后一次迭代的一部分并在循环之后重复一些循环体,并且你必须在进入第一次迭代之前做一些设置,或者跳到循环的中间条目。

ldr r0, =InFileHandle:使用保留调用的寄存器,通过swi指令在寄存器中保存指向数据区的指针。 (在函数的开头/结尾使用push / pop保存/恢复)。然后,您可以只使用具有不同偏移量的不同数据项的负载,而不需要单独构造每个指针。例如ldr r0, [r4, #InFileHandle-Base]如果这是正确的语法。


     ldr  r0, =InString       ;r0 points to the input  string
     ldr  r1, =OutString      ;r1 points to the output string

您可以用add r1, r0, #128替换第二条指令,这更便宜(ALU指令而不是文字池中的PC相对负载,或者汇编程序决定为您构造一个常量。)
