我正在尝试优化块循环的内存使用和时间,其中寄存器中的值除以2然后饱和5但是我得到的代码包括许多分支指令,我想知道是否在加载8之后寄存器,有一种方法可以同时将它们移动到寄存器中以限制多个分支或单个指令可以在同一行中执行它们吗?
THUMB
AREA RESET, CODE, READONLY
EXPORT __Vectors
EXPORT Reset_Handler
__Vectors
DCD 0x00180000 ; top of the stack
DCD Reset_Handler ; reset vector - where the program starts
AREA Task2b_Code, CODE, READONLY
Reset_Handler
ENTRY
num_words EQU (end_source-source)/4 ; number of words to copy
start
LDR r0,=source ; point to the start of the area of memory to copy from
LDR r1,=dest ; point to the start of the area of memory to copy to
MOV r2,#num_words ; get the number of words to copy
; find out how many blocks of 8 words need to be copied - it is assumed
; that it is faster to load 8 data items at a time, rather than load
; individually
block
MOVS r3,r2,LSR #3 ; find the number of blocks of 8 words
BEQ individ ; if no blocks to copy, just copy individual words
; copy and process blocks of 8 words
block_loop
LDMIA r0!,{r5-r12} ; get 8 words to copy as a block
MOV r4,r5 ; get first item
BL data_processing ; process first item
MOV r5,r4 ; keep first item
MOV r4,r6 ; get second item
BL data_processing ; process second item
MOV r6,r4 ; keep second item
MOV r4,r7 ; get third item
BL data_processing ; process third item
MOV r7,r4 ; keep third item
MOV r4,r8 ; get fourth item
BL data_processing ; process fourth item
MOV r8,r4 ; keep fourth item
MOV r4,r9 ; get fifth item
BL data_processing ; process fifth item
MOV r9,r4 ; keep fifth item
MOV r4,r10 ; get sixth item
BL data_processing ; process sixth item
MOV r10,r4 ; keep sixth item
MOV r4,r11 ; get seventh item
BL data_processing ; process seventh item
MOV r11,r4 ; keep seventh item
MOV r4,r12 ; get eighth item
BL data_processing ; process eighth item
MOV r12,r4 ; keep eighth item
STMIA r1!,{r5-r12} ; copy the 8 words
SUBS r3,r3,#1 ; move on to the next block
BNE block_loop ; continue until last block reached
; there may now be some data items available (fewer than 8)
; find out how many of these individual words need to be copied
individ
ANDS r3,r2,#7 ; find the number of words that remain to copy individually
BEQ exit ; skip individual copying if none remains
; copy the excess of words
individ_loop
LDR r4,[r0],#4 ; get next word to copy
BL data_processing ; process the item read
STR r4,[r1],#4 ; copy the word
SUBS r3,r3,#1 ; move on to the next word
BNE individ_loop ; continue until the last word reached
; languish in an endless loop once all is done
exit
B exit
; subroutine to scale a value by 0.5 and then saturate values to a maximum of 5
data_processing
CMP r4,#10 ; check whether saturation is needed
BLT divide_by_two ; if not, just divide by 2
MOV r4,#5 ; saturate to 5
BX lr
divide_by_two
MOV r4,r4,LSR #1 ; perform scaling
BX lr
AREA Task2b_ROData, DATA, READONLY
source ; some data to copy
DCD 1,2,3,4,5,6,7,8,9,10,11,0,4,6,12,15,13,8,5,4,3,2,1,6,23,11,9,10
end_source
AREA Task2b_RWData, DATA, READWRITE
dest ; copy to this area of memory
SPACE end_source-source
end_dest
END
答案 0 :(得分:0)
使用此简单代码
area blockCopy, code
entry
ldr r0,= 0x40000000
ldr r1,= 0x40000020
mov r4,#5
up ldr r3,[r0],#4
str r3,[r1],#4
subs r4,r4,#1
BNE up
stop b stop
end