从字符串程序集中删除多余的空格

时间:2016-11-10 04:54:09

标签: assembly x86-16

从汇编考试中得到一些问题。它是这样的,给定字符串:

str db "  hello world   hello    world   #"

#符号表示字符串的结尾。操作后,字符串应如下所示:

"hello world hello world#"

任何用于删除额外空白区域的算法或建议都将受到赞赏。

我试过这段代码:

data segment
str db "  hello world   hello    world   #" data ends     


start:
mov si,offset str
mov di,0

while:
cmp [si],'#'
jne loopwhile
jmp whileT
loopwhile:
inc di
inc si
jmp while

whileT:
mov si,0
while2:
cmp si,di
jae finish

   cmp str[si],32
   je check2
   inc si
   jmp while2
   check2:
   cmp str[si+1],32
   je inner
   inc si 
   jmp while2
      inner:
        mov bx,si
        inc bx
        innerW:
        cmp bx,di
        jae finishInner
        mov al,str[bx+1]
        mov str[bx],al
        inc bx
        jmp innerW 
        finishInner:
        dec di
        jmp while2

finish: 
mov ax,4Ch
int 21h 

code ends

但我仍然在字符串的开头有一个额外的空格。

1 个答案:

答案 0 :(得分:1)

更简单(?)(确定更短)算法:

    mov   ax,SEG str
    mov   ds,ax
    mov   es,ax
    mov   si,OFFSET str
    mov   di,si
    mov   bx,si
    ; ds:si = source pointer to read char by char
    ; es:di = destination pointer to write modified string
    ; bx = str pointer for compare during second phase
    xor   cx,cx  ; cx = 0, counts spaces to copy

copyLoop:
    lodsb           ; al = ds:[si++]
    cmp   al,'#'
    je    removeTrailingSpaces
    cmp   al,' '
    jne   notSpace
    jcxz  copyLoop  ; no more spaces allowed to copy, skip
    ; copy the space
    dec   cx        ; --allowed
    stosb           ; es:[di++] = al
    jmp   copyLoop

notSpace:
    mov   cx,1      ; one space can be copied next time
    stosb           ; copy the not-space char
    jmp   copyLoop

removeTrailingSpaces:
    cmp   di,bx
    je    emptyStringResult
    dec   di
    cmp   BYTE PTR [di],' '
    je    removeTrailingSpaces
    inc   di        ; not-space found, write '#' after it
emptyStringResult:
    stosb           ; write the '#' at final position

    mov   ax,4Ch    ; exit
    int   21h 

工作原理:

只需复制从ds:[si]es:[di]的所有内容,倒计时空格,并在计数器为零时跳过它们。非空格字符将计数器重置为1(因此将复制单词之后的下一个空格)。

当找到'#'时,它会扫描字符串末尾的尾随空格,并写入终止'#'在最后一个非空格字符之后(或当字符串为空时)。

在评论我是如何构建这个算法的,以及如何决定当前的单词是否是最后一个 - 这给了我另一个想法,如何处理字符串的结尾。要缓存最后一个已知的单词结束位置,所以在到达源字符串的结尾后,我可以使用缓存的指针直接将终结符设置在正确的位置。变式2:

    ; initial code is identical, only function of bx is different, so updated comment:
    ...
    ; bx = str pointer pointing +1 beyond last non-space character
    ; (for empty input string that means OFFSET str to produce "#" result)
    ...

copyLoop:
    lodsb           ; al = ds:[si++]
    cmp   al,'#'
    je    setTerminatorAndExit
    cmp   al,' '
    jne   notSpace
    jcxz  copyLoop  ; no more spaces allowed to copy, skip
    ; copy the space
    dec   cx        ; --allowed
    stosb           ; es:[di++] = al
    jmp   copyLoop

notSpace:
    mov   cx,1      ; one space can be copied next time
    stosb           ; copy the not-space char
    mov   bx,di     ; update bx to point +1 beyond last non-space char
    jmp   copyLoop

setTerminatorAndExit:
    mov   [bx],al   ; write the '#' to cached position of last non-space+1

    mov   ax,4Ch    ; exit
    int   21h