Hello Hello what is new Hello what is not new


Hello   3
what    2
is      2
not     1
new     2


INCLUDE Irvine32.inc

Get_frequencies PROTO,
    pString:PTR BYTE,   ; points to string
    pTable:PTR DWORD    ; points to frequency table

freqTable DWORD 256 DUP(0)
;aString BYTE 1,2,"This is extremely difficult for the experienced",0

aString BYTE 80 DUP(0),0

str1 BYTE "*** Constructing a Frequency Table *** (DEMO)",
    "Enter between 1 and 80 characters: ",0

main PROC

    call Clrscr
    mov  edx,OFFSET str1
    call WriteString

    mov  ecx,SIZEOF aString - 1
    mov  edx,OFFSET aString
    call ReadString

    INVOKE Get_frequencies, ADDR aString, ADDR freqTable
    call DisplayTable

   main ENDP


  Get_frequencies PROC,
    pString:PTR BYTE,   ; points to string
    pTable:PTR DWORD    ; points to frequencey table

; Constructs a character frequency table. Each array position
; is indexed by its corresponding ASCII code.
; Returns: Each entry in the table contains a count of how
; many times that character occurred in the string.

mov esi,pString
mov edi,pTable
cld     ; clear Direction flag (forward)

L1: mov eax,0       ; clear upper bits of EAX
   lodsb        ; AL = [ESI], inc ESI
   cmp al,0     ; end of string?
   je  Exit_proc        ; yes: exit
   shl eax,2        ; multiply by 4
   inc DWORD PTR [edi + eax]    ; inc table[AL]
   jmp L1       ; repeat loop

 Get_frequencies ENDP


 DisplayTable PROC

  ; Display the non-empty entries of the frequency table.
  ; This procedure was not required, but it makes it easier
  ; to demonstrate that Get_frequencies works.

  colonStr BYTE ": ",0
  call Crlf
  mov ecx,LENGTHOF freqTable    ; entries to show
  mov esi,OFFSET freqTable
  mov ebx,0 ; index counter

 L1:    mov eax,[esi]   ; get frequency count
        cmp eax,0   ; count = 0?
        jna L2  ; if so, skip to next entry

         mov eax,ebx    ; display the index
         call WriteChar
         mov edx,OFFSET colonStr    ; display ": "
         call WriteString
         mov eax,[esi]  ; show frequency count
         call WriteDec
         call Crlf

  L2:   add esi,TYPE freqTable  ; point to next table entry
        inc ebx ; increment index
        loop L1

        call Crlf
      DisplayTable ENDP

      END main


 str2 BYTE "one two three",0

  main proc
   mov  edi,OFFSET str2
    Mov esi,edi
    Mov Ecx, 0  ;reset ecx to 0
    Not Ecx     ;set Ecx to -1 or highest possible integer
    Mov Al, ' ' ;Initialize a1 to delimiter of (space) ' '
    Cld         ;Clear Direction Pointer
    Repne Scasb ;scan edi one byte at a time until delimiter found
    Not Ecx
    Lea Eax, [ecx-1] ;Set Eax to index of found delimiter
    Xchg Esi, Edi  ;Take Edi which is now equal to string after found       delimiter and put in esi

    mov edx, esi
    call WriteString    

 main endp
 end main


字符和单词之间的区别在于可能的单词数量是无限的(除了内存大小限制),因此您不能直接使用单词作为计数表的索引。即使使用4个字符的单词作为计数表的4字节索引,也会使计数表大小= 2 ^ 32个条目。

将计数表算法从字符调整为单词的最直接方法是某种dictionary data structure,如hash tabletree,甚至是{{3} }。每个条目都将一个单词映射到其计数器。

另一种选择是对单词列表进行排序,然后遍历计算重复项的排序列表。或者在排序时累积重复计数,即radix tree / trie



;; tigher version of your char-count loop
   xor   eax, eax                    ; clear upper bits of EAX
   lodsb                          ; AL = [ESI], inc ESI
   inc   DWORD PTR [edi + eax*4]  ; inc table[AL]
   test  al,al                    ; Set flags based on AL
   jz    L1                       ; loop if it's not the end of the string
   ;; fall through when done
   ;;dec   DWORD PTR [edi]          ; undo the count of the zero if you care


由于我们左移an interesting problem when your input word list is too big to fit in memory all at once ,我们不需要在循环内零eax。如果您关心表现,请使用as part of the addressing mode




  • 找到第一个单词的结尾
  • 在字符串的其余部分搜索该字词的出现次数。 (请记住仅匹配完整的单词,而不是strstr(3)
  • 在每次比赛中,将剩余的字符串复制到左侧,从字符串中删除单词。 (也删除第一个单词。)
  • 重复直到字符串为空

这花了很多时间复制字符。您可以使用rep movsb执行此操作,我认为这仍然适用于重叠dst和src。



  • 将字符复制到字符串B中。
  • 当您到达某个单词的结尾时,请检查它是否与您要查找的当前单词匹配。
    • 如果是:count++dst-=length_of_current_countword。当您检测到要查找的单词时,将目标指针(edi)倒回到开头,以便将来复制将覆盖它。即你现在基本上可以免费复制它。
  • 重复,直至到达字符串A的结尾
  • 打印当前字词和计数。
  • 如果字符串B不是empy,它将成为新的字符串A,等等。也许在寄存器或内存中保持指向A和B的指针可以交换它们,而不是直接使用静态存储的地址。或者只是以愚蠢的方式进行,并将B复制到A而不做任何更改。