我用Fortran(我选择的语言)编写了以下代码,认为这样会非常快。事实证明,它相当快,但比查找子字符串的FORTRAN固有(索引)慢得多。举个例子,如果我在一个完全随机的5,000,000个字符的字符串中搜索字符串“ The moon's a Balloon”,该字符串位于字符串末尾1,000个字符的位置,我将得到以下计时(平均在10个字符上库存英特尔HM370(Cannon Lake-H)。我正在运行Windows 10,仅将GCC Fortan与-O3和-fno-检查优化标志一起使用); 我的代码(下)〜0.09秒 本征〜0.03秒 我正在寻找有关重构代码以进一步加快速度的任何建议。
INTEGER FUNCTION BOYERMOORE(Text,Pat,Siztext,Sizpat) RESULT(SEARCH)
IMPLICIT NONE
!
! Dummy arguments
!
CHARACTER(*) :: Pat
INTEGER :: Sizpat
INTEGER :: Siztext
CHARACTER(*) :: Text
INTENT (IN) Pat, Sizpat, Siztext, Text
!
! Local variables
!
LOGICAL :: found
INTEGER :: i
INTEGER :: j
INTEGER :: k
INTEGER :: maxchar
INTEGER, DIMENSION(0:Siztext) :: skip
!Code starts here
maxchar = Siztext
found = .FALSE.
SEARCH = 0
IF(Sizpat==0)THEN ! Nothing to search for
SEARCH = 1
found = .TRUE.
ENDIF
skip(0:maxchar) = Sizpat
DO k = 1, Sizpat - 1 ! Setup the shift sizes
skip(IACHAR(Pat(k:k))) = Sizpat - k
ENDDO
k = Sizpat
DO WHILE ((.NOT.found) .AND. (k<=Siztext)) ! Scan
i = k
j = Sizpat
DO WHILE (j>=1) ! Match the characters in substring
IF(Text(i:i)/=Pat(j:j))THEN
j = -1
ELSE
j = j - 1
i = i - 1
ENDIF
IF(j==0)THEN ! Found
SEARCH = i + 1
found = .TRUE.
ENDIF
k = k + skip(IACHAR(Text(k:k))) ! Slide window right
ENDDO
ENDDO
RETURN
END FUNCTION BOYERMOORE