除了介词词之外,大写字串

时间:2014-05-10 00:06:37

标签: vbscript asp-classic vb6

我正在使用下面的代码从文本框中输入一个字符串,然后转换为大写字母,除了像(the,and,an,as,to或on等)之类的单词等。

问题#1:我希望字符串的第一个单词始终大写,无论单词是什么。

问题#2:当字符串重新组合在一起时,字间距不正确。

xText = queryForHTML    
xTextSplit = split(xText, " ")

for each item in xTextSplit

    xWord = item

    if lcase(item) = "the" or lcase(item) = "and" or lcase(item) = "an" or lcase(item) = "as" or lcase(item) = "to" or lcase(item) = "is" or lcase(item) = "on" then
        xWord = lcase(item)
    end if

    xCompleteWord = xCompleteWord & " " & xWord

next

queryForHTML = xCompleteWord

2 个答案:

答案 0 :(得分:0)

Option Explicit 

Dim originalString    
    originalString = "a saMple of String capiTalization (in some cases, not so obvious)"

Dim convertedString

Dim noiseWords
    noiseWords= "/a/abaft/aboard/about/above/absent/across/afore/after/against/along/alongside/amid" + _ 
                "/amidst/among/amongst/an/anenst/apropos/apud/around/as/aside/astride/at/athwart/atop" + _ 
                "/barring/before/behind/below/beneath/beside/besides/between/beyond/but/by/circa" + _ 
                "/concerning/despite/down/during/except/excluding/failing/following/for/forenenst/from" + _ 
                "/given/in/including/inside/into/like/mid/midst/minus/modulo/near/next/notwithstanding" + _ 
                "/o/of/off/on/onto/opposite/or/out/outside/over/pace/past/per/plus/pro/qua/regarding" + _ 
                "/round/sans/save/since/so/than/through/thru/throughout/thruout/till/times/to/toward" + _ 
                "/towards/under/underneath/unlike/until/unto/up/upon/versus/vs/via/vice/vis/with/within" + _ 
                "/without/worth/this/"

    Function correctCase(matchString,word,position,sourceString)
        word = LCase(word)
        If (position > 0) And (InStr(noiseWords,"/" & word & "/")>0) Then 
            correctCase = word
        Else
            correctCase = UCase(Left(word,1)) & Mid(word,2,Len(word)-1)
        End If
    End Function 

    With New RegExp
        .Pattern = "(\w+)"
        .Global = True 
        .IgnoreCase = True
        convertedString = .Replace(originalString,GetRef("correctCase"))
    End With 

    WScript.Echo originalString
    WScript.Echo convertedString

基本思想是使用匹配任何序列的正则表达式" word"字符([a-zA-Z0-9]),对于每个序列,调用一个函数,它接收字符串匹配的参数,包含单词的捕获组,字符串中找到它的位置以及完整的源字符串。

如果单词位于0位,则为大写。如果这个词是"噪音"单词,它是小写的,否则,这个词是大写的。

答案 1 :(得分:0)

以下代码基于GetStringTypeW() Win32 API函数,该函数提供有关字符串中字符的信息。你只担​​心可以是大写或小写的字符。您的代码的问题在于它仅适用于空格分解单词的最简单情况。但话语可以通过标点符号来分解。并且有许多Unicode字符没有"大写"和"小写"。

我没有编写这个无聊,容易出错的代码来执行此操作,而是利用了GetStringTypeW()。我遍历数组中的每个元素,其中每个元素对应于相同位置的字符串中的字符。我有一个标志bInWord,用于存储当前位置是否在单词内。如果我们点击大写或小写字符,并且没有设置,我们设置它,并将当前位置保存为单词的开头。另外,如果我们遇到一个大写字符,并且我们已经知道我们在一个单词中,那么我们在那里然后通过写入返回的字符串使字符小写。 当我们点击非字母字符或到达字符串的末尾并设置bInWord时,我们会将最后一个字与"非正确字符"的列表进行比较。话。如果我们匹配,并且第一个字符是大写,那么我们用小写字符覆盖该字符。如果我们不匹配,并且第一个字符是小写的,我们会用大写字符覆盖该字符。

Option Explicit

Private Declare Function GetStringTypeW Lib "Kernel32.dll" ( _
    ByVal dwInfoType As Long, _
    ByVal lpSrcStr As Long, _
    ByVal cchSrc As Long, _
    ByRef lpCharType As Integer _
) As Long

Private Const CT_CTYPE1                     As Long = &H1

Private Const C1_UPPER                      As Long = &H1     ' Uppercase
Private Const C1_LOWER                      As Long = &H2     ' Lowercase
Private Const C1_DIGIT                      As Long = &H4     ' Decimal digits
Private Const C1_SPACE                      As Long = &H8     ' Space characters
Private Const C1_PUNCT                      As Long = &H10    ' Punctuation
Private Const C1_CNTRL                      As Long = &H20    ' Control characters
Private Const C1_BLANK                      As Long = &H40    ' Blank characters
Private Const C1_XDIGIT                     As Long = &H80    ' Hexadecimal digits
Private Const C1_ALPHA                      As Long = &H100   ' Any linguistic character: alphabetical, syllabary, or ideographic
Private Const C1_DEFINED                    As Long = &H200   ' A defined character, but not one of the other C1_* types

Private Function ProperCaseWords(ByRef in_sText As String) As String

    Dim lTextLen            As Long
    Dim aiCharType()        As Integer
    Dim lPos                As Long
    Dim lPosStartWord       As Long
    Dim bInWord             As Boolean
    Dim bFirstCharUCase     As Boolean
    Dim sWord               As String

    ' Output buffer contains a copy of the original string.
    ProperCaseWords = in_sText

    lTextLen = Len(in_sText)

    ' Resize the character type buffer to be one more than the string.
    ReDim aiCharType(1 To lTextLen + 1)

    ' Retrieve string type data about this Unicode string into <aiCharType()>.
    ' If it fails, then we just return the original string.
    ' Note that the last element in the array is not filled by this function, and will contain zero.
    ' This is deliberate, so we can handle the corner case where the last word is right at the end of the string.
    If (GetStringTypeW(CT_CTYPE1, StrPtr(ProperCaseWords), lTextLen, aiCharType(1))) = 0 Then
        Exit Function
    End If

    ' We start outside a word.
    bInWord = False

    ' Iterate through the entire array, including the last element which corresponds to no character.
    For lPos = 1 To lTextLen + 1

        If (aiCharType(lPos) And C1_LOWER) = C1_LOWER Then
        ' Lower case characters.
            If Not bInWord Then
                bFirstCharUCase = False
                lPosStartWord = lPos
                bInWord = True
            End If
        ElseIf (aiCharType(lPos) And C1_UPPER) = C1_UPPER Then
        ' Upper case characters.
            If bInWord Then
            ' If we are already in the word, i.e. past the first character, then we know that the character *should* be lower case.
                Mid$(ProperCaseWords, lPos, 1) = LCase$(Mid$(ProperCaseWords, lPos, 1))
            Else
                bFirstCharUCase = True
                lPosStartWord = lPos
                bInWord = True
            End If
        Else
        ' Non lower or upper case characters. Also includes last (zero) element.
            If bInWord Then
            ' If we are in a word, and the latest character is non-alphabetical, then we now check what word it is, and
            ' decide whether to make the first character upper or lower case.
                bInWord = False

                ' Retrieve the word from the string, and deliberately make the first character lower case.
                ' Note that all other characters in the word would have already been made lower case.
                sWord = Mid$(ProperCaseWords, lPosStartWord, lPos - lPosStartWord)
                If bFirstCharUCase Then
                    Mid$(sWord, 1, 1) = LCase$(Mid$(sWord, 1, 1))
                End If

                ' Compare our word against a lower-case word list.
                Select Case sWord
                Case "in", "on", "an", "to", "and", "the", "with", "that", "is" ' <=== CUSTOM LIST OF WORDS
                    If bFirstCharUCase Then
                        Mid$(ProperCaseWords, lPosStartWord, 1) = LCase$(Mid$(ProperCaseWords, lPosStartWord, 1))
                    End If
                Case Else
                    If Not bFirstCharUCase Then
                        Mid$(ProperCaseWords, lPosStartWord, 1) = UCase$(Mid$(ProperCaseWords, lPosStartWord, 1))
                    End If
                End Select
            End If
        End If

    Next lPos

End Function