
时间:2009-09-16 20:26:28

标签: sql sql-server tsql




3 个答案:

答案 0 :(得分:3)



答案 1 :(得分:1)

此类字符串操作最好由CLR scalar valued functions处理。

答案 2 :(得分:1)



  1. 删除脚本标记和内容
  2. 用空格替换所有HTML标记。
  3. 替换为空格
  4. 用X
  5. 替换所有实体代码(“&amp; xxx;”)
  6. 用空格替换所有标点符号和数学符号(。,;:'&amp;()[] + /&lt;&gt;≥≤°÷)(不删除短划线)添加百分比,反斜杠,下划线,克拉,星号,等号,花括号,问题和惊叹号,管道,美元和美分标志,英镑符号,标签,crlf
  7. 用空格替换所有数字
  8. 用空格替换所有一个字母的单词(pattern:space single-character-wildcard space)
  9. 删除多余的空格。
  10. 它实现了DFSA的文本传感器类型(确定性有限状态自动机,嗯,它几乎是确定性的,因为它在几个地方看起来很先进):

    Imports System
    Imports System.Data
    Imports System.Data.SqlClient
    Imports System.Data.SqlTypes
    Imports Microsoft.SqlServer.Server
    Partial Public Class UserDefinedFunctions
        Public Enum States
        End Enum
        Enum SubStates
        End Enum
        Const CharSpace As Integer = 32
        Const CharAmp As Integer = 38
        Const CharSlash As Integer = 47
        Const CharLT As Integer = 60
        Const CharGT As Integer = 62
        Const CharA As Integer = 65
        Const CharX As Integer = 88
        Const CharZ As Integer = 90
        Const Char_a As Integer = 97
        Const Char_b As Integer = 98
        Const Char_n As Integer = 110
        Const Char_p As Integer = 112
        Const Char_s As Integer = 115
        Const Char_z As Integer = 122
        Const CharDash As Integer = 45
        Const CharSemiC As Integer = 59
        <Microsoft.SqlServer.Server.SqlFunction( _
                DataAccess:=DataAccessKind.None _
                , IsDeterministic:=True _
                , IsPrecise:=True)> _
        Public Shared Function HTMLCleaner(ByVal chars As SqlTypes.SqlBytes) As SqlTypes.SqlBytes
            Dim b As Byte
            Dim i As Integer, j As Integer
            Dim Out As Byte()
            Dim State As States = States.Space1
            Dim Substate As SubStates = SubStates.None
            Dim strAccum As String = ""
            ReDim Out(0 To chars.Length - 1)
            For i = 0 To chars.Length - 1
                b = chars(i)
                Select Case State
                    Case States.Norm
                        Select Case b
                            Case CharA To CharZ, Char_a To Char_z
                                Out(j) = b
                                j = j + 1
                                State = States.Norm
                            Case CharSpace
                                Out(j) = b
                                j = j + 1
                                State = States.Space1
                            Case CharAmp
                                State = States.Entity 'skip output'
                            Case CharLT
                                Out(j) = CharSpace
                                j = j + 1
                                State = States.HTMLTag
                            Case CharDash
                                Out(j) = b
                                j = j + 1
                                State = States.Norm
                            Case Else
                                State = States.Norm 'skip output'
                        End Select
                    Case States.Space1
                        Select Case b
                            Case CharSpace
                                'discard leading & multiple spaces'
                            Case CharAmp
                                State = States.Entity 'skip output'
                            Case CharLT
                                Out(j) = CharSpace
                                j = j + 1
                                State = States.HTMLTag
                            Case CharDash
                                Out(j) = b
                                j = j + 1
                                State = States.Norm
                            Case CharA To CharZ, Char_a To Char_z
                                Out(j) = b
                                j = j + 1
                                State = States.Word1
                            Case Else
                                State = States.Norm 'skip output'
                        End Select
                    Case States.Word1
                        Select Case b
                            Case CharSpace
                                'single char word, retract from output:'
                                j = j - 1
                                State = States.Space1
                            Case CharAmp
                                State = States.Entity 'skip output'
                            Case CharLT
                                Out(j) = CharSpace
                                j = j + 1
                                State = States.HTMLTag
                            Case CharDash
                                Out(j) = b
                                j = j + 1
                                State = States.Norm
                            Case CharA To CharZ, Char_a To Char_z
                                Out(j) = b
                                j = j + 1
                                State = States.Norm
                            Case Else
                                State = States.Norm 'skip output'
                        End Select
                    Case States.Entity
                        Select Case b
                            Case CharSemiC
                                'End of entity, wrap it up:'
                                If strAccum = "nbsp" Then
                                    Out(j) = CharSpace
                                    j = j + 1
                                    strAccum = ""
                                    State = States.Space1
                                    'output "X"'
                                    Out(j) = CharX
                                    j = j + 1
                                    State = States.Norm
                                End If
                            Case Else
                                'else, keep scanning for semicolon...'
                                ' accumulate entity chars:'
                                strAccum = strAccum & b
                        End Select
                    Case States.HTMLTag
                        If b = CharGT Then
                            If strAccum = "SCRIPT" Then
                                strAccum = ""
                                State = States.Script
                                Substate = SubStates.None
                            ElseIf strAccum = "STYLE" Then
                                strAccum = ""
                                State = States.Style
                                Substate = SubStates.None
                                Out(j) = CharSpace
                                j = j + 1
                                State = States.Space1
                                strAccum = ""
                            End If
                            'accumulate tag name'
                            strAccum = strAccum & b
                        End If
                    Case States.Script
                        Select Case Substate
                            Case SubStates.None
                                If b = CharGT Then
                                    Substate = SubStates.EndBegin
                                End If
                            Case SubStates.EndBegin
                                If b = CharSlash Then
                                    Substate = SubStates.EndSlash
                                    strAccum = ""
                                    Substate = SubStates.None
                                End If
                            Case SubStates.EndSlash
                                If b = CharGT Then
                                    If strAccum = "SCRIPT" Then
                                        'end of script found; output nothing'
                                        State = States.Norm
                                        Substate = SubStates.None
                                        'false alarm, back to script-scanning'
                                        Substate = SubStates.None
                                    End If
                                    'accumulate the end-tags label'
                                    strAccum = strAccum & b
                                End If
                        End Select
                    Case States.Style
                        Select Case Substate
                            Case SubStates.None
                                If b = CharGT Then
                                    Substate = SubStates.EndBegin
                                End If
                            Case SubStates.EndBegin
                                If b = CharSlash Then
                                    Substate = SubStates.EndSlash
                                    strAccum = ""
                                    Substate = SubStates.None
                                End If
                            Case SubStates.EndSlash
                                If b = CharGT Then
                                    If strAccum = "STYLE" Then
                                        'end of script found; output nothing'
                                        State = States.Norm
                                        Substate = SubStates.None
                                        'false alarm, back to script-scanning'
                                        Substate = SubStates.None
                                    End If
                                    'accumulate the end-tags label'
                                    strAccum = strAccum & b
                                End If
                        End Select
                    Case Else
                        Debug.Assert(1 = 0)
                End Select
                'extra check for multiple spaces'
                If j > 1 _
                    AndAlso (Out(j - 1) = CharSpace _
                    And Out(j - 2) = CharSpace) Then
                    j = j - 1   'roll back the last character'
                ElseIf j = 1 AndAlso Out(0) = CharSpace Then
                    j = 0   'overwrite leading space'
                End If
            'remove any trailing space:'
            If j > 0 AndAlso Out(j - 1) = CharSpace Then j = j - 1
            'trim off the trailing excess'
            ReDim Preserve Out(0 To j - 1)
            Return New SqlBytes(Out)
        End Function
        <Microsoft.SqlServer.Server.SqlFunction( _
                DataAccess:=DataAccessKind.None _
                , IsDeterministic:=True _
                , IsPrecise:=True)> _
        Public Shared Function HTMLCopy2(ByVal chars As SqlTypes.SqlBytes) As SqlTypes.SqlBytes
            Dim out() As Byte
            ReDim out(0 To chars.Length - 1)
            For i As Integer = 0 To chars.Length - 1
                out(i) = chars.Buffer(i)
            Return New SqlBytes(out)
        End Function
        <Microsoft.SqlServer.Server.SqlFunction( _
                DataAccess:=DataAccessKind.None _
                , IsDeterministic:=True _
                , IsPrecise:=True)> _
        Public Shared Function HTMLCopy(ByVal chars As SqlTypes.SqlBytes) As SqlTypes.SqlBytes
            ' Add your code here'
            Return New SqlTypes.SqlBytes(chars.Buffer)
        End Function
    End Class