mssql(tsql)程序替换顺序

时间:2011-08-14 21:37:39

标签: tsql sql-server-2008

...
<td class="m92_t_col5" id="preis_0">xx</td>
...

我想换到

...
<td id="preis_0" class="m92_t_col5">xxx</td>
...

所以id =“”必须先是,然后才是class =“”。这可能与tsql有关吗? id或class中的文本是通用的......

1 个答案:

答案 0 :(得分:2)

我需要找一些肥皂,但考虑到你的要求,这是一个如何实现所需替代品的例子。

-- This will probably not perform terribly well for a number of
-- reasons, not the least of which we are doing lots of string manipulation
-- within tsql.
-- Much of this query nonsense could be consolidated into fewer queries
-- but given the dearth of information, I chose to build out the solution
-- in a tumbling data anti-pattern
;
WITH SAMPLE_DATA AS
(
    -- gin up some demo data
    -- with random spacing and ids to make valid test cases
    select '<td class="m92_t_col5" id="preis_0">xx</td>' AS html
    union all select '<td id="preis_2" class="m29_t_col5">no fix req</td>'
    union all select '<td id="preis_49" class="m29_t_col5">no fix req</td>'
    union all select '<td  class="m93_t_col50" id="preis_3">xy</td>'
    union all select '<td      class="m95_t_col5" style="fuzzy" id="preis_5">xz</td>'
    union all select '<td id="preis_8" class="m29_t_col5">no fix req</td>'
)
, ORDINALS AS
(
    -- Find the starting position of the keywords
    SELECT SD.*
    ,   CHARINDEX('class=', SD.html, 0) AS class_ordinal
    ,   CHARINDEX('id=', SD.html, 0) AS id_ordinal 
    -- You will really need something in here to keep stuff straight
    -- otherwise when we bring it all back together, it'll be wrong
    ,   ROW_NUMBER() OVER (ORDER BY (SELECT 1)) AS original_sequence
    FROM SAMPLE_DATA SD 
)
, NEEDS_MODIFIED AS
(
    -- identify the rows that need modified
    -- and use the ordinals in previous query to find the close position
    SELECT
        O.*
    ,   CHARINDEX('"', O.html, O.class_ordinal+7) + 1 AS class_ordinal_end_quote
    ,   CHARINDEX('"', O.html, O.id_ordinal+4) + 1 AS id_ordinal_end_quote
    FROM
        ORDINALS O
    WHERE
        O.id_ordinal > O.class_ordinal
)
, FIND_PARTS AS
(
    -- strip out the parts
    SELECT
        NM.*
    ,   SUBSTRING(NM.html, class_ordinal, class_ordinal_end_quote - class_ordinal) AS class
    ,   SUBSTRING(NM.html, id_ordinal, id_ordinal_end_quote - id_ordinal) AS id
    FROM
        NEEDS_MODIFIED NM
)
, DONE AS
(
    SELECT
        -- This is the heart of the matter
        -- having identified the correct text values for class and id
        -- we will now perform a triple replace
        -- Replace 1 is swapping the class text with somthing that should not exist in source
        -- Replace 2 replaces the id text with our class text
        -- Replace 3 removes our placeholder value with id
        REPLACE(REPLACE(REPLACE(FP.html, FP.class, '~~|~'), FP.id, FP.class), '~~|~', FP.id) AS html
    ,   FP.original_sequence
    FROM
        FIND_PARTS FP
    UNION ALL
    SELECT
        O.html
    ,   O.original_sequence
    FROM
        ORDINALS O
    WHERE
        O.id_ordinal < O.class_ordinal

)
SELECT
    D.html
FROM
    DONE D
ORDER BY
    D.original_sequence 

输入

<td class="m92_t_col5" id="preis_0">xx</td>
<td id="preis_2" class="m29_t_col5">no fix req</td>
<td id="preis_49" class="m29_t_col5">no fix req</td>
<td  class="m93_t_col50" id="preis_3">xy</td>
<td      class="m95_t_col5" style="fuzzy" id="preis_5">xz</td>
<td id="preis_8" class="m29_t_col5">no fix req</td>

输出

<td id="preis_0" class="m92_t_col5">xx</td>
<td id="preis_2" class="m29_t_col5">no fix req</td>
<td id="preis_49" class="m29_t_col5">no fix req</td>
<td  id="preis_3" class="m93_t_col50">xy</td>
<td      id="preis_5" style="fuzzy" class="m95_t_col5">xz</td>
<td id="preis_8" class="m29_t_col5">no fix req</td>

在做了一些思考之后,你可能一直在尝试将其作为标量函数。这可能会有更糟糕的表现,但它解决了这个问题。

-- Same logic as above, now in function form
CREATE FUNCTION dbo.ClassIdSwap
(
    @input varchar(max)
)
RETURNS varchar(max)
AS
BEGIN
    DECLARE
        @class_ordinal int
    ,   @class_text varchar(max)
    ,   @class_ordinal_end_quote int
    ,   @id_ordinal int
    ,   @id_text varchar(max)
    ,   @id_ordinal_end_quote int
    ,   @out_html varchar(max)

    SELECT
        @class_ordinal = CHARINDEX('class=', @input, 0) 
    ,   @id_ordinal = CHARINDEX('id=', @input, 0) 

    SELECT
        @class_ordinal_end_quote = CHARINDEX('"', @input, @class_ordinal+7) + 1
    ,   @id_ordinal_end_quote = CHARINDEX('"', @input, @id_ordinal+4) + 1 

    -- bail out early
    IF (@id_ordinal < @class_ordinal)
    BEGIN
        RETURN @input
    END

    SELECT    
        @class_text = SUBSTRING(@input, @class_ordinal, @class_ordinal_end_quote - @class_ordinal)
    ,   @id_text = SUBSTRING(@input, @id_ordinal, @id_ordinal_end_quote - @id_ordinal)

    RETURN (REPLACE(REPLACE(REPLACE(@input, @class_text, '~~|~'), @id_text, @class_text), '~~|~', @id_text))

END

用法

;
WITH SAMPLE_DATA AS
(
    -- gin up some demo data
    -- with random spacing and ids to make valid test cases
    select '<td class="m92_t_col5" id="preis_0">xx</td>' AS html
    union all select '<td id="preis_2" class="m29_t_col5">no fix req</td>'
    union all select '<td id="preis_49" class="m29_t_col5">no fix req</td>'
    union all select '<td  class="m93_t_col50" id="preis_3">xy</td>'
    union all select '<td      class="m95_t_col5" style="fuzzy" id="preis_5">xz</td>'
    union all select '<td id="preis_8" class="m29_t_col5">no fix req</td>'
)
SELECT
    D.html  
,   dbo.ClassIdSwap(D.html) AS modified     
FROM
    SAMPLE_DATA D