...
<td class="m92_t_col5" id="preis_0">xx</td>
...
我想换到
...
<td id="preis_0" class="m92_t_col5">xxx</td>
...
所以id =“”必须先是,然后才是class =“”。这可能与tsql有关吗? id或class中的文本是通用的......
答案 0 :(得分:2)
我需要找一些肥皂,但考虑到你的要求,这是一个如何实现所需替代品的例子。
-- This will probably not perform terribly well for a number of
-- reasons, not the least of which we are doing lots of string manipulation
-- within tsql.
-- Much of this query nonsense could be consolidated into fewer queries
-- but given the dearth of information, I chose to build out the solution
-- in a tumbling data anti-pattern
;
WITH SAMPLE_DATA AS
(
-- gin up some demo data
-- with random spacing and ids to make valid test cases
select '<td class="m92_t_col5" id="preis_0">xx</td>' AS html
union all select '<td id="preis_2" class="m29_t_col5">no fix req</td>'
union all select '<td id="preis_49" class="m29_t_col5">no fix req</td>'
union all select '<td class="m93_t_col50" id="preis_3">xy</td>'
union all select '<td class="m95_t_col5" style="fuzzy" id="preis_5">xz</td>'
union all select '<td id="preis_8" class="m29_t_col5">no fix req</td>'
)
, ORDINALS AS
(
-- Find the starting position of the keywords
SELECT SD.*
, CHARINDEX('class=', SD.html, 0) AS class_ordinal
, CHARINDEX('id=', SD.html, 0) AS id_ordinal
-- You will really need something in here to keep stuff straight
-- otherwise when we bring it all back together, it'll be wrong
, ROW_NUMBER() OVER (ORDER BY (SELECT 1)) AS original_sequence
FROM SAMPLE_DATA SD
)
, NEEDS_MODIFIED AS
(
-- identify the rows that need modified
-- and use the ordinals in previous query to find the close position
SELECT
O.*
, CHARINDEX('"', O.html, O.class_ordinal+7) + 1 AS class_ordinal_end_quote
, CHARINDEX('"', O.html, O.id_ordinal+4) + 1 AS id_ordinal_end_quote
FROM
ORDINALS O
WHERE
O.id_ordinal > O.class_ordinal
)
, FIND_PARTS AS
(
-- strip out the parts
SELECT
NM.*
, SUBSTRING(NM.html, class_ordinal, class_ordinal_end_quote - class_ordinal) AS class
, SUBSTRING(NM.html, id_ordinal, id_ordinal_end_quote - id_ordinal) AS id
FROM
NEEDS_MODIFIED NM
)
, DONE AS
(
SELECT
-- This is the heart of the matter
-- having identified the correct text values for class and id
-- we will now perform a triple replace
-- Replace 1 is swapping the class text with somthing that should not exist in source
-- Replace 2 replaces the id text with our class text
-- Replace 3 removes our placeholder value with id
REPLACE(REPLACE(REPLACE(FP.html, FP.class, '~~|~'), FP.id, FP.class), '~~|~', FP.id) AS html
, FP.original_sequence
FROM
FIND_PARTS FP
UNION ALL
SELECT
O.html
, O.original_sequence
FROM
ORDINALS O
WHERE
O.id_ordinal < O.class_ordinal
)
SELECT
D.html
FROM
DONE D
ORDER BY
D.original_sequence
输入
<td class="m92_t_col5" id="preis_0">xx</td>
<td id="preis_2" class="m29_t_col5">no fix req</td>
<td id="preis_49" class="m29_t_col5">no fix req</td>
<td class="m93_t_col50" id="preis_3">xy</td>
<td class="m95_t_col5" style="fuzzy" id="preis_5">xz</td>
<td id="preis_8" class="m29_t_col5">no fix req</td>
输出
<td id="preis_0" class="m92_t_col5">xx</td>
<td id="preis_2" class="m29_t_col5">no fix req</td>
<td id="preis_49" class="m29_t_col5">no fix req</td>
<td id="preis_3" class="m93_t_col50">xy</td>
<td id="preis_5" style="fuzzy" class="m95_t_col5">xz</td>
<td id="preis_8" class="m29_t_col5">no fix req</td>
在做了一些思考之后,你可能一直在尝试将其作为标量函数。这可能会有更糟糕的表现,但它解决了这个问题。
-- Same logic as above, now in function form
CREATE FUNCTION dbo.ClassIdSwap
(
@input varchar(max)
)
RETURNS varchar(max)
AS
BEGIN
DECLARE
@class_ordinal int
, @class_text varchar(max)
, @class_ordinal_end_quote int
, @id_ordinal int
, @id_text varchar(max)
, @id_ordinal_end_quote int
, @out_html varchar(max)
SELECT
@class_ordinal = CHARINDEX('class=', @input, 0)
, @id_ordinal = CHARINDEX('id=', @input, 0)
SELECT
@class_ordinal_end_quote = CHARINDEX('"', @input, @class_ordinal+7) + 1
, @id_ordinal_end_quote = CHARINDEX('"', @input, @id_ordinal+4) + 1
-- bail out early
IF (@id_ordinal < @class_ordinal)
BEGIN
RETURN @input
END
SELECT
@class_text = SUBSTRING(@input, @class_ordinal, @class_ordinal_end_quote - @class_ordinal)
, @id_text = SUBSTRING(@input, @id_ordinal, @id_ordinal_end_quote - @id_ordinal)
RETURN (REPLACE(REPLACE(REPLACE(@input, @class_text, '~~|~'), @id_text, @class_text), '~~|~', @id_text))
END
用法
;
WITH SAMPLE_DATA AS
(
-- gin up some demo data
-- with random spacing and ids to make valid test cases
select '<td class="m92_t_col5" id="preis_0">xx</td>' AS html
union all select '<td id="preis_2" class="m29_t_col5">no fix req</td>'
union all select '<td id="preis_49" class="m29_t_col5">no fix req</td>'
union all select '<td class="m93_t_col50" id="preis_3">xy</td>'
union all select '<td class="m95_t_col5" style="fuzzy" id="preis_5">xz</td>'
union all select '<td id="preis_8" class="m29_t_col5">no fix req</td>'
)
SELECT
D.html
, dbo.ClassIdSwap(D.html) AS modified
FROM
SAMPLE_DATA D