我正在尝试匿名化我的数据库中的所有数据,因此我正在重命名其中的所有人。我之前问了一个类似的问题,并被告知要使用NewID强制为每个更新的行创建一个新值,但在这种情况下它似乎没有起作用。
我做错了什么?
-- Create Table Customer
CREATE TABLE #FirstName
(
ID int,
FirstName nvarchar(255) NULL,
Gender nvarchar(255) NULL
)
CREATE TABLE #LastName (
ID int,
LastName nvarchar(255)
)
-- BULK INSERT to import data from Text or CSV File
BULK INSERT #FirstName
FROM 'C:\Users\jhollon\Desktop\tmp\names\firstnames.lined.txt'
WITH
(
FIRSTROW = 1,
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\n'
)
BULK INSERT #LastName
FROM 'C:\Users\jhollon\Desktop\tmp\names\lastnames.lined.txt'
WITH
(
FIRSTROW = 1,
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\n'
)
/*SELECT FirstName FROM #FirstName WHERE ID = (
SELECT RandomNumber FROM (
SELECT ABS(CHECKSUM(NewID())) % 1500 AS RandomNumber FROM tblTenant WHERE Sex = '1'
) AS A
);*/
UPDATE tblTenant SET TenantName = (
SELECT LastName + ', ' + FirstName FROM
(SELECT UPPER(FirstName) as FirstName FROM #FirstName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 500 + 1501)) AS A,
(SELECT LastName FROM #LastName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 200 + 1)) as B
) WHERE Sex = '2';
UPDATE tblTenant SET TenantName = (
SELECT LastName + ', ' + FirstName FROM
(SELECT UPPER(FirstName) as FirstName FROM #FirstName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 500 + 1)) AS A,
(SELECT LastName FROM #LastName WHERE ID = (SELECT ABS(CHECKSUM(NewID())) % 200 + 1)) as B
) WHERE Sex = '1';
DROP TABLE #FirstName;
DROP TABLE #LastName;
答案 0 :(得分:2)
正确。子查询的评估次数与广告一样(“cachable scalar subquery”)
尝试使用NEWID作为派生表
UPDATE T
SET
TenantName = L.LastName + ', ' + F.FirstName
FROM
tblTenant T
CROSS APPLY
(SELECT TOP 1 UPPER(FirstName) as FirstName FROM #FirstName
WHERE CHECKSUM(NEWID()) <> T.ID
ORDER BY NEWID()) F
CROSS APPLY
(SELECT TOP 1 LastName FROM #LastName
WHERE CHECKSUM(NEWID()) <> T.ID
ORDER BY NEWID()) L
答案 1 :(得分:0)
我不确定我理解您的问题,但如果您希望ID为唯一值,则可以将其设为标识列。 例如:
[ID] [int] IDENTITY(1,1) NOT NULL
答案 2 :(得分:0)
下面的代码表明,如果没有内外关联,那么在使用上面的CROSS APPLY答案时,旧名称不能保证与新名称不同。
在{FirstName CROSS APPLY
WHERE F.Id <> T.Id ORDER BY NEWID()
会更好
USE tempdb
GO
IF OBJECT_ID('tblTenant') IS NOT NULL
DROP TABLE tblTenant
GO
CREATE TABLE tblTenant
(
Id int,
FirstName nvarchar(20),
LastName nvarchar(20),
Gender bit
)
INSERT INTO tblTenant
VALUES (1, 'Bob' , 'Marley', 1),
(2, 'Boz' , 'Skaggs', 1)
SELECT DISTINCT FirstName
INTO #FirstNames
FROM tblTenant
SELECT DISTINCT LastName
INTO #LastNames
FROM tblTenant
-- There is a probability > 0 that a tenant's new name = tenants old name
SELECT
OldFirst = T.FirstName,
OldLast = T.LastName,
NewFirst = F.FirstName,
NewLast = L.LastName
FROM
tblTenant T
CROSS APPLY
(
SELECT TOP 1 UPPER(FirstName) AS FirstName
FROM #FirstNames
WHERE CHECKSUM(NEWID()) <> T.ID
ORDER BY NEWID()
) F
CROSS APPLY
(
SELECT TOP 1 LastName
FROM #LastNames
WHERE CHECKSUM(NEWID()) <> T.ID
ORDER BY NEWID()
) L