我正在寻找在Oracle中进行哈希和替换算法的方法。
我有一张客户表,里面有客户ID和客户电话号码。我有另一张表,其中列出了参考电话号码值。我想使用这些参考值并在主表中一致地替换它们。
应用此替换逻辑时应该有一致的屏蔽,即同一客户出现在不同表中的任何地方,电话号码替换应该相同。
我曾尝试过以下方法,但性能非常糟糕 - 对于20000名客户来说需要18分钟。你能否提出一个更好的方法来满足这个要求。
DROP TABLE CUS_PHONE_NUM ;
DROP TABLE CUS_PHONE_NUM2 ;
DROP TABLE REF_PHONE_NUMBER ;
-- Main table 1
CREATE TABLE CUS_PHONE_NUM (
CUS_ID VARCHAR2(09) ,
PHONE_NUMBER NUMBER(12) );
-- Main table 2
CREATE TABLE CUS_PHONE_NUM_2 (
CUS_ID VARCHAR2(09) ,
PHONE_NUMBER NUMBER(12) );
-- reference table
CREATE TABLE REF_PHONE_NUMBER ( REF_PHONE_NUMBER_VALUE NUMBER(12));
INSERT INTO REF_PHONE_NUMBER VALUES ( 0470134321) ;
INSERT INTO REF_PHONE_NUMBER VALUES ( 0470134322) ;
INSERT INTO REF_PHONE_NUMBER VALUES ( 0470134323) ;
INSERT INTO REF_PHONE_NUMBER VALUES ( 0470134324) ;
INSERT INTO REF_PHONE_NUMBER VALUES ( 0470134325) ;
INSERT INTO REF_PHONE_NUMBER VALUES ( 0470134326) ;
INSERT INTO REF_PHONE_NUMBER VALUES ( 0470134327) ;
INSERT INTO REF_PHONE_NUMBER VALUES ( 0470134328) ;
INSERT INTO REF_PHONE_NUMBER VALUES ( 0470134329) ;
INSERT INTO REF_PHONE_NUMBER VALUES ( 0470134320) ;
TRUNCATE TABLE CUS_PHONE_NUM ;
COMMIT;
INSERT INTO CUS_PHONE_NUM VALUES ( '401795401' , 0426881030 ) ;
INSERT INTO CUS_PHONE_NUM VALUES ( '401795402' , 0426881031 ) ;
INSERT INTO CUS_PHONE_NUM VALUES ( '401795403' , 0426881032 ) ;
INSERT INTO CUS_PHONE_NUM VALUES ( '401795404' , 0426881033 ) ;
INSERT INTO CUS_PHONE_NUM VALUES ( '401795405' , 0426881034 ) ;
INSERT INTO CUS_PHONE_NUM VALUES ( '401795406' , 0426881035 ) ;
INSERT INTO CUS_PHONE_NUM VALUES ( '401795407' , 0426881036 ) ;
TRUNCATE TABLE CUS_PHONE_NUM_2 ;
COMMIT;
INSERT INTO CUS_PHONE_NUM_2 VALUES ( '401795401' , 0426881030 ) ;
INSERT INTO CUS_PHONE_NUM_2 VALUES ( '401795403' , 0426881032 ) ;
INSERT INTO CUS_PHONE_NUM_2 VALUES ( '401795405' , 0426881034 ) ;
INSERT INTO CUS_PHONE_NUM_2 VALUES ( '401795407' , 0426881036 ) ;
COMMIT ;
-- SQL1 to replace reference values for Main table 1 - CUS_PHONE_NUM
WITH REF_PHONE_NUMBER_HASH AS
(
SELECT REF_PHONE_NUMBER_VALUE ,
row_number() over(order by REF_PHONE_NUMBER_VALUE) - 1 REF_PHONE_NUMBER_VALUE_HASH,
count(*) over() max_buckets
FROM REF_PHONE_NUMBER
),
CUS_PHONE_NUM_HASH AS
(select CUS_ID,
PHONE_NUMBER
from CUS_PHONE_NUM )
SELECT c.*, n.* ,ora_hash(PHONE_NUMBER, n.max_buckets)
FROM CUS_PHONE_NUM_HASH c
JOIN REF_PHONE_NUMBER_HASH n
ON ora_hash(PHONE_NUMBER, n.max_buckets) = n.REF_PHONE_NUMBER_VALUE_HASH;
-- SQL1 to replace reference values for Main table 2 - CUS_PHONE_NUM_2
WITH REF_PHONE_NUMBER_HASH AS
(
SELECT REF_PHONE_NUMBER_VALUE ,
row_number() over(order by REF_PHONE_NUMBER_VALUE) - 1 REF_PHONE_NUMBER_VALUE_HASH,
count(*) over() max_buckets
FROM REF_PHONE_NUMBER
),
CUS_PHONE_NUM_HASH AS
(select CUS_ID,
PHONE_NUMBER
from CUS_PHONE_NUM_2 )
SELECT c.*, n.* ,ora_hash(PHONE_NUMBER, n.max_buckets)
FROM CUS_PHONE_NUM_HASH c
JOIN REF_PHONE_NUMBER_HASH n
ON ora_hash(PHONE_NUMBER, n.max_buckets) = n.REF_PHONE_NUMBER_VALUE_HASH;
答案 0 :(得分:0)
尝试将max_bucket
移到外面。它改变了对测试数据的解释计划。但我不知道它对你真实的如何有效。我也不知道你的索引。
WITH REF_PHONE_NUMBER_HASH AS
(
SELECT REF_PHONE_NUMBER_VALUE ,
row_number() over(order by REF_PHONE_NUMBER_VALUE) - 1 REF_PHONE_NUMBER_VALUE_HASH
FROM REF_PHONE_NUMBER
),
CUS_PHONE_NUM_HASH AS
(select CUS_ID,
PHONE_NUMBER
from CUS_PHONE_NUM )
, BUCKET as
(select count(*) max_buckets from REF_PHONE_NUMBER)
SELECT c.*, n.* ,ora_hash(PHONE_NUMBER, b.max_buckets)
FROM CUS_PHONE_NUM_HASH c
cross join BUCKET b
JOIN REF_PHONE_NUMBER_HASH n
ON ora_hash(PHONE_NUMBER, b.max_buckets) = n.REF_PHONE_NUMBER_VALUE_HASH
也许你也可以考虑硬编码max_bucket
值。在编码之后,您可以为此创建索引:
create index idx_CUS_PHONE_NUM_01 on CUS_PHONE_NUM
( ora_hash(Phone_NUmber, 255) )