无法在 UDF 标量中评估雪花不受支持的子查询类型

时间:2021-04-20 14:34:46

标签: user-defined-functions snowflake-cloud-data-platform

我正在尝试创建一个函数,该函数将一组电子邮件作为输入,并在电子邮件中返回一个包含散列用户名的 ARRAY。 为此,我创建了这个 UDF:

.navbar{
  list-style:none;
  margin:0;
  padding:0;
  text-align:center;
  position: relative; /* add this */
}

input[type=text]{
  width: 200px;
  bottom: 0; /* add this */
  right: 0; /* add this */
  position: absolute; /* add this */
}

以下示例正常运行

CREATE OR REPLACE FUNCTION pseudonymize_email(email ARRAY) RETURNS ARRAY
LANGUAGE SQL STRICT IMMUTABLE
AS $$
    SELECT array_agg(regexp_replace(value,'.+\@', concat(hash(value), '@'))) as email
    FROM LATERAL flatten(input => email) as f
$$;

这正好返回一列和一个值。

但是,当在普通的 select 语句中使用查询时,例如

SELECT array_agg(regexp_replace(value,'.+\@', concat(hash(value), '@'))) as email 
FROM LATERAL flatten(input => array_construct('toto@gmail.com', 'hello@yahoo.com')) as f

我确实收到以下错误:无法评估不受支持的子查询类型

知道如何解决这个问题吗?

2 个答案:

答案 0 :(得分:1)

对于 SQL UDF,Snowflake 有时会尝试将它们内联,但没有成功。

另一种方法是编写 JavaScript UDF - 在这种情况下效果很好:

CREATE OR REPLACE FUNCTION pseudonymize_email_JS(email ARRAY) RETURNS ARRAY
LANGUAGE JAVASCRIPT STRICT IMMUTABLE
AS $$

// https://stackoverflow.com/a/60467595/132438
// TODO: optimize by creating only once per VM
function md5(inputString) {
    var hc="0123456789abcdef";
    function rh(n) {var j,s="";for(j=0;j<=3;j++) s+=hc.charAt((n>>(j*8+4))&0x0F)+hc.charAt((n>>(j*8))&0x0F);return s;}
    function ad(x,y) {var l=(x&0xFFFF)+(y&0xFFFF);var m=(x>>16)+(y>>16)+(l>>16);return (m<<16)|(l&0xFFFF);}
    function rl(n,c)            {return (n<<c)|(n>>>(32-c));}
    function cm(q,a,b,x,s,t)    {return ad(rl(ad(ad(a,q),ad(x,t)),s),b);}
    function ff(a,b,c,d,x,s,t)  {return cm((b&c)|((~b)&d),a,b,x,s,t);}
    function gg(a,b,c,d,x,s,t)  {return cm((b&d)|(c&(~d)),a,b,x,s,t);}
    function hh(a,b,c,d,x,s,t)  {return cm(b^c^d,a,b,x,s,t);}
    function ii(a,b,c,d,x,s,t)  {return cm(c^(b|(~d)),a,b,x,s,t);}
    function sb(x) {
        var i;var nblk=((x.length+8)>>6)+1;var blks=new Array(nblk*16);for(i=0;i<nblk*16;i++) blks[i]=0;
        for(i=0;i<x.length;i++) blks[i>>2]|=x.charCodeAt(i)<<((i%4)*8);
        blks[i>>2]|=0x80<<((i%4)*8);blks[nblk*16-2]=x.length*8;return blks;
    }
    var i,x=sb(inputString),a=1732584193,b=-271733879,c=-1732584194,d=271733878,olda,oldb,oldc,oldd;
    for(i=0;i<x.length;i+=16) {olda=a;oldb=b;oldc=c;oldd=d;
        a=ff(a,b,c,d,x[i+ 0], 7, -680876936);d=ff(d,a,b,c,x[i+ 1],12, -389564586);c=ff(c,d,a,b,x[i+ 2],17,  606105819);
        b=ff(b,c,d,a,x[i+ 3],22,-1044525330);a=ff(a,b,c,d,x[i+ 4], 7, -176418897);d=ff(d,a,b,c,x[i+ 5],12, 1200080426);
        c=ff(c,d,a,b,x[i+ 6],17,-1473231341);b=ff(b,c,d,a,x[i+ 7],22,  -45705983);a=ff(a,b,c,d,x[i+ 8], 7, 1770035416);
        d=ff(d,a,b,c,x[i+ 9],12,-1958414417);c=ff(c,d,a,b,x[i+10],17,     -42063);b=ff(b,c,d,a,x[i+11],22,-1990404162);
        a=ff(a,b,c,d,x[i+12], 7, 1804603682);d=ff(d,a,b,c,x[i+13],12,  -40341101);c=ff(c,d,a,b,x[i+14],17,-1502002290);
        b=ff(b,c,d,a,x[i+15],22, 1236535329);a=gg(a,b,c,d,x[i+ 1], 5, -165796510);d=gg(d,a,b,c,x[i+ 6], 9,-1069501632);
        c=gg(c,d,a,b,x[i+11],14,  643717713);b=gg(b,c,d,a,x[i+ 0],20, -373897302);a=gg(a,b,c,d,x[i+ 5], 5, -701558691);
        d=gg(d,a,b,c,x[i+10], 9,   38016083);c=gg(c,d,a,b,x[i+15],14, -660478335);b=gg(b,c,d,a,x[i+ 4],20, -405537848);
        a=gg(a,b,c,d,x[i+ 9], 5,  568446438);d=gg(d,a,b,c,x[i+14], 9,-1019803690);c=gg(c,d,a,b,x[i+ 3],14, -187363961);
        b=gg(b,c,d,a,x[i+ 8],20, 1163531501);a=gg(a,b,c,d,x[i+13], 5,-1444681467);d=gg(d,a,b,c,x[i+ 2], 9,  -51403784);
        c=gg(c,d,a,b,x[i+ 7],14, 1735328473);b=gg(b,c,d,a,x[i+12],20,-1926607734);a=hh(a,b,c,d,x[i+ 5], 4,    -378558);
        d=hh(d,a,b,c,x[i+ 8],11,-2022574463);c=hh(c,d,a,b,x[i+11],16, 1839030562);b=hh(b,c,d,a,x[i+14],23,  -35309556);
        a=hh(a,b,c,d,x[i+ 1], 4,-1530992060);d=hh(d,a,b,c,x[i+ 4],11, 1272893353);c=hh(c,d,a,b,x[i+ 7],16, -155497632);
        b=hh(b,c,d,a,x[i+10],23,-1094730640);a=hh(a,b,c,d,x[i+13], 4,  681279174);d=hh(d,a,b,c,x[i+ 0],11, -358537222);
        c=hh(c,d,a,b,x[i+ 3],16, -722521979);b=hh(b,c,d,a,x[i+ 6],23,   76029189);a=hh(a,b,c,d,x[i+ 9], 4, -640364487);
        d=hh(d,a,b,c,x[i+12],11, -421815835);c=hh(c,d,a,b,x[i+15],16,  530742520);b=hh(b,c,d,a,x[i+ 2],23, -995338651);
        a=ii(a,b,c,d,x[i+ 0], 6, -198630844);d=ii(d,a,b,c,x[i+ 7],10, 1126891415);c=ii(c,d,a,b,x[i+14],15,-1416354905);
        b=ii(b,c,d,a,x[i+ 5],21,  -57434055);a=ii(a,b,c,d,x[i+12], 6, 1700485571);d=ii(d,a,b,c,x[i+ 3],10,-1894986606);
        c=ii(c,d,a,b,x[i+10],15,   -1051523);b=ii(b,c,d,a,x[i+ 1],21,-2054922799);a=ii(a,b,c,d,x[i+ 8], 6, 1873313359);
        d=ii(d,a,b,c,x[i+15],10,  -30611744);c=ii(c,d,a,b,x[i+ 6],15,-1560198380);b=ii(b,c,d,a,x[i+13],21, 1309151649);
        a=ii(a,b,c,d,x[i+ 4], 6, -145523070);d=ii(d,a,b,c,x[i+11],10,-1120210379);c=ii(c,d,a,b,x[i+ 2],15,  718787259);
        b=ii(b,c,d,a,x[i+ 9],21, -343485551);a=ad(a,olda);b=ad(b,oldb);c=ad(c,oldc);d=ad(d,oldd);
    }
    return rh(a)+rh(b)+rh(c)+rh(d);
}

return EMAIL.map(function(x) {
    var regex = /(.*)@/;
    var base = regex.exec(x)[1];
    return x.replace(regex, md5(base) + '@')
});
$$;




WITH test_table(col1, col2) AS (
    SELECT 1, array_construct('toto@gmail.com', 'hello@yahoo.com')
)

SELECT
    col1,
    col2,
    pseudonymize_email_js(col2) as hashed_emails
FROM test_table;
<头>
COL1 COL2 HASHED_EMAILS
1 [ "toto@gmail.com", "hello@yahoo.com" ] [ "f71dbe52628a3f83a77ab494817525c6@gmail.com", "5d41402abc4b2a76b9719d911017c592@yahoo.com" ]

答案 1 :(得分:0)

因此,鉴于您无法按所创建的性质对每一行进行函数调用,一种选择是将其推入 CTE,或者仅将 email_hash 的主体作为您的选择。但通常当人们尝试这样做时,他们试图隐藏复杂性或重用逻辑。

WITH test_table(col1, col2) AS (
    SELECT 1, array_construct('toto@gmail.com', 'hello@yahoo.com')
), email_hash AS (
    SELECT col1
        ,col2
        ,array_agg(regexp_replace(f.value,'.+\@', concat(hash(f.value), '@'))) as email
    FROM test_table AS t,
        TABLE(FLATTEN(input => t.col2)) f
    GROUP BY 1,2
)
SELECT
    col1,
    col2,
    email as hashed_emails
FROM email_hash