我在BigQuery中有一个带有联系人电子邮件的表格。
name_family@company.com
name-family@company.com
name.family@company.com
我需要提取姓名和家庭以分开列。 我编写了此SQL代码,但正在寻找其他/更好的方法来实现此目的
WITH emailWithUnserscore AS
(SELECT *,
SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'_')[SAFE_OFFSET(0)] AS firstName,
SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'_')[SAFE_OFFSET(1)] AS lasttName
FROM `project.dataset.contacts`
WHERE LENGTH(SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'_')[SAFE_OFFSET(1)]) > 0 ),
emailWithMinus AS
(SELECT *,
SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'-')[SAFE_OFFSET(0)] AS firstName,
SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'-')[SAFE_OFFSET(1)] AS lasttName
FROM `project.dataset.contacts`
WHERE LENGTH(SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'-')[SAFE_OFFSET(1)]) > 0 ),
emailWithDot AS
(SELECT *,
SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'.')[SAFE_OFFSET(0)] AS firstName,
SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'.')[SAFE_OFFSET(1)] AS lasttName
FROM `project.dataset.contacts`
WHERE LENGTH(SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'.')[SAFE_OFFSET(1)]) > 0 ),
allEmails AS
(SELECT *,
SPLIT(string_field_0, '@')[SAFE_OFFSET(0)] AS firstName,
'' AS lasttName
FROM `project.dataset.contacts`)
SELECT allEmails.string_field_0 AS Email,
if(LENGTH(emailWithUnserscore.lasttName) > 0, emailWithUnserscore.firstName, if(LENGTH(emailWithMinus.lasttName) > 0, emailWithMinus.firstName, if(LENGTH(emailWithDot.lasttName) > 0, emailWithDot.firstName, allEmails.firstName))) AS firstName,
if(LENGTH(emailWithUnserscore.lasttName) > 0, emailWithUnserscore.lasttName, if(LENGTH(emailWithMinus.lasttName) > 0, emailWithMinus.lasttName, if(LENGTH(emailWithDot.lasttName) > 0, emailWithDot.lasttName, allEmails.lasttName))) AS lastName
FROM allEmails
LEFT JOIN emailWithUnserscore ON allEmails.string_field_0 = emailWithUnserscore.string_field_0
LEFT JOIN emailWithMinus ON allEmails.string_field_0 = emailWithMinus.string_field_0
LEFT JOIN emailWithDot ON allEmails.string_field_0 = emailWithDot.string_field_0
ORDER BY Email DES
答案 0 :(得分:2)
#standardSQL
WITH `project.dataset.contacts` AS (
SELECT 'name_family@company.com' email UNION ALL
SELECT 'name-family@company.com' UNION ALL
SELECT 'name.family@company.com'
)
SELECT
email,
REGEXP_EXTRACT(email, r'(.*?)[_\-.]') firstName,
REGEXP_EXTRACT(email, r'[_\-.](.*?)@') lastName
FROM `project.dataset.contacts`
结果
Row email firstName lastName
1 name_family@company.com name family
2 name-family@company.com name family
3 name.family@company.com name family