如何使用动态定界符分割字段

时间:2018-12-20 11:53:37

标签: google-bigquery

我在BigQuery中有一个带有联系人电子邮件的表格。

name_family@company.com

name-family@company.com

name.family@company.com

我需要提取姓名和家庭以分开列。 我编写了此SQL代码,但正在寻找其他/更好的方法来实现此目的


WITH emailWithUnserscore AS
      (SELECT *,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'_')[SAFE_OFFSET(0)] AS firstName,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'_')[SAFE_OFFSET(1)] AS lasttName
       FROM `project.dataset.contacts`
       WHERE LENGTH(SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'_')[SAFE_OFFSET(1)]) > 0 ),
         emailWithMinus AS
      (SELECT *,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'-')[SAFE_OFFSET(0)] AS firstName,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'-')[SAFE_OFFSET(1)] AS lasttName
       FROM `project.dataset.contacts`
       WHERE LENGTH(SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'-')[SAFE_OFFSET(1)]) > 0 ),
         emailWithDot AS
      (SELECT *,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'.')[SAFE_OFFSET(0)] AS firstName,
              SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'.')[SAFE_OFFSET(1)] AS lasttName
       FROM `project.dataset.contacts`
       WHERE LENGTH(SPLIT(SPLIT(string_field_0, '@')[SAFE_OFFSET(0)],'.')[SAFE_OFFSET(1)]) > 0 ),
         allEmails AS
      (SELECT *,
              SPLIT(string_field_0, '@')[SAFE_OFFSET(0)] AS firstName,
              '' AS lasttName
       FROM `project.dataset.contacts`)
    SELECT allEmails.string_field_0 AS Email,
           if(LENGTH(emailWithUnserscore.lasttName) > 0, emailWithUnserscore.firstName, if(LENGTH(emailWithMinus.lasttName) > 0, emailWithMinus.firstName, if(LENGTH(emailWithDot.lasttName) > 0, emailWithDot.firstName, allEmails.firstName))) AS firstName,
           if(LENGTH(emailWithUnserscore.lasttName) > 0, emailWithUnserscore.lasttName, if(LENGTH(emailWithMinus.lasttName) > 0, emailWithMinus.lasttName, if(LENGTH(emailWithDot.lasttName) > 0, emailWithDot.lasttName, allEmails.lasttName))) AS lastName
    FROM allEmails
    LEFT JOIN emailWithUnserscore ON allEmails.string_field_0 = emailWithUnserscore.string_field_0
    LEFT JOIN emailWithMinus ON allEmails.string_field_0 = emailWithMinus.string_field_0
    LEFT JOIN emailWithDot ON allEmails.string_field_0 = emailWithDot.string_field_0
    ORDER BY Email DES

1 个答案:

答案 0 :(得分:2)

#standardSQL
WITH `project.dataset.contacts` AS (
  SELECT 'name_family@company.com' email UNION ALL
  SELECT 'name-family@company.com' UNION ALL
  SELECT 'name.family@company.com' 
)
SELECT 
  email,
  REGEXP_EXTRACT(email, r'(.*?)[_\-.]') firstName,
  REGEXP_EXTRACT(email, r'[_\-.](.*?)@') lastName
FROM `project.dataset.contacts`

结果

Row     email                       firstName   lastName     
1       name_family@company.com     name        family   
2       name-family@company.com     name        family   
3       name.family@company.com     name        family