雪花 JSON 到表格

时间:2021-07-15 16:55:21

标签: stored-procedures snowflake-cloud-data-platform user-defined-functions

我正在阅读 Snowflake 的文档,但还没有找到解决方案,所以我来找你。我在 Snowflake 中有一个表,其中包含一个用于存储 JSON 数据的变体列。您知道一种将变体列上的查询结果动态转换为表格格式的方法吗?

例如我有一个像

这样的查询
select json_data from database.schema.table limit 2

哪个会返回类似

<头>
JSON_DATA
{"EventName": "Test", "EventValue": 100}
{"EventName": "Test", "EventValue": 200}

有没有办法将它作为表返回而不必引用键?我知道我能做到

select
  json_data['EventName'] EventName, 
  json_data['EventValue'] EventValue
from
  database.schema.table

但我正在寻找更动态的东西

select * from table(json_to_table(select json_data from database.schema.table)) limit 2

那可能会回来

<头>
事件名称 事件值
测试 100
测试 200

我正在寻找任何内部解决方案(例如我可能遗漏的存储过程、udf、雪花函数......除了外部函数之外的任何东西)

1 个答案:

答案 0 :(得分:1)

虽然目前无法创建动态列列表,但如注释中所述,您可以运行存储过程来构建(和重建)视图。这将避免必须手动输入和维护一长列列。

在底部创建 SP 后,您可以像这样使用它:

create or replace table MY_TABLE(JSON_DATA variant);
insert into MY_TABLE select parse_json('{"EventName": "Test", "EventValue": 100}');
insert into MY_TABLE select parse_json('{"EventName": "Test", "EventValue": 200}');

call create_view_over_json('MY_TABLE', 'JSON_DATA', 'MY_VIEW');

select * from MY_VIEW;

这是创建视图的存储过程。请注意,如果表非常大,则需要使用 Snowflake 的 TYPEOF() 函数一段时间才能确定列类型。如果已知它是一致的,您可以将其指向示例表或创建的限制为 1000 的表。

create or replace procedure create_view_over_json (TABLE_NAME varchar, COL_NAME varchar, VIEW_NAME varchar)
returns varchar
language javascript
as
$$
/****************************************************************************************************************
*                                                                                                               *
* CREATE_VIEW_OVER_JSON - Craig Warman, Alan Eldridge and Greg Pavlik Snowflake Computing, 2019, 2020, 2021     *
*                                                                                                               *
* This stored procedure creates a view on a table that contains JSON data in a column.                          *
* of type VARIANT.  It can be used for easily generating views that enable access to                            *
* this data for BI tools without the need for manual view creation based on the underlying                      *
* JSON document structure.                                                                                      *
*                                                                                                               *
* Parameters:                                                                                                   *
* TABLE_NAME    - Name of table that contains the semi-structured data.                                         *
* COL_NAME      - Name of VARIANT column in the aforementioned table.                                           *
* VIEW_NAME     - Name of view to be created by this stored procedure.                                          *
*                                                                                                               *
* Usage Example:                                                                                                *
* call create_view_over_json('db.schema.semistruct_data', 'variant_col', 'db.schema.semistruct_data_vw');       *
*                                                                                                               *
* Important notes:                                                                                              *
*   - This is the "basic" version of a more sophisticated procedure. Its primary purpose                        *
*     is to illustrate the view generation concept.                                                             *
*   - This version of the procedure does not support:                                                           *
*         - Column case preservation (all view column names will be case-insensitive).                          *
*         - JSON document attributes that are SQL reserved words (like TYPE or NUMBER).                         *
*         - "Exploding" arrays into separate view columns - instead, arrays are simply                          *
*           materialized as view columns of type ARRAY.                                                         *
*   - Execution of this procedure may take an extended period of time for very                                  *
*     large datasets, or for datasets with a wide variety of document attributes                                *
*     (since the view will have a large number of columns).                                                     *
*                                                                                                               *
* Attribution:                                                                                                  *
* I leveraged code developed by Alan Eldridge as the basis for this stored procedure.                           *
*                                                                                                               *
****************************************************************************************************************/

var currentActivity;

try{

    currentActivity   = "building the query for column types";
    var elementQuery  = GetElementQuery(TABLE_NAME, COL_NAME);
    
    currentActivity   = "running the query to get column names";
    var elementRS     = GetResultSet(elementQuery);

    currentActivity   = "building the column list";
    var colList       = GetColumnList(elementRS);

    currentActivity   = "building the view's DDL";
    var viewDDL       = GetViewDDL(VIEW_NAME, colList, TABLE_NAME);

    currentActivity   = "creating the view";
    return ExecuteSingleValueQuery("status", viewDDL);
}
catch(err){
    return "ERROR: Encountered an error while " + currentActivity + ".\n" + err.message;
}

/****************************************************************************************************************
*                                                                                                               *
*   End of main function. Helper functions below.                                                               *
*                                                                                                               *
****************************************************************************************************************/

function GetElementQuery(tableName, columnName){

// Build a query that returns a list of elements which will be used to build the column list for the CREATE VIEW statement

sql = 
`

SELECT DISTINCT regexp_replace(regexp_replace(f.path,'\\\\[(.+)\\\\]'),'(\\\\w+)','\"\\\\1\"')                      AS path_name,       -- This generates paths with levels enclosed by double quotes (ex: "path"."to"."element").  It also strips any bracket-enclosed array element references (like "[0]")
                DECODE (substr(typeof(f.value),1,1),'A','ARRAY','B','BOOLEAN','I','FLOAT','D','FLOAT','STRING')     AS attribute_type,  -- This generates column datatypes of ARRAY, BOOLEAN, FLOAT, and STRING only
                REGEXP_REPLACE(REGEXP_REPLACE(f.path, '\\\\[(.+)\\\\]'),'[^a-zA-Z0-9]','_')                         AS alias_name       -- This generates column aliases based on the path
FROM
        @~TABLE_NAME~@,
        LATERAL FLATTEN(@~COL_NAME~@, RECURSIVE=>true) f
WHERE   TYPEOF(f.value) != 'OBJECT'
        AND NOT contains(f.path, '[');         -- This prevents traversal down into arrays

`;

    sql = sql.replace(/@~TABLE_NAME~@/g, tableName);
    sql = sql.replace(/@~COL_NAME~@/g, columnName);

    return sql;
}

function GetColumnList(elementRS){

    /*  
        Add elements and datatypes to the column list
        They will look something like this when added:
            col_name:"name"."first"::STRING as name_first,
            col_name:"name"."last"::STRING as name_last
    */

    var col_list = "";

    while (elementRS.next()) {
        if (col_list != "") {
            col_list += ", \n";
        }
        col_list += COL_NAME + ":" + elementRS.getColumnValue("PATH_NAME");         // Start with the element path name
        col_list += "::"           + elementRS.getColumnValue("ATTRIBUTE_TYPE");    // Add the datatype
        col_list += " as "         + elementRS.getColumnValue("ALIAS_NAME");        // And finally the element alias
    }
    return col_list;
}

function GetViewDDL(viewName, columnList, tableName){

sql = 
`
create or replace view @~VIEW_NAME~@ as
select 
    @~COLUMN_LIST~@
from @~TABLE_NAME~@;
`;
    sql = sql.replace(/@~VIEW_NAME~@/g, viewName);
    sql = sql.replace(/@~COLUMN_LIST~@/g, columnList);
    sql = sql.replace(/@~TABLE_NAME~@/g, tableName);

    return sql;
}

/****************************************************************************************************************
*                                                                                                               *
*   Library functions                                                                                           *
*                                                                                                               *
****************************************************************************************************************/

function ExecuteSingleValueQuery(columnName, queryString) {
    var out;
    cmd1 = {sqlText: queryString};
    stmt = snowflake.createStatement(cmd1);
    var rs;
    try{
        rs = stmt.execute();
        rs.next();
        return rs.getColumnValue(columnName);
    }
    catch(err) {
        throw err;
    }
    return out;
}

function GetResultSet(sql){

    try{
        cmd1 = {sqlText: sql};
        stmt = snowflake.createStatement(cmd1);
        var rs;
        rs = stmt.execute();
        return rs;
    }
    catch(err) {
        throw err;
    } 
}
$$;