'I tried to automate json views in snowflake. But it displays error in the regex
I tried this approach and it is displaying an error that the regex has a missing statement. The approach that I am trying is from the Automating Snowflake’s Semi-Structured JSON Data Handling
CREATE OR REPLACE PROCEDURE create_view_over_json (TABLE_NAME varchar, COL_NAME varchar, VIEW_NAME varchar, COLUMN_CASE varchar, COLUMN_TYPE varchar)
RETURNS VARCHAR
LANGUAGE javascript
AS
$$
var alias_dbl_quote = "";
var path_name = "regexp_replace(regexp_replace(f.path,'\\[(.+)\\]'),'(\\w+)','"\\1"')" // This generates paths with levels enclosed by double quotes (ex: "path"."to"."element"). It also strips any bracket-enclosed array element references (like "[0]")
var attribute_type = "DECODE (substr(typeof(f.value),1,1),'A','ARRAY','B','BOOLEAN','I','FLOAT','D','FLOAT','STRING')"; // This generates column datatypes of ARRAY, BOOLEAN, FLOAT, and STRING only
var alias_name = "REGEXP_REPLACE(REGEXP_REPLACE(f.path, '\\[(.+)\\]'),'[^a-zA-Z0-9]','_')" ; // This generates column aliases based on the path
var table_list = TABLE_NAME;
var col_list = "";
var array_num = 0;
if (COLUMN_CASE.toUpperCase().charAt(0) == 'M') {
alias_dbl_quote = """; } // COLUMN_CASE parameter is set to 'match col case' so add double quotes around view column alias name
if (COLUMN_TYPE.toUpperCase().charAt(0) == 'S') {
attribute_type = "DECODE (typeof(f.value),'ARRAY','ARRAY','STRING')"; } // COLUMN_TYPE parameter is set to 'string datatypes' so typecast to STRING instead of value returned by TYPEPOF function
// Build a query that returns a list of elements which will be used to build the column list for the CREATE VIEW statement
var element_query = "SELECT DISTINCT n" +
path_name + " AS path_name, n" +
attribute_type + " AS attribute_type, n" +
alias_name + " AS alias_name n" +
"FROM n" +
TABLE_NAME + ", n" +
"LATERAL FLATTEN(" + COL_NAME + ", RECURSIVE=>true) f n" +
"WHERE TYPEOF(f.value) != 'OBJECT' n" +
"AND NOT contains(f.path,'[') "; // This prevents traversal down into arrays
// Run the query...
var element_stmt = snowflake.createStatement({sqlText:element_query});
var element_res = element_stmt.execute();
// ...And loop through the list that was returned
while (element_res.next()) {
if (element_res.getColumnValue(2) != 'ARRAY') {
if (col_list != "") {
col_list += ", n";}
col_list += COL_NAME + ":" + element_res.getColumnValue(1); // Start with the element path name
col_list += "::" + element_res.getColumnValue(2); // Add the datatype
col_list += " as " + alias_dbl_quote + element_res.getColumnValue(3) + alias_dbl_quote; // And finally the element alias
}
// Array elements get handled in the following section:
else {
array_num++;
var simple_array_col_list = "";
var object_array_col_list = "";
// Build a query that returns the elements in the current array
var array_query = "SELECT DISTINCT n"+
path_name + " AS path_name, n" +
attribute_type + " AS attribute_type, n" +
alias_name + " AS attribute_name, n" +
"f.index n" +
"FROM n" +
TABLE_NAME + ", n" +
"LATERAL FLATTEN(" + COL_NAME + ":" + element_res.getColumnValue(1) + ", RECURSIVE=>true) f n" +
"WHERE REGEXP_REPLACE(f.path, '.+(\\w+\\[.+\\]).+', 'SubArrayEle') != 'SubArrayEle' "; // This prevents return of elements of nested arrays (the entire array will be returned in this case)
while (array_res.next()) {
if (array_res.getColumnValue(1).substring(1) == "") { // The element path name is empty, so this is a simple array element
if (simple_array_col_list != "") {
simple_array_col_list += ", n";}
simple_array_col_list += COL_NAME + ":" + element_res.getColumnValue(1); // Start with the element path name
simple_array_col_list += "[" + array_res.getColumnValue(4) + "]"; // Add the array index
simple_array_col_list += "::" + array_res.getColumnValue(2); // Add the datatype
simple_array_col_list += " as " + alias_dbl_quote + element_res.getColumnValue(3) + "_" + array_res.getColumnValue(4) + alias_dbl_quote; // And finally the element alias - Note that the array alias is added as a prefix to ensure uniqueness
}
else { // This is an object array element
if (object_array_col_list != "") {
object_array_col_list += ", n";}
object_array_col_list += "a" + array_num + ".value:" + array_res.getColumnValue(1).substring(1); // Start with the element name (minus the leading '.' character)
object_array_col_list += "::" + array_res.getColumnValue(2); // Add the datatype
object_array_col_list += " as " + alias_dbl_quote + element_res.getColumnValue(3) + array_res.getColumnValue(3) + alias_dbl_quote; // And finally the element alias - Note that the array alias is added as a prefix to ensure uniqueness
}
}
// If no object array elements were found then add the simple array elements to the
// column list...
if (object_array_col_list == "") {
if (col_list != "") {
col_list += ", n";}
col_list += simple_array_col_list;
}
// ...otherwise, add the object array elements to the column list along with a
// LATERAL FLATTEN clause that references the current array to the table list
else {
if (col_list != "") {
col_list += ", n";}
col_list += object_array_col_list;
table_list += ",n LATERAL FLATTEN(" + COL_NAME + ":" + element_res.getColumnValue(1) + ") a" + array_num;
}
}
}
// Now build the CREATE VIEW statement
var view_ddl = "CREATE OR REPLACE VIEW " + VIEW_NAME + " AS n" +
"SELECT n" + col_list + "n" +
"FROM " + table_list;
// Now run the CREATE VIEW statement
var view_stmt = snowflake.createStatement({sqlText:view_ddl});
var view_res = view_stmt.execute();
return view_res.next();
$$;
I tried this approach but it is displaying an error that regex has the [link I followed ][1]error.
- REGEX ERROR* [1]: https://www.snowflake.com/blog/automating-snowflakes-semi-structured-json-data-handling-part-2/#
I am getting this error:
JavaScript compilation error: Uncaught SyntaxError: Invalid or unexpected token
in CREATE_VIEW_OVER_JSON at ' var path_name = "regexp_replace(regexp_replace(f.path,'\[(.+)\]'),'(\\w+)','"\\1"')" ;'
position 82
Solution 1:[1]
Your error can be produced with just the first two line of the SP because you have you quotes incorrectly escaped/paired for the task:
CREATE OR REPLACE PROCEDURE create_view_over_json (TABLE_NAME varchar, COL_NAME varchar, VIEW_NAME varchar, COLUMN_CASE varchar, COLUMN_TYPE varchar)
RETURNS VARCHAR
LANGUAGE javascript
AS
$$
var alias_dbl_quote = "";
var path_name = "regexp_replace(regexp_replace(f.path,'\\[(.+)\\]'),'(\\w+)','"\\1"')" // This generates paths with levels enclosed by double quotes
$$;
call create_view_over_json(null,null,null,null,null);
gives:
100131 (P0000): JavaScript compilation error: Uncaught SyntaxError: Invalid or unexpected token in CREATE_VIEW_OVER_JSON at 'var path_name = "regexp_replace(regexp_replace(f.path,'\[(.+)\]'),'(\w+)','"\1"')" // This generates paths with levels enclosed by double quotes ' position 79
In SQL and in Javascript you cannot put the Quote you are using to wrap are "string" in the string without escaping. In SQL you can use $$ or ' so you have used $$ for the stored procedure body. Where inside the JavaScript you can use ', ", `
Thus in this line you have used double quoutes, which means you can embed the single quotes, but those doubles you have in the string terminate the string..
"regexp_replace(regexp_replace(f.path,'\\[(.+)\\]'),'(\\w+)','"\\1"')"
^ ^
Thus given you need to use both single and double you should wrap the string in back qoutes `
var path_name = `regexp_replace(regexp_replace(f.path,'\\[(.+)\\]'),'(\\w+)','"\\1"')`;
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | Simeon Pilgrim |
