'Snowflake View too slow to query with dynamic pivot
I have created a stored procedure in Snowflake to dynamically pivot the table and create a view from JSON data. The query creates a view with 370 columns as I desired but it's too slow to query over the view in fact querying a single row takes up to 20 mins
create or replace procedure create_pivot_view()
returns string
language javascript
execute as caller as
$$
  var cols_query = ` select
concat('\\'',
listagg( 
    distinct f.value:signalName::STRING,'\\',\\''),'\\'') as signal_name_list
FROM
(select *
FROM 
    PM_POWER),table(flatten(input=>c1:data:baseData:signals, mode=>'ARRAY')) as f`;
  var stmt1 = snowflake.createStatement({sqlText: cols_query});
  var results1 = stmt1.execute();
  results1.next();
  var col_list = results1.getColumnValue(1);
   pivot_query = `
create or replace view AWSS3_PM.PUBLIC.PM_POWER_CN8000_V2 as 
select  * from (
select figures,stats,SignalName,id,Latitude,Longitude,Altitude
from (
select
  c1:id::STRING as id,
  c1:data:baseData:unitID::varchar as UnitID,
  c1:data:baseData:latitude::varchar as Latitude,
  c1:data:baseData:longitude::varchar as Longitude,
  c1:data:baseData:altitude::varchar as Altitude,
  c1:timestamp::varchar as TimeStamp,
  f.value:"dataValue"::varchar as SignalDataValue,
  f.value:"dataValueValid"::varchar as SignalDataValueValid,
  f.value:"signalID"::varchar as SignalID,
  f.value:"units"::varchar as SignalUnits,
  f.value:"dataValueEnum"::varchar as SignalDataEnum,
  f.value:"signalName"::varchar as SignalName
from
(
select *
FROM 
    PM_POWER), table(flatten(input=>c1:data:baseData:signals, mode=>'ARRAY')) as f
) flt
unpivot (figures for stats in(UnitID,SignalDataValue, SignalDataValueValid, SignalID, SignalUnits, SignalDataEnum, TimeStamp))
) up
pivot (min(up.figures) for up.SignalName in (${col_list}
))
     `;
  var stmt2 = snowflake.createStatement({sqlText: pivot_query});
  stmt2.execute();
  
  return pivot_query;
 
$$;
  
call create_pivot_view();
Any suggestions to fasten this approach or an alternative would be really grateful to improve the performance of the query.
A single row of JSON data looks like the below:
{
  "data": {
    "baseData": {
      "altitude": 0,
      "altitudeValid": "false",
      "customerID": "CN",
      "kind": "Power",
      "latitude": 0,
      "latitudeValid": "false",
      "longitude": 0,
      "longitudeValid": "false",
      "name": "Predictive Maintenance Data.Power",
      "signals": [
        {
          "dataValue": 3112900,
          "dataValueValid": "true",
          "signalID": 3424,
          "signalName": "mainGeneratorPower",
          "units": "kW"
        },
        {
          "dataValue": 1.4035000801086426,
          "dataValueValid": "true",
          "signalID": 380,
          "signalName": "DB_HandlePos",
          "units": "V"
        },
        {
          "dataValue": 2,
          "dataValueEnum": "Reverse",
          "dataValueValid": "true",
          "signalID": 813,
          "signalName": "reverserPos",
          "units": "none"
        },
        {
          "dataValue": 291400,
          "dataValueValid": "true",
          "signalID": 586,
          "signalName": "tractiveEffort",
          "units": "none"
        },
        {
          "dataValue": 8,
          "dataValueEnum": "T 8",
          "dataValueValid": "true",
          "signalID": 899,
          "signalName": "throttlePos",
          "units": "none"
        },
        {
          "dataValue": 47.05950164794922,
          "dataValueValid": "true",
          "signalID": 1805,
          "signalName": "AWTF",
          "units": "°C"
        },
        {
          "dataValue": 0.2971585690975189,
          "dataValueValid": "true",
          "signalID": 4925,
          "signalName": "ChpDty",
          "units": "none"
        },
        {
          "dataValue": 20.14109992980957,
          "dataValueValid": "true",
          "signalID": 4835,
          "signalName": "LDBBLWA",
          "units": "A"
        },
        {
          "dataValue": 36.02000045776367,
          "dataValueValid": "true",
          "signalID": 2669,
          "signalName": "IcGVelM",
          "units": "km/hr"
        },
        {
          "dataValue": 479185.125,
          "dataValueValid": "true",
          "signalID": 1070,
          "signalName": "WPEgILP",
          "units": "PSIG"
        },
        {
          "dataValue": 293026.875,
          "dataValueValid": "true",
          "signalID": 1799,
          "signalName": "WPEgOtP",
          "units": "PSIG"
        },
        {
          "dataValue": 926750,
          "dataValueValid": "true",
          "signalID": 4698,
          "signalName": "MR2 Prs",
          "units": "PSIG"
        },
        {
          "dataValue": 24,
          "dataValueEnum": "ON",
          "dataValueValid": "true",
          "signalID": 664,
          "signalName": "MVCC>",
          "units": "none"
        },
        {
          "dataValue": 907422.625,
          "dataValueValid": "true",
          "signalID": 4804,
          "signalName": "SR Pres",
          "units": "PSIG"
        }
      ],
      "unitID": "CN 8000",
      "ver": "1.0.0"
    },
    "baseType": "PredictiveMaintenanceData"
  },
  "dataName": "CN8000.Prod.PredictiveMaintenanceData",
  "id": "18a89f9e-9620-4453-a546-23412025e7c0",
  "tags": {
    "iaapl.access.level1": "Private",
    "iaapl.access.level2": "OEM",
    "iaapl.internal.deviceID": "",
    "iaapl.internal.deviceName": "",
    "iaapl.internal.encodeTime": "2021-02-25T07:41:19.000Z",
    "iaapl.internal.sender": "Intelligent",
    "iaapl.software.name": "",
    "iaapl.software.partNumber": 0,
    "iaapl.software.version": ""
  },
  "timestamp": "2021-02-25T07:32:31.000Z"
}
The result should look like :
Thanks in Advance
Solution 1:[1]
the bits where you type
( select * FROM PM_POWER)
could just be
PM_POWER
Your JSON is malformed.
You are reading from data:baseData:signals but there is no signals in you data..
If you want help with your View performance, I would remove the whole Stored Procedure part. And get SELECT that get the result you want from the demo data you provide. Because presently.
Update:
So your example data still does not generate the desired output. So if this is a performance improvement project, it's super frustrating, to not even have the input + code = output
anyways, here's you data and code mixed together:
with pm_power as (
    select parse_json('{
  "data": {
    "baseData": {
      "altitude": 0,
      "altitudeValid": "false",
      "customerID": "CN",
      "kind": "Power",
      "latitude": 0,
      "latitudeValid": "false",
      "longitude": 0,
      "longitudeValid": "false",
      "name": "Predictive Maintenance Data.Power",
      "signals": [
        {
          "dataValue": 3112900,
          "dataValueValid": "true",
          "signalID": 3424,
          "signalName": "mainGeneratorPower",
          "units": "kW"
        },
        {
          "dataValue": 1.4035000801086426,
          "dataValueValid": "true",
          "signalID": 380,
          "signalName": "DB_HandlePos",
          "units": "V"
        },
        {
          "dataValue": 2,
          "dataValueEnum": "Reverse",
          "dataValueValid": "true",
          "signalID": 813,
          "signalName": "reverserPos",
          "units": "none"
        },
        {
          "dataValue": 291400,
          "dataValueValid": "true",
          "signalID": 586,
          "signalName": "tractiveEffort",
          "units": "none"
        },
        {
          "dataValue": 8,
          "dataValueEnum": "T 8",
          "dataValueValid": "true",
          "signalID": 899,
          "signalName": "throttlePos",
          "units": "none"
        },
        {
          "dataValue": 47.05950164794922,
          "dataValueValid": "true",
          "signalID": 1805,
          "signalName": "AWTF",
          "units": "°C"
        },
        {
          "dataValue": 0.2971585690975189,
          "dataValueValid": "true",
          "signalID": 4925,
          "signalName": "ChpDty",
          "units": "none"
        },
        {
          "dataValue": 20.14109992980957,
          "dataValueValid": "true",
          "signalID": 4835,
          "signalName": "LDBBLWA",
          "units": "A"
        },
        {
          "dataValue": 36.02000045776367,
          "dataValueValid": "true",
          "signalID": 2669,
          "signalName": "IcGVelM",
          "units": "km/hr"
        },
        {
          "dataValue": 479185.125,
          "dataValueValid": "true",
          "signalID": 1070,
          "signalName": "WPEgILP",
          "units": "PSIG"
        },
        {
          "dataValue": 293026.875,
          "dataValueValid": "true",
          "signalID": 1799,
          "signalName": "WPEgOtP",
          "units": "PSIG"
        },
        {
          "dataValue": 926750,
          "dataValueValid": "true",
          "signalID": 4698,
          "signalName": "MR2 Prs",
          "units": "PSIG"
        },
        {
          "dataValue": 24,
          "dataValueEnum": "ON",
          "dataValueValid": "true",
          "signalID": 664,
          "signalName": "MVCC>",
          "units": "none"
        },
        {
          "dataValue": 907422.625,
          "dataValueValid": "true",
          "signalID": 4804,
          "signalName": "SR Pres",
          "units": "PSIG"
        }
      ],
      "unitID": "CN 8000",
      "ver": "1.0.0"
    },
    "baseType": "PredictiveMaintenanceData"
  },
  "dataName": "CN8000.Prod.PredictiveMaintenanceData",
  "id": "18a89f9e-9620-4453-a546-23412025e7c0",
  "tags": {
    "iaapl.access.level1": "Private",
    "iaapl.access.level2": "OEM",
    "iaapl.internal.deviceID": "",
    "iaapl.internal.deviceName": "",
    "iaapl.internal.encodeTime": "2021-02-25T07:41:19.000Z",
    "iaapl.internal.sender": "Intelligent",
    "iaapl.software.name": "",
    "iaapl.software.partNumber": 0,
    "iaapl.software.version": ""
  },
  "timestamp": "2021-02-25T07:32:31.000Z"
}')as c1
) 
select  * 
from (
    select 
        figures,
        stats,
        SignalName,
        id,
        Latitude,
        Longitude,
        Altitude
    from (
        select
            c1:id::STRING as id,
            c1:data:baseData:unitID::varchar as UnitID,
            c1:data:baseData:latitude::varchar as Latitude,
            c1:data:baseData:longitude::varchar as Longitude,
            c1:data:baseData:altitude::varchar as Altitude,
            c1:timestamp::varchar as TimeStamp,
            f.value:"dataValue"::varchar as SignalDataValue,
            f.value:"dataValueValid"::varchar as SignalDataValueValid,
            f.value:"signalID"::varchar as SignalID,
            f.value:"units"::varchar as SignalUnits,
            f.value:"dataValueEnum"::varchar as SignalDataEnum,
            f.value:"signalName"::varchar as SignalName
        from PM_POWER
            ,table( flatten(input=>c1:data:baseData:signals, mode=>'ARRAY') ) as f
    ) flt
        unpivot (figures for stats in(UnitID, SignalDataValue, SignalDataValueValid, SignalID, SignalUnits, SignalDataEnum, TimeStamp))
) up
pivot (min(up.figures) for up.SignalName in (
'mainGeneratorPower','DB_HandlePos','reverserPos','tractiveEffort','throttlePos','AWTF','LDBBLWA','IcGVelM','WPEgOtP','MR2 Prs','SR Pres','ChpDty','MVCC>','WPEgILP'
))
| STATS | ID | LATITUDE | LONGITUDE | ALTITUDE | 'mainGeneratorPower' | 'DB_HandlePos' | 'reverserPos' | 'tractiveEffort' | 'throttlePos' | 'AWTF' | 'LDBBLWA' | 'IcGVelM' | 'WPEgOtP' | 'MR2 Prs' | 'SR Pres' | 'ChpDty' | 'MVCC>' | 'WPEgILP' | 
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| UNITID | 18a89f9e-9620-4453-a546-23412025e7c0 | 0 | 0 | 0 | CN 8000 | CN 8000 | CN 8000 | CN 8000 | CN 8000 | CN 8000 | CN 8000 | CN 8000 | CN 8000 | CN 8000 | CN 8000 | CN 8000 | CN 8000 | CN 8000 | 
| SIGNALID | 18a89f9e-9620-4453-a546-23412025e7c0 | 0 | 0 | 0 | 3424 | 380 | 813 | 586 | 899 | 1805 | 4835 | 2669 | 1799 | 4698 | 4804 | 4925 | 664 | 1070 | 
| SIGNALUNITS | 18a89f9e-9620-4453-a546-23412025e7c0 | 0 | 0 | 0 | kW | V | none | none | none | °C | A | km/hr | PSIG | PSIG | PSIG | none | none | PSIG | 
| TIMESTAMP | 18a89f9e-9620-4453-a546-23412025e7c0 | 0 | 0 | 0 | 2021-02-25T07:32:31.000Z | 2021-02-25T07:32:31.000Z | 2021-02-25T07:32:31.000Z | 2021-02-25T07:32:31.000Z | 2021-02-25T07:32:31.000Z | 2021-02-25T07:32:31.000Z | 2021-02-25T07:32:31.000Z | 2021-02-25T07:32:31.000Z | 2021-02-25T07:32:31.000Z | 2021-02-25T07:32:31.000Z | 2021-02-25T07:32:31.000Z | 2021-02-25T07:32:31.000Z | 2021-02-25T07:32:31.000Z | 2021-02-25T07:32:31.000Z | 
| SIGNALDATAVALUEVALID | 18a89f9e-9620-4453-a546-23412025e7c0 | 0 | 0 | 0 | true | true | true | true | true | true | true | true | true | true | true | true | true | true | 
| SIGNALDATAENUM | 18a89f9e-9620-4453-a546-23412025e7c0 | 0 | 0 | 0 | Reverse | T 8 | ON | |||||||||||
| SIGNALDATAVALUE | 18a89f9e-9620-4453-a546-23412025e7c0 | 0 | 0 | 0 | 3112900 | 1.4035000801086426 | 2 | 291400 | 8 | 47.05950164794922 | 20.14109992980957 | 36.02000045776367 | 293026.875 | 926750 | 907422.625 | 0.2971585690975189 | 24 | 479185.125 | 
An Answer:
so this SQL gets the answer in the same structure BUT timestamp,unitid are not rows as those are all the same value, so it seem having them as rows is a mistake, and they should be a column..
select
    case lower(o.key )
        when 'datavalue' then 'SignalDataValue'
        when 'datavaluevalid' then 'SignalDataValueValid'
        when 'signalid' then 'SignalID'
        when 'units' then 'SignalUnits'
        when 'datavalueenum' then 'SignalDataEnum'
        when 'signalname' then 'SignalName'
    end as stats,
    c1:id::STRING as id,
    c1:data:baseData:unitID::varchar as UnitID,
    c1:data:baseData:latitude::varchar as Latitude,
    c1:data:baseData:longitude::varchar as Longitude,
    c1:data:baseData:altitude::varchar as Altitude,
    c1:timestamp::varchar as TimeStamp
    ,max(iff(f.value:"signalName"='mainGeneratorPower', o.value, null)) as "'mainGeneratorPower'"
    ,max(iff(f.value:"signalName"='DB_HandlePos', o.value, null)) as "'DB_HandlePos'"
    ,max(iff(f.value:"signalName"='reverserPos', o.value, null)) as "'reverserPos'"
    ,max(iff(f.value:"signalName"='tractiveEffort', o.value, null)) as "'tractiveEffort'"
    ,max(iff(f.value:"signalName"='throttlePos', o.value, null)) as "'throttlePos'"
    ,max(iff(f.value:"signalName"='AWTF', o.value, null)) as "'AWTF'"
    ,max(iff(f.value:"signalName"='LDBBLWA', o.value, null)) as "'LDBBLWA'"
    ,max(iff(f.value:"signalName"='IcGVelM', o.value, null)) as "'IcGVelM'"
    ,max(iff(f.value:"signalName"='WPEgOtP', o.value, null)) as "'WPEgOtP'"
    ,max(iff(f.value:"signalName"='MR2 Prs', o.value, null)) as "'MR2 Prs'"
    ,max(iff(f.value:"signalName"='SR Pres', o.value, null)) as "'SR Pres'"
    ,max(iff(f.value:"signalName"='ChpDty', o.value, null)) as "'ChpDty'"
    ,max(iff(f.value:"signalName"='MVCC>', o.value, null)) as "'MVCC>'"
    ,max(iff(f.value:"signalName"='WPEgILP', o.value, null)) as "'WPEgILP'"
from PM_POWER
    ,table( flatten(input=>c1:data:baseData:signals, mode=>'ARRAY') ) as f
    ,table( flatten(input=>f.value, mode=>'OBJECT')) as o
WHERE o.key != 'signalName'
GROUP BY 1,2,3,4,5,6,7
ORDER BY 1;
gives:
which means your Store Procedure needs to build that.. which is you are happy that this is more performant, you can solve how to build that above in your SP.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source | 
|---|---|
| Solution 1 | 



