'I would like to know if the query can be simplified further (or) any other way to rewrite the SQL query

SELECT id_col,order_col,
sum(nvl(CASE WHEN INDEX = 2 AND trim(order_list) NOT IN('', ' ') THEN order_list END,0)) OVER (partition BY order_col) AS total_qty, 
sum(nvl(CASE WHEN INDEX = 3 AND trim(order_list) NOT IN('',' ') THEN order_list END,0)) OVER (partition BY order_col) AS total_amt,
sum(nvl(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END,0)) 
OVER (partition BY order_col) AS sav_001,     
sum(nvl(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')
     END,0)) OVER (partition BY order_col) AS sav_002,
sum(nvl(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')
     END,0)) OVER (partition BY order_col)AS sav_003
FROM  tbl_001  T1
qualify row_number() over (partition by order_col order by date desc) = 1

The above query is taking long time to run. Is there anyway we can re-write the above query and improve the performance?



Solution 1:[1]

Well tiny things first:

trim(order_list) NOT IN('', ' ') if you have trimmed the white space, the second case cannot be. so it can just be trim(order_list) <> ''

which sized, up you possibly could put that into a WHERE clause, (it would eliminate possible order_col rows with just zeros, which maybe you want.

Lukasz's SUM ignores nulls, still holds.

SELECT 
    id_col,
    order_col,
    sum(CASE WHEN INDEX = 2 THEN order_list END) OVER (partition BY order_col) AS total_qty, 
    sum(CASE WHEN INDEX = 3 THEN order_list END) OVER (partition BY order_col) AS total_amt,
    sum(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END) OVER (partition BY order_col) AS sav_001,     
    sum(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')END) OVER (partition BY order_col) AS sav_002,
    sum(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')END) OVER (partition BY order_col) AS sav_003
FROM tbl_001  T1
WHERE trim(order_list) <> ''
qualify row_number() over (partition by order_col order by date desc) = 1

You are currently building the SUM for every row and then throwing away all but on ROW, and the exist rows care are id_col yet that is never used in any of that OVER()'s, so change to grouping by order_col

old code with some example:

with tbl_001 as (
    select * from values
    (1,10, '10', '2022-03-01'::date, 2),
    (2,10, '11', '2022-03-02'::date, 3),
    (2,10, '21235=12', '2022-03-02'::date, 1),
    (2,10, '21236=13', '2022-03-02'::date, 1),
    (2,10, '21237=14', '2022-03-02'::date, 1)
    t(id_col, order_col, order_list, date, index)
)
SELECT 
    id_col,
    order_col,
    sum(CASE WHEN INDEX = 2 THEN order_list END) OVER (partition BY order_col) AS total_qty, 
    sum(CASE WHEN INDEX = 3 THEN order_list END) OVER (partition BY order_col) AS total_amt,
    sum(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END) OVER (partition BY order_col) AS sav_001,     
    sum(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')END) OVER (partition BY order_col) AS sav_002,
    sum(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')END) OVER (partition BY order_col) AS sav_003
FROM tbl_001  T1
WHERE trim(order_list) <> ''
qualify row_number() over (partition by order_col order by date desc) = 1

gives:

ID_COL ORDER_COL TOTAL_QTY TOTAL_AMT SAV_001 SAV_002 SAV_003
2 10 10 11 12 13 14

So swapping to a GROUP BY form, that id_col is a bit yucky.

with tbl_001 as (
    select * from values
    (1,10, '10', '2022-03-01'::date, 2),
    (2,10, '11', '2022-03-02'::date, 3),
    (2,10, '21235=12', '2022-03-02'::date, 1),
    (2,10, '21236=13', '2022-03-02'::date, 1),
    (2,10, '21237=14', '2022-03-02'::date, 1)
    t(id_col, order_col, order_list, date, index)
)
SELECT 
    array_agg(id_col)within group(order by date desc)[0] as id_col,
    order_col,
    sum(CASE WHEN INDEX = 2 THEN order_list END) AS total_qty, 
    sum(CASE WHEN INDEX = 3 THEN order_list END) AS total_amt,
    sum(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END) AS sav_001,     
    sum(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')END) AS sav_002,
    sum(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')END) AS sav_003
FROM tbl_001  T1
WHERE trim(order_list) <> ''
GROUP BY order_col;

gives (same answer):

ID_COL ORDER_COL TOTAL_QTY TOTAL_AMT SAV_001 SAV_002 SAV_003
2 10 10 11 12 13 14

So that might perform better, and is more readable to me.

If performance was super critical I would try to see if this improved things:

with tbl_001 as (
    select * from values
    (1,10, '10', '2022-03-01'::date, 2),
    (2,10, '11', '2022-03-02'::date, 3),
    (2,10, '21235=12', '2022-03-02'::date, 1),
    (2,10, '21236=13', '2022-03-02'::date, 1),
    (2,10, '21237=14', '2022-03-02'::date, 1)
    t(id_col, order_col, order_list, date, index)
)
SELECT 
    any_value(id_col) as id_col,
    order_col,
    sum(CASE WHEN INDEX = 2 THEN order_list END) AS total_qty, 
    sum(CASE WHEN INDEX = 3 THEN order_list END) AS total_amt,
    sum(CASE WHEN s_order_list[0] = '21235' THEN s_order_list[1] END) AS sav_001,     
    sum(CASE WHEN s_order_list[0] = '21236' THEN s_order_list[1] END) AS sav_002,
    sum(CASE WHEN s_order_list[0] = '21237' THEN s_order_list[1] END) AS sav_003
FROM (
    SELECT 
        first_value(id_col) over(partition by order_col order by date desc) as id_col, 
        order_col, 
        order_list,
        split(order_list,'=') as s_order_list,
        index
    FROM tbl_001
    WHERE trim(order_list) <> ''
)  T1
GROUP BY order_col;

And I total personally would not use CASE and would use IFF

with tbl_001 as (
    select * from values
    (1,10, '10', '2022-03-01'::date, 2),
    (2,10, '11', '2022-03-02'::date, 3),
    (2,10, '21235=12', '2022-03-02'::date, 1),
    (2,10, '21236=13', '2022-03-02'::date, 1),
    (2,10, '21237=14', '2022-03-02'::date, 1)
    t(id_col, order_col, order_list, date, index)
)
SELECT 
    any_value(id_col) as id_col,
    order_col,
    sum(IFF(INDEX = 2, order_list, null)) AS total_qty, 
    sum(IFF(INDEX = 3, order_list, null)) AS total_amt,
    sum(IFF(s1 = 21235, s2, null)) AS sav_001,     
    sum(IFF(s1 = 21236, s2, null)) AS sav_002,
    sum(IFF(s1 = 21237, s2, null)) AS sav_003
FROM (
    SELECT 
        first_value(id_col) over(partition by order_col order by date desc) as id_col, 
        order_col, 
        order_list,
        split_part(order_list,'=', 1)::int as s1,
        split_part(order_list,'=', 2)::double as s2,
        index
    FROM tbl_001
    WHERE trim(order_list) <> ''
)  T1
GROUP BY order_col;

Solution 2:[2]

The NVL(CASE WHEN cond THEN val END, 0) is the same as CASE WHEN cond THEN val ELSE 0 END and even further windowed SUM omits NULLS so simple CASE WHEN cond THEN val END:

SELECT id_col,order_col,
sum(CASE WHEN INDEX = 2 AND trim(order_list) != '' THEN order_list END) 
  OVER (partition BY order_col) AS total_qty, 
sum(CASE WHEN INDEX = 3 AND trim(order_list) != '' THEN order_list END)
  OVER (partition BY order_col) AS total_amt,
sum(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END) 
  OVER (partition BY order_col) AS sav_001,     
sum(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')
     END) OVER (partition BY order_col) AS sav_002,
sum(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')
     END) OVER (partition BY order_col)AS sav_003
FROM  tbl_001  T1
qualify row_number() over (partition by order_col order by date desc) = 1;

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Simeon Pilgrim
Solution 2 Lukasz Szozda