'I would like to know if the query can be simplified further (or) any other way to rewrite the SQL query
SELECT id_col,order_col,
sum(nvl(CASE WHEN INDEX = 2 AND trim(order_list) NOT IN('', ' ') THEN order_list END,0)) OVER (partition BY order_col) AS total_qty,
sum(nvl(CASE WHEN INDEX = 3 AND trim(order_list) NOT IN('',' ') THEN order_list END,0)) OVER (partition BY order_col) AS total_amt,
sum(nvl(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END,0))
OVER (partition BY order_col) AS sav_001,
sum(nvl(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')
END,0)) OVER (partition BY order_col) AS sav_002,
sum(nvl(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')
END,0)) OVER (partition BY order_col)AS sav_003
FROM tbl_001 T1
qualify row_number() over (partition by order_col order by date desc) = 1
The above query is taking long time to run. Is there anyway we can re-write the above query and improve the performance?
Solution 1:[1]
Well tiny things first:
trim(order_list) NOT IN('', ' ') if you have trimmed the white space, the second case cannot be. so it can just be trim(order_list) <> ''
which sized, up you possibly could put that into a WHERE clause, (it would eliminate possible order_col rows with just zeros, which maybe you want.
Lukasz's SUM ignores nulls, still holds.
SELECT
id_col,
order_col,
sum(CASE WHEN INDEX = 2 THEN order_list END) OVER (partition BY order_col) AS total_qty,
sum(CASE WHEN INDEX = 3 THEN order_list END) OVER (partition BY order_col) AS total_amt,
sum(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END) OVER (partition BY order_col) AS sav_001,
sum(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')END) OVER (partition BY order_col) AS sav_002,
sum(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')END) OVER (partition BY order_col) AS sav_003
FROM tbl_001 T1
WHERE trim(order_list) <> ''
qualify row_number() over (partition by order_col order by date desc) = 1
You are currently building the SUM for every row and then throwing away all but on ROW, and the exist rows care are id_col yet that is never used in any of that OVER()'s, so change to grouping by order_col
old code with some example:
with tbl_001 as (
select * from values
(1,10, '10', '2022-03-01'::date, 2),
(2,10, '11', '2022-03-02'::date, 3),
(2,10, '21235=12', '2022-03-02'::date, 1),
(2,10, '21236=13', '2022-03-02'::date, 1),
(2,10, '21237=14', '2022-03-02'::date, 1)
t(id_col, order_col, order_list, date, index)
)
SELECT
id_col,
order_col,
sum(CASE WHEN INDEX = 2 THEN order_list END) OVER (partition BY order_col) AS total_qty,
sum(CASE WHEN INDEX = 3 THEN order_list END) OVER (partition BY order_col) AS total_amt,
sum(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END) OVER (partition BY order_col) AS sav_001,
sum(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')END) OVER (partition BY order_col) AS sav_002,
sum(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')END) OVER (partition BY order_col) AS sav_003
FROM tbl_001 T1
WHERE trim(order_list) <> ''
qualify row_number() over (partition by order_col order by date desc) = 1
gives:
| ID_COL | ORDER_COL | TOTAL_QTY | TOTAL_AMT | SAV_001 | SAV_002 | SAV_003 |
|---|---|---|---|---|---|---|
| 2 | 10 | 10 | 11 | 12 | 13 | 14 |
So swapping to a GROUP BY form, that id_col is a bit yucky.
with tbl_001 as (
select * from values
(1,10, '10', '2022-03-01'::date, 2),
(2,10, '11', '2022-03-02'::date, 3),
(2,10, '21235=12', '2022-03-02'::date, 1),
(2,10, '21236=13', '2022-03-02'::date, 1),
(2,10, '21237=14', '2022-03-02'::date, 1)
t(id_col, order_col, order_list, date, index)
)
SELECT
array_agg(id_col)within group(order by date desc)[0] as id_col,
order_col,
sum(CASE WHEN INDEX = 2 THEN order_list END) AS total_qty,
sum(CASE WHEN INDEX = 3 THEN order_list END) AS total_amt,
sum(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END) AS sav_001,
sum(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')END) AS sav_002,
sum(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')END) AS sav_003
FROM tbl_001 T1
WHERE trim(order_list) <> ''
GROUP BY order_col;
gives (same answer):
| ID_COL | ORDER_COL | TOTAL_QTY | TOTAL_AMT | SAV_001 | SAV_002 | SAV_003 |
|---|---|---|---|---|---|---|
| 2 | 10 | 10 | 11 | 12 | 13 | 14 |
So that might perform better, and is more readable to me.
If performance was super critical I would try to see if this improved things:
with tbl_001 as (
select * from values
(1,10, '10', '2022-03-01'::date, 2),
(2,10, '11', '2022-03-02'::date, 3),
(2,10, '21235=12', '2022-03-02'::date, 1),
(2,10, '21236=13', '2022-03-02'::date, 1),
(2,10, '21237=14', '2022-03-02'::date, 1)
t(id_col, order_col, order_list, date, index)
)
SELECT
any_value(id_col) as id_col,
order_col,
sum(CASE WHEN INDEX = 2 THEN order_list END) AS total_qty,
sum(CASE WHEN INDEX = 3 THEN order_list END) AS total_amt,
sum(CASE WHEN s_order_list[0] = '21235' THEN s_order_list[1] END) AS sav_001,
sum(CASE WHEN s_order_list[0] = '21236' THEN s_order_list[1] END) AS sav_002,
sum(CASE WHEN s_order_list[0] = '21237' THEN s_order_list[1] END) AS sav_003
FROM (
SELECT
first_value(id_col) over(partition by order_col order by date desc) as id_col,
order_col,
order_list,
split(order_list,'=') as s_order_list,
index
FROM tbl_001
WHERE trim(order_list) <> ''
) T1
GROUP BY order_col;
And I total personally would not use CASE and would use IFF
with tbl_001 as (
select * from values
(1,10, '10', '2022-03-01'::date, 2),
(2,10, '11', '2022-03-02'::date, 3),
(2,10, '21235=12', '2022-03-02'::date, 1),
(2,10, '21236=13', '2022-03-02'::date, 1),
(2,10, '21237=14', '2022-03-02'::date, 1)
t(id_col, order_col, order_list, date, index)
)
SELECT
any_value(id_col) as id_col,
order_col,
sum(IFF(INDEX = 2, order_list, null)) AS total_qty,
sum(IFF(INDEX = 3, order_list, null)) AS total_amt,
sum(IFF(s1 = 21235, s2, null)) AS sav_001,
sum(IFF(s1 = 21236, s2, null)) AS sav_002,
sum(IFF(s1 = 21237, s2, null)) AS sav_003
FROM (
SELECT
first_value(id_col) over(partition by order_col order by date desc) as id_col,
order_col,
order_list,
split_part(order_list,'=', 1)::int as s1,
split_part(order_list,'=', 2)::double as s2,
index
FROM tbl_001
WHERE trim(order_list) <> ''
) T1
GROUP BY order_col;
Solution 2:[2]
The NVL(CASE WHEN cond THEN val END, 0) is the same as CASE WHEN cond THEN val ELSE 0 END and even further windowed SUM omits NULLS so simple CASE WHEN cond THEN val END:
SELECT id_col,order_col,
sum(CASE WHEN INDEX = 2 AND trim(order_list) != '' THEN order_list END)
OVER (partition BY order_col) AS total_qty,
sum(CASE WHEN INDEX = 3 AND trim(order_list) != '' THEN order_list END)
OVER (partition BY order_col) AS total_amt,
sum(CASE WHEN order_list LIKE '21235%' THEN replace(order_list,'21235=','')END)
OVER (partition BY order_col) AS sav_001,
sum(CASE WHEN order_list LIKE '21236%' THEN replace(order_list,'21236=','')
END) OVER (partition BY order_col) AS sav_002,
sum(CASE WHEN order_list LIKE '21237%' THEN replace(order_list,'21237=','')
END) OVER (partition BY order_col)AS sav_003
FROM tbl_001 T1
qualify row_number() over (partition by order_col order by date desc) = 1;
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | Simeon Pilgrim |
| Solution 2 | Lukasz Szozda |
