'Migrating T-SQL script to Snowflake - error invalid identifier
I am currently trying to migrate a T-SQL script to Snowflake.
This is the T-SQL script:
SELECT
* —- a bunch of stuff
FROM
TABLE_DATE D
LEFT JOIN
TABLE_CONST AS CONST ON CODE = 999
AND D.DATE BETWEEN CONST.STARTDATE AND ISNULL (CONST.ENDDATE, EOMONTH(GET_DATE()))
LEFT JOIN
TABLE_INFO M ON CONST.CODE = M.CODE
OUTER APPLY
(SELECT TOP 1 *
FROM TABLE_A
WHERE SECID = CONST.SECID
AND DATE_ <= D.DATE_
ORDER BY DATE_ DESC) SEC
OUTER APPLY
(SELECT TOP 1 *
FROM TABLE_B
WHERE I_CODE = SEC.I_CODE
AND DATE_ <= D.DATE_
ORDER BY DATE_ DESC) XR
LEFT JOIN
TABLE_I I ON CONST.SECID = I.SECID
WHERE
D.DATE_ BETWEEN CAST(‘1800-01-01’ AS DATE AND GETDATE()
I have converted a few parts of the script to the snowflake syntax (isnull, eomonth, outer apply) but is still facing some error.
Converted script
SELECT * —- a bunch of stuff
FROM TABLE_DATE D
LEFT JOIN TABLE_CONST AS CONST
ON CODE = 999
AND D.DATE BETWEEN CONST.STARTDATE AND COALESCE (CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))
LEFT JOIN TABLE_INFO M ON CONST.CODE = M.CODE
LEFT JOIN
(SELECT TOP 1 *
FROM TABLE_A
WHERE SECID = CONST.SECID
AND DATE_ <= D.DATE_
ORDER BY DATE_ DESC
) SEC
LEFT JOIN
( SELECT TOP 1 *
FROM TABLE_B
WHERE I_CODE = SEC.I_CODE
AND DATE_ <= D.DATE_
ORDER BY DATE_ DESC
) XR
LEFT JOIN TABLE_I I ON CONST.SECID = I.SECID
WHERE D.DATE_ BETWEEN CAST(‘1800-01-01’ AS DATE AND CURRENT_DATE()
Error that I’m getting is
Invalid identifier ‘CONST.SECID’
I’m new to snowflake and sql and would really appreciate any help.
*script has been change for confidentiality purposes and it is also not possible for me to provide sample data)
Solution 1:[1]
create or replace table TABLE_DATE(DATE_ date);
create or replace table TABLE_CONST(STARTDATE date, ENDDATE date, code number, SECID number);
create or replace table TABLE_INFO(code number);
create or replace table TABLE_A(SECID number, I_CODE number);
create or replace table TABLE_B(date_ date, I_CODE number);
create or replace table TABLE_I(SECID number);
SELECT *
FROM TABLE_DATE AS D
LEFT JOIN TABLE_CONST AS CONST
ON CODE = 999
AND D.DATE_ BETWEEN CONST.STARTDATE AND COALESCE(CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))
LEFT JOIN TABLE_INFO AS M
ON CONST.CODE = M.CODE
LEFT JOIN LATERAL (
SELECT TOP 1 *
FROM TABLE_A
WHERE SECID = CONST.SECID
AND DATE_ <= D.DATE_
ORDER BY DATE_ DESC
) AS SEC
LEFT JOIN LATERAL (
SELECT TOP 1 *
FROM TABLE_B AS B
WHERE b.I_CODE = SEC.I_CODE
AND b.DATE_ <= D.DATE_
ORDER BY b.DATE_ DESC
) XR
LEFT JOIN TABLE_I I ON CONST.SECID = I.SECID
WHERE D.DATE_ BETWEEN '1800-01-01'::date AND CURRENT_DATE()
Alternative SQL 2:
So here I dropped the correlated sub-query's TOP 1 and replace with an outer WHERE, not as glamourous. But it might work. From there speed can be added once failure has be avoided.
create or replace table TABLE_DATE(DATE_ date);
create or replace table TABLE_CONST(STARTDATE date, ENDDATE date, code number, SECID number);
create or replace table TABLE_INFO(code number);
create or replace table TABLE_A(date_ date, SECID number, I_CODE number);
create or replace table TABLE_B(date_ date, I_CODE number);
create or replace table TABLE_I(SECID number);
SELECT *
FROM TABLE_DATE AS D
LEFT JOIN TABLE_CONST AS CONST
ON CODE = 999
AND D.DATE_ BETWEEN CONST.STARTDATE AND COALESCE(CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))
LEFT JOIN TABLE_INFO AS M
ON CONST.CODE = M.CODE
LEFT JOIN LATERAL (
SELECT *
,row_number() over (ORDER BY a.DATE_ DESC) AS RN
FROM TABLE_A AS A
WHERE SECID = CONST.SECID
AND a.DATE_ <= D.DATE_
) AS SEC
LEFT JOIN LATERAL (
SELECT *
,row_number() over (ORDER BY b.DATE_ DESC) AS RN
FROM TABLE_B AS B
WHERE b.I_CODE = SEC.I_CODE
AND b.DATE_ <= D.DATE_
ORDER BY b.DATE_ DESC
) XR
LEFT JOIN TABLE_I I ON CONST.SECID = I.SECID
WHERE D.DATE_ BETWEEN '1800-01-01'::date AND CURRENT_DATE()
and xr.rn = 1 AND sec.rn = 1
Some data that triggers the problem:
WITH TABLE_DATE(DATE_) as (
select * from values
('2022-04-01'::date),
('2022-03-01'::date),
('2022-01-01'::date)
), TABLE_CONST(STARTDATE, ENDDATE, code, SECID) as (
select * from values
('2022-04-01'::date, '2022-04-01'::date, 997, 667),
('2022-03-01'::date, '2022-03-01'::date, 998, 668),
('2022-01-01'::date, '2022-01-01'::date, 999, 669)
), TABLE_INFO(code) as (
select * from values
(999),
(998)
), TABLE_A(date_, SECID, I_CODE) as (
select * from values
('2022-04-01'::date, 667, 767),
('2022-03-29'::date, 667, 767),
('2022-03-28'::date, 667, 767),
('2022-03-01'::date, 668, 768),
('2022-02-28'::date, 668, 768),
('2022-02-27'::date, 668, 768),
('2022-01-01'::date, 669, 769),
('2021-12-29'::date, 669, 769),
('2021-12-28'::date, 669, 769)
), TABLE_B(date_, I_CODE) as (
select * from values
('2022-04-01'::date, 767),
('2022-03-29'::date, 767),
('2022-03-28'::date, 767),
('2022-03-01'::date, 768),
('2022-02-28'::date, 768),
('2022-02-27'::date, 768),
('2022-01-01'::date, 769),
('2021-12-29'::date, 769),
('2021-12-28'::date, 769)
), TABLE_I(SECID ) as (
select * from values
(999),
(998)
)
SELECT *
FROM TABLE_DATE AS D
LEFT JOIN TABLE_CONST AS CONST
ON CONST.CODE = 999
AND D.DATE_ BETWEEN CONST.STARTDATE AND COALESCE(CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))
LEFT JOIN TABLE_INFO AS M
ON CONST.CODE = M.CODE
LEFT JOIN LATERAL (
SELECT A.*
,row_number() over (ORDER BY a.DATE_ DESC) AS RN
FROM TABLE_A AS A
WHERE A.SECID = CONST.SECID
AND a.DATE_ <= D.DATE_
) AS SEC
LEFT JOIN LATERAL (
SELECT B.*
,row_number() over (ORDER BY b.DATE_ DESC) AS RN
FROM TABLE_B AS B
WHERE b.I_CODE = SEC.I_CODE
AND b.DATE_ <= D.DATE_
ORDER BY b.DATE_ DESC
) XR
LEFT JOIN TABLE_I I ON CONST.SECID = I.SECID
WHERE D.DATE_ BETWEEN '1800-01-01'::date AND CURRENT_DATE()
and xr.rn = 1 AND sec.rn = 1;
now we are in luck, we have some data that triggers the problem, we can fix the problem..
So now lets make some code that works:
So this removes the sub-queries, and goes for just plan old join, and then throws away heaps of rows at the end. It's rather gross, and I would tend to prefer to do this earlier in the processing chain. But "it compiles" is a plus...
WITH TABLE_DATE(DATE_) as (
select * from values
('2022-04-01'::date),
('2022-03-01'::date),
('2022-01-01'::date)
), TABLE_CONST(STARTDATE, ENDDATE, code, SECID) as (
select * from values
('2022-04-01'::date, '2022-04-01'::date, 999, 667),
('2022-03-01'::date, '2022-03-01'::date, 999, 668),
('2022-01-01'::date, '2022-01-01'::date, 999, 669)
), TABLE_INFO(code) as (
select * from values
(999),
(998)
), TABLE_A(date_, SECID, I_CODE) as (
select * from values
('2022-04-01'::date, 667, 767),
('2022-03-29'::date, 667, 767),
('2022-03-28'::date, 667, 767),
('2022-03-01'::date, 668, 768),
('2022-02-28'::date, 668, 768),
('2022-02-27'::date, 668, 768),
('2022-01-01'::date, 669, 769),
('2021-12-29'::date, 669, 769),
('2021-12-28'::date, 669, 769)
), TABLE_B(date_, I_CODE) as (
select * from values
('2022-04-01'::date, 767),
('2022-03-29'::date, 767),
('2022-03-28'::date, 767),
('2022-03-01'::date, 768),
('2022-02-28'::date, 768),
('2022-02-27'::date, 768),
('2022-01-01'::date, 769),
('2021-12-29'::date, 769),
('2021-12-28'::date, 769)
), TABLE_I(SECID ) as (
select * from values
(669),
(667)
)
SELECT
d.date_ as d_date
,CONST.STARTDATE as c_STARTDATE
,CONST.ENDDATE as c_ENDDATE
,CONST.code as c_code
,CONST.SECID as c_SECID
,m.code as m_code
,sec.date_ as s_date
,sec.secid as s_secid
,sec.i_code as s_i_code
,xr.date_ as xr_date
,xr.i_code as xr_icode
,i.*
FROM TABLE_DATE AS D
LEFT JOIN TABLE_CONST AS CONST
ON CONST.CODE = 999
AND D.DATE_ BETWEEN CONST.STARTDATE AND COALESCE(CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))
LEFT JOIN TABLE_INFO AS M
ON CONST.CODE = M.CODE
LEFT JOIN TABLE_A AS SEC
ON sec.SECID = CONST.SECID and sec.date_ <= D.DATE_
LEFT JOIN TABLE_B AS XR
ON xr.I_CODE = SEC.I_CODE AND xr.DATE_ <= D.DATE_
LEFT JOIN TABLE_I I ON CONST.SECID = I.SECID
WHERE D.DATE_ BETWEEN '1800-01-01'::date AND CURRENT_DATE()
QUALIFY ROW_NUMBER() OVER (partition by D.DATE_, CONST.SECID order by sec.date_ desc, xr.DATE_ DESC) = 1
ORDER BY 1;
Lets give CTE's a Try:
Now I have pretended the CTE's I were using are full tables, the same main body can be written with those sub-queries written as incremental clean-up so the data does not explode too much. And also for simplicity also carry the used values forward. I would also try without carrying, and doing cleaner joins. But it all really depends on your data.
WITH CTE_A AS (
SELECT
d.date_ as d_date
,CONST.STARTDATE as c_STARTDATE
,CONST.ENDDATE as c_ENDDATE
,CONST.code as c_code
,CONST.SECID as c_SECID
FROM TABLE_DATE AS D
LEFT JOIN TABLE_CONST AS CONST
ON CONST.CODE = 999
AND D.DATE_ BETWEEN CONST.STARTDATE AND COALESCE(CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))
WHERE D.DATE_ BETWEEN '1800-01-01'::date AND CURRENT_DATE()
), CTE_B AS (
SELECT
x.d_date
,x.c_STARTDATE
,x.c_ENDDATE
,x.c_code
,x.c_SECID
,sec.date_ as s_date
,sec.secid as s_secid
,sec.i_code as s_i_code
FROM CTE_A as x
LEFT JOIN TABLE_A AS SEC
ON sec.SECID = x.C_SECID and sec.date_ <= x.d_DATE
QUALIFY ROW_NUMBER() OVER (partition by x.d_DATE, x.C_SECID order by sec.date_ desc) = 1
), CTE_C AS (
SELECT
y.d_date
,y.c_STARTDATE
,y.c_ENDDATE
,y.c_code
,y.c_SECID
,y.s_date
,y.s_secid
,y.s_i_code
,xr.date_ as xr_date
,xr.i_code as xr_icode
FROM CTE_B as y
LEFT JOIN TABLE_B AS XR
ON xr.I_CODE = y.s_i_code AND xr.DATE_ <= y.d_DATE
QUALIFY ROW_NUMBER() OVER (partition by y.d_DATE, y.C_SECID order by xr.date_ desc) = 1
)
SELECT
z.d_date
,z.c_STARTDATE
,z.c_ENDDATE
,z.c_code
,z.c_SECID
,m.code as m_code
,z.s_date
,z.s_secid
,z.s_i_code
,z.xr_date
,z.xr_icode
,i.secid as i_secid
FROM CTE_C AS z
LEFT JOIN TABLE_INFO AS M
ON z.c_CODE = M.CODE
LEFT JOIN TABLE_I I ON z.C_SECID = I.SECID
ORDER BY 1;
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 |
