'Migrating T-SQL script to Snowflake - error invalid identifier

I am currently trying to migrate a T-SQL script to Snowflake.

This is the T-SQL script:

SELECT  
    * —- a bunch of stuff
FROM
    TABLE_DATE D
LEFT JOIN 
    TABLE_CONST AS CONST ON CODE = 999
                         AND D.DATE BETWEEN CONST.STARTDATE AND ISNULL (CONST.ENDDATE, EOMONTH(GET_DATE()))
LEFT JOIN 
    TABLE_INFO M ON CONST.CODE = M.CODE
OUTER APPLY 
    (SELECT TOP 1 *
     FROM TABLE_A 
     WHERE SECID = CONST.SECID
       AND DATE_ <= D.DATE_
     ORDER BY DATE_ DESC) SEC
OUTER APPLY 
    (SELECT TOP 1 *
     FROM TABLE_B 
     WHERE I_CODE = SEC.I_CODE 
       AND DATE_ <= D.DATE_
     ORDER BY DATE_ DESC) XR
LEFT JOIN 
    TABLE_I I ON CONST.SECID = I.SECID
WHERE 
    D.DATE_ BETWEEN CAST(‘1800-01-01’ AS DATE AND GETDATE()

I have converted a few parts of the script to the snowflake syntax (isnull, eomonth, outer apply) but is still facing some error.

Converted script

SELECT * —- a bunch of stuff
FROM TABLE_DATE D
LEFT JOIN TABLE_CONST AS CONST
ON CODE = 999
AND D.DATE BETWEEN CONST.STARTDATE AND COALESCE (CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))

LEFT JOIN TABLE_INFO M ON CONST.CODE = M.CODE

LEFT JOIN 
(SELECT TOP 1 *
FROM TABLE_A 
WHERE SECID = CONST.SECID
AND DATE_ <= D.DATE_
ORDER BY DATE_ DESC
) SEC

LEFT JOIN 
( SELECT TOP 1 *
FROM TABLE_B 
WHERE I_CODE = SEC.I_CODE 
AND DATE_ <= D.DATE_
ORDER BY DATE_ DESC
) XR

LEFT JOIN TABLE_I I ON CONST.SECID = I.SECID

WHERE D.DATE_ BETWEEN CAST(‘1800-01-01’ AS DATE AND CURRENT_DATE()

Error that I’m getting is

Invalid identifier ‘CONST.SECID’

I’m new to snowflake and sql and would really appreciate any help.

*script has been change for confidentiality purposes and it is also not possible for me to provide sample data)



Solution 1:[1]

create or replace table TABLE_DATE(DATE_ date);
create or replace table TABLE_CONST(STARTDATE date, ENDDATE date, code number, SECID number);
create or replace table TABLE_INFO(code number);
create or replace table TABLE_A(SECID number, I_CODE number);
create or replace table TABLE_B(date_ date, I_CODE number);
create or replace table TABLE_I(SECID number);

SELECT *
FROM TABLE_DATE AS D
LEFT JOIN TABLE_CONST AS CONST
    ON CODE = 999
    AND D.DATE_ BETWEEN CONST.STARTDATE AND COALESCE(CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))
LEFT JOIN TABLE_INFO AS M 
    ON CONST.CODE = M.CODE
LEFT JOIN LATERAL (
    SELECT TOP 1 *
    FROM TABLE_A 
    WHERE SECID = CONST.SECID
    AND DATE_ <= D.DATE_
    ORDER BY DATE_ DESC
) AS SEC
LEFT JOIN LATERAL (
    SELECT TOP 1 *
    FROM TABLE_B AS B 
    WHERE b.I_CODE = SEC.I_CODE 
    AND b.DATE_ <= D.DATE_
    ORDER BY b.DATE_ DESC
) XR
LEFT JOIN TABLE_I I ON CONST.SECID = I.SECID
WHERE D.DATE_ BETWEEN '1800-01-01'::date AND CURRENT_DATE()

Alternative SQL 2:

So here I dropped the correlated sub-query's TOP 1 and replace with an outer WHERE, not as glamourous. But it might work. From there speed can be added once failure has be avoided.

create or replace table TABLE_DATE(DATE_ date);
create or replace table TABLE_CONST(STARTDATE date, ENDDATE date, code number, SECID number);
create or replace table TABLE_INFO(code number);
create or replace table TABLE_A(date_ date, SECID number, I_CODE number);
create or replace table TABLE_B(date_ date, I_CODE number);
create or replace table TABLE_I(SECID number);

SELECT *
FROM TABLE_DATE AS D
LEFT JOIN TABLE_CONST AS CONST
    ON CODE = 999
    AND D.DATE_ BETWEEN CONST.STARTDATE AND COALESCE(CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))
LEFT JOIN TABLE_INFO AS M 
    ON CONST.CODE = M.CODE
LEFT JOIN LATERAL (
    SELECT *
        ,row_number() over (ORDER BY a.DATE_ DESC) AS RN
    FROM TABLE_A AS A
    WHERE SECID = CONST.SECID
    AND a.DATE_ <= D.DATE_
) AS SEC
LEFT JOIN LATERAL (
    SELECT *
        ,row_number() over (ORDER BY b.DATE_ DESC) AS RN
    FROM TABLE_B AS B 
    WHERE b.I_CODE = SEC.I_CODE 
    AND b.DATE_ <= D.DATE_
    ORDER BY b.DATE_ DESC
) XR
LEFT JOIN TABLE_I I ON CONST.SECID = I.SECID
WHERE D.DATE_ BETWEEN '1800-01-01'::date AND CURRENT_DATE()
and xr.rn = 1 AND sec.rn = 1

Some data that triggers the problem:

WITH TABLE_DATE(DATE_) as (
    select * from values
    ('2022-04-01'::date),
    ('2022-03-01'::date),
    ('2022-01-01'::date)
), TABLE_CONST(STARTDATE, ENDDATE, code, SECID) as (
    select * from values
    ('2022-04-01'::date, '2022-04-01'::date, 997, 667),
    ('2022-03-01'::date, '2022-03-01'::date, 998, 668),
    ('2022-01-01'::date, '2022-01-01'::date, 999, 669)
), TABLE_INFO(code) as (
    select * from values
    (999),
    (998)
), TABLE_A(date_, SECID, I_CODE) as (
    select * from values
    ('2022-04-01'::date, 667, 767),
    ('2022-03-29'::date, 667, 767),
    ('2022-03-28'::date, 667, 767),
    ('2022-03-01'::date, 668, 768),
    ('2022-02-28'::date, 668, 768),
    ('2022-02-27'::date, 668, 768),
    ('2022-01-01'::date, 669, 769),
    ('2021-12-29'::date, 669, 769),
    ('2021-12-28'::date, 669, 769)
), TABLE_B(date_, I_CODE) as (
    select * from values
    ('2022-04-01'::date, 767),
    ('2022-03-29'::date, 767),
    ('2022-03-28'::date, 767),
    ('2022-03-01'::date, 768),
    ('2022-02-28'::date, 768),
    ('2022-02-27'::date, 768),
    ('2022-01-01'::date, 769),
    ('2021-12-29'::date, 769),
    ('2021-12-28'::date, 769)
), TABLE_I(SECID ) as (
    select * from values
    (999),
    (998)
)
SELECT *
FROM TABLE_DATE AS D
LEFT JOIN TABLE_CONST AS CONST
    ON CONST.CODE = 999
    AND D.DATE_ BETWEEN CONST.STARTDATE AND COALESCE(CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))
LEFT JOIN TABLE_INFO AS M 
    ON CONST.CODE = M.CODE
LEFT JOIN LATERAL (
    SELECT A.*
        ,row_number() over (ORDER BY a.DATE_ DESC) AS RN
    FROM TABLE_A AS A
    WHERE A.SECID = CONST.SECID
    AND a.DATE_ <= D.DATE_
) AS SEC
LEFT JOIN LATERAL (
    SELECT B.*
        ,row_number() over (ORDER BY b.DATE_ DESC) AS RN
    FROM TABLE_B AS B 
    WHERE b.I_CODE = SEC.I_CODE 
    AND b.DATE_ <= D.DATE_
    ORDER BY b.DATE_ DESC
) XR
LEFT JOIN TABLE_I I ON CONST.SECID = I.SECID
WHERE D.DATE_ BETWEEN '1800-01-01'::date AND CURRENT_DATE()
and xr.rn = 1 AND sec.rn = 1;

now we are in luck, we have some data that triggers the problem, we can fix the problem..

So now lets make some code that works:

So this removes the sub-queries, and goes for just plan old join, and then throws away heaps of rows at the end. It's rather gross, and I would tend to prefer to do this earlier in the processing chain. But "it compiles" is a plus...

WITH TABLE_DATE(DATE_) as (
    select * from values
    ('2022-04-01'::date),
    ('2022-03-01'::date),
    ('2022-01-01'::date)
), TABLE_CONST(STARTDATE, ENDDATE, code, SECID) as (
    select * from values
    ('2022-04-01'::date, '2022-04-01'::date, 999, 667),
    ('2022-03-01'::date, '2022-03-01'::date, 999, 668),
    ('2022-01-01'::date, '2022-01-01'::date, 999, 669)
), TABLE_INFO(code) as (
    select * from values
    (999),
    (998)
), TABLE_A(date_, SECID, I_CODE) as (
    select * from values
    ('2022-04-01'::date, 667, 767),
    ('2022-03-29'::date, 667, 767),
    ('2022-03-28'::date, 667, 767),
    ('2022-03-01'::date, 668, 768),
    ('2022-02-28'::date, 668, 768),
    ('2022-02-27'::date, 668, 768),
    ('2022-01-01'::date, 669, 769),
    ('2021-12-29'::date, 669, 769),
    ('2021-12-28'::date, 669, 769)
), TABLE_B(date_, I_CODE) as (
    select * from values
    ('2022-04-01'::date, 767),
    ('2022-03-29'::date, 767),
    ('2022-03-28'::date, 767),
    ('2022-03-01'::date, 768),
    ('2022-02-28'::date, 768),
    ('2022-02-27'::date, 768),
    ('2022-01-01'::date, 769),
    ('2021-12-29'::date, 769),
    ('2021-12-28'::date, 769)
), TABLE_I(SECID ) as (
    select * from values
    (669),
    (667)
)
SELECT 
    d.date_ as d_date
    ,CONST.STARTDATE as c_STARTDATE
    ,CONST.ENDDATE as c_ENDDATE
    ,CONST.code as c_code
    ,CONST.SECID as c_SECID
    ,m.code as m_code
    ,sec.date_ as s_date
    ,sec.secid as s_secid
    ,sec.i_code as s_i_code
    ,xr.date_ as xr_date
    ,xr.i_code as xr_icode
    ,i.*
FROM TABLE_DATE AS D
LEFT JOIN TABLE_CONST AS CONST
    ON CONST.CODE = 999
    AND D.DATE_ BETWEEN CONST.STARTDATE AND COALESCE(CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))
LEFT JOIN TABLE_INFO AS M 
    ON CONST.CODE = M.CODE
LEFT JOIN TABLE_A AS SEC
    ON sec.SECID = CONST.SECID and sec.date_ <= D.DATE_ 
LEFT JOIN TABLE_B AS XR
    ON xr.I_CODE = SEC.I_CODE AND xr.DATE_ <= D.DATE_
LEFT JOIN TABLE_I I ON CONST.SECID = I.SECID
WHERE D.DATE_ BETWEEN '1800-01-01'::date AND CURRENT_DATE()
QUALIFY ROW_NUMBER() OVER (partition by D.DATE_, CONST.SECID order by sec.date_ desc, xr.DATE_ DESC) = 1
ORDER BY 1;

Lets give CTE's a Try:

Now I have pretended the CTE's I were using are full tables, the same main body can be written with those sub-queries written as incremental clean-up so the data does not explode too much. And also for simplicity also carry the used values forward. I would also try without carrying, and doing cleaner joins. But it all really depends on your data.

WITH CTE_A AS (
    SELECT     
        d.date_ as d_date
        ,CONST.STARTDATE as c_STARTDATE
        ,CONST.ENDDATE as c_ENDDATE
        ,CONST.code as c_code
        ,CONST.SECID as c_SECID
    FROM TABLE_DATE AS D
    LEFT JOIN TABLE_CONST AS CONST
        ON CONST.CODE = 999
        AND D.DATE_ BETWEEN CONST.STARTDATE AND COALESCE(CONST.ENDDATE,LAST_DAY(CURRENT_DATE()))
    WHERE D.DATE_ BETWEEN '1800-01-01'::date AND CURRENT_DATE()
), CTE_B AS (
    SELECT 
        x.d_date
        ,x.c_STARTDATE
        ,x.c_ENDDATE
        ,x.c_code
        ,x.c_SECID
        ,sec.date_ as s_date
        ,sec.secid as s_secid
        ,sec.i_code as s_i_code
    FROM CTE_A as x
    LEFT JOIN TABLE_A AS SEC
        ON sec.SECID = x.C_SECID and sec.date_ <= x.d_DATE
    QUALIFY ROW_NUMBER() OVER (partition by x.d_DATE, x.C_SECID order by sec.date_ desc) = 1
), CTE_C AS (
    SELECT 
        y.d_date
        ,y.c_STARTDATE
        ,y.c_ENDDATE
        ,y.c_code
        ,y.c_SECID
        ,y.s_date
        ,y.s_secid
        ,y.s_i_code
        ,xr.date_ as xr_date
        ,xr.i_code as xr_icode
    FROM CTE_B as y
    LEFT JOIN TABLE_B AS XR
    ON xr.I_CODE = y.s_i_code AND xr.DATE_ <= y.d_DATE
    QUALIFY ROW_NUMBER() OVER (partition by y.d_DATE, y.C_SECID order by xr.date_ desc) = 1
)
SELECT 
    z.d_date
    ,z.c_STARTDATE
    ,z.c_ENDDATE
    ,z.c_code
    ,z.c_SECID
    ,m.code as m_code
    ,z.s_date
    ,z.s_secid
    ,z.s_i_code
    ,z.xr_date
    ,z.xr_icode
    ,i.secid as i_secid
FROM CTE_C AS z
LEFT JOIN TABLE_INFO AS M 
    ON z.c_CODE = M.CODE
LEFT JOIN TABLE_I I ON z.C_SECID = I.SECID
ORDER BY 1;

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1