'how to cumulatively combine arrays from previous rows into one aggregate array? (Snowflake/SQL)

Imagine I have a table with two columns, a date DATE and an array of strings ITEMS.

I'd like to create a column ITEMS_AGG which contains an aggregate of all the arrays from previous rows, i.e. something like:

DATE   ITEMS      ITEMS_AGG
1      a, b       a, b
2      a, c       a, b, c
3      b, c       a, b, c
4.     a, d       a, b, c, d
5.     a, b, e    a, b, c, d, e

etc.



Solution 1:[1]

Cummulative array_agg with distinct defined as JavaScript UDTF.

Sample data:

CREATE OR REPLACE TABLE test(grp TEXT, date INT, ITEMS ARRAY)
AS
     SELECT  'X',1,  ARRAY_CONSTRUCT('a', 'b')       
UNION SELECT 'X',2,  ARRAY_CONSTRUCT('a', 'c')       
UNION SELECT 'X',3,  ARRAY_CONSTRUCT('b', 'c')       
UNION SELECT 'X',4,  ARRAY_CONSTRUCT('a', 'd')       
UNION SELECT 'X',5,  ARRAY_CONSTRUCT('a', 'b', 'e')
UNION SELECT 'Y',1,  ARRAY_CONSTRUCT('z')
UNION SELECT 'Y',2,  ARRAY_CONSTRUCT('y','x')
UNION SELECT 'Y',3,  ARRAY_CONSTRUCT('y');

Function:

CREATE OR REPLACE FUNCTION aggregate (TS ARRAY)
RETURNS table (output variant)
LANGUAGE JAVASCRIPT
STRICT
IMMUTABLE
AS '
{
  initialize: function(argumentInfo, context) {
        this.result = [];
    },
  processRow: function (row, rowWriter, context) {
       this.result = [...new Set(this.result.concat(row.TS))];          
       rowWriter.writeRow({OUTPUT: this.result.sort()});           
   }
 }
 ';

Query:

SELECT *
FROM test,  TABLE(aggregate(ITEMS) OVER(PARTITION BY grp ORDER BY date))
ORDER BY grp, date;

Output:

enter image description here

Solution 2:[2]

Well not exactly what you seems to want, you can aggregate with double-ups with a recursive cte, as there is no ARRAY_CAT(DISTINCT) with this SQL:

WITH data AS (
    SELECT column1 as date, split(column2, ',') as items FROM VALUES
    (1, 'a,b'),
    (2, 'a,c'),
    (3, 'b, c'),
    (4, 'a,d'),
    (5, 'a,b,e')
), rec AS (
    WITH RECURSIVE r_cte AS (
        SELECT date as date, items
        FROM data
        WHERE date = 1
        
        UNION ALL
        
        SELECT r.date+1 as r_date, array_cat(r.items, d.items) as items
        FROM r_cte r
        JOIN data d 
            ON r.date + 1 = d.date
    )
    SELECT * from r_cte
)
SELECT *
FROM rec;
DATE ITEMS
1 [ "a", "b" ]
2 [ "a", "b", "a", "c" ]
3 [ "a", "b", "a", "c", "b", " c" ]
4 [ "a", "b", "a", "c", "b", " c", "a", "d" ]
5 [ "a", "b", "a", "c", "b", " c", "a", "d", "a", "b", "e" ]

but really you should use Lukasz solution.

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1
Solution 2 Simeon Pilgrim