'How to create a Retention % metric in Amazon Athena?
I have a data base consisting of:
▪ basket_item_id: ▪ basket_id: ▪ customer_id: ▪ sales_channel_id: ▪ country_id: ▪ category_id: ▪ product: ▪ quantity: ▪ price_total: ▪ date_trading_nk:
I have created a query that summarises the monthly performance, as follows:
But I'm not convinced that I have used the correct method to measure retention.
Would anyone be able to have a look and tell me if there is a better way?
SELECT t1.yr, t1.mnth, Sales, Customers, Baskets, Items, ATV, IPB, SPC, newcustomer, churned_customers, retention*100 AS retentionpercent
FROM
(SELECT
YEAR(date_trading_nk) AS yr,
MONTH(date_trading_nk) AS mnth,
SUM(price_total) as Sales,
COUNT(distinct customer_id) as Customers,
COUNT(distinct basket_id) as Baskets,
SUM(quantity) as Items,
AVG(price_total) AS ATV,
(SUM(quantity)) / (COUNT(distinct basket_id)) as IPB,
SUM(price_total) / COUNT(DISTINCT customer_ID) as SPC
FROM fact_basket_items
GROUP BY YEAR(date_trading_nk), MONTH(date_trading_nk)) t1
LEFT JOIN
(With Occurences AS
(
SELECT
*,
ROW_NUMBER () OVER (PARTITION BY customer_id order by date_trading_nk ) AS "Occurence"
FROM fact_basket_items
)
SELECT YEAR(date_trading_nk) as yr, MONTH(date_trading_nk) as mn, SUM(new_customers) as newcustomer
FROM
(SELECT
COUNT(DISTINCT customer_id) as new_customers , date_trading_nk
FROM Occurences
WHERE Occurence = 1
GROUP BY date_trading_nk
ORDER BY date_trading_nk)
GROUP BY YEAR(date_trading_nk), MONTH(date_trading_nk)
) t2
ON t1.yr = t2.yr AND t1.mnth = t2.mn
LEFT JOIN
(SELECT YEAR(churn_date)as yr, MONTH(churn_date) as mnth, COUNT(customer_id) as churned_customers
FROM
(SELECT
customer_id,
max(date_trading_nk) AS last_purchase,
DATE_ADD('MONTH', 3, max(date_trading_nk)) AS churn_date
FROM fact_basket_items
GROUP BY customer_id)
GROUP BY YEAR(churn_date), MONTH(churn_date)
ORDER BY YEAR(churn_date), MONTH(churn_date)) t3
ON t1.yr = t3.yr AND t1.mnth = t3.mnth
LEFT JOIN
(WITH daily_retention as
(WITH activity AS
(SELECT customer_id, product, quantity, date_trading_nk as purchase_month
FROM fact_basket_items
ORDER BY customer_id)
SELECT
activity.purchase_month,
count(distinct activity.customer_id) as active_users,
count(distinct future_activity.customer_id) as retained_users,
CAST(count(distinct future_activity.customer_id) AS DECIMAL(5,2)) /
CAST(count(distinct activity.customer_id) AS DECIMAL(5,2)) as retention
FROM activity
LEFT JOIN activity AS future_activity ON
activity.customer_id = future_activity.customer_id
AND activity.purchase_month = future_activity.purchase_month - interval '1' MONTH
GROUP BY 1
ORDER BY purchase_month)
SELECT
YEAR(purchase_month) as yr,
MONTH(purchase_month) as mnth,
SUM(active_users) as active,
SUM(retained_users) as retained,
CAST(SUM(retained_users)AS DECIMAL(5,1))
/CAST(SUM(active_users)AS DECIMAL(7,3)) as retention
FROM daily_retention
GROUP BY YEAR(purchase_month), MONTH(purchase_month)
ORDER BY YEAR(purchase_month), MONTH(purchase_month)) t4
ON t1.yr = t4.yr AND t1.mnth = t4.mnth
ORDER BY t1.yr, t1.mnth;
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
