Skip to main content

Time-Series Analysis

Learning Focus

Use this lesson to build time-series analytics in DuckDB — rolling averages, YoY comparisons, and trend detection.

Rolling Aggregations

-- 7-day rolling average of daily revenue
SELECT
sale_date,
daily_revenue,
ROUND(AVG(daily_revenue) OVER (
ORDER BY sale_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
), 2) AS rolling_7d_avg
FROM daily_sales
ORDER BY sale_date;

-- 30-day rolling sum
SELECT
sale_date,
daily_revenue,
SUM(daily_revenue) OVER (
ORDER BY sale_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) AS rolling_30d_revenue
FROM daily_sales
ORDER BY sale_date;

Period-over-Period Comparison

-- Month-over-Month growth
WITH monthly AS (
SELECT
date_trunc('month', sale_date) AS month,
SUM(revenue) AS revenue
FROM clean_sales
GROUP BY month
)
SELECT
month,
revenue,
lag(revenue, 1) OVER (ORDER BY month) AS prev_month,
lag(revenue, 12) OVER (ORDER BY month) AS prev_year_month,
ROUND((revenue - lag(revenue, 1) OVER (ORDER BY month))
/ NULLIF(lag(revenue, 1) OVER (ORDER BY month), 0) * 100, 1) AS mom_growth_pct,
ROUND((revenue - lag(revenue, 12) OVER (ORDER BY month))
/ NULLIF(lag(revenue, 12) OVER (ORDER BY month), 0) * 100, 1) AS yoy_growth_pct
FROM monthly
ORDER BY month;

Gap Filling (Generate Missing Dates)

DuckDB can generate a complete date spine to fill gaps in sparse data:

-- Generate a full date spine for 2024
WITH date_spine AS (
SELECT unnest(generate_series(
DATE '2024-01-01',
DATE '2024-12-31',
INTERVAL '1 day'
))::DATE AS day
)
SELECT
ds.day,
COALESCE(s.daily_revenue, 0) AS revenue
FROM date_spine ds
LEFT JOIN daily_sales s ON ds.day = s.sale_date
ORDER BY ds.day;

Seasonality Detection

-- Average revenue by day of week
SELECT
dayname(sale_date) AS day_of_week,
extract('dow' FROM sale_date) AS dow_num,
ROUND(AVG(revenue), 2) AS avg_revenue,
COUNT(*) AS transaction_count
FROM clean_sales
GROUP BY day_of_week, dow_num
ORDER BY dow_num;

-- Average revenue by month (seasonal pattern)
SELECT
extract('month' FROM sale_date) AS month_num,
monthname(sale_date) AS month_name,
ROUND(AVG(revenue), 2) AS avg_revenue
FROM clean_sales
GROUP BY month_num, month_name
ORDER BY month_num;

Cumulative Metrics

-- Running totals (cumulative revenue by day)
SELECT
sale_date,
revenue,
SUM(revenue) OVER (ORDER BY sale_date) AS cumulative_revenue
FROM daily_sales
ORDER BY sale_date;

-- Percentage of total revenue per day
SELECT
sale_date,
revenue,
ROUND(revenue / SUM(revenue) OVER () * 100, 2) AS pct_of_total
FROM daily_sales
ORDER BY sale_date;

Common Pitfalls

PitfallConsequencePrevention
Rolling window on unsorted dataWrong rolling aggregationAlways include ORDER BY date in window OVER clause
Missing dates in date_spineGaps in reportsUse generate_series + LEFT JOIN for spine
YoY comparison with partial yearMisleading growth %Filter both periods to same date range

Quick Reference

-- Rolling 7-day avg
AVG(col) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW)

-- MoM growth
(revenue - lag(revenue,1) OVER (ORDER BY month)) / lag(revenue,1) OVER (...) * 100

-- Date spine
SELECT unnest(generate_series(DATE '2024-01-01', DATE '2024-12-31', INTERVAL '1 day'))::DATE AS day

-- Cumulative
SUM(col) OVER (ORDER BY date)

What's Next