The lag
window function
allows to evaluate an expression
at offset
rows before the current row.
For time series data, sometimes it is desired to only get previous values in a
certain time window. In the example below, only the entry of the preceding day
should be considered - if it exists.
DROP TABLE IF EXISTS example;
CREATE TABLE example (
user_id INTEGER NOT NULL,
created_at DATE NOT NULL,
something TEXT
);
INSERT INTO example (user_id, created_at, something) VALUES
(1, '2024-02-27', 'foo'),
(1, '2024-02-28', NULL),
(1, '2024-02-29', 'bar'),
(2, '2024-02-29', 'foo'),
(2, '2024-03-01', 'bar'),
(3, '2024-03-01', 'bar'),
(4, '2024-03-02', NULL),
(4, '2024-03-03', 'foo'),
(4, '2024-03-05', 'bar')
;
SELECT * FROM example;
-- Suppose we want to identify the first day where `something`
-- is 'bar' and the value for `something` on the preceding day,
-- if - and only if - the preceding day has been recorded. We
-- would expect the following result:
--
-- user created_at something something_before
-- 1 2024-02-29 bar NULL
-- 2 2024-03-01 bar foo
-- 3 2024-03-01 bar NULL
-- 4 2024-03-05 bar NULL
-- The following query almost works:
SELECT * FROM(
SELECT
*,
LAG(something) OVER(
PARTITION BY user_id
ORDER BY user_id, created_at
) AS something_before
FROM example
ORDER BY user_id
)
WHERE
something = 'bar'
;
-- We get the expected result for user ID `1`, `2` and `3` but
-- also `something_before` with `foo` for the user with ID `4`
-- where no entry has been recorded for the preceding day and
-- `lag()` resolves to the value from 2 days ago.
-- By calculating a `previous_day_offset` we can limit 'lag()'
-- to users with an actual record for the preceding day:
DROP TABLE IF EXISTS example_with_day_offsets;
CREATE TABLE example_with_day_offsets AS (
SELECT
*,
created_at - LAG(created_at) OVER(
PARTITION BY user_id
ORDER BY user_id, created_at
) AS previous_day_offset
FROM example
ORDER BY user_id, created_at
);
SELECT * FROM example_with_day_offsets;
DROP TABLE IF EXISTS example_with_something_before;
CREATE TABLE example_with_something_before AS (
SELECT
user_id,
created_at,
something,
CASE
WHEN
previous_day_offset = 1
THEN
LAG(something) OVER(
PARTITION BY user_id
ORDER BY user_id, created_at
)
ELSE
NULL
END AS something_before
FROM example_with_day_offsets
ORDER BY user_id, created_at
)
;
SELECT
*
FROM
example_with_something_before
WHERE
something = 'bar'
;
Created 2024-03-23T17:22:26+01:00 · Edit