db<>fiddle用于下面的所有数据和查询
我有一个events
具有以下结构的表:
create table events (
correlation_id char(26) not null,
user_id bigint,
task_id bigint not null,
location_id bigint,
type bigint not null,
created_at timestamp(6) with time zone not null,
constraint events_correlation_id_created_at_user_id_unique
unique (correlation_id, created_at, user_id)
);
此表包含正在执行的任务的记录,如下所示:
相关标识 | 用户身份 | task_id | location_id | 类型 | created_at |
---|---|---|---|---|---|
01CN4HP4AN0000000000000001 | 4 | 58 | 30 | 0 | 2018-08-17 18:17:15.348629 |
01CN4HP4AN0000000000000001 | 4 | 58 | 30 | 1 | 2018-08-17 18:17:22.852299 |
01CN4HP4AN0000000000000001 | 4 | 58 | 30 | 99 | 2018-08-17 18:17:25.535593 |
01CN4J9SZ80000000000000003 | 4 | 97 | 30 | 0 | 2018-08-17 18:28:00.104093 |
01CN4J9SZ80000000000000003 | 4 | 97 | 30 | 99 | 2018-08-17 18:29:09.016840 |
01CN4JC1430000000000000004 | 4 | 99 | 30 | 0 | 2018-08-17 18:29:12.963264 |
01CN4JC1430000000000000004 | 4 | 99 | 30 | 99 | 2018-08-17 18:32:09.272632 |
01CN4KJCDY0000000000000005 | 139 | 97 | 30 | 0 | 2018-08-17 18:50:09.725668 |
01CN4KJCDY0000000000000005 | 139 | 97 | 30 | 3 | 2018-08-17 18:50:11.842000 |
01CN4KJCDY0000000000000005 | 139 | 97 | 30 | 99 | 2018-08-17 18:51:42.240895 |
01CNC4G1Y40000000000000008 | 139 | 99 | 30 | 0 | 2018-08-20 17:00:40.260430 |
01CNC4G1Y40000000000000008 | 139 | 99 | 30 | 99 | 2018-08-20 17:00:47.583501 |
带有 的行type = 0
表示任务的开始,带有 的行type = 99
表示任务的结束。(其他值表示与此问题无关的其他内容,但为了完整起见,此处包含两个示例行。)
每个task_id
对应于tasks
表中的一行。任务表中唯一与该问题相关的其他字段称为inprogress_status
,它可以是1
or 2
,分别表示Opening task
和Closing task
。
我最初被要求提供一个查询,该查询将返回按开始日期和位置排序的任务列表,其中一行包含每个任务的开始 ( type = 0
) 和结束 ( type = 99
)。
这是我曾经这样做的查询:
SELECT e.created_at::DATE, e.location_id, e.task_id
, CASE t.inprogress_status WHEN 2 THEN 'CLOSE' WHEN 1 THEN 'OPEN' END AS task_type
, e.correlation_id
, json_object_agg(e.type, json_build_object('timestamp', e.created_at, 'user_id', e.user_id)) AS events
FROM events e
JOIN tasks t on e.task_id = t.id
WHERE e.type IN (0, 99)
AND t.inprogress_status IN (1, 2)
group by created_at::DATE, location_id, task_id, correlation_id, inprogress_status
ORDER BY 1, 2, 3;
这是使用上面显示的数据进行查询的结果:
created_at | location_id | task_id | 任务类型 | 相关标识 | 事件 |
---|---|---|---|---|---|
2018-08-17 | 30 | 58 | 打开 | 01CN4HP4AN0000000000000001 | {"0": {"timestamp": "2018-08-17T18:17:15.348629+00:00", "user_id": 4}, "99": {"timestamp": "2018-08-17T18:17:25.535593+00:00", "user_id": 4} } |
2018-08-17 | 30 | 97 | 关 | 01CN4J9SZ80000000000000003 | {"0": {"timestamp": "2018-08-17T18:28:00.104093+00:00", "user_id": 4}, "99": {"timestamp": "2018-08-17T18:29:09.01684+00:00", "user_id": 4} } |
2018-08-17 | 30 | 99 | 打开 | 01CN4JC1430000000000000004 | { "0": {"timestamp": "2018-08-17T18:29:12.963264+00:00", "user_id": 4}, "99": {"timestamp": "2018-08-17T18:32:09.272632+00:00", "user_id": 4} } |
2018-08-17 | 30 | 97 | 关 | 01CN4KJCDY0000000000000005 | { "0": {"timestamp": "2018-08-17T18:50:09.725668+00:00", "user_id": 139}, "99": {"timestamp": "2018-08-17T18:51:42.240895+00:00", "user_id": 139} } |
2018-08-20 | 30 | 99 | 打开 | 01CNC4G1Y40000000000000008 | { "0": {"timestamp": "2018-08-20T17:00:40.26043+00:00", "user_id": 139}, "99" : {"timestamp": "2018-08-20T17:00:47.583501+00:00", "user_id" : 139} } |
在上面的例子中,task_id
58
and 99
haveinprogress_status = 1
和task_id
97
has inprogress_status = 2
。
现在我被要求修改返回的数据结构,以便它也可以聚合inprogress_status
,并将行作为 OPEN+CLOSE 事件对返回。
为了弄清楚如何构建它,我首先尝试获取这种格式(我真正想要的最终格式如下):
created_at | location_id | 事件 |
---|---|---|
2018-08-17 | 30 | {"OPEN": [{"correlation_id": "01CN4HP4AN0000000000000001", "0" : {"timestamp" : "2018-08-17T18:17:15.348629+00:00", "user_id" : 4}, "99" : {"timestamp" : "2018-08-17T18:17:25.535593+00:00", "user_id" : 4} }, {"OPEN": {"correlation_id": "01CN4JC1430000000000000004", "0" : {"timestamp" : "2018-08-17T18:29:12.963264+00:00", "user_id" : 4}, "99" : {"timestamp" : "2018-08-17T18:32:09.272632+00:00", "user_id" : 4} }], "CLOSE": [{"correlation_id": "01CN4J9SZ80000000000000003", "0" : {"timestamp" : "2018-08-17T18:28:00.104093+00:00", "user_id" : 4}, "99" : {"timestamp" : "2018-08-17T18:29:09.01684+00:00", "user_id" : 4} }, { "correlation_id": "01CN4KJCDY0000000000000005", "0" : {"timestamp" : "2018-08-17T18:50:09.725668+00:00", "user_id" : 139}, "99" : {"timestamp" : "2018-08-17T18:51:42.240895+00:00", "user_id" : 139} }]} |
2018-08-20 | 30 | {"OPEN": [{"correlation_id": "01CNC4G1Y40000000000000008", "0" : {"timestamp" : "2018-08-20T17:00:40.26043+00:00", "user_id" : 139}, "99" : {"timestamp" : "2018-08-20T17:00:47.583501+00:00", "user_id" : 139} }], "CLOSE": null} |
这是我写的第一个查询,试图使这项工作:
WITH grouped_events AS (
SELECT e.created_at::DATE AS created_date,
location_id,
task_id,
CASE t.inprogress_status WHEN 2 THEN 'CLOSE' WHEN 1 THEN 'OPEN' END AS task_type,
jsonb_build_object('id', e.correlation_id) ||
jsonb_object_agg(type, jsonb_build_object('timestamp', e.created_at, 'user_id', user_id)) AS events
FROM events e
JOIN tasks t on e.task_id = t.id
WHERE type IN (0, 99)
AND inprogress_status IN (1, 2)
GROUP BY e.created_at::DATE, location_id, task_id, correlation_id, t.inprogress_status
)
SELECT created_date, location_id, json_object_agg(task_type, events)
FROM grouped_events
GROUP BY 1, 2
ORDER BY 1, 2
问题是这会产生无效的 JSON。具有多个相同的键:
{
"OPEN": {
"0": { "user_id": 4, "timestamp": "2018-08-17T18:29:12.963264+00:00" },
"99": { "user_id": 4, "timestamp": "2018-08-17T18:32:09.272632+00:00" },
"id": "01CN4JC1430000000000000004"
},
"OPEN": {
"0": { "user_id": 4, "timestamp": "2018-08-17T18:17:15.348629+00:00" },
"99": { "user_id": 4, "timestamp": "2018-08-17T18:17:25.535593+00:00" },
"id": "01CN4HP4AN0000000000000001"
},
// ... etc.
}
我发现这个查询以上面显示的格式返回数据:
WITH grouped_events1 AS (
SELECT e.created_at::DATE AS created_date,
location_id,
task_id,
CASE t.inprogress_status WHEN 2 THEN 'CLOSE' WHEN 1 THEN 'OPEN' END AS task_type,
jsonb_build_object('id', e.correlation_id) ||
jsonb_object_agg(type, jsonb_build_object('timestamp', e.created_at, 'user_id', user_id)) AS events
FROM events e
JOIN tasks t on e.task_id = t.id
WHERE type IN (0, 99)
AND inprogress_status IN (1, 2)
GROUP BY e.created_at::DATE, location_id, task_id, correlation_id, t.inprogress_status
), grouped_events2 AS (
SELECT created_date, location_id, task_type, json_agg(events) AS events
FROM grouped_events1
GROUP BY 1, 2, 3
)
SELECT created_date, location_id, json_object_agg(task_type, events)
FROM grouped_events2
GROUP BY 1, 2
ORDER BY 1, 2
但是,我实际需要的格式应该只是将单个 OPEN 与单个 CLOSE 配对,如下所示(每个 OPEN 和紧随其后的 CLOSE):
created_at | location_id | 事件 |
---|---|---|
2018-08-17 | 30 | {"OPEN": {"correlation_id": "01CN4HP4AN0000000000000001", "0" : {"timestamp" : "2018-08-17T18:17:15.348629+00:00", "user_id" : 4}, "99" : {"timestamp" : "2018-08-17T18:17:25.535593+00:00", "user_id" : 4} }, "CLOSE": {"correlation_id": "01CN4J9SZ80000000000000003", "0" : {"timestamp" : "2018-08-17T18:28:00.104093+00:00", "user_id" : 4}, "99" : {"timestamp" : "2018-08-17T18:29:09.01684+00:00", "user_id" : 4} }} |
2018-08-17 | 30 | {"OPEN": {"OPEN": {"correlation_id": "01CN4JC1430000000000000004", "0" : {"timestamp" : "2018-08-17T18:29:12.963264+00:00", "user_id" : 4}, "99" : {"timestamp" : "2018-08-17T18:32:09.272632+00:00", "user_id" : 4} }, "CLOSE": { "correlation_id": "01CN4KJCDY0000000000000005", "0" : {"timestamp" : "2018-08-17T18:50:09.725668+00:00", "user_id" : 139}, "99" : {"timestamp" : "2018-08-17T18:51:42.240895+00:00", "user_id" : 139} }} |
2018-08-20 | 30 | {"OPEN": [{"correlation_id": "01CNC4G1Y40000000000000008", "0" : {"timestamp" : "2018-08-20T17:00:40.26043+00:00", "user_id" : 139}, "99" : {"timestamp" : "2018-08-20T17:00:47.583501+00:00", "user_id" : 139} }], "CLOSE": null} |
现在我想弄清楚我是否走错了方向,因为我看不出如何从我所拥有的东西中得到我的最终格式。
我接近这个错误吗?我怎样才能得到我正在寻找的结果?
这会产生您想要的结果:
db<>在这里摆弄
除了在一天开始时缺少“OPEN”事件和在最后缺少“CLOSE”事件之外只是缺少。
我使用
jsonb
而不是json
允许jsonb || jsonb
操作员。json
如果您确实需要,您可以将结果转换为。核心特征是形成任务编号的复杂表达式:
每个“打开”任务都会启动一个新组。
created_at
with定义了任务的type = 0
顺序。从技术上讲,这是可行的,因为我们可以FILTER
在窗口函数中嵌套聚合函数(甚至使用聚合子句)。相关答案: