京华开发一哥们找我优化条报表SQL,反馈执行时间很慢需要 18s 才能出结果,安排。
# 原SQL
SELECT
2 AS TYPE,
to_char(a."create_time",'yyyy') AS YEAR,
to_char(a."create_time",'mm') AS MONTH,
b."app_id" AS main_id,
b."merchant_name" AS main_name,
COUNT(1) AS total_count,
SUM(a."content_split_count") AS split_total_count,
(
SELECT COALESCE(COUNT(x1."id"),0)
FROM "inno_ticket" x1
INNER JOIN "inno_statereport" y1
ON x1."id" = y1."ticket_id"
WHERE to_char(x1."create_time",'yyyy-mm') = to_char(a."create_time",'yyyy-mm')
AND to_char(x1."user_id") = to_char(b."app_id")
AND y1."state" = 0
) AS success_count,
(
SELECT COALESCE(SUM(x2."content_split_count"),0)
FROM "inno_ticket" x2
INNER JOIN "inno_statereport" y2
ON x2."id" = y2."ticket_id"
WHERE to_char(x2."create_time",'yyyy-mm') = to_char(a."create_time",'yyyy-mm')
AND to_char(x2."user_id") = to_char(b."app_id")
AND y2."state" = 0
) AS success_split_count,
(
SELECT COALESCE(COUNT(x3."id"),0)
FROM "inno_ticket" x3
INNER JOIN "inno_statereport" y3
ON x3."id" = y3."ticket_id"
WHERE to_char(x3."create_time",'yyyy-mm') = to_char(a."create_time",'yyyy-mm')
AND to_char(x3."user_id") = to_char(b."app_id")
AND y3."state" = 1
) AS fail_count,
(
SELECT COALESCE(SUM(x4."content_split_count"),0)
FROM "inno_ticket" x4
INNER JOIN "inno_statereport" y4
ON x4."id" = y4."ticket_id"
WHERE to_char(x4."create_time",'yyyy-mm') = to_char(a."create_time",'yyyy-mm')
AND to_char(x4."user_id") = to_char(b."app_id")
AND y4."state" = 1
) AS fail_split_count,
(
SELECT COALESCE(COUNT(1),0)
FROM "inno_ticket" x5
WHERE to_char(x5."create_time",'yyyy-mm') = to_char(a."create_time",'yyyy-mm')
AND to_char(x5."user_id") = to_char(b."app_id")
AND NOT EXISTS (SELECT 1 FROM "inno_statereport" y5 WHERE y5."ticket_id" = x5."id")
) AS not_statereport_count,
(
SELECT COALESCE(SUM(x6."content_split_count"),0)
FROM "inno_ticket" x6
WHERE to_char(x6."create_time",'yyyy-mm') = to_char(a."create_time",'yyyy-mm')
AND to_char(x6."user_id") = to_char(b."app_id")
AND NOT EXISTS (SELECT 1 FROM "inno_statereport" y6 WHERE y6."ticket_id" = x6."id")
) AS split_not_statereport_count,
to_char(NOW(),'yyyy-mm-dd hh24:mi:ss') AS create_time,
to_char(a."create_time",'yyyy-mm') AS send_date
FROM "inno_ticket" a
LEFT JOIN "inno_merchant" b
ON to_char(a."user_id") = to_char(b."app_id")
WHERE b."app_id" IS NOT NULL
AND to_char(a."create_time",'yyyy-mm') = '2022-07'
GROUP BY main_id,YEAR,MONTH
ORDER BY main_id,YEAR,MONTH ASC;
# 执行计划
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------
GroupAggregate (cost=61274.47..1487331953.10 rows=3794 width=424) (actual time=1615.250..18670.271 rows=16 loops=1)
Group Key: b.app_id, (TO_CHAR(a.create_time, 'yyyy'::TEXT)), (TO_CHAR(a.create_time, 'mm'::TEXT))
-> Sort (cost=61274.47..61283.95 rows=3794 width=304) (actual time=618.402..623.671 rows=49026 loops=1)
Sort Key: b.app_id, (TO_CHAR(a.create_time, 'yyyy'::TEXT)), (TO_CHAR(a.create_time, 'mm'::TEXT))
Sort Method: external merge Disk: 3440kB
-> Hash Join (cost=12.25..61048.92 rows=3794 width=304) (actual time=1.721..463.194 rows=49026 loops=1)
Hash Cond: ((a.user_id)::TEXT = (b.app_id)::TEXT)
-> Seq Scan on inno_ticket a (cost=0.00..60965.54 rows=3794 width=20) (actual time=1.675..425.268 rows=82989 loops=1)
Filter: (TO_CHAR(create_time, 'yyyy-mm'::TEXT) = '2022-07'::TEXT)
Rows Removed by Filter: 676325
-> Hash (cost=11.00..11.00 rows=100 width=228) (actual time=0.022..0.022 rows=34 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 11kB
-> Seq Scan on inno_merchant b (cost=0.00..11.00 rows=100 width=228) (actual time=0.005..0.009 rows=34 loops=1)
Filter: (app_id IS NOT NULL)
SubPlan 1
-> Aggregate (cost=65334.47..65334.48 rows=1 width=8) (actual time=188.908..188.908 rows=1 loops=16)
-> Nested Loop (cost=0.42..65334.32 rows=61 width=8) (actual time=85.565..188.610 rows=2918 loops=16)
-> Seq Scan on inno_ticket x1 (cost=0.00..64759.38 rows=68 width=8) (actual time=85.542..183.712 rows=3064 loops=16)
Filter: (((user_id)::TEXT = (b.app_id)::TEXT) AND (TO_CHAR(create_time, 'yyyy-mm'::TEXT) = TO_CHAR(a.create_time, 'yyyy-mm'::TEXT)))
Rows Removed by Filter: 756250
-> Index Scan using ix_ticket_id on inno_statereport y1 (cost=0.42..8.45 rows=1 width=8) (actual time=0.001..0.001 rows=1 loops=49026)
Index Cond: (ticket_id = x1.id)
Filter: (state = 0)
Rows Removed by Filter: 0
SubPlan 2
-> Aggregate (cost=65334.47..65334.48 rows=1 width=8) (actual time=187.983..187.983 rows=1 loops=16)
-> Nested Loop (cost=0.42..65334.32 rows=61 width=4) (actual time=85.478..187.682 rows=2918 loops=16)
-> Seq Scan on inno_ticket x2 (cost=0.00..64759.38 rows=68 width=12) (actual time=85.459..182.919 rows=3064 loops=16)
Filter: (((user_id)::TEXT = (b.app_id)::TEXT) AND (TO_CHAR(create_time, 'yyyy-mm'::TEXT) = TO_CHAR(a.create_time, 'yyyy-mm'::TEXT)))
Rows Removed by Filter: 756250
-> Index Scan using ix_ticket_id on inno_statereport y2 (cost=0.42..8.45 rows=1 width=8) (actual time=0.001..0.001 rows=1 loops=49026)
Index Cond: (ticket_id = x2.id)
Filter: (state = 0)
Rows Removed by Filter: 0
SubPlan 3
-> Aggregate (cost=65334.33..65334.34 rows=1 width=8) (actual time=187.672..187.673 rows=1 loops=16)
-> Nested Loop (cost=0.42..65334.32 rows=5 width=8) (actual time=145.489..187.651 rows=121 loops=16)
-> Seq Scan on inno_ticket x3 (cost=0.00..64759.38 rows=68 width=8) (actual time=85.450..183.167 rows=3064 loops=16)
Filter: (((user_id)::TEXT = (b.app_id)::TEXT) AND (TO_CHAR(create_time, 'yyyy-mm'::TEXT) = TO_CHAR(a.create_time, 'yyyy-mm'::TEXT)))
Rows Removed by Filter: 756250
-> Index Scan using ix_ticket_id on inno_statereport y3 (cost=0.42..8.45 rows=1 width=8) (actual time=0.001..0.001 rows=0 loops=49026)
Index Cond: (ticket_id = x3.id)
Filter: (state = 1)
Rows Removed by Filter: 1
SubPlan 4
-> Aggregate (cost=65334.33..65334.34 rows=1 width=8) (actual time=187.727..187.727 rows=1 loops=16)
-> Nested Loop (cost=0.42..65334.32 rows=5 width=4) (actual time=145.774..187.705 rows=121 loops=16)
-> Seq Scan on inno_ticket x4 (cost=0.00..64759.38 rows=68 width=12) (actual time=85.750..183.253 rows=3064 loops=16)
Filter: (((user_id)::TEXT = (b.app_id)::TEXT) AND (TO_CHAR(create_time, 'yyyy-mm'::TEXT) = TO_CHAR(a.create_time, 'yyyy-mm'::TEXT)))
Rows Removed by Filter: 756250
-> Index Scan using ix_ticket_id on inno_statereport y4 (cost=0.42..8.45 rows=1 width=8) (actual time=0.001..0.001 rows=0 loops=49026)
Index Cond: (ticket_id = x4.id)
Filter: (state = 1)
Rows Removed by Filter: 1
SubPlan 5
-> Aggregate (cost=65334.14..65334.15 rows=1 width=8) (actual time=187.468..187.469 rows=1 loops=16)
-> Nested Loop Anti Join (cost=0.42..65334.13 rows=2 width=0) (actual time=176.473..187.461 rows=26 loops=16)
-> Seq Scan on inno_ticket x5 (cost=0.00..64759.38 rows=68 width=8) (actual time=85.532..183.525 rows=3064 loops=16)
Filter: (((user_id)::TEXT = (b.app_id)::TEXT) AND (TO_CHAR(create_time, 'yyyy-mm'::TEXT) = TO_CHAR(a.create_time, 'yyyy-mm'::TEXT)))
Rows Removed by Filter: 756250
-> Index Only Scan using ix_ticket_id on inno_statereport y5 (cost=0.42..8.44 rows=1 width=8) (actual time=0.001..0.001 rows=1 loops=49026)
Index Cond: (ticket_id = x5.id)
Heap Fetches: 48617
SubPlan 6
-> Aggregate (cost=65334.14..65334.15 rows=1 width=8) (actual time=187.120..187.120 rows=1 loops=16)
-> Nested Loop Anti Join (cost=0.42..65334.13 rows=2 width=4) (actual time=176.121..187.112 rows=26 loops=16)
-> Seq Scan on inno_ticket x6 (cost=0.00..64759.38 rows=68 width=12) (actual time=85.678..183.167 rows=3064 loops=16)
Filter: (((user_id)::TEXT = (b.app_id)::TEXT) AND (TO_CHAR(create_time, 'yyyy-mm'::TEXT) = TO_CHAR(a.create_time, 'yyyy-mm'::TEXT)))
Rows Removed by Filter: 756250
-> Index Only Scan using ix_ticket_id on inno_statereport y6 (cost=0.42..8.44 rows=1 width=8) (actual time=0.001..0.001 rows=1 loops=49026)
Index Cond: (ticket_id = x6.id)
Heap Fetches: 48617
Planning time: 1.978 ms
Execution time: 18672.345 ms
(74 rows)
标红处是完全不能走索引的,而且一条SQL使用6个标量子查询,如果是主表是大表的情况下,标子要被干N次。
# 加索引优化SQL
CREATE OR REPLACE FUNCTION pg_catalog.TO_CHAR(timestamp with time zone, text)
RETURNS text
LANGUAGE internal
IMMUTABLE STRICT
AS $function$timestamptz_to_char$function$
CREATE INDEX idx_func_inno_ticket ON inno_ticket using btree (to_char(create_time,'yyyy-mm'));
CREATE INDEX idx_func_inno_ticket ON inno_ticket (to_char(user_id));
# 优化后的执行计划
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------
GroupAggregate (cost=12088.53..281878946.59 rows=3797 width=424) (actual time=406.013..3509.848 rows=16 loops=1)
Group Key: b.app_id, (to_char(a.create_time, 'yyyy'::TEXT)), (to_char(a.create_time, 'mm'::TEXT))
-> Sort (cost=12088.53..12098.02 rows=3797 width=304) (actual time=223.045..228.313 rows=49026 loops=1)
Sort Key: b.app_id, (to_char(a.create_time, 'yyyy'::TEXT)), (to_char(a.create_time, 'mm'::TEXT))
Sort Method: external merge Disk: 3440kB
-> Hash Join (cost=86.10..11862.79 rows=3797 width=304) (actual time=11.031..71.658 rows=49026 loops=1)
Hash Cond: ((a.user_id)::TEXT = (b.app_id)::TEXT)
-> Bitmap Heap Scan on inno_ticket a (cost=73.85..11779.34 rows=3797 width=20) (actual time=10.974..24.840 rows=82989 loops=1)
Recheck Cond: (to_char(create_time, 'yyyy-mm'::TEXT) = '2022-07'::TEXT)
Heap Blocks: exact=8311
-> Bitmap Index Scan on idx_func_inno_ticket (cost=0.00..72.90 rows=3797 width=0) (actual time=9.805..9.805 rows=82989 loops=1)
Index Cond: (to_char(create_time, 'yyyy-mm'::TEXT) = '2022-07'::TEXT)
-> Hash (cost=11.00..11.00 rows=100 width=228) (actual time=0.024..0.024 rows=34 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 11kB
-> Seq Scan on inno_merchant b (cost=0.00..11.00 rows=100 width=228) (actual time=0.006..0.010 rows=34 loops=1)
Filter: (app_id IS NOT NULL)
SubPlan 1
-> Aggregate (cost=12372.49..12372.50 rows=1 width=8) (actual time=34.580..34.580 rows=1 loops=16)
-> Nested Loop (cost=73.35..12372.34 rows=61 width=8) (actual time=11.638..34.316 rows=2918 loops=16)
-> Bitmap Heap Scan on inno_ticket x1 (cost=72.92..11797.40 rows=68 width=8) (actual time=11.619..29.651 rows=3064 loops=16)
Recheck Cond: (to_char(create_time, 'yyyy-mm'::TEXT) = to_char(a.create_time, 'yyyy-mm'::TEXT))
Filter: ((user_id)::TEXT = (b.app_id)::TEXT)
Rows Removed by Filter: 79925
Heap Blocks: exact=132976
-> Bitmap Index Scan on idx_func_inno_ticket (cost=0.00..72.90 rows=3797 width=0) (actual time=7.990..7.990 rows=82989 loops=16)
Index Cond: (to_char(create_time, 'yyyy-mm'::TEXT) = to_char(a.create_time, 'yyyy-mm'::TEXT))
-> Index Scan using ix_ticket_id on inno_statereport y1 (cost=0.42..8.45 rows=1 width=8) (actual time=0.001..0.001 rows=1 loops=49026)
Index Cond: (ticket_id = x1.id)
Filter: (state = 0)
Rows Removed by Filter: 0
SubPlan 2
-> Aggregate (cost=12372.49..12372.50 rows=1 width=8) (actual time=34.895..34.895 rows=1 loops=16)
-> Nested Loop (cost=73.35..12372.34 rows=61 width=4) (actual time=11.645..34.622 rows=2918 loops=16)
-> Bitmap Heap Scan on inno_ticket x2 (cost=72.92..11797.40 rows=68 width=12) (actual time=11.628..29.818 rows=3064 loops=16)
Recheck Cond: (to_char(create_time, 'yyyy-mm'::TEXT) = to_char(a.create_time, 'yyyy-mm'::TEXT))
Filter: ((user_id)::TEXT = (b.app_id)::TEXT)
Rows Removed by Filter: 79925
Heap Blocks: exact=132976
-> Bitmap Index Scan on idx_func_inno_ticket (cost=0.00..72.90 rows=3797 width=0) (actual time=7.997..7.997 rows=82989 loops=16)
Index Cond: (to_char(create_time, 'yyyy-mm'::TEXT) = to_char(a.create_time, 'yyyy-mm'::TEXT))
-> Index Scan using ix_ticket_id on inno_statereport y2 (cost=0.42..8.45 rows=1 width=8) (actual time=0.001..0.001 rows=1 loops=49026)
Index Cond: (ticket_id = x2.id)
Filter: (state = 0)
Rows Removed by Filter: 0
SubPlan 3
-> Aggregate (cost=12372.35..12372.36 rows=1 width=8) (actual time=33.857..33.857 rows=1 loops=16)
-> Nested Loop (cost=73.35..12372.34 rows=5 width=8) (actual time=20.514..33.839 rows=121 loops=16)
-> Bitmap Heap Scan on inno_ticket x3 (cost=72.92..11797.40 rows=68 width=8) (actual time=11.652..29.658 rows=3064 loops=16)
Recheck Cond: (to_char(create_time, 'yyyy-mm'::TEXT) = to_char(a.create_time, 'yyyy-mm'::TEXT))
Filter: ((user_id)::TEXT = (b.app_id)::TEXT)
Rows Removed by Filter: 79925
Heap Blocks: exact=132976
-> Bitmap Index Scan on idx_func_inno_ticket (cost=0.00..72.90 rows=3797 width=0) (actual time=8.038..8.038 rows=82989 loops=16)
Index Cond: (to_char(create_time, 'yyyy-mm'::TEXT) = to_char(a.create_time, 'yyyy-mm'::TEXT))
-> Index Scan using ix_ticket_id on inno_statereport y3 (cost=0.42..8.45 rows=1 width=8) (actual time=0.001..0.001 rows=0 loops=49026)
Index Cond: (ticket_id = x3.id)
Filter: (state = 1)
Rows Removed by Filter: 1
SubPlan 4
-> Aggregate (cost=12372.35..12372.36 rows=1 width=8) (actual time=33.935..33.935 rows=1 loops=16)
-> Nested Loop (cost=73.35..12372.34 rows=5 width=4) (actual time=20.587..33.918 rows=121 loops=16)
-> Bitmap Heap Scan on inno_ticket x4 (cost=72.92..11797.40 rows=68 width=12) (actual time=11.712..29.717 rows=3064 loops=16)
Recheck Cond: (to_char(create_time, 'yyyy-mm'::TEXT) = to_char(a.create_time, 'yyyy-mm'::TEXT))
Filter: ((user_id)::TEXT = (b.app_id)::TEXT)
Rows Removed by Filter: 79925
Heap Blocks: exact=132976
-> Bitmap Index Scan on idx_func_inno_ticket (cost=0.00..72.90 rows=3797 width=0) (actual time=8.074..8.074 rows=82989 loops=16)
Index Cond: (to_char(create_time, 'yyyy-mm'::TEXT) = to_char(a.create_time, 'yyyy-mm'::TEXT))
-> Index Scan using ix_ticket_id on inno_statereport y4 (cost=0.42..8.45 rows=1 width=8) (actual time=0.001..0.001 rows=0 loops=49026)
Index Cond: (ticket_id = x4.id)
Filter: (state = 1)
Rows Removed by Filter: 1
SubPlan 5
-> Aggregate (cost=12372.15..12372.16 rows=1 width=8) (actual time=33.366..33.366 rows=1 loops=16)
-> Nested Loop Anti Join (cost=73.35..12372.15 rows=2 width=0) (actual time=23.225..33.358 rows=26 loops=16)
-> Bitmap Heap Scan on inno_ticket x5 (cost=72.92..11797.40 rows=68 width=8) (actual time=11.675..29.665 rows=3064 loops=16)
Recheck Cond: (to_char(create_time, 'yyyy-mm'::TEXT) = to_char(a.create_time, 'yyyy-mm'::TEXT))
Filter: ((user_id)::TEXT = (b.app_id)::TEXT)
Rows Removed by Filter: 79925
Heap Blocks: exact=132976
-> Bitmap Index Scan on idx_func_inno_ticket (cost=0.00..72.90 rows=3797 width=0) (actual time=8.056..8.056 rows=82989 loops=16)
Index Cond: (to_char(create_time, 'yyyy-mm'::TEXT) = to_char(a.create_time, 'yyyy-mm'::TEXT))
-> Index Only Scan using ix_ticket_id on inno_statereport y5 (cost=0.42..8.44 rows=1 width=8) (actual time=0.001..0.001 rows=1 loops=49026)
Index Cond: (ticket_id = x5.id)
Heap Fetches: 48617
SubPlan 6
-> Aggregate (cost=12372.15..12372.16 rows=1 width=8) (actual time=33.410..33.411 rows=1 loops=16)
-> Nested Loop Anti Join (cost=73.35..12372.15 rows=2 width=4) (actual time=23.233..33.403 rows=26 loops=16)
-> Bitmap Heap Scan on inno_ticket x6 (cost=72.92..11797.40 rows=68 width=12) (actual time=11.671..29.707 rows=3064 loops=16)
Recheck Cond: (to_char(create_time, 'yyyy-mm'::TEXT) = to_char(a.create_time, 'yyyy-mm'::TEXT))
Filter: ((user_id)::TEXT = (b.app_id)::TEXT)
Rows Removed by Filter: 79925
Heap Blocks: exact=132976
-> Bitmap Index Scan on idx_func_inno_ticket (cost=0.00..72.90 rows=3797 width=0) (actual time=8.050..8.050 rows=82989 loops=16)
Index Cond: (to_char(create_time, 'yyyy-mm'::TEXT) = to_char(a.create_time, 'yyyy-mm'::TEXT))
-> Index Only Scan using ix_ticket_id on inno_statereport y6 (cost=0.42..8.44 rows=1 width=8) (actual time=0.001..0.001 rows=1 loops=49026)
Index Cond: (ticket_id = x6.id)
Heap Fetches: 48617
Planning time: 1.833 ms
Execution time: 3511.169 ms
(100 rows)
可以看到,SQL从原来的18s 降到 3.5s 能出结果。
结论:
标量子查询尽量使用 left join 来代替,减少主表和标量表访问的次数,提升SQL编写的能力。
# 使用 left join 代替标量子查询
with x as (
SELECT
2 AS TYPE,
to_char(a."create_time",'yyyy') AS YEAR,
to_char(a."create_time",'mm') AS MONTH,
b."app_id" AS main_id,
b."merchant_name" AS main_name,
COUNT(1) AS total_count,
SUM(a."content_split_count") AS split_total_count,
to_char(NOW(),'yyyy-mm-dd hh24:mi:ss') AS create_time,
to_char(a."create_time",'yyyy-mm') AS send_date
FROM "inno_ticket" a
LEFT JOIN "inno_merchant" b
ON to_char(a."user_id") = to_char(b."app_id")
WHERE b."app_id" IS NOT NULL
AND to_char(a."create_time",'yyyy-mm') = '2022-07'
GROUP BY main_id,YEAR,MONTH
ORDER BY main_id,YEAR,MONTH ASC
)
select
2 AS TYPE,
x.YEAR,
x.MONTH,
x."app_id" AS main_id,
x."merchant_name" AS main_name,
x.total_count,
x.split_total_count,
x1.success_count,
x1.success_split_count,
x1.fail_count,
x1.fail_split_count,
x2.not_statereport_count,
x2.split_not_statereport_count
x.create_time,
x.send_date
from x left join (
select to_char(x1."user_id"),
COALESCE(COUNT(case when y1."state" = 0 then x1."id" else null end),0) as success_count ,
COALESCE(SUM(case when y1."state" = 0 then x1."content_split_count" else null end),0) as success_split_count,
COALESCE(COUNT(case when y1."state" = 1 then x1."id" else null end),0) as fail_count ,
COALESCE(SUM(case when y1."state" = 1 then x1."content_split_count" else null end),0) as fail_split_count
FROM "inno_ticket" x1
INNER JOIN "inno_statereport" y1
ON x1."id" = y1."ticket_id"
where to_char(x1."create_time",'yyyy-mm') = '2022-07'
group by to_char(x1."user_id")
) x1 on (to_char(x1."user_id") = to_char(x."app_id"))
left join (
SELECT to_char(x5."user_id"),
COALESCE(COUNT(1), 0) as not_statereport_count,
COALESCE(SUM(x6."content_split_count"), 0) as split_not_statereport_count
FROM "inno_ticket" x5
left join "inno_statereport" y5
on y5."ticket_id" = x5."id"
where to_char(x5."create_time", 'yyyy-mm') = '2022-07'
and y5."ticket_id" is null
group by to_char(x5."user_id")
)x2 on to_char(x2."user_id") = to_char(x."app_id");
以上SQL只是以 left join 方式进行改写,没有具体测试过是否等价,熟悉SQL的同学能看懂笔者的改写思路即可。
提示:SQL等价改写不是一下就能改写成功,需要静下心来思考SQL的逻辑和表与表之间的比对关系,一步一步改来调试最终才能改写成功。