Appearance
PostgreSQL 显式 JOIN 子句与查询优化完全指南
概述
在复杂查询中,表的连接顺序对性能有着巨大影响。PostgreSQL 查询规划器虽然智能,但在面对多表连接时,可能无法总是找到最优方案。通过使用显式 JOIN 语法和相关配置,我们可以引导规划器选择更好的执行计划。本文将深入探讨如何利用显式 JOIN 来优化查询性能。
连接顺序的重要性
连接顺序对性能的影响
sql
-- 创建测试表
CREATE TABLE customers (
customer_id INT PRIMARY KEY,
name VARCHAR(100),
country VARCHAR(50),
created_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE orders (
order_id INT PRIMARY KEY,
customer_id INT REFERENCES customers(customer_id),
order_date DATE,
amount DECIMAL(10,2)
);
CREATE TABLE order_items (
item_id INT PRIMARY KEY,
order_id INT REFERENCES orders(order_id),
product_id INT,
quantity INT,
unit_price DECIMAL(10,2)
);
CREATE TABLE products (
product_id INT PRIMARY KEY,
name VARCHAR(100),
category VARCHAR(50),
supplier_id INT
);
-- 插入测试数据
INSERT INTO customers (customer_id, name, country)
SELECT i, 'Customer ' || i,
CASE (i % 5)
WHEN 0 THEN 'USA'
WHEN 1 THEN 'UK'
WHEN 2 THEN 'Germany'
WHEN 3 THEN 'France'
ELSE 'Japan'
END
FROM generate_series(1, 10000) i;
INSERT INTO orders (order_id, customer_id, order_date, amount)
SELECT i,
(i % 10000) + 1,
DATE '2020-01-01' + (i % 1460) * INTERVAL '1 day',
(random() * 1000)::DECIMAL(10,2)
FROM generate_series(1, 100000) i;
INSERT INTO order_items (item_id, order_id, product_id, quantity, unit_price)
SELECT i,
((i-1) / 3) + 1, -- 每个订单3个项目
(i % 1000) + 1,
(random() * 10 + 1)::INT,
(random() * 100)::DECIMAL(10,2)
FROM generate_series(1, 300000) i;
INSERT INTO products (product_id, name, category, supplier_id)
SELECT i,
'Product ' || i,
CASE (i % 10)
WHEN 0 THEN 'Electronics'
WHEN 1 THEN 'Clothing'
WHEN 2 THEN 'Food'
WHEN 3 THEN 'Books'
ELSE 'Other'
END,
(i % 100) + 1
FROM generate_series(1, 1000) i;
-- 分析表以更新统计信息
ANALYZE customers, orders, order_items, products;
连接顺序可能性的指数增长
隐式 vs 显式 JOIN
语义等价但执行计划可能不同
sql
-- 隐式 JOIN(逗号分隔)
EXPLAIN (ANALYZE, BUFFERS)
SELECT c.name, COUNT(oi.item_id) as item_count, SUM(oi.quantity * oi.unit_price) as total_value
FROM customers c, orders o, order_items oi, products p
WHERE c.customer_id = o.customer_id
AND o.order_id = oi.order_id
AND oi.product_id = p.product_id
AND c.country = 'USA'
AND p.category = 'Electronics'
GROUP BY c.customer_id, c.name;
-- 显式 JOIN(指定顺序)
EXPLAIN (ANALYZE, BUFFERS)
SELECT c.name, COUNT(oi.item_id) as item_count, SUM(oi.quantity * oi.unit_price) as total_value
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
JOIN order_items oi ON o.order_id = oi.order_id
JOIN products p ON oi.product_id = p.product_id
WHERE c.country = 'USA'
AND p.category = 'Electronics'
GROUP BY c.customer_id, c.name;
-- 优化的显式 JOIN(更好的连接顺序)
EXPLAIN (ANALYZE, BUFFERS)
SELECT c.name, COUNT(oi.item_id) as item_count, SUM(oi.quantity * oi.unit_price) as total_value
FROM products p
JOIN order_items oi ON p.product_id = oi.product_id
JOIN orders o ON oi.order_id = o.order_id
JOIN customers c ON o.customer_id = c.customer_id
WHERE c.country = 'USA'
AND p.category = 'Electronics'
GROUP BY c.customer_id, c.name;
性能对比测试
sql
-- 创建性能测试函数
CREATE OR REPLACE FUNCTION compare_join_strategies(
p_country VARCHAR,
p_category VARCHAR
)
RETURNS TABLE(
join_strategy TEXT,
execution_time INTERVAL,
rows_returned BIGINT,
buffers_hit BIGINT,
buffers_read BIGINT
) AS $$
DECLARE
v_start TIMESTAMP;
v_end TIMESTAMP;
v_plan JSONB;
v_buffers JSONB;
BEGIN
-- 测试隐式 JOIN
v_start := clock_timestamp();
PERFORM COUNT(*)
FROM customers c, orders o, order_items oi, products p
WHERE c.customer_id = o.customer_id
AND o.order_id = oi.order_id
AND oi.product_id = p.product_id
AND c.country = p_country
AND p.category = p_category;
v_end := clock_timestamp();
RETURN QUERY
SELECT 'Implicit JOIN',
v_end - v_start,
0::BIGINT, -- 简化示例
0::BIGINT,
0::BIGINT;
-- 测试显式 JOIN(标准顺序)
v_start := clock_timestamp();
PERFORM COUNT(*)
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
JOIN order_items oi ON o.order_id = oi.order_id
JOIN products p ON oi.product_id = p.product_id
WHERE c.country = p_country
AND p.category = p_category;
v_end := clock_timestamp();
RETURN QUERY
SELECT 'Explicit JOIN (Standard)',
v_end - v_start,
0::BIGINT,
0::BIGINT,
0::BIGINT;
-- 测试优化的显式 JOIN
v_start := clock_timestamp();
PERFORM COUNT(*)
FROM products p
JOIN order_items oi ON p.product_id = oi.product_id
JOIN orders o ON oi.order_id = o.order_id
JOIN customers c ON o.customer_id = c.customer_id
WHERE p.category = p_category
AND c.country = p_country;
v_end := clock_timestamp();
RETURN QUERY
SELECT 'Explicit JOIN (Optimized)',
v_end - v_start,
0::BIGINT,
0::BIGINT,
0::BIGINT;
END;
$$ LANGUAGE plpgsql;
-- 执行测试
SELECT * FROM compare_join_strategies('USA', 'Electronics');
join_collapse_limit 参数详解
参数配置和影响
sql
-- 查看当前设置
SHOW join_collapse_limit;
SHOW from_collapse_limit;
SHOW geqo_threshold;
-- 创建测试函数来演示 join_collapse_limit 的影响
CREATE OR REPLACE FUNCTION test_join_collapse_limit()
RETURNS TABLE(
collapse_limit INT,
planning_time INTERVAL,
execution_time INTERVAL,
total_cost NUMERIC
) AS $$
DECLARE
v_start_plan TIMESTAMP;
v_start_exec TIMESTAMP;
v_end TIMESTAMP;
v_plan TEXT;
v_cost NUMERIC;
BEGIN
-- 测试不同的 join_collapse_limit 值
FOR i IN ARRAY[1, 4, 8, 12] LOOP
EXECUTE format('SET LOCAL join_collapse_limit = %s', i);
v_start_plan := clock_timestamp();
-- 获取执行计划
EXECUTE '
EXPLAIN (FORMAT JSON)
SELECT COUNT(*)
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
JOIN order_items oi ON o.order_id = oi.order_id
JOIN products p ON oi.product_id = p.product_id
WHERE c.country = ''USA''
AND p.category = ''Electronics'''
INTO v_plan;
v_start_exec := clock_timestamp();
-- 执行查询
PERFORM COUNT(*)
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
JOIN order_items oi ON o.order_id = oi.order_id
JOIN products p ON oi.product_id = p.product_id
WHERE c.country = 'USA'
AND p.category = 'Electronics';
v_end := clock_timestamp();
-- 提取总成本
v_cost := (v_plan::JSON->0->'Plan'->>'Total Cost')::NUMERIC;
RETURN QUERY
SELECT i,
v_start_exec - v_start_plan,
v_end - v_start_exec,
v_cost;
END LOOP;
RESET join_collapse_limit;
END;
$$ LANGUAGE plpgsql;
-- 执行测试
SELECT * FROM test_join_collapse_limit();
join_collapse_limit 工作原理
外连接的特殊性
LEFT JOIN 约束连接顺序
sql
-- 创建额外的测试表
CREATE TABLE suppliers (
supplier_id INT PRIMARY KEY,
name VARCHAR(100),
country VARCHAR(50)
);
INSERT INTO suppliers (supplier_id, name, country)
SELECT i, 'Supplier ' || i,
CASE (i % 3)
WHEN 0 THEN 'China'
WHEN 1 THEN 'India'
ELSE 'Vietnam'
END
FROM generate_series(1, 100) i;
-- LEFT JOIN 示例 - 连接顺序受限
EXPLAIN (ANALYZE, VERBOSE)
SELECT c.name, COUNT(DISTINCT o.order_id) as order_count,
COUNT(DISTINCT p.product_id) as product_types
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
LEFT JOIN order_items oi ON o.order_id = oi.order_id
LEFT JOIN products p ON oi.product_id = p.product_id
WHERE c.country = 'USA'
GROUP BY c.customer_id, c.name;
-- 尝试重排 LEFT JOIN(语义不同!)
EXPLAIN (ANALYZE, VERBOSE)
SELECT c.name, COUNT(DISTINCT o.order_id) as order_count,
COUNT(DISTINCT p.product_id) as product_types
FROM products p
LEFT JOIN order_items oi ON p.product_id = oi.product_id
LEFT JOIN orders o ON oi.order_id = o.order_id
LEFT JOIN customers c ON o.customer_id = c.customer_id
WHERE c.country = 'USA'
GROUP BY c.customer_id, c.name;
混合 JOIN 类型优化
sql
-- 创建复杂查询优化函数
CREATE OR REPLACE FUNCTION optimize_complex_join(
p_start_date DATE,
p_end_date DATE
)
RETURNS TABLE(
optimization_level TEXT,
execution_plan TEXT,
estimated_rows BIGINT,
estimated_cost NUMERIC
) AS $$
DECLARE
v_plan JSONB;
BEGIN
-- 基础查询 - 未优化
SET LOCAL join_collapse_limit = 12;
EXECUTE '
EXPLAIN (FORMAT JSON)
SELECT c.name, s.name as supplier,
COUNT(*) as item_count,
SUM(oi.quantity * oi.unit_price) as total_value
FROM customers c
INNER JOIN orders o ON c.customer_id = o.customer_id
LEFT JOIN order_items oi ON o.order_id = oi.order_id
INNER JOIN products p ON oi.product_id = p.product_id
LEFT JOIN suppliers s ON p.supplier_id = s.supplier_id
WHERE o.order_date BETWEEN $1 AND $2
GROUP BY c.customer_id, c.name, s.supplier_id, s.name'
USING p_start_date, p_end_date
INTO v_plan;
RETURN QUERY
SELECT 'Unoptimized',
v_plan::JSON->0->'Plan'->>'Node Type',
(v_plan::JSON->0->'Plan'->>'Plan Rows')::BIGINT,
(v_plan::JSON->0->'Plan'->>'Total Cost')::NUMERIC;
-- 强制特定连接顺序
SET LOCAL join_collapse_limit = 1;
EXECUTE '
EXPLAIN (FORMAT JSON)
SELECT c.name, s.name as supplier,
COUNT(*) as item_count,
SUM(oi.quantity * oi.unit_price) as total_value
FROM (customers c
INNER JOIN orders o ON c.customer_id = o.customer_id
AND o.order_date BETWEEN $1 AND $2)
LEFT JOIN (order_items oi
INNER JOIN products p ON oi.product_id = p.product_id
LEFT JOIN suppliers s ON p.supplier_id = s.supplier_id)
ON o.order_id = oi.order_id
GROUP BY c.customer_id, c.name, s.supplier_id, s.name'
USING p_start_date, p_end_date
INTO v_plan;
RETURN QUERY
SELECT 'Forced Order',
v_plan::JSON->0->'Plan'->>'Node Type',
(v_plan::JSON->0->'Plan'->>'Plan Rows')::BIGINT,
(v_plan::JSON->0->'Plan'->>'Total Cost')::NUMERIC;
RESET join_collapse_limit;
END;
$$ LANGUAGE plpgsql;
子查询折叠与优化
from_collapse_limit 的作用
sql
-- 创建视图来演示子查询折叠
CREATE VIEW high_value_orders AS
SELECT o.*,
SUM(oi.quantity * oi.unit_price) as order_total
FROM orders o
JOIN order_items oi ON o.order_id = oi.order_id
GROUP BY o.order_id
HAVING SUM(oi.quantity * oi.unit_price) > 500;
-- 测试子查询折叠
CREATE OR REPLACE FUNCTION test_subquery_collapse()
RETURNS TABLE(
scenario TEXT,
collapse_limit INT,
execution_time INTERVAL,
plan_complexity TEXT
) AS $$
DECLARE
v_start TIMESTAMP;
v_end TIMESTAMP;
v_plan TEXT;
BEGIN
-- 场景1:允许折叠
SET LOCAL from_collapse_limit = 10;
v_start := clock_timestamp();
PERFORM c.name, hv.order_total
FROM customers c
JOIN high_value_orders hv ON c.customer_id = hv.customer_id
WHERE c.country = 'USA';
v_end := clock_timestamp();
RETURN QUERY
SELECT 'Collapsed', 10, v_end - v_start, 'Simple';
-- 场景2:阻止折叠
SET LOCAL from_collapse_limit = 1;
v_start := clock_timestamp();
PERFORM c.name, hv.order_total
FROM customers c
JOIN high_value_orders hv ON c.customer_id = hv.customer_id
WHERE c.country = 'USA';
v_end := clock_timestamp();
RETURN QUERY
SELECT 'Not Collapsed', 1, v_end - v_start, 'Complex';
RESET from_collapse_limit;
END;
$$ LANGUAGE plpgsql;
复杂查询优化示例
sql
-- 创建包含多层子查询的复杂查询
CREATE OR REPLACE FUNCTION analyze_complex_query_performance()
RETURNS TABLE(
strategy TEXT,
planning_time INTERVAL,
execution_time INTERVAL,
total_time INTERVAL,
rows_returned BIGINT
) AS $$
DECLARE
v_start_plan TIMESTAMP;
v_start_exec TIMESTAMP;
v_end TIMESTAMP;
v_count BIGINT;
BEGIN
-- 策略1:完全展开
SET LOCAL from_collapse_limit = 100;
SET LOCAL join_collapse_limit = 100;
v_start_plan := clock_timestamp();
-- 强制重新规划
DISCARD PLANS;
v_start_exec := clock_timestamp();
SELECT COUNT(*) INTO v_count
FROM (
SELECT c.customer_id, c.name,
COUNT(DISTINCT o.order_id) as order_count,
SUM(sub.item_total) as customer_total
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
JOIN (
SELECT oi.order_id,
SUM(oi.quantity * oi.unit_price) as item_total,
COUNT(*) as item_count
FROM order_items oi
JOIN products p ON oi.product_id = p.product_id
WHERE p.category IN ('Electronics', 'Books')
GROUP BY oi.order_id
HAVING COUNT(*) > 2
) sub ON o.order_id = sub.order_id
WHERE o.order_date >= CURRENT_DATE - INTERVAL '1 year'
GROUP BY c.customer_id, c.name
HAVING COUNT(DISTINCT o.order_id) > 5
) final_result;
v_end := clock_timestamp();
RETURN QUERY
SELECT 'Full Expansion',
v_start_exec - v_start_plan,
v_end - v_start_exec,
v_end - v_start_plan,
v_count;
-- 策略2:限制折叠
SET LOCAL from_collapse_limit = 4;
SET LOCAL join_collapse_limit = 4;
v_start_plan := clock_timestamp();
DISCARD PLANS;
v_start_exec := clock_timestamp();
-- 执行相同查询
SELECT COUNT(*) INTO v_count
FROM (
SELECT c.customer_id, c.name,
COUNT(DISTINCT o.order_id) as order_count,
SUM(sub.item_total) as customer_total
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
JOIN (
SELECT oi.order_id,
SUM(oi.quantity * oi.unit_price) as item_total,
COUNT(*) as item_count
FROM order_items oi
JOIN products p ON oi.product_id = p.product_id
WHERE p.category IN ('Electronics', 'Books')
GROUP BY oi.order_id
HAVING COUNT(*) > 2
) sub ON o.order_id = sub.order_id
WHERE o.order_date >= CURRENT_DATE - INTERVAL '1 year'
GROUP BY c.customer_id, c.name
HAVING COUNT(DISTINCT o.order_id) > 5
) final_result;
v_end := clock_timestamp();
RETURN QUERY
SELECT 'Limited Collapse',
v_start_exec - v_start_plan,
v_end - v_start_exec,
v_end - v_start_plan,
v_count;
RESET from_collapse_limit;
RESET join_collapse_limit;
END;
$$ LANGUAGE plpgsql;
实际应用场景
场景1:报表查询优化
sql
-- 创建销售报表查询优化
CREATE OR REPLACE FUNCTION optimize_sales_report(
p_year INT,
p_min_revenue DECIMAL
)
RETURNS TABLE(
country VARCHAR,
category VARCHAR,
total_orders BIGINT,
total_revenue DECIMAL,
avg_order_value DECIMAL,
top_customer VARCHAR
) AS $$
BEGIN
-- 使用优化的 JOIN 顺序
SET LOCAL join_collapse_limit = 1;
RETURN QUERY
WITH customer_totals AS (
-- 先计算每个客户的总额
SELECT c.customer_id, c.name, c.country,
SUM(oi.quantity * oi.unit_price) as customer_total
FROM customers c
JOIN orders o ON c.customer_id = o.customer_id
JOIN order_items oi ON o.order_id = oi.order_id
WHERE EXTRACT(YEAR FROM o.order_date) = p_year
GROUP BY c.customer_id, c.name, c.country
HAVING SUM(oi.quantity * oi.unit_price) > p_min_revenue
),
category_stats AS (
-- 按类别统计
SELECT ct.country, p.category,
COUNT(DISTINCT o.order_id) as order_count,
SUM(oi.quantity * oi.unit_price) as revenue
FROM customer_totals ct
JOIN orders o ON ct.customer_id = o.customer_id
JOIN order_items oi ON o.order_id = oi.order_id
JOIN products p ON oi.product_id = p.product_id
WHERE EXTRACT(YEAR FROM o.order_date) = p_year
GROUP BY ct.country, p.category
)
SELECT
cs.country,
cs.category,
cs.order_count,
cs.revenue,
cs.revenue / NULLIF(cs.order_count, 0) as avg_order_value,
(SELECT ct.name
FROM customer_totals ct
WHERE ct.country = cs.country
ORDER BY ct.customer_total DESC
LIMIT 1) as top_customer
FROM category_stats cs
ORDER BY cs.country, cs.revenue DESC;
RESET join_collapse_limit;
END;
$$ LANGUAGE plpgsql;
场景2:实时分析查询
sql
-- 创建实时分析优化器
CREATE OR REPLACE FUNCTION realtime_analytics_optimizer(
p_time_window INTERVAL,
p_threshold DECIMAL
)
RETURNS VOID AS $$
DECLARE
v_start TIMESTAMP;
v_end TIMESTAMP;
v_duration INTERVAL;
v_row_count BIGINT;
BEGIN
-- 策略1:标准查询
v_start := clock_timestamp();
SELECT COUNT(*) INTO v_row_count
FROM orders o
JOIN order_items oi ON o.order_id = oi.order_id
JOIN products p ON oi.product_id = p.product_id
JOIN customers c ON o.customer_id = c.customer_id
WHERE o.order_date > CURRENT_TIMESTAMP - p_time_window
AND oi.quantity * oi.unit_price > p_threshold;
v_end := clock_timestamp();
RAISE NOTICE 'Standard query: % rows in %', v_row_count, v_end - v_start;
-- 策略2:优化 JOIN 顺序
SET LOCAL join_collapse_limit = 1;
v_start := clock_timestamp();
SELECT COUNT(*) INTO v_row_count
FROM (
SELECT oi.order_id, oi.product_id
FROM order_items oi
WHERE oi.quantity * oi.unit_price > p_threshold
) filtered_items
JOIN orders o ON filtered_items.order_id = o.order_id
AND o.order_date > CURRENT_TIMESTAMP - p_time_window
JOIN products p ON filtered_items.product_id = p.product_id
JOIN customers c ON o.customer_id = c.customer_id;
v_end := clock_timestamp();
RAISE NOTICE 'Optimized query: % rows in %', v_row_count, v_end - v_start;
RESET join_collapse_limit;
END;
$$ LANGUAGE plpgsql;
最佳实践和建议
1. JOIN 顺序优化决策树
2. 参数调优指南
sql
-- 创建参数调优建议函数
CREATE OR REPLACE FUNCTION recommend_join_settings(
p_table_count INT,
p_query_complexity TEXT -- 'simple', 'moderate', 'complex'
)
RETURNS TABLE(
parameter TEXT,
recommended_value TEXT,
reason TEXT
) AS $$
BEGIN
-- join_collapse_limit
RETURN QUERY
SELECT
'join_collapse_limit',
CASE
WHEN p_table_count <= 4 THEN '8'
WHEN p_table_count <= 8 THEN '4'
WHEN p_query_complexity = 'simple' THEN '2'
ELSE '1'
END,
CASE
WHEN p_table_count <= 4 THEN 'Small query, let planner optimize'
WHEN p_table_count <= 8 THEN 'Moderate size, balance planning time'
ELSE 'Large query, control join order manually'
END;
-- from_collapse_limit
RETURN QUERY
SELECT
'from_collapse_limit',
CASE
WHEN p_query_complexity = 'simple' THEN '8'
WHEN p_query_complexity = 'moderate' THEN '4'
ELSE '2'
END,
'Control subquery expansion based on complexity';
-- geqo_threshold
RETURN QUERY
SELECT
'geqo_threshold',
CASE
WHEN p_table_count <= 10 THEN '12'
ELSE '10'
END,
'Genetic optimizer threshold for very complex queries';
END;
$$ LANGUAGE plpgsql;
-- 使用示例
SELECT * FROM recommend_join_settings(6, 'moderate');
3. 查询重写模板
sql
-- 创建查询重写辅助函数
CREATE OR REPLACE FUNCTION rewrite_query_for_performance(
p_original_query TEXT
)
RETURNS TEXT AS $$
DECLARE
v_rewritten TEXT;
BEGIN
-- 这是一个概念示例,实际实现会更复杂
v_rewritten := p_original_query;
-- 规则1:将过滤条件高的表放在前面
-- 规则2:小表驱动大表
-- 规则3:利用索引的 JOIN 优先
RETURN format('
-- Original query has been rewritten for performance
-- Consider the following optimizations:
-- 1. Filter early: Move selective predicates up
-- 2. Join order: Small tables first
-- 3. Use explicit JOIN syntax with collapse_limit = 1
SET LOCAL join_collapse_limit = 1;
%s
RESET join_collapse_limit;
', v_rewritten);
END;
$$ LANGUAGE plpgsql;
性能监控和诊断
监控 JOIN 性能
sql
-- 创建 JOIN 性能监控视图
CREATE VIEW join_performance_stats AS
WITH query_stats AS (
SELECT
queryid,
query,
calls,
total_exec_time,
mean_exec_time,
rows,
100.0 * shared_blks_hit / NULLIF(shared_blks_hit + shared_blks_read, 0) as hit_ratio
FROM pg_stat_statements
WHERE query LIKE '%JOIN%'
AND query NOT LIKE '%pg_stat_statements%'
)
SELECT
queryid,
LEFT(query, 100) || '...' as query_preview,
calls,
round(mean_exec_time::numeric, 2) as avg_time_ms,
round(total_exec_time::numeric / 1000, 2) as total_time_sec,
rows,
round(hit_ratio, 2) as cache_hit_pct,
CASE
WHEN mean_exec_time > 1000 THEN 'SLOW'
WHEN mean_exec_time > 100 THEN 'MODERATE'
ELSE 'FAST'
END as performance_category
FROM query_stats
ORDER BY total_exec_time DESC
LIMIT 50;
-- 诊断特定查询的 JOIN 顺序
CREATE OR REPLACE FUNCTION diagnose_join_order(
p_query TEXT
)
RETURNS TABLE(
node_type TEXT,
join_type TEXT,
startup_cost NUMERIC,
total_cost NUMERIC,
plan_rows BIGINT,
plan_width INT
) AS $$
DECLARE
v_plan JSONB;
BEGIN
EXECUTE 'EXPLAIN (FORMAT JSON) ' || p_query INTO v_plan;
-- 递归提取所有 JOIN 节点
RETURN QUERY
WITH RECURSIVE plan_nodes AS (
SELECT
v_plan::JSON->0->'Plan' as node,
0 as level
UNION ALL
SELECT
CASE
WHEN jsonb_typeof(element) = 'object' THEN element
ELSE NULL
END,
level + 1
FROM plan_nodes,
LATERAL (
SELECT jsonb_array_elements(
CASE
WHEN node->'Plans' IS NOT NULL
THEN node->'Plans'
ELSE '[]'::JSONB
END
) as element
) sub
WHERE node IS NOT NULL
)
SELECT
node->>'Node Type',
node->>'Join Type',
(node->>'Startup Cost')::NUMERIC,
(node->>'Total Cost')::NUMERIC,
(node->>'Plan Rows')::BIGINT,
(node->>'Plan Width')::INT
FROM plan_nodes
WHERE node->>'Node Type' LIKE '%Join%'
OR node->>'Node Type' IN ('Nested Loop', 'Hash', 'Merge')
ORDER BY level;
END;
$$ LANGUAGE plpgsql;
总结
掌握显式 JOIN 控制是优化复杂查询的重要技能:
- 理解连接顺序的影响:不同的连接顺序可能导致巨大的性能差异
- 合理使用参数:join_collapse_limit 和 from_collapse_limit 提供灵活控制
- 显式 JOIN 的优势:在复杂查询中能够精确控制执行计划
- 外连接的特殊性:LEFT/RIGHT JOIN 本身就限制了连接顺序
- 平衡规划时间和执行时间:找到适合你的应用的最佳配置
关键要点
- 小查询(< 5 表):让优化器自由选择
- 中等查询(5-10 表):考虑适度限制
- 大查询(> 10 表):主动控制连接顺序
- 高频查询:值得手动优化
- 监控和调整:持续优化是关键
注意事项
过度限制优化器可能导致错过更好的执行计划。建议:
- 先测量,后优化
- 在测试环境验证
- 保留原始查询作为基准
- 定期重新评估