In commit bd3e3e9e5, I over-hastily used 1 / rel->rows as the assumed
frequency of entries in a column that ANALYZE has found to be unique.
However, rel->rows is the number of table rows that are estimated to
pass the query's restriction conditions, so that we got a too-large
result if the query has selective restrictions. What I should have
used is 1 / rel->tuples, since that is the estimated total number of
table rows. The pre-existing code path that digs a frequency out of
the histogram produces a frequency relative to the whole table, so
surely this new alternative code path must do so as well. Any
correction needed on the basis of selectivity must be done by the
user of the mcv_freq value.
Fixing this causes all the regression test plans changed by bd3e3e9e5
to revert to what they had been, except for the first change in
join.out. As I correctly argued in bd3e3e9e5, in that test case we
have no stats and should not risk a hash join. Evidently I was less
correct to argue that the other changes were improvements.
Reported-by: Joel Jacobson <joel@compiler.org>
Diagnosed-by: Tender Wang <tndrwang@gmail.com>
Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/341b723c-da45-4058-9446-1514dedb17c1@app.fastmail.com
SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
@ -297,23 +297,23 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.a <
-- Currently we can't do partitioned join if nullable-side partitions are pruned
EXPLAIN (COSTS OFF)
SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
@ -778,23 +778,23 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_e t1, prt2_e t2 WHERE (t1.a + t1.b)/2 =
SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_e t1, prt2_e t2 WHERE (t1.a + t1.b)/2 = (t2.b + t2.a)/2 AND t1.c = 0 ORDER BY t1.a, t2.b;
@ -864,26 +864,26 @@ SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2
Sort
Sort Key: t1.a, t2.b, ((t3.a + t3.b))
-> Append
-> Hash Left Join
Hash Cond: (t1_1.a = ((t3_1.a + t3_1.b) / 2))
-> Hash Right Join
Hash Cond: (t2_1.b = t1_1.a)
-> Seq Scan on prt2_p1 t2_1
-> Hash
-> Seq Scan on prt1_p1 t1_1
Filter: (b = 0)
-> Hash Right Join
Hash Cond: (((t3_1.a + t3_1.b) / 2) = t1_1.a)
-> Seq Scan on prt1_e_p1 t3_1
-> Hash
-> Seq Scan on prt1_e_p1 t3_1
-> Hash Left Join
Hash Cond: (t1_2.a = ((t3_2.a + t3_2.b) / 2))
-> Hash Right Join
Hash Cond: (t2_2.b = t1_2.a)
-> Seq Scan on prt2_p2 t2_2
-> Hash
-> Seq Scan on prt1_p2 t1_2
Filter: (b = 0)
-> Hash Right Join
Hash Cond: (t2_1.b = t1_1.a)
-> Seq Scan on prt2_p1 t2_1
-> Hash
-> Seq Scan on prt1_p1 t1_1
Filter: (b = 0)
-> Hash Right Join
Hash Cond: (((t3_2.a + t3_2.b) / 2) = t1_2.a)
-> Seq Scan on prt1_e_p2 t3_2
-> Hash
-> Seq Scan on prt1_e_p2 t3_2
-> Hash Right Join
Hash Cond: (t2_2.b = t1_2.a)
-> Seq Scan on prt2_p2 t2_2
-> Hash
-> Seq Scan on prt1_p2 t1_2
Filter: (b = 0)
-> Hash Right Join
Hash Cond: (((t3_3.a + t3_3.b) / 2) = t1_3.a)
-> Seq Scan on prt1_e_p3 t3_3
@ -921,21 +921,21 @@ SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2
Sort Key: t1.a, t2.b, ((t3.a + t3.b))
-> Append
-> Nested Loop Left Join
-> Hash Left Join
Hash Cond: (((t3_1.a + t3_1.b) / 2) = t1_1.a)
-> Seq Scan on prt1_e_p1 t3_1
Filter: (c = 0)
-> Hash Right Join
Hash Cond: (t1_1.a = ((t3_1.a + t3_1.b) / 2))
-> Seq Scan on prt1_p1 t1_1
-> Hash
-> Seq Scan on prt1_p1 t1_1
-> Seq Scan on prt1_e_p1 t3_1
Filter: (c = 0)
-> Index Scan using iprt2_p1_b on prt2_p1 t2_1
Index Cond: (b = t1_1.a)
-> Nested Loop Left Join
-> Hash Left Join
Hash Cond: (((t3_2.a + t3_2.b) / 2) = t1_2.a)
-> Seq Scan on prt1_e_p2 t3_2
Filter: (c = 0)
-> Hash Right Join
Hash Cond: (t1_2.a = ((t3_2.a + t3_2.b) / 2))
-> Seq Scan on prt1_p2 t1_2
-> Hash
-> Seq Scan on prt1_p2 t1_2
-> Seq Scan on prt1_e_p2 t3_2
Filter: (c = 0)
-> Index Scan using iprt2_p2_b on prt2_p2 t2_2
Index Cond: (b = t1_2.a)
-> Nested Loop Left Join
@ -1080,14 +1080,14 @@ SELECT COUNT(*) FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) FULL JOIN prt2 p3(
-- make sure these go to null as expected
EXPLAIN (COSTS OFF)
SELECT t1.a, t1.phv, t2.b, t2.phv, t3.a + t3.b, t3.phv FROM ((SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b)) FULL JOIN (SELECT 50 phv, * FROM prt1_e WHERE prt1_e.c = 0) t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.a = t1.phv OR t2.b = t2.phv OR (t3.a + t3.b)/2 = t3.phv ORDER BY t1.a, t2.b, t3.a + t3.b;
Filter: ((prt1_3.a = (50)) OR (prt2_3.b = (75)) OR (((prt1_e_3.a + prt1_e_3.b) / 2) = (50)))
-> Hash Full Join
Hash Cond: (prt1_3.a = prt2_3.b)
-> Seq Scan on prt1_p3 prt1_3
@ -1109,15 +1121,10 @@ SELECT t1.a, t1.phv, t2.b, t2.phv, t3.a + t3.b, t3.phv FROM ((SELECT 50 phv, * F
-> Hash
-> Seq Scan on prt2_p3 prt2_3
Filter: (a = 0)
-> Hash
-> Append
-> Seq Scan on prt1_e_p1 prt1_e_1
Filter: (c = 0)
-> Seq Scan on prt1_e_p2 prt1_e_2
Filter: (c = 0)
-> Hash
-> Seq Scan on prt1_e_p3 prt1_e_3
Filter: (c = 0)
(35 rows)
(42 rows)
SELECT t1.a, t1.phv, t2.b, t2.phv, t3.a + t3.b, t3.phv FROM ((SELECT 50 phv, * FROM prt1 WHERE prt1.b = 0) t1 FULL JOIN (SELECT 75 phv, * FROM prt2 WHERE prt2.a = 0) t2 ON (t1.a = t2.b)) FULL JOIN (SELECT 50 phv, * FROM prt1_e WHERE prt1_e.c = 0) t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t1.a = t1.phv OR t2.b = t2.phv OR (t3.a + t3.b)/2 = t3.phv ORDER BY t1.a, t2.b, t3.a + t3.b;
a | phv | b | phv | ?column? | phv
@ -1139,11 +1146,11 @@ SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHER
-> Sort
Sort Key: t1_5.b
-> Hash Join
Hash Cond: (t1_5.b = ((t2_1.a + t2_1.b) / 2))
-> Seq Scan on prt2_p1 t1_5
Filter: (a = 0)
Hash Cond: (((t2_1.a + t2_1.b) / 2) = t1_5.b)
-> Seq Scan on prt1_e_p1 t2_1
-> Hash
-> Seq Scan on prt1_e_p1 t2_1
-> Seq Scan on prt2_p1 t1_5
Filter: (a = 0)
-> Index Scan using iprt1_p1_a on prt1_p1 t1_2
Index Cond: (a = ((t2_1.a + t2_1.b) / 2))
Filter: (b = 0)
@ -1153,11 +1160,11 @@ SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHER
-> Sort
Sort Key: t1_6.b
-> Hash Join
Hash Cond: (t1_6.b = ((t2_2.a + t2_2.b) / 2))
-> Seq Scan on prt2_p2 t1_6
Filter: (a = 0)
Hash Cond: (((t2_2.a + t2_2.b) / 2) = t1_6.b)
-> Seq Scan on prt1_e_p2 t2_2
-> Hash
-> Seq Scan on prt1_e_p2 t2_2
-> Seq Scan on prt2_p2 t1_6
Filter: (a = 0)
-> Index Scan using iprt1_p2_a on prt1_p2 t1_3
Index Cond: (a = ((t2_2.a + t2_2.b) / 2))
Filter: (b = 0)
@ -1937,12 +1944,12 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_l t1 LEFT JOIN prt2_l t2 ON t1.a = t2.b
Sort
Sort Key: t1.a, t2.b
-> Append
-> Hash Left Join
Hash Cond: ((t1_1.a = t2_1.b) AND ((t1_1.c)::text = (t2_1.c)::text))
-> Seq Scan on prt1_l_p1 t1_1
Filter: (b = 0)
-> Hash Right Join
Hash Cond: ((t2_1.b = t1_1.a) AND ((t2_1.c)::text = (t1_1.c)::text))
-> Seq Scan on prt2_l_p1 t2_1
-> Hash
-> Seq Scan on prt2_l_p1 t2_1
-> Seq Scan on prt1_l_p1 t1_1
Filter: (b = 0)
-> Hash Right Join
Hash Cond: ((t2_2.b = t1_2.a) AND ((t2_2.c)::text = (t1_2.c)::text))
-> Seq Scan on prt2_l_p2_p1 t2_2
@ -2961,26 +2968,26 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_adv t1 LEFT JOIN prt2_adv t2 ON (t1.a =
-- partitions on the nullable side
EXPLAIN (COSTS OFF)
SELECT t1.b, t1.c, t2.a, t2.c FROM prt2_adv t1 LEFT JOIN prt1_adv t2 ON (t1.b = t2.a) WHERE t1.a = 0 ORDER BY t1.b, t2.a;
@ -3319,19 +3326,19 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_adv t1 INNER JOIN prt2_adv t2 ON (t1.a =
Sort
Sort Key: t1.a
-> Hash Join
Hash Cond: (t1.a = t2.b)
Hash Cond: (t2.b = t1.a)
-> Append
-> Seq Scan on prt1_adv_p2 t1_1
Filter: (b = 0)
-> Seq Scan on prt1_adv_p3 t1_2
Filter: (b = 0)
-> Seq Scan on prt1_adv_p1 t1_3
Filter: (b = 0)
-> Seq Scan on prt2_adv_p1 t2_1
-> Seq Scan on prt2_adv_p2 t2_2
-> Seq Scan on prt2_adv_p3 t2_3
-> Hash
-> Append
-> Seq Scan on prt2_adv_p1 t2_1
-> Seq Scan on prt2_adv_p2 t2_2
-> Seq Scan on prt2_adv_p3 t2_3
-> Seq Scan on prt1_adv_p2 t1_1
Filter: (b = 0)
-> Seq Scan on prt1_adv_p3 t1_2
Filter: (b = 0)
-> Seq Scan on prt1_adv_p1 t1_3
Filter: (b = 0)
(16 rows)
ALTER TABLE prt2_adv DETACH PARTITION prt2_adv_p3;
@ -3347,19 +3354,19 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_adv t1 INNER JOIN prt2_adv t2 ON (t1.a =
Sort
Sort Key: t1.a
-> Hash Join
Hash Cond: (t1.a = t2.b)
Hash Cond: (t2.b = t1.a)
-> Append
-> Seq Scan on prt1_adv_p2 t1_1
Filter: (b = 0)
-> Seq Scan on prt1_adv_p3 t1_2
Filter: (b = 0)
-> Seq Scan on prt1_adv_p1 t1_3
Filter: (b = 0)
-> Seq Scan on prt2_adv_p1 t2_1
-> Seq Scan on prt2_adv_p2 t2_2
-> Seq Scan on prt2_adv_p3 t2_3
-> Hash
-> Append
-> Seq Scan on prt2_adv_p1 t2_1
-> Seq Scan on prt2_adv_p2 t2_2
-> Seq Scan on prt2_adv_p3 t2_3
-> Seq Scan on prt1_adv_p2 t1_1
Filter: (b = 0)
-> Seq Scan on prt1_adv_p3 t1_2
Filter: (b = 0)
-> Seq Scan on prt1_adv_p1 t1_3
Filter: (b = 0)
(16 rows)
DROP TABLE prt1_adv_p3;
@ -5011,11 +5018,11 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2
-> Hash
-> Seq Scan on beta_neg_p1 t2_1
-> Hash Join
Hash Cond: ((t1_2.a = t2_2.a) AND (t1_2.b = t2_2.b))
-> Seq Scan on alpha_neg_p2 t1_2
Filter: ((b >= 125) AND (b < 225))
Hash Cond: ((t2_2.a = t1_2.a) AND (t2_2.b = t1_2.b))
-> Seq Scan on beta_neg_p2 t2_2
-> Hash
-> Seq Scan on beta_neg_p2 t2_2
-> Seq Scan on alpha_neg_p2 t1_2
Filter: ((b >= 125) AND (b < 225))
-> Hash Join
Hash Cond: ((t2_4.a = t1_4.a) AND (t2_4.b = t1_4.b))
-> Append
@ -5134,28 +5141,25 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.c = t2
EXPLAIN (COSTS OFF)
SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b AND t1.c = t2.c) WHERE ((t1.b >= 100 AND t1.b < 110) OR (t1.b >= 200 AND t1.b < 210)) AND ((t2.b >= 100 AND t2.b < 110) OR (t2.b >= 200 AND t2.b < 210)) AND t1.c IN ('0004', '0009') ORDER BY t1.a, t1.b;
Merge Cond: ((t1_1.a = t2_1.a) AND (t1_1.b = t2_1.b) AND (t1_1.c = t2_1.c))
-> Sort
Sort Key: t1_1.a, t1_1.b, t1_1.c
-> Seq Scan on alpha_neg_p1 t1_1
Filter: ((c = ANY ('{0004,0009}'::text[])) AND (((b >= 100) AND (b < 110)) OR ((b >= 200) AND (b < 210))))
-> Sort
Sort Key: t2_1.a, t2_1.b, t2_1.c
-> Hash Join
Hash Cond: ((t1_1.a = t2_1.a) AND (t1_1.b = t2_1.b) AND (t1_1.c = t2_1.c))
-> Seq Scan on alpha_neg_p1 t1_1
Filter: ((c = ANY ('{0004,0009}'::text[])) AND (((b >= 100) AND (b < 110)) OR ((b >= 200) AND (b < 210))))
-> Hash
-> Seq Scan on beta_neg_p1 t2_1
Filter: (((b >= 100) AND (b < 110)) OR ((b >= 200) AND (b < 210)))
-> Hash Join
Hash Cond: ((t2_2.a = t1_2.a) AND (t2_2.b = t1_2.b) AND (t2_2.c = t1_2.c))
-> Seq Scan on beta_neg_p2 t2_2
Filter: (((b >= 100) AND (b < 110)) OR ((b >= 200) AND (b < 210)))
Hash Cond: ((t1_2.a = t2_2.a) AND (t1_2.b = t2_2.b) AND (t1_2.c = t2_2.c))
-> Seq Scan on alpha_neg_p2 t1_2
Filter: ((c = ANY ('{0004,0009}'::text[])) AND (((b >= 100) AND (b < 110)) OR ((b >= 200) AND (b < 210))))
-> Hash
-> Seq Scan on alpha_neg_p2 t1_2
Filter: ((c = ANY ('{0004,0009}'::text[])) AND (((b >= 100) AND (b < 110)) OR ((b >= 200) AND (b < 210))))
-> Seq Scan on beta_neg_p2 t2_2
Filter: (((b >= 100) AND (b < 110)) OR ((b >= 200) AND (b < 210)))
-> Nested Loop
Join Filter: ((t1_3.a = t2_3.a) AND (t1_3.b = t2_3.b) AND (t1_3.c = t2_3.c))
-> Seq Scan on alpha_pos_p2 t1_3
@ -5168,7 +5172,7 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2
Filter: ((c = ANY ('{0004,0009}'::text[])) AND (((b >= 100) AND (b < 110)) OR ((b >= 200) AND (b < 210))))
-> Seq Scan on beta_pos_p3 t2_4
Filter: (((b >= 100) AND (b < 110)) OR ((b >= 200) AND (b < 210)))
(32 rows)
(29 rows)
SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b AND t1.c = t2.c) WHERE ((t1.b >= 100 AND t1.b < 110) OR (t1.b >= 200 AND t1.b < 210)) AND ((t2.b >= 100 AND t2.b < 110) OR (t2.b >= 200 AND t2.b < 210)) AND t1.c IN ('0004', '0009') ORDER BY t1.a, t1.b;