mirror of https://github.com/postgres/postgres
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
436 lines
15 KiB
436 lines
15 KiB
-- Generic extended statistics support
|
|
|
|
-- We will be checking execution plans without/with statistics, so
|
|
-- let's make sure we get simple non-parallel plans. Also set the
|
|
-- work_mem low so that we can use small amounts of data.
|
|
|
|
-- check the number of estimated/actual rows in the top node
|
|
create function check_estimated_rows(text) returns table (estimated int, actual int)
|
|
language plpgsql as
|
|
$$
|
|
declare
|
|
ln text;
|
|
tmp text[];
|
|
first_row bool := true;
|
|
begin
|
|
for ln in
|
|
execute format('explain analyze %s', $1)
|
|
loop
|
|
if first_row then
|
|
first_row := false;
|
|
tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
|
|
return query select tmp[1]::int, tmp[2]::int;
|
|
end if;
|
|
end loop;
|
|
end;
|
|
$$;
|
|
|
|
-- Verify failures
|
|
CREATE STATISTICS tst;
|
|
CREATE STATISTICS tst ON a, b;
|
|
CREATE STATISTICS tst FROM sometab;
|
|
CREATE STATISTICS tst ON a, b FROM nonexistent;
|
|
CREATE STATISTICS tst ON a, b FROM pg_class;
|
|
CREATE STATISTICS tst ON relname, relname, relnatts FROM pg_class;
|
|
CREATE STATISTICS tst ON relnatts + relpages FROM pg_class;
|
|
CREATE STATISTICS tst ON (relpages, reltuples) FROM pg_class;
|
|
CREATE STATISTICS tst (unrecognized) ON relname, relnatts FROM pg_class;
|
|
|
|
-- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it
|
|
CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
|
|
CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1;
|
|
CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1;
|
|
DROP STATISTICS ab1_a_b_stats;
|
|
|
|
CREATE SCHEMA regress_schema_2;
|
|
CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON a, b FROM ab1;
|
|
|
|
-- Let's also verify the pg_get_statisticsobjdef output looks sane.
|
|
SELECT pg_get_statisticsobjdef(oid) FROM pg_statistic_ext WHERE stxname = 'ab1_a_b_stats';
|
|
|
|
DROP STATISTICS regress_schema_2.ab1_a_b_stats;
|
|
|
|
-- Ensure statistics are dropped when columns are
|
|
CREATE STATISTICS ab1_b_c_stats ON b, c FROM ab1;
|
|
CREATE STATISTICS ab1_a_b_c_stats ON a, b, c FROM ab1;
|
|
CREATE STATISTICS ab1_b_a_stats ON b, a FROM ab1;
|
|
ALTER TABLE ab1 DROP COLUMN a;
|
|
\d ab1
|
|
-- Ensure statistics are dropped when table is
|
|
SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%';
|
|
DROP TABLE ab1;
|
|
SELECT stxname FROM pg_statistic_ext WHERE stxname LIKE 'ab1%';
|
|
|
|
-- Ensure things work sanely with SET STATISTICS 0
|
|
CREATE TABLE ab1 (a INTEGER, b INTEGER);
|
|
ALTER TABLE ab1 ALTER a SET STATISTICS 0;
|
|
INSERT INTO ab1 SELECT a, a%23 FROM generate_series(1, 1000) a;
|
|
CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
|
|
ANALYZE ab1;
|
|
ALTER TABLE ab1 ALTER a SET STATISTICS -1;
|
|
-- partial analyze doesn't build stats either
|
|
ANALYZE ab1 (a);
|
|
ANALYZE ab1;
|
|
DROP TABLE ab1;
|
|
|
|
-- Verify supported object types for extended statistics
|
|
CREATE schema tststats;
|
|
|
|
CREATE TABLE tststats.t (a int, b int, c text);
|
|
CREATE INDEX ti ON tststats.t (a, b);
|
|
CREATE SEQUENCE tststats.s;
|
|
CREATE VIEW tststats.v AS SELECT * FROM tststats.t;
|
|
CREATE MATERIALIZED VIEW tststats.mv AS SELECT * FROM tststats.t;
|
|
CREATE TYPE tststats.ty AS (a int, b int, c text);
|
|
CREATE FOREIGN DATA WRAPPER extstats_dummy_fdw;
|
|
CREATE SERVER extstats_dummy_srv FOREIGN DATA WRAPPER extstats_dummy_fdw;
|
|
CREATE FOREIGN TABLE tststats.f (a int, b int, c text) SERVER extstats_dummy_srv;
|
|
CREATE TABLE tststats.pt (a int, b int, c text) PARTITION BY RANGE (a, b);
|
|
CREATE TABLE tststats.pt1 PARTITION OF tststats.pt FOR VALUES FROM (-10, -10) TO (10, 10);
|
|
|
|
CREATE STATISTICS tststats.s1 ON a, b FROM tststats.t;
|
|
CREATE STATISTICS tststats.s2 ON a, b FROM tststats.ti;
|
|
CREATE STATISTICS tststats.s3 ON a, b FROM tststats.s;
|
|
CREATE STATISTICS tststats.s4 ON a, b FROM tststats.v;
|
|
CREATE STATISTICS tststats.s5 ON a, b FROM tststats.mv;
|
|
CREATE STATISTICS tststats.s6 ON a, b FROM tststats.ty;
|
|
CREATE STATISTICS tststats.s7 ON a, b FROM tststats.f;
|
|
CREATE STATISTICS tststats.s8 ON a, b FROM tststats.pt;
|
|
CREATE STATISTICS tststats.s9 ON a, b FROM tststats.pt1;
|
|
DO $$
|
|
DECLARE
|
|
relname text := reltoastrelid::regclass FROM pg_class WHERE oid = 'tststats.t'::regclass;
|
|
BEGIN
|
|
EXECUTE 'CREATE STATISTICS tststats.s10 ON a, b FROM ' || relname;
|
|
EXCEPTION WHEN wrong_object_type THEN
|
|
RAISE NOTICE 'stats on toast table not created';
|
|
END;
|
|
$$;
|
|
|
|
DROP SCHEMA tststats CASCADE;
|
|
DROP FOREIGN DATA WRAPPER extstats_dummy_fdw CASCADE;
|
|
|
|
-- n-distinct tests
|
|
CREATE TABLE ndistinct (
|
|
filler1 TEXT,
|
|
filler2 NUMERIC,
|
|
a INT,
|
|
b INT,
|
|
filler3 DATE,
|
|
c INT,
|
|
d INT
|
|
);
|
|
|
|
-- over-estimates when using only per-column statistics
|
|
INSERT INTO ndistinct (a, b, c, filler1)
|
|
SELECT i/100, i/100, i/100, cash_words((i/100)::money)
|
|
FROM generate_series(1,1000) s(i);
|
|
|
|
ANALYZE ndistinct;
|
|
|
|
-- Group Aggregate, due to over-estimate of the number of groups
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
|
|
|
|
-- correct command
|
|
CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
|
|
|
|
ANALYZE ndistinct;
|
|
|
|
SELECT stxkind, stxndistinct
|
|
FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
|
|
|
|
-- Hash Aggregate, thanks to estimates improved by the statistic
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
|
|
|
|
-- last two plans keep using Group Aggregate, because 'd' is not covered
|
|
-- by the statistic and while it's NULL-only we assume 200 values for it
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
|
|
|
|
TRUNCATE TABLE ndistinct;
|
|
|
|
-- under-estimates when using only per-column statistics
|
|
INSERT INTO ndistinct (a, b, c, filler1)
|
|
SELECT mod(i,50), mod(i,51), mod(i,32),
|
|
cash_words(mod(i,33)::int::money)
|
|
FROM generate_series(1,5000) s(i);
|
|
|
|
ANALYZE ndistinct;
|
|
|
|
SELECT stxkind, stxndistinct
|
|
FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
|
|
|
|
-- correct esimates
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d');
|
|
|
|
DROP STATISTICS s10;
|
|
|
|
SELECT stxkind, stxndistinct
|
|
FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
|
|
|
|
-- dropping the statistics results in under-estimates
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d');
|
|
|
|
-- functional dependencies tests
|
|
CREATE TABLE functional_dependencies (
|
|
filler1 TEXT,
|
|
filler2 NUMERIC,
|
|
a INT,
|
|
b TEXT,
|
|
filler3 DATE,
|
|
c INT,
|
|
d TEXT
|
|
);
|
|
|
|
CREATE INDEX fdeps_ab_idx ON functional_dependencies (a, b);
|
|
CREATE INDEX fdeps_abc_idx ON functional_dependencies (a, b, c);
|
|
|
|
-- random data (no functional dependencies)
|
|
INSERT INTO functional_dependencies (a, b, c, filler1)
|
|
SELECT mod(i, 23), mod(i, 29), mod(i, 31), i FROM generate_series(1,5000) s(i);
|
|
|
|
ANALYZE functional_dependencies;
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
|
|
|
|
-- create statistics
|
|
CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
|
|
|
|
ANALYZE functional_dependencies;
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
|
|
|
|
-- a => b, a => c, b => c
|
|
TRUNCATE functional_dependencies;
|
|
DROP STATISTICS func_deps_stat;
|
|
|
|
INSERT INTO functional_dependencies (a, b, c, filler1)
|
|
SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
|
|
|
|
ANALYZE functional_dependencies;
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
|
|
|
|
-- create statistics
|
|
CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
|
|
|
|
ANALYZE functional_dependencies;
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
|
|
|
|
-- check change of column type doesn't break it
|
|
ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric;
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
|
|
|
|
ANALYZE functional_dependencies;
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
|
|
|
|
-- MCV lists
|
|
CREATE TABLE mcv_lists (
|
|
filler1 TEXT,
|
|
filler2 NUMERIC,
|
|
a INT,
|
|
b VARCHAR,
|
|
filler3 DATE,
|
|
c INT,
|
|
d TEXT
|
|
);
|
|
|
|
-- random data (no MCV list)
|
|
INSERT INTO mcv_lists (a, b, c, filler1)
|
|
SELECT mod(i,37), mod(i,41), mod(i,43), mod(i,47) FROM generate_series(1,5000) s(i);
|
|
|
|
ANALYZE mcv_lists;
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
|
|
|
|
-- create statistics
|
|
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
|
|
|
|
ANALYZE mcv_lists;
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
|
|
|
|
-- 100 distinct combinations, all in the MCV list
|
|
TRUNCATE mcv_lists;
|
|
DROP STATISTICS mcv_lists_stats;
|
|
|
|
INSERT INTO mcv_lists (a, b, c, filler1)
|
|
SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
|
|
|
|
ANALYZE mcv_lists;
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 1 AND b < ''1''');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 0 AND b <= ''0''');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b < ''1'' AND c < 5');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4');
|
|
|
|
-- create statistics
|
|
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
|
|
|
|
ANALYZE mcv_lists;
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 1 AND b < ''1''');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 0 AND b <= ''0''');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b < ''1'' AND c < 5');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4');
|
|
|
|
-- check change of unrelated column type does not reset the MCV statistics
|
|
ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
|
|
|
|
SELECT stxmcv IS NOT NULL FROM pg_statistic_ext WHERE stxname = 'mcv_lists_stats';
|
|
|
|
-- check change of column type resets the MCV statistics
|
|
ALTER TABLE mcv_lists ALTER COLUMN c TYPE numeric;
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
|
|
|
|
ANALYZE mcv_lists;
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
|
|
|
|
-- 100 distinct combinations with NULL values, all in the MCV list
|
|
TRUNCATE mcv_lists;
|
|
DROP STATISTICS mcv_lists_stats;
|
|
|
|
INSERT INTO mcv_lists (a, b, c, filler1)
|
|
SELECT
|
|
(CASE WHEN mod(i,100) = 1 THEN NULL ELSE mod(i,100) END),
|
|
(CASE WHEN mod(i,50) = 1 THEN NULL ELSE mod(i,50) END),
|
|
(CASE WHEN mod(i,25) = 1 THEN NULL ELSE mod(i,25) END),
|
|
i
|
|
FROM generate_series(1,5000) s(i);
|
|
|
|
ANALYZE mcv_lists;
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL AND c IS NULL');
|
|
|
|
-- create statistics
|
|
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
|
|
|
|
ANALYZE mcv_lists;
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND b IS NULL AND c IS NULL');
|
|
|
|
-- test pg_mcv_list_items with a very simple (single item) MCV list
|
|
TRUNCATE mcv_lists;
|
|
INSERT INTO mcv_lists (a, b, c) SELECT 1, 2, 3 FROM generate_series(1,1000) s(i);
|
|
ANALYZE mcv_lists;
|
|
|
|
SELECT m.* FROM pg_statistic_ext,
|
|
pg_mcv_list_items(stxmcv) m WHERE stxname = 'mcv_lists_stats';
|
|
|
|
-- mcv with arrays
|
|
CREATE TABLE mcv_lists_arrays (
|
|
a TEXT[],
|
|
b NUMERIC[],
|
|
c INT[]
|
|
);
|
|
|
|
INSERT INTO mcv_lists_arrays (a, b, c)
|
|
SELECT
|
|
ARRAY[md5((i/100)::text), md5((i/100-1)::text), md5((i/100+1)::text)],
|
|
ARRAY[(i/100-1)::numeric/1000, (i/100)::numeric/1000, (i/100+1)::numeric/1000],
|
|
ARRAY[(i/100-1), i/100, (i/100+1)]
|
|
FROM generate_series(1,5000) s(i);
|
|
|
|
CREATE STATISTICS mcv_lists_arrays_stats (mcv) ON a, b, c
|
|
FROM mcv_lists_arrays;
|
|
|
|
ANALYZE mcv_lists_arrays;
|
|
|
|
-- mcv with bool
|
|
CREATE TABLE mcv_lists_bool (
|
|
a BOOL,
|
|
b BOOL,
|
|
c BOOL
|
|
);
|
|
|
|
INSERT INTO mcv_lists_bool (a, b, c)
|
|
SELECT
|
|
(mod(i,2) = 0), (mod(i,4) = 0), (mod(i,8) = 0)
|
|
FROM generate_series(1,10000) s(i);
|
|
|
|
ANALYZE mcv_lists_bool;
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE a AND b AND c');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND c');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND NOT b AND c');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c');
|
|
|
|
CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c
|
|
FROM mcv_lists_bool;
|
|
|
|
ANALYZE mcv_lists_bool;
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE a AND b AND c');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND c');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND NOT b AND c');
|
|
|
|
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c');
|
|
|