os: centos 7.4
db: postgresql 12.2
postgresql 12 的分區表已經比較完善。
版本
# cat /etc/centos-release
CentOS Linux release 7.4.1708 (Core)
#
# su - postgres
Last login: Thu Mar 19 14:47:45 CST 2020 on pts/0
$
$ psql
psql (12.2)
Type "help" for help.
postgres=# select version();
version
---------------------------------------------------------------------------------------------------------
PostgreSQL 12.2 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-39), 64-bit
(1 row)
postgres=# show enable_partition_pruning;
enable_partition_pruning
--------------------------
on
(1 row)
postgres=# select name,setting from pg_settings where name like '%partition%';
name | setting
-----------------------------------+---------
enable_partition_pruning | on
enable_partitionwise_aggregate | off
enable_partitionwise_join | off
(3 rows)
single column range
single column in the partition key
– from >=
– to <
postgres=# CREATE TABLE measurement (
logdate date not null,
peaktemp int,
unitsales int
) PARTITION BY RANGE (logdate);
CREATE TABLE measurement_1 PARTITION OF measurement FOR VALUES FROM (MINVALUE) TO ('2018-01-01');
CREATE TABLE measurement_2 PARTITION OF measurement FOR VALUES FROM ('2018-01-01') TO ('2019-01-01');
CREATE TABLE measurement_3 PARTITION OF measurement FOR VALUES FROM ('2019-01-01') TO ('2020-01-01');
CREATE TABLE measurement_4 PARTITION OF measurement FOR VALUES FROM ('2020-01-01') TO (MAXVALUE);
postgres=# \d+
List of relations
Schema | Name | Type | Owner | Size | Description
--------+-------------------------------+-------------------+----------+------------+-------------
public | measurement | partitioned table | postgres | 0 bytes |
public | measurement_1 | table | postgres | 0 bytes |
public | measurement_2 | table | postgres | 0 bytes |
public | measurement_3 | table | postgres | 0 bytes |
public | measurement_4 | table | postgres | 0 bytes |
(5 rows)
postgres=# select * from pg_inherits;
inhrelid | inhparent | inhseqno
----------+-----------+----------
16629 | 16626 | 1
16632 | 16626 | 1
16635 | 16626 | 1
16638 | 16626 | 1
(4 rows)
postgres=# insert into measurement
select current_date - mod(id,1000),
id,
id
from generate_series(1,100000) as id;
postgres=# \d+
List of relations
Schema | Name | Type | Owner | Size | Description
--------+-------------------------------+-------------------+----------+------------+-------------
public | measurement | partitioned table | postgres | 0 bytes |
public | measurement_1 | table | postgres | 856 kB |
public | measurement_2 | table | postgres | 1608 kB |
public | measurement_3 | table | postgres | 1608 kB |
public | measurement_4 | table | postgres | 368 kB |
(4 rows)
postgres=# select min(logdate),max(logdate) from measurement_1;
min | max
------------+------------
2017-06-24 | 2017-12-31
(1 row)
postgres=# select min(logdate),max(logdate) from measurement_2;
min | max
------------+------------
2018-01-01 | 2018-12-31
(1 row)
postgres=# select min(logdate),max(logdate) from measurement_3;
min | max
------------+------------
2019-01-01 | 2019-12-31
(1 row)
postgres=# select min(logdate),max(logdate) from measurement_4;
min | max
------------+------------
2020-01-01 | 2020-03-19
(1 row)
postgres=# explain select * from measurement where logdate='2020-02-02';
QUERY PLAN
------------------------------------------------------------------
Seq Scan on measurement_4 (cost=0.00..141.75 rows=100 width=12)
Filter: (logdate = '2020-02-02'::date)
(2 rows)
postgres=# explain select * from measurement where logdate='2019-02-02';
QUERY PLAN
------------------------------------------------------------------
Seq Scan on measurement_3 (cost=0.00..654.25 rows=100 width=12)
Filter: (logdate = '2019-02-02'::date)
(2 rows)
multiple columns range
multiple columns in the partition key
– from >=
– to <
postgres=# CREATE TABLE measurement_year_month (
logdate date not null,
peaktemp int,
unitsales int
) PARTITION BY RANGE (EXTRACT(YEAR FROM logdate), EXTRACT(MONTH FROM logdate));
CREATE TABLE measurement_year_month_1 PARTITION OF measurement_year_month FOR VALUES FROM (MINVALUE, MINVALUE) TO (2018, 01);
CREATE TABLE measurement_year_month_2 PARTITION OF measurement_year_month FOR VALUES FROM (2018, 01) TO (2019, 01);
CREATE TABLE measurement_year_month_3 PARTITION OF measurement_year_month FOR VALUES FROM (2019, 01) TO (2020, 01);
CREATE TABLE measurement_year_month_4 PARTITION OF measurement_year_month FOR VALUES FROM (2020, 01) TO (MAXVALUE, MAXVALUE);
postgres=# \d+
List of relations
Schema | Name | Type | Owner | Size | Description
--------+-------------------------------+-------------------+----------+------------+-------------
public | measurement_year_month | partitioned table | postgres | 0 bytes |
public | measurement_year_month_1 | table | postgres | 0 bytes |
public | measurement_year_month_2 | table | postgres | 0 bytes |
public | measurement_year_month_3 | table | postgres | 0 bytes |
public | measurement_year_month_4 | table | postgres | 0 bytes |
(5 rows)
postgres=# select * from pg_inherits;
inhrelid | inhparent | inhseqno
----------+-----------+----------
16644 | 16641 | 1
16647 | 16641 | 1
16650 | 16641 | 1
16653 | 16641 | 1
(4 rows)
postgres=# insert into measurement_year_month
select current_date - mod(id,1000),
id,
id
from generate_series(1,200000) as id;
postgres=# \d+
List of relations
Schema | Name | Type | Owner | Size | Description
--------+-------------------------------+-------------------+----------+------------+-------------
public | measurement_year_month | partitioned table | postgres | 0 bytes |
public | measurement_year_month_1 | table | postgres | 1680 kB |
public | measurement_year_month_2 | table | postgres | 3184 kB |
public | measurement_year_month_3 | table | postgres | 3184 kB |
public | measurement_year_month_4 | table | postgres | 712 kB |
(16 rows)
postgres=# select min(logdate),max(logdate) from measurement_year_month_1;
min | max
------------+------------
2017-06-24 | 2017-12-31
(1 row)
postgres=# select min(logdate),max(logdate) from measurement_year_month_2;
min | max
------------+------------
2018-01-01 | 2018-12-31
(1 row)
postgres=# select min(logdate),max(logdate) from measurement_year_month_3;
min | max
------------+------------
2019-01-01 | 2019-12-31
(1 row)
postgres=# select min(logdate),max(logdate) from measurement_year_month_4;
min | max
------------+------------
2020-01-01 | 2020-03-19
(1 row)
postgres=# explain select * from measurement_year_month where logdate='2020-02-02';
QUERY PLAN
------------------------------------------------------------------------------------
Append (cost=0.00..3586.99 rows=799 width=12)
-> Seq Scan on measurement_year_month_1 (cost=0.00..684.50 rows=200 width=12)
Filter: (logdate = '2020-02-02'::date)
-> Seq Scan on measurement_year_month_2 (cost=0.00..1307.50 rows=200 width=12)
Filter: (logdate = '2020-02-02'::date)
-> Seq Scan on measurement_year_month_3 (cost=0.00..1307.50 rows=199 width=12)
Filter: (logdate = '2020-02-02'::date)
-> Seq Scan on measurement_year_month_4 (cost=0.00..283.50 rows=200 width=12)
Filter: (logdate = '2020-02-02'::date)
(9 rows)
postgres=# explain select *
from measurement_year_month
where EXTRACT(YEAR FROM logdate) = 2020
and EXTRACT(MONTH FROM logdate) = 2;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Seq Scan on measurement_year_month_4 (cost=0.00..481.00 rows=1 width=12)
Filter: ((date_part('year'::text, (logdate)::timestamp without time zone) = '2020'::double precision) AND (date_part('month'::text, (logdate)::timestamp without time zone) = '2'::double precision))
(2 rows)
postgres=# explain select *
from measurement_year_month
where EXTRACT(YEAR FROM logdate) = 2020;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------
Append (cost=0.00..2037.22 rows=444 width=12)
-> Seq Scan on measurement_year_month_3 (cost=0.00..1672.50 rows=365 width=12)
Filter: (date_part('year'::text, (logdate)::timestamp without time zone) = '2020'::double precision)
-> Seq Scan on measurement_year_month_4 (cost=0.00..362.50 rows=79 width=12)
Filter: (date_part('year'::text, (logdate)::timestamp without time zone) = '2020'::double precision)
(5 rows)
最後一個執行計劃怎麼會掃描2個分區了?
參考:
https://www.postgresql.org/docs/12/sql-createtable.html
https://www.postgresql.org/docs/12/ddl-partitioning.html