postgresql 12 數據庫分區表之 range

os: centos 7.4
db: postgresql 12.2

postgresql 12 的分區表已經比較完善。

版本

# cat /etc/centos-release
CentOS Linux release 7.4.1708 (Core) 
# 
# su - postgres
Last login: Thu Mar 19 14:47:45 CST 2020 on pts/0
$ 
$ psql
psql (12.2)
Type "help" for help.

postgres=# select version();
                                                 version                                                 
---------------------------------------------------------------------------------------------------------
 PostgreSQL 12.2 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-39), 64-bit
(1 row)

postgres=# show enable_partition_pruning;
 enable_partition_pruning 
--------------------------
 on
(1 row)

postgres=# select name,setting from pg_settings where name like '%partition%';
               name                | setting 
-----------------------------------+---------
 enable_partition_pruning          | on
 enable_partitionwise_aggregate    | off
 enable_partitionwise_join         | off
(3 rows) 

single column range

single column in the partition key
– from >=
– to <

postgres=# CREATE TABLE measurement (
    logdate         date not null,
    peaktemp        int,
    unitsales       int
) PARTITION BY RANGE (logdate);

CREATE TABLE measurement_1 PARTITION OF measurement FOR VALUES FROM (MINVALUE) TO ('2018-01-01');
CREATE TABLE measurement_2 PARTITION OF measurement FOR VALUES FROM ('2018-01-01') TO ('2019-01-01');
CREATE TABLE measurement_3 PARTITION OF measurement FOR VALUES FROM ('2019-01-01') TO ('2020-01-01');
CREATE TABLE measurement_4 PARTITION OF measurement FOR VALUES FROM ('2020-01-01') TO (MAXVALUE);

postgres=# \d+
                                        List of relations
 Schema |             Name              |       Type        |  Owner   |    Size    | Description
--------+-------------------------------+-------------------+----------+------------+-------------
 public | measurement                   | partitioned table | postgres | 0 bytes    | 
 public | measurement_1                 | table             | postgres | 0 bytes    | 
 public | measurement_2                 | table             | postgres | 0 bytes    | 
 public | measurement_3                 | table             | postgres | 0 bytes    | 
 public | measurement_4                 | table             | postgres | 0 bytes    | 
(5 rows)

postgres=# select * from pg_inherits;
 inhrelid | inhparent | inhseqno 
----------+-----------+----------
    16629 |     16626 |        1
    16632 |     16626 |        1
    16635 |     16626 |        1
    16638 |     16626 |        1
(4 rows)

postgres=# insert into measurement
select current_date - mod(id,1000),
       id,
       id
  from generate_series(1,100000) as id;

postgres=# \d+
                                        List of relations
 Schema |             Name              |       Type        |  Owner   |    Size    | Description 
--------+-------------------------------+-------------------+----------+------------+-------------
 public | measurement                   | partitioned table | postgres | 0 bytes    | 
 public | measurement_1                 | table             | postgres | 856 kB     | 
 public | measurement_2                 | table             | postgres | 1608 kB    | 
 public | measurement_3                 | table             | postgres | 1608 kB    | 
 public | measurement_4                 | table             | postgres | 368 kB     | 
(4 rows)

postgres=# select min(logdate),max(logdate) from measurement_1;
    min     |    max     
------------+------------
 2017-06-24 | 2017-12-31
(1 row)

postgres=# select min(logdate),max(logdate) from measurement_2;
    min     |    max     
------------+------------
 2018-01-01 | 2018-12-31
(1 row)

postgres=# select min(logdate),max(logdate) from measurement_3;
    min     |    max     
------------+------------
 2019-01-01 | 2019-12-31
(1 row)

postgres=# select min(logdate),max(logdate) from measurement_4;
    min     |    max     
------------+------------
 2020-01-01 | 2020-03-19
(1 row)

postgres=# explain select * from measurement where logdate='2020-02-02';
                            QUERY PLAN                            
------------------------------------------------------------------
 Seq Scan on measurement_4  (cost=0.00..141.75 rows=100 width=12)
   Filter: (logdate = '2020-02-02'::date)
(2 rows)

postgres=# explain select * from measurement where logdate='2019-02-02';
                            QUERY PLAN                            
------------------------------------------------------------------
 Seq Scan on measurement_3  (cost=0.00..654.25 rows=100 width=12)
   Filter: (logdate = '2019-02-02'::date)
(2 rows)

multiple columns range

multiple columns in the partition key
– from >=
– to <

postgres=# CREATE TABLE measurement_year_month (
    logdate         date not null,
    peaktemp        int,
    unitsales       int
) PARTITION BY RANGE (EXTRACT(YEAR FROM logdate), EXTRACT(MONTH FROM logdate));

CREATE TABLE measurement_year_month_1 PARTITION OF measurement_year_month FOR VALUES FROM (MINVALUE, MINVALUE) TO (2018, 01);
CREATE TABLE measurement_year_month_2 PARTITION OF measurement_year_month FOR VALUES FROM (2018, 01) TO (2019, 01);
CREATE TABLE measurement_year_month_3 PARTITION OF measurement_year_month FOR VALUES FROM (2019, 01) TO (2020, 01);
CREATE TABLE measurement_year_month_4 PARTITION OF measurement_year_month FOR VALUES FROM (2020, 01) TO (MAXVALUE, MAXVALUE);

postgres=# \d+
                                        List of relations
 Schema |             Name              |       Type        |  Owner   |    Size    | Description 
--------+-------------------------------+-------------------+----------+------------+------------- 
 public | measurement_year_month        | partitioned table | postgres | 0 bytes    | 
 public | measurement_year_month_1      | table             | postgres | 0 bytes    | 
 public | measurement_year_month_2      | table             | postgres | 0 bytes    | 
 public | measurement_year_month_3      | table             | postgres | 0 bytes    | 
 public | measurement_year_month_4      | table             | postgres | 0 bytes    | 
(5 rows)

postgres=# select * from pg_inherits;
 inhrelid | inhparent | inhseqno 
----------+-----------+----------
    16644 |     16641 |        1
    16647 |     16641 |        1
    16650 |     16641 |        1
    16653 |     16641 |        1
(4 rows)

postgres=# insert into measurement_year_month
select current_date - mod(id,1000),
       id,
       id
  from generate_series(1,200000) as id;

postgres=# \d+
                                        List of relations
 Schema |             Name              |       Type        |  Owner   |    Size    | Description 
--------+-------------------------------+-------------------+----------+------------+------------- 
 public | measurement_year_month        | partitioned table | postgres | 0 bytes    | 
 public | measurement_year_month_1      | table             | postgres | 1680 kB    | 
 public | measurement_year_month_2      | table             | postgres | 3184 kB    | 
 public | measurement_year_month_3      | table             | postgres | 3184 kB    | 
 public | measurement_year_month_4      | table             | postgres | 712 kB     |
(16 rows)

postgres=# select min(logdate),max(logdate) from measurement_year_month_1;
    min     |    max     
------------+------------
 2017-06-24 | 2017-12-31
(1 row)

postgres=# select min(logdate),max(logdate) from measurement_year_month_2;
    min     |    max     
------------+------------
 2018-01-01 | 2018-12-31
(1 row)

postgres=# select min(logdate),max(logdate) from measurement_year_month_3;
    min     |    max     
------------+------------
 2019-01-01 | 2019-12-31
(1 row)

postgres=# select min(logdate),max(logdate) from measurement_year_month_4;
    min     |    max     
------------+------------
 2020-01-01 | 2020-03-19
(1 row)

postgres=# explain select * from measurement_year_month where logdate='2020-02-02';
                                     QUERY PLAN                                     
------------------------------------------------------------------------------------
 Append  (cost=0.00..3586.99 rows=799 width=12)
   ->  Seq Scan on measurement_year_month_1  (cost=0.00..684.50 rows=200 width=12)
         Filter: (logdate = '2020-02-02'::date)
   ->  Seq Scan on measurement_year_month_2  (cost=0.00..1307.50 rows=200 width=12)
         Filter: (logdate = '2020-02-02'::date)
   ->  Seq Scan on measurement_year_month_3  (cost=0.00..1307.50 rows=199 width=12)
         Filter: (logdate = '2020-02-02'::date)
   ->  Seq Scan on measurement_year_month_4  (cost=0.00..283.50 rows=200 width=12)
         Filter: (logdate = '2020-02-02'::date)
(9 rows)

postgres=# explain select * 
 from measurement_year_month 
where EXTRACT(YEAR FROM logdate)  = 2020 
  and EXTRACT(MONTH FROM logdate) = 2;

                                                                                               QUERY PLAN                                                    
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Seq Scan on measurement_year_month_4  (cost=0.00..481.00 rows=1 width=12)
   Filter: ((date_part('year'::text, (logdate)::timestamp without time zone) = '2020'::double precision) AND (date_part('month'::text, (logdate)::timestamp without time zone) = '2'::double precision))
(2 rows)

postgres=# explain select * 
 from measurement_year_month 
where EXTRACT(YEAR FROM logdate)  = 2020;

                                                  QUERY PLAN                                                  
--------------------------------------------------------------------------------------------------------------
 Append  (cost=0.00..2037.22 rows=444 width=12)
   ->  Seq Scan on measurement_year_month_3  (cost=0.00..1672.50 rows=365 width=12)
         Filter: (date_part('year'::text, (logdate)::timestamp without time zone) = '2020'::double precision)
   ->  Seq Scan on measurement_year_month_4  (cost=0.00..362.50 rows=79 width=12)
         Filter: (date_part('year'::text, (logdate)::timestamp without time zone) = '2020'::double precision)
(5 rows)

最後一個執行計劃怎麼會掃描2個分區了?

參考:
https://www.postgresql.org/docs/12/sql-createtable.html
https://www.postgresql.org/docs/12/ddl-partitioning.html

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章