postgresql分區表筆記

最近在做oracle數據庫向pg遷移的事情,非分區表使用ora2pg很容易遷移,遇到分區表的時候,發現由於oracle中的分區表分區表都採用的“P_日期”作爲分區名,ora2pg導出的sql中pg子表名成爲“p_日期”,導致大量的表名重複出錯,只有在ora2pg到處的文件上進行手工修改,這裏整理一下pg分區表的知識。


#####################################################################################################

分區表建立過程

######################################################################################################

第一步:創建表

CREATE TABLE T_WEIBO_DAILY(PARTITION_DATE  BIGINT,DATA VARCHAR);


第二部,通過繼承創建子表:

CREATE TABLE T_WEIBO_DAILY_P20110701 ( CHECK (
        PARTITION_DATE IN (20110701) --AND      PARTITION_DATE IN (20110701) AND        PARTITION_DATE IN (20110701) AND        PARTITION_DATE IN (20110701)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110702 ( CHECK (
        PARTITION_DATE IN (20110702) --AND      PARTITION_DATE IN (20110702) AND        PARTITION_DATE IN (20110702) AND        PARTITION_DATE IN (20110702)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110808 ( CHECK ( 
        PARTITION_DATE IN (20110808) --AND      PARTITION_DATE IN (20110808) AND        PARTITION_DATE IN (20110808) AND        PARTITION_DATE IN (20110808)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110807 ( CHECK (
        PARTITION_DATE IN (20110807) --AND      PARTITION_DATE IN (20110807) AND        PARTITION_DATE IN (20110807) AND        PARTITION_DATE IN (20110807)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110806 ( CHECK (
        PARTITION_DATE IN (20110806) --AND      PARTITION_DATE IN (20110806) AND        PARTITION_DATE IN (20110806) AND        PARTITION_DATE IN (20110806)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110805 ( CHECK (
        PARTITION_DATE IN (20110805) --AND      PARTITION_DATE IN (20110805) AND        PARTITION_DATE IN (20110805) AND        PARTITION_DATE IN (20110805)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110804 ( CHECK (
        PARTITION_DATE IN (20110804) --AND      PARTITION_DATE IN (20110804) AND        PARTITION_DATE IN (20110804) AND        PARTITION_DATE IN (20110804)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110803 ( CHECK (
        PARTITION_DATE IN (20110803) --AND      PARTITION_DATE IN (20110803) AND        PARTITION_DATE IN (20110803) AND        PARTITION_DATE IN (20110803)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110802 ( CHECK (
        PARTITION_DATE IN (20110802) --AND      PARTITION_DATE IN (20110802) AND        PARTITION_DATE IN (20110802) AND        PARTITION_DATE IN (20110802)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110809 ( CHECK (
        PARTITION_DATE IN (20110809) --AND      PARTITION_DATE IN (20110809) AND        PARTITION_DATE IN (20110809) AND        PARTITION_DATE IN (20110809)
) ) INHERITS (T_WEIBO_DAILY);
CREATE TABLE T_WEIBO_DAILY_P20110810 ( CHECK (
        PARTITION_DATE IN (20110810) --AND      PARTITION_DATE IN (20110810) AND        PARTITION_DATE IN (20110810) AND        PARTITION_DATE IN (20110810)
) ) INHERITS (T_WEIBO_DAILY);


第三部:子表上建立索引:

-- Create indexes on each partition table
CREATE INDEX T_WEIBO_DAILY_P20110701_PARTITION_DATE ON T_WEIBO_DAILY_P20110701 (PARTITION_DATE);
--CREATE INDEX P_20110701_PARTITION_DATE ON P_20110701 (PARTITION_DATE);
--CREATE INDEX P_20110701_PARTITION_DATE ON P_20110701 (PARTITION_DATE);
--CREATE INDEX P_20110701_PARTITION_DATE ON P_20110701 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110702_PARTITION_DATE ON T_WEIBO_DAILY_P20110702 (PARTITION_DATE);
--CREATE INDEX P_20110702_PARTITION_DATE ON P_20110702 (PARTITION_DATE);
--CREATE INDEX P_20110702_PARTITION_DATE ON P_20110702 (PARTITION_DATE);
--CREATE INDEX P_20110702_PARTITION_DATE ON P_20110702 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110808_PARTITION_DATE ON T_WEIBO_DAILY_P20110808 (PARTITION_DATE);
--CREATE INDEX P_20110808_PARTITION_DATE ON P_20110808 (PARTITION_DATE);
--CREATE INDEX P_20110808_PARTITION_DATE ON P_20110808 (PARTITION_DATE);
--CREATE INDEX P_20110808_PARTITION_DATE ON P_20110808 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110807_PARTITION_DATE ON T_WEIBO_DAILY_P20110807 (PARTITION_DATE);
--CREATE INDEX P_20110807_PARTITION_DATE ON P_20110807 (PARTITION_DATE);
--CREATE INDEX P_20110807_PARTITION_DATE ON P_20110807 (PARTITION_DATE);
--CREATE INDEX P_20110807_PARTITION_DATE ON P_20110807 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110806_PARTITION_DATE ON T_WEIBO_DAILY_P20110806 (PARTITION_DATE);
--CREATE INDEX P_20110806_PARTITION_DATE ON P_20110806 (PARTITION_DATE);
--CREATE INDEX P_20110806_PARTITION_DATE ON P_20110806 (PARTITION_DATE);
--CREATE INDEX P_20110806_PARTITION_DATE ON P_20110806 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110805_PARTITION_DATE ON T_WEIBO_DAILY_P20110805 (PARTITION_DATE);
--CREATE INDEX P_20110805_PARTITION_DATE ON P_20110805 (PARTITION_DATE);
--CREATE INDEX P_20110805_PARTITION_DATE ON P_20110805 (PARTITION_DATE);
--CREATE INDEX P_20110805_PARTITION_DATE ON P_20110805 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110804_PARTITION_DATE ON T_WEIBO_DAILY_P20110804 (PARTITION_DATE);
--CREATE INDEX P_20110804_PARTITION_DATE ON P_20110804 (PARTITION_DATE);
--CREATE INDEX P_20110804_PARTITION_DATE ON P_20110804 (PARTITION_DATE);
--CREATE INDEX P_20110804_PARTITION_DATE ON P_20110804 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110803_PARTITION_DATE ON T_WEIBO_DAILY_P20110803 (PARTITION_DATE);
--CREATE INDEX P_20110803_PARTITION_DATE ON P_20110803 (PARTITION_DATE);
--CREATE INDEX P_20110803_PARTITION_DATE ON P_20110803 (PARTITION_DATE);
--CREATE INDEX P_20110803_PARTITION_DATE ON P_20110803 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110802_PARTITION_DATE ON T_WEIBO_DAILY_P20110802 (PARTITION_DATE);
--CREATE INDEX P_20110802_PARTITION_DATE ON P_20110802 (PARTITION_DATE);
--CREATE INDEX P_20110802_PARTITION_DATE ON P_20110802 (PARTITION_DATE);
--CREATE INDEX P_20110802_PARTITION_DATE ON P_20110802 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110809_PARTITION_DATE ON T_WEIBO_DAILY_P20110809 (PARTITION_DATE);
--CREATE INDEX P_20110809_PARTITION_DATE ON P_20110809 (PARTITION_DATE);
--CREATE INDEX P_20110809_PARTITION_DATE ON P_20110809 (PARTITION_DATE);
--CREATE INDEX P_20110809_PARTITION_DATE ON P_20110809 (PARTITION_DATE);
CREATE INDEX T_WEIBO_DAILY_P20110810_PARTITION_DATE ON T_WEIBO_DAILY_P20110810 (PARTITION_DATE);
--CREATE INDEX P_20110810_PARTITION_DATE ON P_20110810 (PARTITION_DATE);
--CREATE INDEX P_20110810_PARTITION_DATE ON P_20110810 (PARTITION_DATE);
--CREATE INDEX P_20110810_PARTITION_DATE ON P_20110810 (PARTITION_DATE);




第四部:創建觸發函數:

CREATE OR REPLACE FUNCTION T_WEIBO_DAILY_insert_trigger()
RETURNS TRIGGER AS $$
BEGIN
        IF ( NEW.PARTITION_DATE IN (20110701)  ) THEN INSERT INTO T_WEIBO_DAILY_P20110701 VALUES (NEW.*);
        ELSIF ( NEW.PARTITION_DATE IN (20110702) ) THEN INSERT INTO T_WEIBO_DAILY_P20110702 VALUES (NEW.*);
        ELSIF ( NEW.PARTITION_DATE IN (20110808) ) THEN INSERT INTO T_WEIBO_DAILY_P20110808 VALUES (NEW.*);
        ELSIF ( NEW.PARTITION_DATE IN (20110807) ) THEN INSERT INTO T_WEIBO_DAILY_P20110807 VALUES (NEW.*);
        ELSIF ( NEW.PARTITION_DATE IN (20110806) ) THEN INSERT INTO T_WEIBO_DAILY_P20110806 VALUES (NEW.*);
        ELSIF ( NEW.PARTITION_DATE IN (20110805) ) THEN INSERT INTO T_WEIBO_DAILY_P20110805 VALUES (NEW.*);
        ELSIF ( NEW.PARTITION_DATE IN (20110804) ) THEN INSERT INTO T_WEIBO_DAILY_P20110804 VALUES (NEW.*);
        ELSIF ( NEW.PARTITION_DATE IN (20110803) ) THEN INSERT INTO T_WEIBO_DAILY_P20110803 VALUES (NEW.*);
        ELSIF ( NEW.PARTITION_DATE IN (20110802) ) THEN INSERT INTO T_WEIBO_DAILY_P20110802 VALUES (NEW.*);
        ELSIF ( NEW.PARTITION_DATE IN (20110809) ) THEN INSERT INTO T_WEIBO_DAILY_P20110809 VALUES (NEW.*);
        ELSIF ( NEW.PARTITION_DATE IN (20110810) ) THEN INSERT INTO T_WEIBO_DAILY_P20110810 VALUES (NEW.*);


        ELSE
                --INSERT INTO T_WEIBO_DAILY VALUES (NEW.*);


                -- Or if you prefer raising an exception
                 RAISE EXCEPTION 'Value out of range. Fix the T_WEIBO_DAILY_insert_trigger() function!';
        END IF;
        RETURN NULL;
END;
$$
LANGUAGE plpgsql;


第五步:建立觸發器

CREATE TRIGGER insert_T_WEIBO_DAILY_trigger
    BEFORE INSERT ON T_WEIBO_DAILY
    FOR EACH ROW EXECUTE PROCEDURE T_WEIBO_DAILY_insert_trigger();


##################################################################################################

分區表的分區剪枝

###################################################################################################

版本信息和constraint_exclusion如下:

ta=# SELECT VERSION();
                                                       version                                                        
----------------------------------------------------------------------------------------------------------------------
 PostgreSQL 9.1.2 on x86_64-unknown-linux-gnu, compiled by gcc (GCC) 4.1.2 20070115 (prerelease) (SUSE Linux), 64-bit
(1 row)

ta=# show constraint_exclusion;
 constraint_exclusion 
----------------------
 partition
(1 row) 

看了一下postgresql.conf,constraint_exclusion 默認爲paritition,文檔中對這個參數描述如下:

constraint_exclusion (enum) 
        Controls the query planner iss use of table constraints to optimize queries. The allowed 
values of constraint_exclusion are on (examine constraints for all tables), off (never examine 
constraints), and partition (examine constraints only for inheritance child tables and UNION ALL 
subqueries). partition is the default setting.

       When this parameter allows it for a particular table, the planner compares query conditions
with the table is CHECK constraints, and omits scanning tables for which the conditions contradict the constraints.


也就是說constraint_exclusion 有三個選項(on,off,partition)作用如下:

 on :對所有的表都會進行約束檢查

 off:對所有表都不進行約束檢查

 parition:只對繼承的子表和UNION ALL的子查詢進行約束檢查


根據說明:沒有必要對所有表在生成查詢計劃時都進行約束檢查,對分區表需要分區剪枝,所以partition比較合適。


另:測試發現分區剪枝只發生在where條件後面,使用select * from table parition(**)這種寫法分區剪枝不起作用!

ta=# explain select * from ta.T_WEIBO_DAILY where partition_date=20110701;
                                                   QUERY PLAN                                                    
-----------------------------------------------------------------------------------------------------------------
 Result  (cost=0.00..7.50 rows=3 width=236)
   ->  Append  (cost=0.00..7.50 rows=3 width=236)
         ->  Seq Scan on t_weibo_daily  (cost=0.00..0.00 rows=1 width=236)
               Filter: (partition_date = 20110701)
         ->  Bitmap Heap Scan on t_weibo_daily_p20110701 t_weibo_daily  (cost=3.27..7.50 rows=2 width=236)
               Recheck Cond: (partition_date = 20110701)
               ->  Bitmap Index Scan on t_weibo_daily_p20110701_partition_date  (cost=0.00..3.27 rows=2 width=0)
                     Index Cond: (partition_date = 20110701)
(8 rows)

ta=# explain select * from ta.T_WEIBO_DAILY partition( t_weibo_daily_p20110701);
                                            QUERY PLAN                                            
--------------------------------------------------------------------------------------------------
 Result  (cost=0.00..143.00 rows=3301 width=236)
   ->  Append  (cost=0.00..143.00 rows=3301 width=236)
         ->  Seq Scan on t_weibo_daily partition  (cost=0.00..0.00 rows=1 width=236)
         ->  Seq Scan on t_weibo_daily_p20110701 partition  (cost=0.00..13.00 rows=300 width=236)
         ->  Seq Scan on t_weibo_daily_p20110702 partition  (cost=0.00..13.00 rows=300 width=236)
         ->  Seq Scan on t_weibo_daily_p20110808 partition  (cost=0.00..13.00 rows=300 width=236)
         ->  Seq Scan on t_weibo_daily_p20110807 partition  (cost=0.00..13.00 rows=300 width=236)
         ->  Seq Scan on t_weibo_daily_p20110806 partition  (cost=0.00..13.00 rows=300 width=236)
         ->  Seq Scan on t_weibo_daily_p20110805 partition  (cost=0.00..13.00 rows=300 width=236)
         ->  Seq Scan on t_weibo_daily_p20110804 partition  (cost=0.00..13.00 rows=300 width=236)
         ->  Seq Scan on t_weibo_daily_p20110803 partition  (cost=0.00..13.00 rows=300 width=236)
         ->  Seq Scan on t_weibo_daily_p20110802 partition  (cost=0.00..13.00 rows=300 width=236)
         ->  Seq Scan on t_weibo_daily_p20110809 partition  (cost=0.00..13.00 rows=300 width=236)
         ->  Seq Scan on t_weibo_daily_p20110810 partition  (cost=0.00..13.00 rows=300 width=236)
(14 rows) 

不清楚是不是我的第二個寫法有問題??


##########################################################################

遺留的需要思考和解決的問題

1. pg分區這麼複雜,怎樣包裝能讓用戶一個sql或者一個命令完成分區的增減。

2. 怎樣讓用戶能簡單的創建一個分區表,(或許可以參考greenplum)

3. 分區剪枝使用parition方式訪問問題的問題,要麼就報錯,要麼就要正確找到分區,在海量數據中沒有分區剪枝是個災難。

##########################################################################













發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章