greenplum - 創建表並查看錶數據分佈情況

隨機分佈

testdb=# create table test01 (id int, col1 varchar(100)) distributed RANDOMLY;
CREATE TABLE
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# insert into test01 values (1,'a');
INSERT 0 1
testdb=# select gp_segment_id,count(*) from test01 group by gp_segment_id;
 gp_segment_id | count
---------------+-------
             1 |     1
             6 |     4
             3 |     1
             8 |     1
             0 |     2
             5 |     1
(6 rows)

根據鍵值分佈

testdb=# create table test02 (id int, col1 varchar(100)) distributed by (id);
CREATE TABLE
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=#  insert into test02 values (1,'a');
INSERT 0 1
testdb=# select gp_segment_id,count(*) from test02 group by gp_segment_id;
 gp_segment_id | count
---------------+-------
             4 |    11
(1 row)

如果有主鍵，就會根據主鍵分佈

testdb=# create table test03(id int primary key, col1 varchar(100));
CREATE TABLE
testdb=# \d test03
            Table "public.test03"
 Column |          Type          | Modifiers
--------+------------------------+-----------
 id     | integer                | not null
 col1   | character varying(100) |
Indexes:
    "test03_pkey" PRIMARY KEY, btree (id)
Distributed by: (id)

沒有主鍵，沒有分佈建，第一列作爲分佈鍵

testdb=# create table test04(id int,  col1 varchar(100));
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE
testdb=# \d test04
            Table "public.test04"
 Column |          Type          | Modifiers
--------+------------------------+-----------
 id     | integer                |
 col1   | character varying(100) |
Distributed by: (id)

testdb=# create table test05 ( col1 varchar(100), id int);
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'col1' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE
testdb=# \d test05
            Table "public.test05"
 Column |          Type          | Modifiers
--------+------------------------+-----------
 col1   | character varying(100) |
 id     | integer                |
Distributed by: (col1)

有主鍵，不能按照隨機分配

testdb=# create table test06 ( id int primary key, col1 varchar(100)) distributed randomly;
ERROR:  PRIMARY KEY and DISTRIBUTED RANDOMLY are incompatible
testdb=# create table test06 ( col1 varchar(100), id int primary key) distributed randomly;
ERROR:  PRIMARY KEY and DISTRIBUTED RANDOMLY are incompatible

有主鍵，則分佈鍵只能爲主鍵，若不同則會報錯。

testdb=# create table test06 ( id int primary key, col1 varchar(100)) distributed by (col1);
ERROR:  PRIMARY KEY and DISTRIBUTED BY definitions are incompatible
HINT:  When there is both a PRIMARY KEY and a DISTRIBUTED BY clause, the DISTRIBUTED BY clause must be a subset of the PRIMARY KEY.
testdb=# create table test06 ( id int primary key, col1 varchar(100)) distributed by (id);
CREATE TABLE
testdb=# \d test06
            Table "public.test06"
 Column |          Type          | Modifiers
--------+------------------------+-----------
 id     | integer                | not null
 col1   | character varying(100) |
Indexes:
    "test06_pkey" PRIMARY KEY, btree (id)
Distributed by: (id)

查看錶的分佈鍵方式

-- 如果爲 RANDOMLY分佈，則 distkey爲空
testdb=# select c.relname, policytype, numsegments, distkey 
from pg_class c, pg_catalog.gp_distribution_policy gpp 
where c.oid = gpp.localoid;
 relname | policytype | numsegments | distkey
---------+------------+-------------+---------
 test01  | p          |          12 |
 test03  | p          |          12 | 1
 test04  | p          |          12 | 1
 test05  | p          |          12 | 1
 test06  | p          |          12 | 1
 test02  | p          |          12 | 1
(6 rows)

查看錶數據分佈

select gp_segment_id,count(*) from table_name group by gp_segment_id;

greenplum - 創建表並查看錶數據分佈情況

Python 爬蟲：Spring Boot 反爬蟲的成功案例

京東科技數字化營銷能力的演進與最佳實踐| 京東雲技術團隊

AWS - Redshift -跨賬號導入 S3 數據

MySQL - 在InnoDB 中 Troubleshooting Row size too large (」 8126)

greenplum - 簡單常用查看gp狀態命令

greenplum - 在Amazon Linux 2 （EC2）中安裝 gp6.8

greenplum - Add Mirror of segment

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結