GreenPlum--一些整理

1、創建數據庫：
create database 庫名;

2、刪除數據庫：
drop database 庫名;

3、創建表：
create table 表名(
id integer,
name text,
price numeric {精確度較高的小數型，同mysql的decimal}
);

3-1、GP建表指定列級約束
create table 表名(
id integer primary key, {主鍵約束}
name text not null, {非空約束}
price numeric check(price>0), {檢查約束}
type integer unique {唯一約束}
);
【注】：主鍵、唯一鍵、隨機分佈不能共存

3-2、聲明表的分佈策略
GP的分佈鍵作用是保證數據能夠均勻分佈在不同的存儲節點上，充分利用並行計算帶來的高性能。GP的分佈策略包括HASH分佈和隨機分佈。
HASH分佈的關鍵字是：distributed by(列名)
隨機分佈的關鍵字是：distributed by randonly

在創建表或者修改表定義的時候，必須使用distributed by來執行分佈鍵，從而使數據均勻的存儲在不同的segment上。
如果指定的分佈鍵(列名)不是主鍵，則無法創建(指定的列必須是主鍵)。
(1)、聲明hash分佈
create table 表名(
id integer primary key, {主鍵約束}
name text not null, {非空約束}
price numeric check(price>0), {檢查約束}
type integer unique {唯一約束}
)distributed by(id);

(2)、聲明隨機分佈
create table 表名(
id integer primary key, {主鍵約束，這裏就不能在聲明主鍵約束}
name text not null, {非空約束}
price numeric check(price>0), {檢查約束}
type integer unique {唯一約束，這裏就不能在聲明唯一約束}
)distributed by randonly; {指定隨機分佈}

【注】：主鍵、唯一鍵、隨機分佈不能共存
否則報錯：ERROR:Primary key and distributed randonly are incompatible{不兼容的}
【說明】：幾何數據類型和自定義的數據類型不適合作爲GP的分佈鍵。如果沒有適合的列可以保證數據的均勻分佈，則使用隨機分佈

4、分區表的特徵
(1)、對一張表作分區，實際上是創建了一張父表和多個子表
(2)、每個分區在創建時都帶有一個不同的檢查約束(check)
(3)、任何分區結構的修改或者表結構的修改都要通過父表使用partition字句結合alter table命令完成

查看某張表是否爲分區表：
select count(*) from pg_partition where parrelid=‘測試schema.表名’::regclass;
結果：如果有數據就是分區表，如果無數據則不爲分區表

(4)、分區選擇性掃描的限制
查詢計劃只可以用穩定的比較運算符 = , < , > , <= , >= , <>
查詢計劃不能識別非穩定的函數來執行選擇性掃描

(5)、創建和管理分區表
分區表類型使用場景
range 表示一個序列範圍，如日期、數字、價格等
list 表示一個列表，如產品名稱
【注】：主鍵或是唯一鍵必須包含表中的所有分區鍵
1)、定義range類型分區表，使用start，end，every定義分區增量讓GP自動創建分區
create table test(
c1 integer,
c2 date
)distributed by(c1)
partition by range(c2)
(
start(date,‘2019-09-01’)inclusive
end(date,‘2019-09-03’)exclusive
every(interval ‘1 day’)
);
創建結果：
test（父表）
test_1_part_1
test_1_part_2
test_1_part_3
關鍵字說明：
start：分區開始值
end: 分區結束值
inclusive: 表示包含左邊的取值
exclusive: 表示不包含左邊的取值
every: 表示分區範圍自增長的步長

2)、定義日期範圍分區表，且並給每個分區表單獨命名
create table test(
c1 integer,
c2 date
)distributed by(c1)
partition by range(c2)
(
partition one start(date,‘2019-09-01’)inclusive
partition two start(date,‘2019-09-02’)inclusive
partition thr start(date,‘2019-09-03’)inclusive
end(date,‘2019-09-04’)exclusive
);
創建結果：
test（父表）
test_1_par_one
test_1_par_two
test_1_par_thr

3)、定義數字範圍分區表
create table test2(
id int,
year int
)distributed by(id)
partition by range(year)
(
statr(2014)
end(2016)
every(1),
default partition extra
);
創建結果：
test2（父表）
test2_1_prt_extra
test2_1_prt_2
test2_1_prt_3

4)、創建列表分區
create table test2(
id int,
gender char(1)
)distributed by(id)
partition by list(gender)
(
partition boys values(‘M’),
partition girls values(‘F’),
default partition thr
);

【說明】：default partition 分區名稱的作用是是定義默認分區，在分區檢查約束範圍內的數據會被放到對應的分區，不在各個
分區表檢查範圍內的數據都會被放入到默認分區表中
舉例：
insert into test2 value
(1,‘M’),
(2,‘M’),
(3,‘F’),
(4,‘K’);
select * from test2_1_prt_boys;
結果：
1 M
2 M
select * from test2_1_prt_girls;
結果：
3 F
select * from test2_1_prt_thr;
結果：{K不等於M也不等與F，所以被存儲到默認分區內}
4 K

5)、定義多級分區表
創建二級子分區表根據日期字段做一級分區，再根據地區列表做二級分區
create table test2(
id int,
dates date,
region text
)distributed by(id)
partition by range(dates)
subpartition by list(region)
subpartition template
(
subpartition usa values(‘usa’),
subpartition uk values(‘uk’),
subpartition ch values(‘ch’),
default subpartition otherRegions
)
(
start(date,‘2019-09-20’)inclusive
end(date,‘2019-09-22’)exclusive
every(interaval ‘1 day’),
default partition otherDays
);

6)、查看子分區是否被掃描
explain select * from test2;

7)、交換分區(待查方法)

8)、查看分區設計
通過pg_partition視圖查看分區表的設計情況
select partitionboundry, partitiontablename, partitionname, partitionlevel, partitionrank from pg_partition;

通過pg_partition_templates查看子分區模板
select * from pg_partition_templates where tablename=‘test2’;

通過pg_partition_columns查看分區鍵
select * from pg_partition_columns where tablename=‘test2’;

(6)、維護分區表
分區表的維護包括添加新分區，重命名，拆分，模板修改和刪除分區等
1)、添加新分區{原分區表中如果默認存在分區需要先drop掉默認分區}
alter table test2 drop default partition;
alter table test2 add partition 分區名 start(‘2019-09-20’::date) end(‘2019-09-30’::date);
分區名如：p20200317

1-1)、創建一個新的空分區：
create table 分區表名_y2008m02 partition OF 源表
for values from (‘2008-02-01’) TO (‘2008-03-01’)
TABLESPACE fasttablespace;

2、重命名分區{修改父表信息、會影響所有的分區表}
alter table test2 rename to test22;

3)、只修改分區名稱{for(‘2019-09-20’)填寫分區鍵的值這裏即指test22_1_prt_4這個分區表}
alter table test22 rename partition for(‘2019-09-20’) to change_par;
結果：test22_1_prt_4->改名爲test22_1_prt_change_par

4)、刪除分區
alter table schema.test22 drop default partition; 刪除默認分區
alter table schema.test22 drop partition if exists “partitionName”;

5)、清空分區數據
alter table test22 truncate partition for(rand(1));

6)、修改子分區模板
alter table test22 set subpartition template
(
subpartition usa value(‘usa’),
subpartition africa value(‘africa’),
default subpartition other
);

7)、拆分分區
alter table 分區表名 split partition p20120105分區名 at((‘2012-01-06’::date)) into
(PARTITION p20120105(分區名) ,PARTITION p20120106(分區名) );

8)、交換分區
alter table 分區表名 exchange partition p20120102(分區名) with table 新分區表名;

5、數據的存儲方式
推存儲(heap)：適合數據經常變化的小表
只追加存儲(Append-Only){即AO表}：適合大表，通常是批量裝載數且只進行只讀查詢操作
默認的建表存儲模式爲堆存儲。

創建堆存儲表(heap)：
create table test(id int)distributed by(id);

創建AO表：
create table test(
id int,
name text not null,
sex text not null check(sex in(‘male’,‘famale’))
)with (appendonly=true)
distributed by(id);
【注】：AO表不支持主鍵、唯一約束

6、快速建表
(1)、在創建表的時候，如果要創建一張結構一模一樣的表，可以利用create table like命令，但是創建表後的一些特殊屬性並不會一樣。如壓縮、只增加(appendonly)屬性等。
如果不指定分佈鍵，則默認分佈鍵與源表一樣
create table test2 (like test1) distributed by(id);
{注：使用create table like命令創建的表不帶數據的，且(like 源表)得加上括號}

(2)、根據查詢結果創建表，使用create table as 或 select into命令
create table as 和 select into命令功能一樣，但select into語法簡單且不能手動指定分佈鍵，只能使用默認的分佈鍵
{創建test2表}
方式一:
create table test2 as select id,name from test1 distributed by(id);
方式二：
select id,name into test1 from test2;

7、創建列存表
選擇列存儲或行存儲的場景：
(1)、表中的數據需要做更新操作，選擇行存儲
(2)、如果表經常有insert操作，選擇行存儲
(3)、如果在select和where中涉及表的全部或大部分列時，選擇行存儲
(4)、如果在where和having中對單列做聚合操作且返回少量的行，選擇列存儲
(5)、行存儲對於列多或行尺寸相對少的表更高效
(6)、列存儲只在訪問寬表的少量列的查詢中性能更高
(7)、列存儲根據有壓縮優勢
(8)、默認情況下，表是按行存儲的方式存儲
(9)、列存儲必須是AO表，否則無法創建成功，使用with(orientation=column)指定爲列存儲

如：
create table test(
id integer,
name text not null
)with(orientation=column,appendonly=true)
distributed by(id)

8、創建壓縮表
表壓縮的目的是爲了減少佔用存儲空間，用於數據倉庫中的事實表。不經常進行數據和表結構的操作。壓縮表必須是AO表。
GP數據庫的壓縮方式分爲：表級壓縮、列級壓縮

行存儲：表級壓縮，列級壓縮壓縮算法：ZLIB、QUICKLZ
列存儲：表級壓縮，列級壓縮壓縮算法：RLE_TYPE、ZLIB、QUICKLZ

(1)、表級壓縮
get_ao_distribution(表名) 查看AO表的分佈情況
get_ao_compression_ratio(表名) 查看AO表的壓縮率
pg_total_relation_size(表名) 查看AO表的佔用空間大小（通常和函數pg_size_pretty()連用）

查看AO表test1的分佈情況
select get_ao_distribution(test1);

查看AO表test1的壓縮率
select get_ao_compression_ratio(test1);

查看AO表test1的佔用空間大小
select pg_size_pretty(pg_total_relation_size(test1));

(2)、創建壓縮表（表級壓縮）
create table test1(
id integer,
name text not null
)with(appendonly=true,compression=‘zlib’)
distributed by(id);

3)、創建壓縮表（列級壓縮）
create table test1(
id integer encoding(compression=‘zlib’),
name text not null encoding(compression=‘quickly’),
sex text not null encoding(compression=null)
)with(appendonly=true,orientation=column)
distributed by(id);
【注】：創建壓縮表，同時使用了表級壓縮和列級壓縮，列級壓縮會覆蓋表級壓縮的設置

9、數據加載
1000，0000條記錄加載到數據庫表
\copy test22 from ‘/data/test.csv’ with delimeter ‘,’;

GreenPlum--一些整理

linux安裝cuda和cudnn

模擬手機設備：使用 Playwright 實現移動端自動化測試

Mellanox網卡開啓SR-IOV

全面系統的AI學習路徑，幫助普通人也能玩轉AI

HTML 00 Tutorial

uni-app實現上拉加載

vue3編譯優化之“靜態提升”

又是一個月-20240513

flask 如何保證返回json有序

linux服務器設置ssh免密

1.Centos7.6下1.安裝2+4節點Greenplum集羣

使用pgadminⅢ連接GreenPlum 出現的錯誤

GreenPlum--視圖篇

GP--大表分區管理(一)

GreenPlum--創建與管理模式（schema）

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結