【Hive】Hive內部表/外部表

1. 建表語句

Hive官網有詳細的建表語句。

CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name    -- (Note: TEMPORARY available in Hive 0.14.0 and later)
  [(col_name data_type [column_constraint_specification] [COMMENT col_comment], ... [constraint_specification])]
  [COMMENT table_comment]
  [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
  [CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
  [SKEWED BY (col_name, col_name, ...)                  -- (Note: Available in Hive 0.10.0 and later)]
     ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)
     [STORED AS DIRECTORIES]
  [
   [ROW FORMAT row_format] 
   [STORED AS file_format]
     | STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)]  -- (Note: Available in Hive 0.6.0 and later)
  ]
  [LOCATION hdfs_path]
  [TBLPROPERTIES (property_name=property_value, ...)]   -- (Note: Available in Hive 0.6.0 and later)
  [AS select_statement];   -- (Note: Available in Hive 0.5.0 and later; not supported for external tables)

2. 創建內部表

2.1 創建dept表
create table myhive.dept 
(id int,
 dept_name string)
row format delimited fields terminated by ' ';

查看內部表屬性,可以發現,內部表爲:MANAGED_TABLE

hive (myhive)> describe formatted dept;
col_name        data_type       comment
# col_name              data_type               comment             
                 
id                      int                                         
dept_name               string                                      
                 
# Detailed Table Information             
Database:               myhive                   
Owner:                  hadoop                   
CreateTime:             Fri Jun 12 05:57:53 CST 2020     
LastAccessTime:         UNKNOWN                  
Protect Mode:           None                     
Retention:              0                        
Location:               hdfs://node01:8020/user/hive/warehouse/myhive.db/dept    
Table Type:             **MANAGED_TABLE**            
Table Parameters:                
        transient_lastDdlTime   1591912673          
                 
# Storage Information            
SerDe Library:          org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe       
InputFormat:            org.apache.hadoop.mapred.TextInputFormat         
OutputFormat:           org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat       
Compressed:             No                       
Num Buckets:            -1                       
Bucket Columns:         []                       
Sort Columns:           []                       
Storage Desc Params:             
        field.delim                                 
        serialization.format 
2.2 導入數據
[hadoop@node03 ~]$ cat data/hiveData/dept.txt 
1 sales
2 product
3 financial

##導入數據
hive (myhive)> load data local inpath '/home/hadoop/data/hiveData/dept.txt' into table dept;

##查詢數據
hive (myhive)> select * from dept;
dept.id dept.dept_name
1       sales
2       product
3       financial

3. 創建外部表

3.1 創建employee表
create external table myhive.employee
(id int,
 name string)
row format delimited fields terminated by ' ';

查看內部表屬性,可以發現,外部表爲:EXTERNAL_TABLE

hive (myhive)> describe formatted employee;
col_name        data_type       comment
# col_name              data_type               comment             
                 
id                      int                                         
name                    string                                      
                 
# Detailed Table Information             
Database:               myhive                   
Owner:                  hadoop                   
CreateTime:             Fri Jun 12 06:05:00 CST 2020     
LastAccessTime:         UNKNOWN                  
Protect Mode:           None                     
Retention:              0                        
Location:               hdfs://node01:8020/user/hive/warehouse/myhive.db/employee        
Table Type:             **EXTERNAL_TABLE**           
Table Parameters:                
        EXTERNAL                TRUE                
        transient_lastDdlTime   1591913100          
                 
# Storage Information            
SerDe Library:          org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe       
InputFormat:            org.apache.hadoop.mapred.TextInputFormat         
OutputFormat:           org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat       
Compressed:             No                       
Num Buckets:            -1                       
Bucket Columns:         []                       
Sort Columns:           []                       
Storage Desc Params:             
        field.delim                                 
        serialization.format
3.2 導入數據
[hadoop@node03 ~]$ cat data/hiveData/employee.txt 
1 lisa
2 tom
3 john

##導入數據
hive (myhive)> load data local inpath '/home/hadoop/data/hiveData/employee.txt' into table employee;

##查詢數據
hive (myhive)> select * from employee;
employee.id     employee.name
1       lisa
2       tom
3       john

4. 內部表和外部表相互轉換

4.1 內部錶轉換爲外部表
alter table dept set tblproperties('EXTERNAL'='TRUE');
4.2 外部錶轉換爲內部表
alter table employee set tblproperties ('EXTERNAL'='FALSE');

5. 內部表和外部表區別

  1. 建表語法有區別,外部表在建表時有“EXTERNAL” 關鍵字;
  2. 刪表後數據文件狀態有區別,刪除內部表後,相關數據也會被刪除,刪除外部表只是刪除了表結構,數據文件還是存在。

6. 內部表和外部表的選擇時機

6.1 何時選擇使用內部表
  1. 數據和表結構有相同的生命週期,也就是刪表時也想把數據刪除,此時可以選擇使用內部表;
  2. 數據只有本系統一張表使用,如數倉的dw層。
6.2 何時選擇使用外部表
  1. 有多個系統或集羣同時訪問一份數據;
  2. 希望一份數據被多個表加載;
  3. 刪除表結構無需擔心數據被刪除。

總結

內部表和外部表的建表語句區別不大,實際生產環境中,一般推薦使用外部表多一些。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章