Hive學習筆記(4)

Hive學習筆記

筆記內容主要來自Hive編程指南

HiveQL:視圖

Hive視圖是一個邏輯結構,因爲它不像一個表會存儲數據。Hive目前暫不支持物化視圖。

使用視圖來降低查詢複雜度

-- Hive查詢語句中多層嵌套
FROM(
    SELECT * FROM people JOIN cart
    ON (cart.people_id=people.id) WHERE firstname='john'
) a SELECT a.lastname WHERE a.id=3;

-- 嵌套子查詢變成了一個視圖
CREATE VIEW shorter_join AS
SELECT * FROM people JOIN cart
ON (cart.people_id=people.id) WHERE firstname='john';

-- 現在就可以像操作表一樣來操作這個視圖
SELECT lastname FROM shorter_join WHERE id=3;

使用視圖來限制基於條件過濾的數據

-- Hive通過創建視圖來限制數據訪問可以用來保護信息不被隨意查詢

CREATE TABLE userinfo(firstname string, lastname string, ssn string, password string);
-- 通過視圖隱藏了ssn和password
CREATE VIEW safer_user_info AS 
SELECT firstname, lastname FROM userinfo;

動態分區中的視圖和 map 類型

示例數據
示例數據

CREATE EXTERNAL TABLE dynamictable(cols map<string,string>)
ROW FORMAT DELIMITED
    FIELDS TERMINATED BY '\004'
    COLLECTION ITEMS TERMINATED BY '\001'
    MAP KEYS TERMINATED BY '\002'
STORED AS TEXTFILE;

--可以創建一個視圖取出 type值 等於 request 的 city,state和part 3個字段。
CREATE VIEW orders(state, city, part) AS
SELECT cols["state"], cols["city"], cols["part"]
FROM dynamictable
WHERE cols["type"] = "request";

-- 可以加where限制條件
CREATE VIEW shipments(time, part) AS
SELECT cols["time"], cols["part"]
FROM dynamictable
WHERE cols["type"] = "response";

視圖零碎


-- 創建視圖
CREATE VIEW IF NOT EXISTS shipments(time, part)
COMMENT 'Time and parts for shipments.'
TBLPROPERTIES('creator'='me')
AS SELECT ...;

--放棄視圖
DROP VIEW IF EXISTS shipments;

--更新
ALTER VIEW shipments SET TBLPROPERTIES('created_at'='some_timestamp');

HiveQL:索引

CREATE TABLE employees(
    name STRING,
    salary FLOAT,
    subordinates ARRAY<STRING>,
    deductions MAP<STRING,FLOAT>,
    address STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>
)
PARTITIONED BY(country STRING, state STRING);

-- 下面僅對分區字段country 建立索引
CREATE INDEX employees_index
ON TABLE employees (country)
AS 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler'
WITH DEFERRED REBUILD
IDXPROPERTIES('creator'='me', 'created_at'='some_time')
IN TABLE employees_index_table
PARTITIONED BY (country, name)
COMMENT 'Employees indexed by country and name.';


--Bitmap索引普遍應用於排重後值較少的列
CREATE INDEX employees_index
ON TABLE employees (country)
AS 'BITMAP'
WITH DEFERRED REBUILD
IDXPROPERTIES('creator'='me','created_at'='some_time')
IN TABLE employees_index_table
PARTITIONED BY (country, name)
COMMENT 'Employees indexed by country and name.';

重建索引

-- 如果用戶指定了 DEFERRED REBUILD,那麼新索引將呈現空白狀態。在任何時候,都可以進行第一次索引創建或者使用 ALTER INDEX 對索引進行重建
ALTER INDEX employees_index
ON TABLE employees
PARTITION (country='US')
REBUILD;
-- 如果省略掉PARTITION,那麼將會對所有分區進行重建索引
--在工作流可以對對應的索引執行重建索引語句ALTER INDEX...REBUILD

顯示索引

-- 顯示對於這個索引表對所有列所建立的索引
SHOW FORMATTED INDEX ON employees;

-- INDEXS 列舉出多個索引信息
SHOW FORMATTED INDEXS ON employees;

刪除索引

-- 刪除一個索引將會刪除這個索引表
DROP INDEX IF EXISTS employees_index ON TABLE employees;

實現一個定製化的索引處理器

Hive Wiki 頁面具有實現一個定製化的索引處理器的完整的例子,鏈接是 https://cwiki.apache.org/confluence/display/Hive/IndexDev#CREATE_INDEX 其中還包括了索引的初步設計文檔。


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章