Hive解析Json格式用戶日誌

Json數據

第一次寫博客,mark一下 ——20191025
下面是一條json數據,導入hive中類型爲string

{
	"user_id": "u0001",
	"view_params": "order_condition=03&order_type=1&key=華爲手機",
	"exts": {"target_type": "04","target_category": "100","target_ids": "[1,2,3]"},
	"ct": "1567429965000"
}

在這裏插入圖片描述

HIVE SQL解析

用了三種方式,得出數據如下,前兩種方法看起來更清爽,並且效率高:
在這裏插入圖片描述

第一種方式

select
get_json_object(b,'$.user_id') as user_id,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',1) as order_condition,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',2) as order_type,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',3) as key,
get_json_object(get_json_object(b,'$.exts'),'$.target_type') as target_type,
get_json_object(get_json_object(b,'$.exts'),'$.target_category') as target_category,
get_json_object(get_json_object(b,'$.exts'),'$.target_ids') as target_ids,
from_unixtime(cast((get_json_object(b,'$.ct')/1000)as bigint),'yyyyMMddHH') as ct
from log_json;

第二種方式

select
get_json_object(b,'$.user_id') as user_id,
str_to_map(get_json_object(b,'$.view_params'),"&","=")['order_condition'] as order_condition,
str_to_map(get_json_object(b,'$.view_params'),"&","=")['order_type'] as order_type,
str_to_map(get_json_object(b,'$.view_params'),"&","=")['key'] as key,
get_json_object(get_json_object(b,'$.exts'),'$.target_type') as target_type,
get_json_object(get_json_object(b,'$.exts'),'$.target_category') as target_category,
get_json_object(get_json_object(b,'$.exts'),'$.target_ids') as target_ids,
from_unixtime(cast((get_json_object(b,'$.ct')/1000)as bigint),'yyyyMMddHH') as ct
from log_json;

第三種方式

select
t.user_id,
split(split(t.view_params,'&')[0],'=')[1] as order_condition,
split(split(t.view_params,'&')[1],'=')[1] as order_type,
split(split(t.view_params,'&')[2],'=')[1] as key,
tmp1.target_type,
tmp1.target_category,
tmp1.target_ids,
from_unixtime(cast(t.ct/1000 as bigint),'yyyyMMddHH') as ct
from
(select
tmp.user_id,
tmp.view_params,
tmp.exts,
tmp.ct
from log_json
lateral view json_tuple(b,'user_id','view_params','exts','ct') tmp as
user_id,view_params,exts,ct) t
lateral view json_tuple(exts,'target_type','target_category','target_ids') tmp1 as
target_type,target_category,target_ids;

string類型的數組形式的行轉列拆分

在這裏插入圖片描述

大家可以看出來target_ids字段爲string類型的數組,如果想拆開來,
可以用以下lateral view+explode+split+regext_replace方式,當然union all拆分也可以,一般也不會這麼用,如果有更好的方式請留言,互相學習,謝謝~
下面是我的方式:

select
get_json_object(b,'$.user_id') as user_id,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',1) as order_condition,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',2) as order_type,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',3) as key,
get_json_object(get_json_object(b,'$.exts'),'$.target_type') as target_type,
get_json_object(get_json_object(b,'$.exts'),'$.target_category') as target_category,
from_unixtime(cast((get_json_object(b,'$.ct')/1000)as bigint),'yyyyMMddHH') as ct,
tmp.target_ids
from log_json
lateral view explode(split(regexp_replace(get_json_object(get_json_object(b,'$.exts'),'$.target_ids'),'[\\[\\]]',''),',')) tmp as target_ids;

數據展現:
在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章