Json數據
第一次寫博客,mark一下 ——20191025
下面是一條json數據,導入hive中類型爲string
{
"user_id": "u0001",
"view_params": "order_condition=03&order_type=1&key=華爲手機",
"exts": {"target_type": "04","target_category": "100","target_ids": "[1,2,3]"},
"ct": "1567429965000"
}
HIVE SQL解析
用了三種方式,得出數據如下,前兩種方法看起來更清爽,並且效率高:
第一種方式
select
get_json_object(b,'$.user_id') as user_id,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',1) as order_condition,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',2) as order_type,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',3) as key,
get_json_object(get_json_object(b,'$.exts'),'$.target_type') as target_type,
get_json_object(get_json_object(b,'$.exts'),'$.target_category') as target_category,
get_json_object(get_json_object(b,'$.exts'),'$.target_ids') as target_ids,
from_unixtime(cast((get_json_object(b,'$.ct')/1000)as bigint),'yyyyMMddHH') as ct
from log_json;
第二種方式
select
get_json_object(b,'$.user_id') as user_id,
str_to_map(get_json_object(b,'$.view_params'),"&","=")['order_condition'] as order_condition,
str_to_map(get_json_object(b,'$.view_params'),"&","=")['order_type'] as order_type,
str_to_map(get_json_object(b,'$.view_params'),"&","=")['key'] as key,
get_json_object(get_json_object(b,'$.exts'),'$.target_type') as target_type,
get_json_object(get_json_object(b,'$.exts'),'$.target_category') as target_category,
get_json_object(get_json_object(b,'$.exts'),'$.target_ids') as target_ids,
from_unixtime(cast((get_json_object(b,'$.ct')/1000)as bigint),'yyyyMMddHH') as ct
from log_json;
第三種方式
select
t.user_id,
split(split(t.view_params,'&')[0],'=')[1] as order_condition,
split(split(t.view_params,'&')[1],'=')[1] as order_type,
split(split(t.view_params,'&')[2],'=')[1] as key,
tmp1.target_type,
tmp1.target_category,
tmp1.target_ids,
from_unixtime(cast(t.ct/1000 as bigint),'yyyyMMddHH') as ct
from
(select
tmp.user_id,
tmp.view_params,
tmp.exts,
tmp.ct
from log_json
lateral view json_tuple(b,'user_id','view_params','exts','ct') tmp as
user_id,view_params,exts,ct) t
lateral view json_tuple(exts,'target_type','target_category','target_ids') tmp1 as
target_type,target_category,target_ids;
string類型的數組形式的行轉列拆分
大家可以看出來target_ids字段爲string類型的數組,如果想拆開來,
可以用以下lateral view+explode+split+regext_replace方式,當然union all拆分也可以,一般也不會這麼用,如果有更好的方式請留言,互相學習,謝謝~
下面是我的方式:
select
get_json_object(b,'$.user_id') as user_id,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',1) as order_condition,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',2) as order_type,
regexp_extract(get_json_object(b,'$.view_params'),'.*=(.*)\\&.*=(.*)\\&.*=(.*)',3) as key,
get_json_object(get_json_object(b,'$.exts'),'$.target_type') as target_type,
get_json_object(get_json_object(b,'$.exts'),'$.target_category') as target_category,
from_unixtime(cast((get_json_object(b,'$.ct')/1000)as bigint),'yyyyMMddHH') as ct,
tmp.target_ids
from log_json
lateral view explode(split(regexp_replace(get_json_object(get_json_object(b,'$.exts'),'$.target_ids'),'[\\[\\]]',''),',')) tmp as target_ids;
數據展現: