創建hive表時(分區表、內外表都適用),遇到雙符號的切分符怎麼辦,比如:||

兩個字段的例子 

# 1. 建表
create  EXTERNAL table b(
id double,
name string)
PARTITIONED BY (ds STRING) 
row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
with serdeproperties(
'input.regex'='(.*)\\|\\|(.*)',
'output.format.string'='%1$s %2$s %3$s')
stored as textfile;

# 2. 加載數據
hive> load data local inpath '/root/a' into table b partition (ds='2020-02-02');

# 3. 查看數據
hive> select * from b;
OK
01	huangbo
02	xuzheng
03	wangbaoqiang
Time taken: 0.098 seconds, Fetched: 3 row(s)

很多字段的例子 (16個字段,需要切15次)

注意:一行數據中有16個分隔符就要在‘input.regex’ = ‘切15次’ 

切分符是其他的話只需把||換成其他即可  




-- 數據 111||0.0||0.0||0.0||0.0||0.0||0.0||1||0.0||1||1||1||1||1||1||1


CREATE EXTERNAL TABLE  IF NOT EXISTS  ecar_match_gps (
terminalid BIGINT,
acceleration DOUBLE, 
direct DOUBLE, 
distance DOUBLE, 
height DOUBLE, 
lat DOUBLE,
lon DOUBLE,
satellitenum INTEGER,
speed DOUBLE, 
systime BIGINT, 
h3_5 BIGINT,
h3_7 BIGINT,
h3_10 BIGINT,
h3_13 BIGINT,
utc BIGINT,
roadid BIGINT
)  
PARTITIONED BY (ds STRING) 
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH  SERDEPROPERTIES(
'input.regex'='(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)'
)
stored as textfile;

load data local inpath '/root/4' into table bbbc partition (ds='2020-02-02');



CREATE EXTERNAL TABLE  IF NOT EXISTS  ecar_reserve_gps (
terminalid BIGINT,
acceleration DOUBLE, 
direct DOUBLE, 
distance DOUBLE, 
height DOUBLE, 
lat DOUBLE,
lon DOUBLE,
satellitenum INTEGER,
speed DOUBLE, 
systime BIGINT, 
h3_5 BIGINT,
h3_7 BIGINT,
h3_10 BIGINT,
h3_13 BIGINT,
utc BIGINT
)  
PARTITIONED BY (ds STRING) 
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH  SERDEPROPERTIES(
'input.regex'='(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)\\|\\|(.*)'
);

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章