datax hdfswriter文檔
https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md
需要注意的是,hdfswriter寫入時的字段分隔符,需要用戶保證與創建的Hive表的字段分隔符一致,否則無法在Hive表中查到數據。
另外,雖然hive3支持decimal格式,但是datax還不支持。。。因此datax作業中應該用string代替decimal。
建表語句
create external table ods.studentpay_chain
(id string COMMENT 'id',
name string COMMENT '名稱',
age int COMMENT '年齡',
money decimal(28,10) COMMENT '金額',
updateTime timestamp COMMENT '更新時間',
startDate timestamp COMMENT '生效日期',
endDate timestamp COMMENT '失效日期')
COMMENT '學生繳費表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'
STORED AS ORC
LOCATION '/user/hive/warehouse/ods.db/ods.studentpay_chain';
datax作業配置
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"column": ["*"],
"connection": [
{
"jdbcUrl": ["jdbc:mysql://xxx:3306/xxx"],
"table": ["$table"]
}
],
"password": "xxx",
"username": "xxx",
"where": "updateTime > '$from' and updateTime < '$to'"
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"column": [
{"name":"id","type":"string"},
{"name":"name","type":"string"},
{"name":"age","type":"int"},
{"name":"money","type":"string"},
{"name":"updateTime","type":"timestamp"},
{"name":"startTime","type":"timestamp"},
{"name":"endTime","type":"timestamp"}
],
"compress": "SNAPPY",
"defaultFS": "hdfs://xxx:9000",
"fieldDelimiter": "\t",
"fileName": "$table",
"fileType": "orc",
"path": "/user/hive/warehouse/ods.db/ods.studentpay_chain",
"writeMode": "append"
}
}
}
],
"setting": {
"speed": {
"channel": "2"
}
}
}
}