datax 簡單例子

{
    "job": {
        "setting": {
            "speed": {
                "channel": 4
            },
            "errorLimit": {
                "record": 0,
                "percentage": 0.02
            }
        },
        "content": [
            {
                "reader": {
                    "name": "mysqlreader",
                    "parameter": {
                        "username": "sqoop",
                        "password": "********",
                        "connection": [
                            {
                                "jdbcUrl": ["jdbc:mysql://**.**.**.**:3306?useUnicode=true&characterEncoding=utf-8&tinyInt1isBit=false"],
								"querySql": [
									"select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_001.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_002.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_003.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_004.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_005.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_006.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_007.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_008.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_009.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_010.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_011.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_012.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_013.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_014.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_015.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_016.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_017.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_018.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_019.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_020.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_021.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_022.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_023.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_024.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_025.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_026.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_027.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_028.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_029.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_030.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_031.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_032.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_033.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_034.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_035.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_036.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_037.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_038.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_039.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_040.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_041.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_042.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_043.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_044.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_045.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_046.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_047.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_048.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_049.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_050.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_051.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_052.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_053.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_054.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_055.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_056.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_057.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_058.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_059.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_060.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_061.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_062.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_063.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'",
                                    "select session_id,sender_id,receiver_id,msg_id,msg_type,msg_content,msg_status,platform,create_time,'' as expire_time,customize as visible_list,curdate() as etl_date from lagou_im_064.chat_v5_record where create_time>='$execute_date 00:00:00' AND create_time<'$next_date 00:00:00'"
								]
                            }
                        ]
                    }
                },
                "writer": {
                    "name": "hdfswriter",
                    "parameter": {
                        "defaultFS": "hdfs://host",
                        "fileType": "text",
                        "compress": "gzip",
                        "path": "/user/dw/warehouse/temp.db/chat_v5_record",
                        "fileName": "chat_v5_record",
                        "fieldDelimiter": "\u0001",
                        "writeMode": "append",
                        "haveKerberos": true,
                        "kerberosKeytabFilePath": "路徑",
                        "kerberosPrincipal": "[email protected]",
                        "column": [
                            {
                                "name": "session_id",
                                "type": "bigint"
                            },
							{   
							    "name": "sender_id",
							    "type": "bigint"
							},
							{   
							    "name": "receiver_id",
							    "type": "bigint"
							},
							{   
							    "name": "msg_id",
							    "type": "bigint"
							},
							{   
							    "name": "msg_type",
							    "type": "int"
							},
							{   
							    "name": "msg_content",
							    "type": "string"
							},
                            {   
                                "name": "msg_status",
                                "type": "int"
                            },
                            {   
                                "name": "platform",
                                "type": "int"
                            },

                            {   
                                "name": "create_time",
                                "type": "String"
                            },

                            {   
                                "name": "expire_time",
                                "type": "String"
                            },
                            {   
                                "name": "visible_list",
                                "type": "String"
                            },
                            {   
                                "name": "etl_date",
                                "type": "String"
                            },
                        ]
                    }
                }
            }
        ]
    }
}


執行

#!/bin/bash

set -e

START_DATE=$1
END_DATE=$2

if [ ! -n "${START_DATE}" ]; then
    START_DATE=$(date +%Y-%m-%d --date '1 days ago')
fi
if [ ! -n "${END_DATE}" ]; then
    END_DATE=$(date +%Y-%m-%d --date '0 days ago')
fi

echo "execute data from ${START_DATE} to ${END_DATE}......"

start=${START_DATE}

while ((`date --date=''$START_DATE'' +'%Y%m%d'` < `date --date=''$END_DATE'' +'%Y%m%d'`))
do

NEXT_DAY=$(date -d "${START_DATE} 1 days" "+%Y-%m-%d")

hive -e "truncate table temp.user_last_active_time_tmp;"

python /data/data/datax/bin/datax.py -p"-Dexecute_date=${START_DATE} -Dnext_date=${NEXT_DAY}" /data/*/*/script/datax/active_user/user_last_active_time.json

hive -e "
set mapred.max.split.size=256000000;
set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
set mapred.min.split.size.per.node=128000000;
set mapred.min.split.size.per.rack=128000000;

ALTER TABLE ods.user_last_active_time ADD IF NOT EXISTS PARTITION(dt='$START_DATE');

INSERT OVERWRITE TABLE ods.user_last_active_time PARTITION(dt='$START_DATE')
SELECT * FROM temp.user_last_active_time_tmp;
"

START_DATE=`date --date=''$START_DATE' +1 day' +'%F'`

done

echo "finished execute ${start} ~ ${END_DATE}......"

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章