1. 運行腳本:
#!/usr/bin/env bash
input="XXXX"
output="XXXX"
sday="XXXX"
id_type="XXXX"
tag_origin_short_name="XXXX"
name=dataToES_test
if [ ! -d "/opt/log/wangxuan/norman/${sday}/${id_type}/${tag_origin_short_name}/sparkESLog" ]; then
mkdir -p /opt/log/wangxuan/norman/${sday}/${id_type}/${tag_origin_short_name}/sparkESLog
fi
# 預清空輸出的hdfs路徑
hdfs dfs -rm -r XXXX
time1=$(date "+%Y-%m-%d %H:%M:%S")
echo "------------------${time1} ${sday} ${id_type} ${tag_origin_short_name} spark任務啓動------------------"
set -x
#新腳本需要注意修改當前服務器的spark bin路徑
/usr/lib/software/spark/spark-2.3/bin/spark-submit --class XXXX \
--master yarn \
--deploy-mode cluster \
--queue root.offline.hdp_teu_dpd.normal \
--name ${name} \
--executor-memory 20G \
--num-executors 64 \
--executor-cores 4 \
--conf spark.yarn.executor.memoryOverhead=4G \
--conf "spark.driver.extraJavaOptions=-Dlog4jspark.root.logger=WARN,console" \
--conf "spark.executor.extraJavaOptions=-verbose:gc -XX:+UseG1GC" \
--driver-memory 4g \
--conf spark.yarn.maxAppAttempts=1 \
--conf spark.locality.wait=60000 \
--conf spark.dynamicAllocation.enabled=true \
--conf spark.rpc.askTimeout=6000 \
--conf spark.core.connection.ack.wait.timeout=6000 \
--conf spark.storage.memoryFraction=0.5 \
--conf spark.shuffle.memoryFraction=0.3 \
--conf spark.rpc.netty.dispatcher.numThreads=64 \
--conf spark.default.parallelism=3000 \
./XXXX.jar ${input} ${output} > /opt/log/XXXX/sparkESLog/${name}.log 2>&1
if [ "$?" != 0 ] ; then
time2=$(date "+%Y-%m-%d %H:%M:%S")
echo "!!!!${time2} ${sday} ${id_type} ${tag_origin_short_name} spark任務程序強行終止!!!!"
exit 1
fi
time3=$(date "+%Y-%m-%d %H:%M:%S")
echo "------------------${time3} ${sday} ${id_type} ${tag_origin_short_name} spark任務運行成功------------------"
exit 0
2. 調度運行腳本並打印腳本運行日誌的啓動腳本
#!/bin/bash
BASEDIR=`dirname $0`
cd ${BASEDIR}/../run
BASEDIR=`pwd`
cd ${BASEDIR}
id_type=$1
project=$2
sday_key=$3
if [ ! -d "/opt/log/XXXX/${sday_key}/${id_type}/${project}" ]; then
mkdir -p /opt/log/XXXX/${sday_key}/${id_type}/${project}
fi
time1=$(date "+%Y-%m-%d %H:%M:%S")
echo "------------------------${time1} spark作業開始運行------------------------"
./XXXX.sh ${id_type} ${project} ${sday_key} > /opt/log/XXXX/${sday_key}/${id_type}/${project}/2.project_dsToEs_online_${id_type}_${project}.log 2>&1
if [ "$?" != 0 ] ; then
time2=$(date "+%Y-%m-%d %H:%M:%S")
echo "------------------------${time2} spark作業運行異常------------------------"
exit 1
fi
time3=$(date "+%Y-%m-%d %H:%M:%S")
echo "------------------------${time3} spark作業運行完畢------------------------"
exit 0