Spark常用提交腳本

1. 運行腳本:

#!/usr/bin/env bash

input="XXXX" 
output="XXXX"
sday="XXXX"
id_type="XXXX"
tag_origin_short_name="XXXX"

name=dataToES_test

if [ ! -d "/opt/log/wangxuan/norman/${sday}/${id_type}/${tag_origin_short_name}/sparkESLog" ]; then
  mkdir -p /opt/log/wangxuan/norman/${sday}/${id_type}/${tag_origin_short_name}/sparkESLog
fi

# 預清空輸出的hdfs路徑
hdfs dfs -rm -r XXXX

time1=$(date "+%Y-%m-%d %H:%M:%S")
echo "------------------${time1} ${sday} ${id_type} ${tag_origin_short_name} spark任務啓動------------------"

set -x
#新腳本需要注意修改當前服務器的spark bin路徑
/usr/lib/software/spark/spark-2.3/bin/spark-submit --class XXXX \
    --master yarn \
    --deploy-mode cluster \
    --queue root.offline.hdp_teu_dpd.normal \
    --name ${name} \
    --executor-memory 20G \
    --num-executors 64 \
    --executor-cores 4 \
    --conf spark.yarn.executor.memoryOverhead=4G \
    --conf "spark.driver.extraJavaOptions=-Dlog4jspark.root.logger=WARN,console" \
    --conf "spark.executor.extraJavaOptions=-verbose:gc -XX:+UseG1GC" \
    --driver-memory 4g \
    --conf spark.yarn.maxAppAttempts=1 \
    --conf spark.locality.wait=60000 \
    --conf spark.dynamicAllocation.enabled=true \
    --conf spark.rpc.askTimeout=6000 \
    --conf spark.core.connection.ack.wait.timeout=6000 \
    --conf spark.storage.memoryFraction=0.5 \
    --conf spark.shuffle.memoryFraction=0.3 \
    --conf spark.rpc.netty.dispatcher.numThreads=64 \
    --conf spark.default.parallelism=3000 \
    ./XXXX.jar ${input} ${output} > /opt/log/XXXX/sparkESLog/${name}.log 2>&1

if [ "$?" != 0 ] ; then
   time2=$(date "+%Y-%m-%d %H:%M:%S")
   echo "!!!!${time2} ${sday} ${id_type} ${tag_origin_short_name} spark任務程序強行終止!!!!"
   exit 1
fi

time3=$(date "+%Y-%m-%d %H:%M:%S")
echo "------------------${time3} ${sday} ${id_type} ${tag_origin_short_name} spark任務運行成功------------------"

exit 0

2. 調度運行腳本並打印腳本運行日誌的啓動腳本

#!/bin/bash
BASEDIR=`dirname $0`
cd ${BASEDIR}/../run
BASEDIR=`pwd`
cd ${BASEDIR}

id_type=$1
project=$2
sday_key=$3

if [ ! -d "/opt/log/XXXX/${sday_key}/${id_type}/${project}" ]; then
  mkdir -p /opt/log/XXXX/${sday_key}/${id_type}/${project}
fi

time1=$(date "+%Y-%m-%d %H:%M:%S")
echo "------------------------${time1} spark作業開始運行------------------------"

./XXXX.sh ${id_type} ${project} ${sday_key} > /opt/log/XXXX/${sday_key}/${id_type}/${project}/2.project_dsToEs_online_${id_type}_${project}.log 2>&1

if [ "$?" != 0 ] ; then
   time2=$(date "+%Y-%m-%d %H:%M:%S")
   echo "------------------------${time2} spark作業運行異常------------------------"
   exit 1
fi

time3=$(date "+%Y-%m-%d %H:%M:%S")
echo "------------------------${time3} spark作業運行完畢------------------------"

exit 0

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章