承接上面“spark-core_07: $SPARK_HOME/sbin/start-slaves.sh腳本分析”
一、這是$SPARK_HOME/sbin/slaves.sh腳本;
#!/usr/bin/envbash
#Run a shell command on all slave hosts.
#Environment Variables
# SPARK_SLAVES File naming remote hosts.
# Default is ${SPARK_CONF_DIR}/slaves.
# SPARK_CONF_DIR Alternate conf dir. Default is${SPARK_HOME}/conf.
# SPARK_SLAVE_SLEEP Seconds to sleep betweenspawning remote commands.
# SPARK_SSH_OPTS Options passed to ssh whenrunning remote commands.
#默認會去找${SPARK_CONF_DIR}/slaves文本中定義的slave
# SPARK_SLAVE_SLEEP:執行遠程命令時的休息時間
##
usage="Usage:slaves.sh [--config <conf-dir>] command..."
# ifno args specified, show usage
if [$# -le 0 ]; then
echo $usage
exit 1
fi
if [-z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname"$0"`"/..; pwd)"
fi
#這個spark-config.sh作用就是將$SPARK_CONF_DIR環境變量的值取出來.即:${SPARK_HOME}/conf
."${SPARK_HOME}/sbin/spark-config.sh"
# If the slaves file is specified in the command line,
# then it takes precedence over the definition in
# spark-env.sh. Save it here.
# 如果在命令行中指定了slaves文件,則它優先於spark-env.sh中的定義。
if [-f "$SPARK_SLAVES" ]; then
HOSTLIST=`cat "$SPARK_SLAVES"`
fi
#Check if --config is passed as an argument. It is an optional parameter.
#Exit if the argument is not a directory.
if ["$1" == "--config" ]
then
shift
conf_dir="$1"
if [ ! -d "$conf_dir" ]
then
echo "ERROR : $conf_dir is not adirectory"
echo $usage
exit 1
else
export SPARK_CONF_DIR="$conf_dir"
fi
shift
fi
#該腳本會加載spark-env.sh加載一次。並設置環境變量SPARK_SCALA_VERSION=2.10及SPARK_ENV_LOADED=1
."${SPARK_HOME}/bin/load-spark-env.sh"
#如果$HOSTLIST和$SPARK_SLAVES都爲空串,並且${SPARK_HOME}/conf/slavas存在,則將slavas文本中內容
#給變量HOSTLIST
if ["$HOSTLIST" = "" ]; then
if [ "$SPARK_SLAVES" = ""]; then
if [ -f"${SPARK_CONF_DIR}/slaves" ]; then
HOSTLIST=`cat"${SPARK_CONF_DIR}/slaves"`
else
HOSTLIST=localhost
fi
else
HOSTLIST=`cat "${SPARK_SLAVES}"`
fi
fi
# Bydefault disable strict host key checking
#如果$SPARK_SSH_OPTS沒值,則將-oStrictHostKeyChecking=no給它, 禁用嚴格的key檢查
if ["$SPARK_SSH_OPTS" = "" ]; then
SPARK_SSH_OPTS="-oStrictHostKeyChecking=no"
fi
#循環$HOSTLIST,並且sed會將串中由#開始,如luyl152#aaa,執行之後變成luyl152,則#後面會被刪除掉
#下面:$"${@// /\\ }" 對應的值:cd/data/spark-1.6.0-bin-hadoop2.6 ;/data/spark-1.6.0-bin-hadoop2.6/sbin/start-slave.sh spark://luyl152:7077
#就是對應start-slaves.sh對應參數,不過寫法有點奇怪,直接寫成$@不是一樣的效果嗎。
forslave in `echo "$HOSTLIST"|sed "s/#.*$//;/^$/d"`; do
if [ -n "${SPARK_SSH_FOREGROUND}"]; then
ssh $SPARK_SSH_OPTS "$slave"$"${@// /\\ }" \
2>&1 | sed "s/^/$slave:/"
else
ssh $SPARK_SSH_OPTS "$slave"$"${@// /\\ }" \
2>&1 | sed "s/^/$slave:/" &
fi
if [ "$SPARK_SLAVE_SLEEP" !="" ]; then
sleep $SPARK_SLAVE_SLEEP
fi
done
wait
二、$SPARK_HOME/sbin/start-slave.sh
#!/usr/bin/envbash
#Starts a slave on the machine this script is executed on.
#
#Environment Variables
#
# SPARK_WORKER_INSTANCES The number of worker instances to run on this
# slave. Default is 1.
# SPARK_WORKER_PORT The base port number for the firstworker. If set,
# subsequent workerswill increment this number. If
# unset, Spark willfind a valid port number, but
# with no guarantee ofa predictable pattern.
# SPARK_WORKER_WEBUI_PORT The base port forthe web interface of the first
# worker. Subsequent workers will increment this
# number. Default is 8081.
# SPARK_WORKER_INSTANCES:是worker的實例,默認一個;
# SPARK_WORKER_PORT : 每個worker的端口,如果設置了,當前節點有多個worker,會按該port自增
# SPARK_WORKER_WEBUI_PORT : web端口是8081,master的是8080
#在slavas.sh會讓ssh去每個slave機器執行cd /data/spark-1.6.0-bin-hadoop2.6 ;/data/spark-1.6.0-bin-hadoop2.6/sbin/start-slave.sh spark://luyl152:7077
if [-z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname"$0"`"/..; pwd)"
fi
#NOTE: This exact class name is matched downstream by SparkSubmit.
#Any changes need to be reflected there.
#worker的類路徑
CLASS="org.apache.spark.deploy.worker.Worker"
if[[ $# -lt 1 ]] || [[ "$@" = *--help ]] || [[ "$@" = *-h ]];then
echo "Usage: ./sbin/start-slave.sh[options] <master>"
pattern="Usage:"
pattern+="\|Using Spark's default log4jprofile:"
pattern+="\|Registered signal handlersfor"
"${SPARK_HOME}"/bin/spark-class$CLASS --help 2>&1 | grep -v "$pattern" 1>&2
exit 1
fi
#這個spark-config.sh作用就是將$SPARK_CONF_DIR環境變量的值取出來.即:${SPARK_HOME}/conf
."${SPARK_HOME}/sbin/spark-config.sh"
#該腳本會加載spark-env.sh加載一次。並設置環境變量SPARK_SCALA_VERSION=2.10及SPARK_ENV_LOADED=1
."${SPARK_HOME}/bin/load-spark-env.sh"
#First argument should be the master; we need to store it aside because we may
#need to insert arguments between it and the other arguments
#第1個參數sparkMaster地址即spark://luyl152:7077,並將它shift掉
MASTER=$1
shift
# Determine desired worker port 設置worker的端口
if ["$SPARK_WORKER_WEBUI_PORT" = "" ]; then
SPARK_WORKER_WEBUI_PORT=8081
fi
#Start up the appropriate number of workers on this machine.
#quick local function to start a worker
functionstart_instance {
WORKER_NUM=$1
shift
#如果SPARK_WORKER_PORT爲空,傳入空做爲spark-daemon.sh的參數
if [ "$SPARK_WORKER_PORT" ="" ]; then
PORT_FLAG=
PORT_NUM=
else
PORT_FLAG="--port"
PORT_NUM=$(( $SPARK_WORKER_PORT +$WORKER_NUM - 1 ))
fi
WEBUI_PORT=$(( $SPARK_WORKER_WEBUI_PORT +$WORKER_NUM - 1 ))
#spark-daemon.sh start org.apache.spark.deploy.worker.Worker 1--webui-port 8081 spark://luyl152:7077
#由此進入了“spark-core_06: $SPARK_HOME/sbin/spark-daemon.sh腳本分析”
"${SPARK_HOME}/sbin"/spark-daemon.sh start $CLASS $WORKER_NUM\
--webui-port "$WEBUI_PORT"$PORT_FLAG $PORT_NUM $MASTER "$@"
}
#如果SPARK_WORKER_INSTANCES變量沒有值,則調用start_instance第一個參數就是worker的實例個數,最開始時就把spark://luyl152:7077去掉了,所以$@沒有值了
if ["$SPARK_WORKER_INSTANCES" = "" ]; then
start_instance 1 "$@"
else
for ((i=0; i<$SPARK_WORKER_INSTANCES;i++)); do
start_instance $(( 1 + $i )) "$@"
done
fi
關於org.apache.spark.deploy.worker.Worker源碼在下面的章節進行分析