spark-core_08: $SPARK_HOME/sbin/slaves.sh、start-slave.sh脚本分析

原創

水中舟_luyl

2020-07-03 01:31

承接上面“spark-core_07: $SPARK_HOME/sbin/start-slaves.sh脚本分析”

一、这是$SPARK_HOME/sbin/slaves.sh脚本；

#!/usr/bin/envbash

#Run a shell command on all slave hosts.

#Environment Variables

# SPARK_SLAVES File naming remote hosts.

# Default is ${SPARK_CONF_DIR}/slaves.

# SPARK_CONF_DIR Alternate conf dir. Default is${SPARK_HOME}/conf.

# SPARK_SLAVE_SLEEP Seconds to sleep betweenspawning remote commands.

# SPARK_SSH_OPTS Options passed to ssh whenrunning remote commands.

#默认会去找${SPARK_CONF_DIR}/slaves文本中定义的slave

# SPARK_SLAVE_SLEEP:执行远程命令时的休息时间

usage="Usage:slaves.sh [--config <conf-dir>] command..."

# ifno args specified, show usage

if [$# -le 0 ]; then

echo $usage

exit 1

if [-z "${SPARK_HOME}" ]; then

export SPARK_HOME="$(cd "`dirname"$0"`"/..; pwd)"

#这个spark-config.sh作用就是将$SPARK_CONF_DIR环境变量的值取出来.即：${SPARK_HOME}/conf

."${SPARK_HOME}/sbin/spark-config.sh"

# If the slaves file is specified in the command line,

# then it takes precedence over the definition in

# spark-env.sh. Save it here.

# 如果在命令行中指定了slaves文件，则它优先于spark-env.sh中的定义。

if [-f "$SPARK_SLAVES" ]; then

HOSTLIST=`cat "$SPARK_SLAVES"`

#Check if --config is passed as an argument. It is an optional parameter.

#Exit if the argument is not a directory.

if ["$1" == "--config" ]

then

shift

conf_dir="$1"

if [ ! -d "$conf_dir" ]

then

echo "ERROR : $conf_dir is not adirectory"

echo $usage

exit 1

else

export SPARK_CONF_DIR="$conf_dir"

shift

#该脚本会加载spark-env.sh加载一次。并设置环境变量SPARK_SCALA_VERSION=2.10及SPARK_ENV_LOADED=1

."${SPARK_HOME}/bin/load-spark-env.sh"

#如果$HOSTLIST和$SPARK_SLAVES都为空串，并且${SPARK_HOME}/conf/slavas存在，则将slavas文本中内容

#给变量HOSTLIST

if ["$HOSTLIST" = "" ]; then

if [ "$SPARK_SLAVES" = ""]; then

if [ -f"${SPARK_CONF_DIR}/slaves" ]; then

HOSTLIST=`cat"${SPARK_CONF_DIR}/slaves"`

else

HOSTLIST=localhost

else

HOSTLIST=`cat "${SPARK_SLAVES}"`

# Bydefault disable strict host key checking

#如果$SPARK_SSH_OPTS没值，则将-oStrictHostKeyChecking=no给它，禁用严格的key检查

if ["$SPARK_SSH_OPTS" = "" ]; then

SPARK_SSH_OPTS="-oStrictHostKeyChecking=no"

#循环$HOSTLIST，并且sed会将串中由#开始，如luyl152#aaa,执行之后变成luyl152，则#后面会被删除掉

#下面：$"${@// /\\ }" 对应的值：cd/data/spark-1.6.0-bin-hadoop2.6 ;/data/spark-1.6.0-bin-hadoop2.6/sbin/start-slave.sh spark://luyl152:7077

#就是对应start-slaves.sh对应参数,不过写法有点奇怪，直接写成$@不是一样的效果吗。

forslave in `echo "$HOSTLIST"|sed "s/#.*$//;/^$/d"`; do

if [ -n "${SPARK_SSH_FOREGROUND}"]; then

ssh $SPARK_SSH_OPTS "$slave"$"${@// /\\ }" \

2>&1 | sed "s/^/$slave:/"

else

ssh $SPARK_SSH_OPTS "$slave"$"${@// /\\ }" \

2>&1 | sed "s/^/$slave:/" &

if [ "$SPARK_SLAVE_SLEEP" !="" ]; then

sleep $SPARK_SLAVE_SLEEP

done

wait

二、$SPARK_HOME/sbin/start-slave.sh

#!/usr/bin/envbash

#Starts a slave on the machine this script is executed on.

#Environment Variables

# SPARK_WORKER_INSTANCES The number of worker instances to run on this

# slave. Default is 1.

# SPARK_WORKER_PORT The base port number for the firstworker. If set,

# subsequent workerswill increment this number. If

# unset, Spark willfind a valid port number, but

# with no guarantee ofa predictable pattern.

# SPARK_WORKER_WEBUI_PORT The base port forthe web interface of the first

# worker. Subsequent workers will increment this

# number. Default is 8081.

# SPARK_WORKER_INSTANCES：是worker的实例，默认一个;

# SPARK_WORKER_PORT : 每个worker的端口，如果设置了,当前节点有多个worker，会按该port自增

# SPARK_WORKER_WEBUI_PORT ： web端口是8081，master的是8080

#在slavas.sh会让ssh去每个slave机器执行cd /data/spark-1.6.0-bin-hadoop2.6 ;/data/spark-1.6.0-bin-hadoop2.6/sbin/start-slave.sh spark://luyl152:7077

if [-z "${SPARK_HOME}" ]; then

export SPARK_HOME="$(cd "`dirname"$0"`"/..; pwd)"

#NOTE: This exact class name is matched downstream by SparkSubmit.

#Any changes need to be reflected there.

#worker的类路径

CLASS="org.apache.spark.deploy.worker.Worker"

if[[ $# -lt 1 ]] || [[ "$@" = *--help ]] || [[ "$@" = *-h ]];then

echo "Usage: ./sbin/start-slave.sh[options] <master>"

pattern="Usage:"

pattern+="\|Using Spark's default log4jprofile:"

pattern+="\|Registered signal handlersfor"

"${SPARK_HOME}"/bin/spark-class$CLASS --help 2>&1 | grep -v "$pattern" 1>&2

exit 1

#这个spark-config.sh作用就是将$SPARK_CONF_DIR环境变量的值取出来.即：${SPARK_HOME}/conf

."${SPARK_HOME}/sbin/spark-config.sh"

#该脚本会加载spark-env.sh加载一次。并设置环境变量SPARK_SCALA_VERSION=2.10及SPARK_ENV_LOADED=1

."${SPARK_HOME}/bin/load-spark-env.sh"

#First argument should be the master; we need to store it aside because we may

#need to insert arguments between it and the other arguments

#第1个参数sparkMaster地址即spark://luyl152:7077，并将它shift掉

MASTER=$1

shift

# Determine desired worker port 设置worker的端口

if ["$SPARK_WORKER_WEBUI_PORT" = "" ]; then

SPARK_WORKER_WEBUI_PORT=8081

#Start up the appropriate number of workers on this machine.

#quick local function to start a worker

functionstart_instance {

WORKER_NUM=$1

shift

#如果SPARK_WORKER_PORT为空，传入空做为spark-daemon.sh的参数

if [ "$SPARK_WORKER_PORT" ="" ]; then

PORT_FLAG=

PORT_NUM=

else

PORT_FLAG="--port"

PORT_NUM=$(( $SPARK_WORKER_PORT +$WORKER_NUM - 1 ))

WEBUI_PORT=$(( $SPARK_WORKER_WEBUI_PORT +$WORKER_NUM - 1 ))

#spark-daemon.sh start org.apache.spark.deploy.worker.Worker 1--webui-port 8081 spark://luyl152:7077

#由此进入了“spark-core_06: $SPARK_HOME/sbin/spark-daemon.sh脚本分析”

"${SPARK_HOME}/sbin"/spark-daemon.sh start $CLASS $WORKER_NUM\

--webui-port "$WEBUI_PORT"$PORT_FLAG $PORT_NUM $MASTER "$@"

}

#如果SPARK_WORKER_INSTANCES变量没有值，则调用start_instance第一个参数就是worker的实例个数，最开始时就把spark://luyl152:7077去掉了，所以$@没有值了

if ["$SPARK_WORKER_INSTANCES" = "" ]; then

start_instance 1 "$@"

else

for ((i=0; i<$SPARK_WORKER_INSTANCES;i++)); do

start_instance $(( 1 + $i )) "$@"

done

关于org.apache.spark.deploy.worker.Worker源码在下面的章节进行分析

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

spark-core_08: $SPARK_HOME/sbin/slaves.sh、start-slave.sh脚本分析

SparkStreaming案例：NetworkWordCount--ReceiverSupervisorImpl.onStart()如何將Reciver數據寫到BlockManager中

SparkStream例子HdfsWordCount--從Dstream到RDD全過程解析

SparkStream源碼分析：JobScheduler的JobStarted、JobCompleted是怎麼被調用的

SparkStream例子HdfsWordCount--InputDStream及OutputDstream是如何進入DStreamGraph中

spark-core_08: $SPARK_HOME/sbin/slaves.sh、start-slave.sh腳本分析

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結