好程序員大數據分享Spark任務和集羣啓動流程,Spark集羣啓動流程
1.調用start-all.sh腳本,開始啓動Master
2.Master啓動以後,preStart方法調用了一個定時器,定時檢查超時的Worker後刪除
3.啓動腳本會解析slaves配置文件,找到啓動Worker的相應節點.開始啓動Worker
4.Worker服務啓動後開始調用preStart方法開始向所有的Master進行註冊
5.Master接收到Worker發送過來的註冊信息,Master開始保存註冊信息並把自己的URL響應給Worker
6.Worker接收到Master的URL後並更新,開始調用一個定時器,定時的向Master發送心跳信息
任務提交流程
1.Driver端會通過spark-submit腳本啓動SaparkSubmit進程,此時創建了一個非常重要的對象(SparkContext),開始向Master發送消息
2.Master接收到發送過來的信息後開始生成任務信息,並把任務信息放到一個對列裏
3.Master把所有有效的Worker過濾出來,按照空閒的資源進行排序
4.Master開始向有效的Worker通知拿取任務信息並啓動相應的Executor
5.Worker啓動Executor並向Driver反向註冊
6.Driver開始把生成的task發送給相應的Executor,Executor開始執行任務
集羣啓動流程
1.首先創建Master類
import akka.actor.{Actor, ActorSystem, Props} import com.typesafe.config.{Config, ConfigFactory}
import scala.collection.mutable import scala.concurrent.duration._
class Master(val masterHost: String, val masterPort: Int) extends Actor{
// 用來存儲Worker的註冊信息 val idToWorker = new mutable.HashMap[String, WorkerInfo]()
// 用來存儲Worker的信息 val workers = new mutable.HashSet[WorkerInfo]()
// Worker的超時時間間隔 val checkInterval: Long = 15000
// 生命週期方法,在構造器之後,receive方法之前只調用一次 override def preStart(): Unit = { // 啓動一個定時器,用來定時檢查超時的Worker import context.dispatcher context.system.scheduler.schedule(0 millis, checkInterval millis, self, CheckTimeOutWorker) }
// 在preStart方法之後,不斷的重複調用 override def receive: Receive = { // Worker -> Master case RegisterWorker(id, host, port, memory, cores) => { if (!idToWorker.contains(id)){ val workerInfo = new WorkerInfo(id, host, port, memory, cores) idToWorker += (id -> workerInfo) workers += workerInfo
println("a worker registered")
sender ! RegisteredWorker(s"akka.tcp://${Master.MASTER_SYSTEM}" + s"@${masterHost}:${masterPort}/user/${Master.MASTER_ACTOR}") } } case HeartBeat(workerId) => { // 通過傳過來的workerId獲取對應的WorkerInfo val workerInfo: WorkerInfo = idToWorker(workerId) // 獲取當前時間 val currentTime = System.currentTimeMillis() // 更新最後一次心跳時間 workerInfo.lastHeartbeatTime = currentTime } case CheckTimeOutWorker => { val currentTime = System.currentTimeMillis() val toRemove: mutable.HashSet[WorkerInfo] = workers.filter(w => currentTime - w.lastHeartbeatTime > checkInterval)
// 將超時的Worker從idToWorker和workers中移除 toRemove.foreach(deadWorker => { idToWorker -= deadWorker.id workers -= deadWorker })
println(s"num of workers: ${workers.size}") } } } object Master{ val MASTER_SYSTEM = "MasterSystem" val MASTER_ACTOR = "Master"
def main(args: Array[String]): Unit = { val host = args(0) val port = args(1).toInt
val configStr = s""" |akka.actor.provider = "akka.remote.RemoteActorRefProvider" |akka.remote.netty.tcp.hostname = "$host" |akka.remote.netty.tcp.port = "$port" """.stripMargin
// 配置創建Actor需要的配置信息 val config: Config = ConfigFactory.parseString(configStr)
// 創建ActorSystem val actorSystem: ActorSystem = ActorSystem(MASTER_SYSTEM, config)
// 用actorSystem實例創建Actor actorSystem.actorOf(Props(new Master(host, port)), MASTER_ACTOR)
actorSystem.awaitTermination()
} } |
2.創建RemoteMsg特質
trait RemoteMsg extends Serializable{
}
// Master -> self(Master) case object CheckTimeOutWorker
// Worker -> Master case class RegisterWorker(id: String, host: String, port: Int, memory: Int, cores: Int) extends RemoteMsg
// Master -> Worker case class RegisteredWorker(masterUrl: String) extends RemoteMsg
// Worker -> self case object SendHeartBeat
// Worker -> Master(HeartBeat) case class HeartBeat(workerId: String) extends RemoteMsg |
3.創建Worker類
import java.util.UUID
import akka.actor.{Actor, ActorRef, ActorSelection, ActorSystem, Props} import com.typesafe.config.{Config, ConfigFactory}
import scala.concurrent.duration._
class Worker(val host: String, val port: Int, val masterHost: String, val masterPort: Int, val memory: Int, val cores: Int) extends Actor{
// 生成一個Worker ID val workerId = UUID.randomUUID().toString
// 用來存儲MasterURL var masterUrl: String = _
// 心跳時間間隔 val heartBeat_interval: Long = 10000
// master的Actor var master: ActorSelection = _
override def preStart(){ // 獲取Master的Actor master = context.actorSelection(s"akka.tcp://${Master.MASTER_SYSTEM}" + s"@${masterHost}:${masterPort}/user/${Master.MASTER_ACTOR}")
master ! RegisterWorker(workerId, host, port, memory, cores) }
override def receive: Receive = { // Worker接收到Master發送過來的註冊成功的信息(masterUrl) case RegisteredWorker(masterUrl) => { this.masterUrl = masterUrl // 啓動一個定時器,定時給Master發送心跳 import context.dispatcher context.system.scheduler.schedule(0 millis, heartBeat_interval millis, self, SendHeartBeat) } case SendHeartBeat => { // 向Master發送心跳 master ! HeartBeat(workerId) }
}
} object Worker{ val WORKER_SYSTEM = "WorkerSystem" val WORKER_ACTOR = "Worker"
def main(args: Array[String]): Unit = { val host = args(0) val port = args(1).toInt val masterHost = args(2) val masterPort = args(3).toInt val memory = args(4).toInt val cores = args(5).toInt
val configStr = s""" |akka.actor.provider = "akka.remote.RemoteActorRefProvider" |akka.remote.netty.tcp.hostname = "$host" |akka.remote.netty.tcp.port = "$port" """.stripMargin
// 配置創建Actor需要的配置信息 val config: Config = ConfigFactory.parseString(configStr)
// 創建ActorSystem val actorSystem: ActorSystem = ActorSystem(WORKER_SYSTEM, config)
// 用actorSystem實例創建Actor val worker: ActorRef = actorSystem.actorOf( Props(new Worker(host, port, masterHost, masterPort, memory, cores)), WORKER_ACTOR)
actorSystem.awaitTermination()
} } |
4.創建初始化類
class WorkerInfo(val id: String, val host: String, val port: Int, val memory: Int, val cores: Int) {
// 初始化最後一次心跳的時間 var lastHeartbeatTime: Long = _
} |
5.本地測試需要傳入參數: