好程序員大數據分享Spark任務和集羣啓動流程

  好程序員大數據分享Spark任務和集羣啓動流程,Spark集羣啓動流程

  1.調用start-all.sh腳本,開始啓動Master

  2.Master啓動以後,preStart方法調用了一個定時器,定時檢查超時的Worker後刪除

  3.啓動腳本會解析slaves配置文件,找到啓動Worker的相應節點.開始啓動Worker

  4.Worker服務啓動後開始調用preStart方法開始向所有的Master進行註冊

  5.Master接收到Worker發送過來的註冊信息,Master開始保存註冊信息並把自己的URL響應給Worker

  6.Worker接收到Master的URL後並更新,開始調用一個定時器,定時的向Master發送心跳信息

 

任務提交流程

1.Driver端會通過spark-submit腳本啓動SaparkSubmit進程,此時創建了一個非常重要的對象(SparkContext),開始向Master發送消息

2.Master接收到發送過來的信息後開始生成任務信息,並把任務信息放到一個對列裏

3.Master把所有有效的Worker過濾出來,按照空閒的資源進行排序

4.Master開始向有效的Worker通知拿取任務信息並啓動相應的Executor

5.Worker啓動Executor並向Driver反向註冊

6.Driver開始把生成的task發送給相應的Executor,Executor開始執行任務

 

集羣啓動流程

1.首先創建Master類

import akka.actor.{Actor, ActorSystem, Props}

import com.typesafe.config.{Config, ConfigFactory}

 

import scala.collection.mutable

import scala.concurrent.duration._

 

class Master(val masterHost: String, val masterPort: Int) extends Actor{

 

  // 用來存儲Worker的註冊信息

  val idToWorker = new mutable.HashMap[String, WorkerInfo]()

 

  // 用來存儲Worker的信息

  val workers = new mutable.HashSet[WorkerInfo]()

 

  // Worker的超時時間間隔

  val checkInterval: Long = 15000

 

 

  // 生命週期方法,在構造器之後,receive方法之前只調用一次

  override def preStart(): Unit = {

    // 啓動一個定時器,用來定時檢查超時的Worker

    import context.dispatcher

    context.system.scheduler.schedule(0 millis, checkInterval millis, self, CheckTimeOutWorker)

  }

 

  // 在preStart方法之後,不斷的重複調用

  override def receive: Receive = {

    // Worker -> Master

    case RegisterWorker(id, host, port, memory, cores) => {

      if (!idToWorker.contains(id)){

        val workerInfo = new WorkerInfo(id, host, port, memory, cores)

        idToWorker += (id -> workerInfo)

        workers += workerInfo

 

        println("a worker registered")

 

        sender ! RegisteredWorker(s"akka.tcp://${Master.MASTER_SYSTEM}" +

          s"@${masterHost}:${masterPort}/user/${Master.MASTER_ACTOR}")

      }

    }

    case HeartBeat(workerId) => {

      // 通過傳過來的workerId獲取對應的WorkerInfo

      val workerInfo: WorkerInfo = idToWorker(workerId)

      // 獲取當前時間

      val currentTime = System.currentTimeMillis()

      // 更新最後一次心跳時間

      workerInfo.lastHeartbeatTime = currentTime

    }

    case CheckTimeOutWorker => {

      val currentTime = System.currentTimeMillis()

      val toRemove: mutable.HashSet[WorkerInfo] =

        workers.filter(w => currentTime - w.lastHeartbeatTime > checkInterval)

 

      // 將超時的Worker從idToWorker和workers中移除

      toRemove.foreach(deadWorker => {

        idToWorker -= deadWorker.id

        workers -= deadWorker

      })

 

      println(s"num of workers: ${workers.size}")

    }

  }

}

object Master{

  val MASTER_SYSTEM = "MasterSystem"

  val MASTER_ACTOR = "Master"

 

  def main(args: Array[String]): Unit = {

    val host = args(0)

    val port = args(1).toInt

 

    val configStr =

      s"""

         |akka.actor.provider = "akka.remote.RemoteActorRefProvider"

         |akka.remote.netty.tcp.hostname = "$host"

         |akka.remote.netty.tcp.port = "$port"

      """.stripMargin

 

    // 配置創建Actor需要的配置信息

    val config: Config = ConfigFactory.parseString(configStr)

 

    // 創建ActorSystem

    val actorSystem: ActorSystem = ActorSystem(MASTER_SYSTEM, config)

 

    // 用actorSystem實例創建Actor

    actorSystem.actorOf(Props(new Master(host, port)), MASTER_ACTOR)

 

    actorSystem.awaitTermination()

 

  }

}

2.創建RemoteMsg特質

trait RemoteMsg extends Serializable{

 

}

 

// Master -> self(Master)

case object CheckTimeOutWorker

 

// Worker -> Master

case class RegisterWorker(id: String, host: String,

                          port: Int, memory: Int, cores: Int) extends RemoteMsg

 

// Master -> Worker

case class RegisteredWorker(masterUrl: String) extends RemoteMsg

 

// Worker -> self

case object SendHeartBeat

 

// Worker -> Master(HeartBeat)

case class HeartBeat(workerId: String) extends RemoteMsg

3.創建Worker類

import java.util.UUID

 

import akka.actor.{Actor, ActorRef, ActorSelection, ActorSystem, Props}

import com.typesafe.config.{Config, ConfigFactory}

 

import scala.concurrent.duration._

 

class Worker(val host: String, val port: Int, val masterHost: String,

             val masterPort: Int, val memory: Int, val cores: Int) extends Actor{

 

  // 生成一個Worker ID

  val workerId = UUID.randomUUID().toString

 

  // 用來存儲MasterURL

  var masterUrl: String = _

 

  // 心跳時間間隔

  val heartBeat_interval: Long = 10000

 

  // master的Actor

  var master: ActorSelection = _

 

  override def preStart(){

    // 獲取Master的Actor

    master = context.actorSelection(s"akka.tcp://${Master.MASTER_SYSTEM}" +

      s"@${masterHost}:${masterPort}/user/${Master.MASTER_ACTOR}")

 

    master ! RegisterWorker(workerId, host, port, memory, cores)

  }

 

  override def receive: Receive = {

    // Worker接收到Master發送過來的註冊成功的信息(masterUrl)

    case RegisteredWorker(masterUrl) => {

      this.masterUrl = masterUrl

      // 啓動一個定時器,定時給Master發送心跳

      import context.dispatcher

      context.system.scheduler.schedule(0 millis, heartBeat_interval millis, self, SendHeartBeat)

    }

    case SendHeartBeat => {

      // 向Master發送心跳

      master ! HeartBeat(workerId)

    }

 

  }

 

}

object Worker{

  val WORKER_SYSTEM = "WorkerSystem"

  val WORKER_ACTOR = "Worker"

 

  def main(args: Array[String]): Unit = {

    val host = args(0)

    val port = args(1).toInt

    val masterHost = args(2)

    val masterPort = args(3).toInt

    val memory = args(4).toInt

    val cores = args(5).toInt

 

    val configStr =

      s"""

         |akka.actor.provider = "akka.remote.RemoteActorRefProvider"

         |akka.remote.netty.tcp.hostname = "$host"

         |akka.remote.netty.tcp.port = "$port"

      """.stripMargin

 

    // 配置創建Actor需要的配置信息

    val config: Config = ConfigFactory.parseString(configStr)

 

    // 創建ActorSystem

    val actorSystem: ActorSystem = ActorSystem(WORKER_SYSTEM, config)

 

    // 用actorSystem實例創建Actor

    val worker: ActorRef = actorSystem.actorOf(

      Props(new Worker(host, port, masterHost, masterPort, memory, cores)), WORKER_ACTOR)

 

    actorSystem.awaitTermination()

 

  }

}

4.創建初始化類

class WorkerInfo(val id: String, val host: String, val port: Int,

                 val memory: Int, val cores: Int) {

 

  // 初始化最後一次心跳的時間

  var lastHeartbeatTime: Long = _

 

}

5.本地測試需要傳入參數:


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章