Spark調度相關概念
- Task(任務):單個分區數據集上的最小處理流程單元
- TaskSet(任務集):由一組關聯的,但相互之間沒有Shuffle依賴關係的任務所組成的任務集
- Stage(調度階段):由一個任務集對應
- Job(作業):由一個RDD Action生成的一個或多個調度階段所組成的一次計算作業
- Application(應用程序):Spark應用程序,由一個或多個作業組成,用戶編寫的
作業運行
val sc=new SparkContext()
val hdfsFile = sc.textFile(args(1))
val flatMapRdd = hdfsFile.flatMap(s => s.split(" "))
val filterRdd = flatMapRdd.filter(_.length == 2)
val mapRdd = filterRdd.map(word => (word, 1))
val reduce = mapRdd.reduceByKey(_ + _)
reduce.cache()
reduce.count()
這段代碼是在Spark應用程序的main函數中的,所以它會在Driver中運行SparkContext
SparkContext是連接Spark程序的橋樑,它會初始化類LiveListenerBus,該會監聽Spark程序事件並做相應的處理;還會調用SparkEnv.create方法,還會一個調用該類的地方就是初始化類Executor的時候SparkEnv
object SparkEnv extends Logging {
private val env = new ThreadLocal[SparkEnv]//ThreadLocal爲使用該變量的線程提供獨立的變量副本
@volatile private var lastSetSparkEnv : SparkEnv = _//緩存最新的SparkEnv並且volatile,便於其他線程獲得
......
private[spark] def create(
conf: SparkConf,
executorId: String,
hostname: String,
port: Int,
isDriver: Boolean,
isLocal: Boolean,
listenerBus: LiveListenerBus = null): SparkEnv = {
......
val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, port, conf = conf,
securityManager = securityManager)
......
def registerOrLookup(name: String, newActor: => Actor): ActorRef = {//如果在Driver上創建Actor對象,否則創建ref
if (isDriver) {
logInfo("Registering " + name)
actorSystem.actorOf(Props(newActor), name = name)
} else {
val driverHost: String = conf.get("spark.driver.host", "localhost")
val driverPort: Int = conf.getInt("spark.driver.port", 7077)
Utils.checkHost(driverHost, "Expected hostname")
val url = s"akka.tcp://spark@$driverHost:$driverPort/user/$name"
val timeout = AkkaUtils.lookupTimeout(conf)
logInfo(s"Connecting to $name: $url")
Await.result(actorSystem.actorSelection(url).resolveOne(timeout), timeout)
}
}
val mapOutputTracker = if (isDriver) {
new MapOutputTrackerMaster(conf)
} else {
new MapOutputTrackerWorker(conf)
}
// Have to assign trackerActor after initialization as MapOutputTrackerActor
// requires the MapOutputTracker itself
mapOutputTracker.trackerActor = registerOrLookup(
"MapOutputTracker",
new MapOutputTrackerMasterActor(mapOutputTracker.asInstanceOf[MapOutputTrackerMaster], conf))
val blockManagerMaster = new BlockManagerMaster(registerOrLookup(
"BlockManagerMaster",
new BlockManagerMasterActor(isLocal, conf, listenerBus)), conf)
val blockManager = new BlockManager(executorId, actorSystem, blockManagerMaster,
serializer, conf, securityManager, mapOutputTracker)
val connectionManager = blockManager.connectionManager
val broadcastManager = new BroadcastManager(isDriver, conf, securityManager)
val cacheManager = new CacheManager(blockManager)
val shuffleFetcher = instantiateClass[ShuffleFetcher](
"spark.shuffle.fetcher", "org.apache.spark.BlockStoreShuffleFetcher")
val httpFileServer = new HttpFileServer(securityManager)
httpFileServer.initialize()
conf.set("spark.fileserver.uri", httpFileServer.serverUri)
val metricsSystem = if (isDriver) {
MetricsSystem.createMetricsSystem("driver", conf, securityManager)
} else {
MetricsSystem.createMetricsSystem("executor", conf, securityManager)
}
metricsSystem.start()
......
new SparkEnv(
executorId,
actorSystem,
serializer,
closureSerializer,
cacheManager,
mapOutputTracker,
shuffleFetcher,
broadcastManager,
blockManager,
connectionManager,
securityManager,
httpFileServer,
sparkFilesDir,
metricsSystem,
conf)
}
從上面的代碼可以看到,SparkEnv會創建很多對象,比如blockManager、cacheManager、mapOutputTracker等在SparkContext中,最主要的初始化工作就是初始化TaskScheduler和DAGScheduler,這兩個就是Spark的核心所在
private[spark] var taskScheduler = SparkContext.createTaskScheduler(this, master)
@volatile private[spark] var dagScheduler: DAGScheduler = _
try {
dagScheduler = new DAGScheduler(this)
} catch {
case e: Exception => throw
new SparkException("DAGScheduler cannot be initialized due to %s".format(e.getMessage))
}
// start TaskScheduler after taskScheduler sets DAGScheduler reference in DAGScheduler's
// constructor
taskScheduler.start()
TaskSchedulerImpl
def initialize(backend: SchedulerBackend) {
this.backend = backend
// temporarily set rootPool name to empty
rootPool = new Pool("", schedulingMode, 0, 0)
schedulableBuilder = {
schedulingMode match {
case SchedulingMode.FIFO =>
new FIFOSchedulableBuilder(rootPool)
case SchedulingMode.FAIR =>
new FairSchedulableBuilder(rootPool, conf)
}
}
schedulableBuilder.buildPools()
}
創建TaskSchedulerImpl對象的時候會調用initialize方法,同時創建SchedulerBackend對象,在standalone模式下就是類SparkDepolySchedulerBackend,該類主要跟worker上的CoarseGrainedExecutorBackend通信;還會根據用戶設定的SchedulingMode調度模式創建一個rootPool根調度池,之後根據具體的調度模式再進一步創建SchedulableBuilder對象,具體的SchedulableBuilder對象的buildPool是方法將在rootPool的基礎上完成整個調度池的構建工作,每個SparkContext可能又同時存在多個可運行的任務集,這些任務集之間的調度室由rootPool來決定的 override def start() {
super.start()//CoarseGrainedSchedulerBackend.start
// The endpoint for executors to talk to us
val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format(
conf.get("spark.driver.host"), conf.get("spark.driver.port"),
CoarseGrainedSchedulerBackend.ACTOR_NAME)
val args = Seq(driverUrl, "{{EXECUTOR_ID}}", "{{HOSTNAME}}", "{{CORES}}", "{{WORKER_URL}}")
val extraJavaOpts = sc.conf.getOption("spark.executor.extraJavaOptions")
val classPathEntries = sc.conf.getOption("spark.executor.extraClassPath").toSeq.flatMap { cp =>
cp.split(java.io.File.pathSeparator)
}
val libraryPathEntries =
sc.conf.getOption("spark.executor.extraLibraryPath").toSeq.flatMap { cp =>
cp.split(java.io.File.pathSeparator)
}
val command = Command(//該命令會發送給Master,然後Master再發送給worker並啓動CoarseGrainedExecutorBackend進程
"org.apache.spark.executor.CoarseGrainedExecutorBackend", args, sc.executorEnvs,
classPathEntries, libraryPathEntries, extraJavaOpts)
val sparkHome = sc.getSparkHome()
val appDesc = new ApplicationDescription(sc.appName, maxCores, sc.executorMemory, command,
sparkHome, sc.ui.appUIAddress, sc.eventLogger.map(_.logDir))
client = new AppClient(sc.env.actorSystem, masters, appDesc, this, conf)//向Master註冊程序
client.start()
}
1、CoarseGrainedSchedulerBackend會創建DriverActorDAGScheduler
class DAGScheduler(
private[scheduler] val sc: SparkContext,
private[scheduler] val taskScheduler: TaskScheduler,
listenerBus: LiveListenerBus,
mapOutputTracker: MapOutputTrackerMaster,
blockManagerMaster: BlockManagerMaster,
env: SparkEnv)
DAGScheduler會把上述引用傳進來,最主要的功能是因爲劃分完Stage後需要交給TaskScheduler;任務運行完成還要將shuffle結果的相關信息向MapOutputTrackerMaster註冊 private[scheduler] val jobIdToStageIds = new HashMap[Int, HashSet[Int]]
private[scheduler] val stageIdToJobIds = new HashMap[Int, HashSet[Int]]
private[scheduler] val stageIdToStage = new HashMap[Int, Stage]
private[scheduler] val shuffleToMapStage = new HashMap[Int, Stage]
private[scheduler] val jobIdToActiveJob = new HashMap[Int, ActiveJob]
private[scheduler] val resultStageToJob = new HashMap[Stage, ActiveJob]
private[scheduler] val stageToInfos = new HashMap[Stage, StageInfo]
等待運行的調度階段列表
private[scheduler] val waitingStages = new HashSet[Stage]
正在運行的調度階段列表
private[scheduler] val runningStages = new HashSet[Stage]
失敗等待重新提交的調度階段
private[scheduler] val failedStages = new HashSet[Stage]
每個調度階段裏等待執行的任務列表
private[scheduler] val pendingTasks = new HashMap[Stage, HashSet[Task[_]]]
DAGScheduler還會創建DAGSchedulerEventProcessActor,這樣一來就可以將同步的函數調用轉換對事件的異步處理了