spark-core_19:ActorSystem的初始化源碼分析

1,在SparkEnv再創建時調用create(),裏面調用初始ActorSystem

注:ActorSystem在後面版本會被RpcEnv替換掉

private def create(
   
conf: SparkConf,
   
executorId: String,
   
hostname: String,
   
port: Int,
   
isDriver: Boolean,
   
isLocal: Boolean,
   
numUsableCores: Int,
   
listenerBus: LiveListenerBus = null,
   
mockOutputCommitCoordinator:Option[OutputCommitCoordinator] = None): SparkEnv = {

 
。。。。  // Create the ActorSystem for Akka and get theport it binds to.
  // 創建ActorSystem及返回對應actorSystem的port
  // driverActorSystemName = "sparkDriver" ,executorActorSystemName = "sparkExecutor"

  val actorSystemName= if (isDriver) driverActorSystemName else executorActorSystemName
 
//創建RpcEnv,在1.6開始已使用NettyRpcEnv,並且也不在使用ActorSystem
  //如果port是0話,會給RpcEnv.address.prot動態分配一個非0的端口

  val rpcEnv= RpcEnv.create(actorSystemName, hostname, port, conf, securityManager,
   
clientMode = !isDriver)
 
val actorSystem:ActorSystem =
   
if (rpcEnv.isInstanceOf[AkkaRpcEnv]){
     
rpcEnv.asInstanceOf[AkkaRpcEnv].actorSystem
    } else {
     
val actorSystemPort=
       
if (port== 0 || rpcEnv.address == null) {
         
port
        } else {
         
rpcEnv.address.port + 1
       
}
     
// Create a ActorSystem for legacy codes
      //該方法返回tuple(ActorSystem,ActorSystem的port),同時將ActorSystem引用給當前變量actorSystem

      AkkaUtils.createActorSystem(
       
actorSystemName + "ActorSystem",
       
hostname,
       
actorSystemPort,
       
conf,
       
securityManager
     
)._1
    }

2,進入AkkaUtils.createActorSystem()

private[spark] object AkkaUtilsextends Logging {

 
/**
   * Creates an ActorSystem ready forremoting, with various Spark features. Returns both the ActorSystem itself andits port (which is hard to get from Akka).
   *
   * Note: the
`name` parameter is important, as even if a clientsends a message to right
   * host + port, if the system name isincorrect, Akka will drop the message.
   *
   * If indestructible is set to true,the Actor System will continue running in the event
   * of a fatal exception. This is usedby
[[org.apache.spark.executor.Executor]].
    *
    * 創建遠程ActorSystem,提供各種功能,該方法返回ActorSystem和ActorSystem的port
    * 注意:
`name`
參數非常重要,因爲它是actorSystem的標識。
    * 如果indestructible設置爲true,actorSystem可以運行在致命的execption中,由Executor進行使用
    * name= sparkDriverActorSystem或sparkExecutorActorSystem  、hosst:當前節點的ip或主機名,Port=0
   */

 
def createActorSystem(
     
name: String,
     
host: String,
     
port: Int,
     
conf: SparkConf,
     
securityManager: SecurityManager): (ActorSystem, Int) = {
   
//定義一個匿名函數,給下面startServiceOnPort啓動
    val startService:Int => (ActorSystem, Int) ={ actualPort =>
     
doCreateActorSystem(name, host, actualPort, conf, securityManager)
   
}
    /**
      * 18/04/17 19:12:47 INFO Remoting:Remoting started; listening on addresses :[akka.tcp://[email protected]:35868]
       18/04/17 19:13:23 INFO util.Utils:Successfully started service 'sparkDriverActorSystem' on port 35868.
      */

   
Utils.startServiceOnPort(port, startService, conf, name)
 
}

3,會調用startService函數,再調用doCreateActorSystem()

private def doCreateActorSystem(
   
name: String,
   
host: String,
   
port: Int,
   
conf: SparkConf,
   
securityManager: SecurityManager): (ActorSystem, Int) = {
 
//給actorSystem設置各種默認配製
  val akkaThreads= conf.getInt("spark.akka.threads", 4)
 
val akkaBatchSize= conf.getInt("spark.akka.batchSize", 15)
 
val akkaTimeoutS= conf.getTimeAsSeconds("spark.akka.timeout",
   
conf.get("spark.network.timeout", "120s"))
 
//以字節爲單位返回Akka消息的已配置最大幀frame大小。這個maxFrameSizeBytes返回值是128M
  val akkaFrameSize= maxFrameSizeBytes(conf)
 
val akkaLogLifecycleEvents= conf.getBoolean("spark.akka.logLifecycleEvents", false)
 
val lifecycleEvents= if (akkaLogLifecycleEvents) "on" else "off"
 
if (!akkaLogLifecycleEvents){
   
// As a workaround for Akka issue #3787, we coerce the"EndpointWriter" log to be silent.
    // See:https://www.assembla.com/spaces/akka/tickets/3787#/

    Option(Logger.getLogger("akka.remote.EndpointWriter")).map(l => l.setLevel(Level.FATAL))
 
}

  val logAkkaConfig= if (conf.getBoolean("spark.akka.logAkkaConfig", false)) "on"else "off"
 
/** conf.getTimeAsSeconds()
    * 以秒爲單位獲取時間參數,如果未設置,則返回到默認值。 如果沒有提供後綴,則假定秒。
    * 比如: 50s,100ms, or 250us
    */

 
val akkaHeartBeatPausesS= conf.getTimeAsSeconds("spark.akka.heartbeat.pauses", "6000s")
 
val akkaHeartBeatIntervalS= conf.getTimeAsSeconds("spark.akka.heartbeat.interval", "1000s")
 
//什麼都沒有設置,默認是null
  val secretKey= securityManager.getSecretKey()
 
//isAuthOn默認值是false
  val isAuthOn= securityManager.isAuthenticationEnabled()
 
if (isAuthOn&& secretKey == null) {
   
throw new Exception("Secret key isnull with authentication on")
 
}
  val requireCookie= if (isAuthOn) "on"else "off"
 
val secureCookie= if (isAuthOn) secretKey else ""
 
logDebug(s"IncreateActorSystem, requireCookie is: $requireCookie")
 
//默認值是什麼都沒有
  val akkaSslConfig= securityManager.akkaSSLOptions.createAkkaConfig
     
.getOrElse(ConfigFactory.empty())

  /**  導入importscala.collection.JavaConverters._做相應的隱式轉換,將scala的map變成java的Map
      隱式轉換的定義是在JavaConverters._父接口類DecorateAsJava做的
       conf.getAkkaConf,其中SparkConf如果有akka的配製樹信息,key需要akka開始,如果key寫成spark.akka會和spark衝突
      withFallback()它的特性是:如果第一次相應的key值,後面再有相同的key值,後面的key值不會覆蓋前面的key值
      ############加載配製也可以使用配製文件
    *
    * akka.remote.netty.tcp.hostname:就是當前節點的ip,和別的actorSystem做交互用的
    * akka.remote.netty.tcp.port:值是0時,port會取隨機值
    */


 
val akkaConf= ConfigFactory.parseMap(conf.getAkkaConf.toMap.asJava)
   
.withFallback(akkaSslConfig).withFallback(ConfigFactory.parseString(
    s"""
   
|akka.daemonic = on
    |akka.loggers =[""akka.event.slf4j.Slf4jLogger""]
    |akka.stdout-loglevel ="ERROR"
    |akka.jvm-exit-on-fatal-error = off
    |akka.remote.require-cookie = "$requireCookie"
   
|akka.remote.secure-cookie = "$secureCookie"
   
|akka.remote.transport-failure-detector.heartbeat-interval= $akkaHeartBeatIntervalS s
   
|akka.remote.transport-failure-detector.acceptable-heartbeat-pause = $akkaHeartBeatPausesS s
   
|akka.actor.provider ="akka.remote.RemoteActorRefProvider"
    |akka.remote.netty.tcp.transport-class= "akka.remote.transport.netty.NettyTransport"
    |akka.remote.netty.tcp.hostname ="$host"
   
|akka.remote.netty.tcp.port = $port
   
|akka.remote.netty.tcp.tcp-nodelay =on
   |akka.remote.netty.tcp.connection-timeout = $akkaTimeoutS s
 
  |akka.remote.netty.tcp.maximum-frame-size = ${akkaFrameSize}B
   
|akka.remote.netty.tcp.execution-pool-size = $akkaThreads
   
|akka.actor.default-dispatcher.throughput = $akkaBatchSize
   
|akka.log-config-on-start = $logAkkaConfig
   
|akka.remote.log-remote-lifecycle-events= $lifecycleEvents
   
|akka.log-dead-letters = $lifecycleEvents
   
|akka.log-dead-letters-during-shutdown = $lifecycleEvents
   
""".stripMargin))
 
//啓動ActorSystem。akkaConf:Config實例可以通過:ConfigFactory.load(配製文件路徑)、ConfigFactory.parseString(“key=\”value\””)
  //或ConfigFactory.parseMap(Map[String,String])

  val actorSystem= ActorSystem(name, akkaConf)
 
//這個provider就是由akka.actor.provider = "akka.remote.RemoteActorRefProvider"得到的,然後從RemoteActorRefProvider得到對應的port
  val provider= actorSystem.asInstanceOf[ExtendedActorSystem].provider
 
val boundPort= provider.getDefaultAddress.port.get
 
(actorSystem, boundPort)//最後將actorSystem和port返回
}


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章