spark core源碼分析5 spark提交框架

博客地址: http://blog.csdn.net/yueqian_zhu/


源碼位置:

org.apache.spark.deploy.SparkSubmit.SparkSubmit.scala

這裏解析參數,包括提交jar包的mainclass,Executor、Driver的相關配置等等等等。。
def main(args: Array[String]): Unit = {
  val appArgs = new SparkSubmitArguments(args)
  if (appArgs.verbose) {
    printStream.println(appArgs)
  }
  appArgs.action match {
    case SparkSubmitAction.SUBMIT => submit(appArgs)//默認走submit分支
    case SparkSubmitAction.KILL => kill(appArgs)
    case SparkSubmitAction.REQUEST_STATUS => requestStatus(appArgs)
  }
}
看提交流程
private def submit(args: SparkSubmitArguments): Unit = {
  //這個方法很長,不細講了,主要是根據參數來設置必要的運行信息
  //需要注意的是,childMainClass是根據部署的模式來區分的
  //deployMode == CLIENT,childMainClass直接取jar包中的mainclass
  //deployMode == cluster且是部署爲standalone模式,childMainClass = "org.apache.spark.deploy.Client"
  //部署爲Yarn模式,childMainClass = "org.apache.spark.deploy.yarn.Client"
  val (childArgs, childClasspath, sysProps, childMainClass) = prepareSubmitEnvironment(args)

  def doRunMain(): Unit = {
    if (args.proxyUser != null) {
      val proxyUser = UserGroupInformation.createProxyUser(args.proxyUser,
        UserGroupInformation.getCurrentUser())
      try {
        proxyUser.doAs(new PrivilegedExceptionAction[Unit]() {
          override def run(): Unit = {
            runMain(childArgs, childClasspath, sysProps, childMainClass, args.verbose)
          }
        })
      } catch {
        case e: Exception =>
          // Hadoop's AuthorizationException suppresses the exception's stack trace, which
          // makes the message printed to the output by the JVM not very helpful. Instead,
          // detect exceptions with empty stack traces here, and treat them differently.
          if (e.getStackTrace().length == 0) {
            printStream.println(s"ERROR: ${e.getClass().getName()}: ${e.getMessage()}")
            exitFn()
          } else {
            throw e
          }
      }
    } else {
      //一般默認情況下走這裏,即運行childMainClass中的main方法
      runMain(childArgs, childClasspath, sysProps, childMainClass, args.verbose)
    }
  }

   // In standalone cluster mode, there are two submission gateways:
   //   (1) The traditional Akka gateway using o.a.s.deploy.Client as a wrapper
   //   (2) The new REST-based gateway introduced in Spark 1.3
   // The latter is the default behavior as of Spark 1.3, but Spark submit will fail over
   // to use the legacy gateway if the master endpoint turns out to be not a REST server.
  if (args.isStandaloneCluster && args.useRest) {
    try {
      printStream.println("Running Spark using the REST application submission protocol.")
      doRunMain()
    } catch {
      // Fail over to use the legacy submission gateway
      case e: SubmitRestConnectionException =>
        printWarning(s"Master endpoint ${args.master} was not a REST server. " +
          "Falling back to legacy submission gateway instead.")
        args.useRest = false
        submit(args)
    }
  // In all other modes, just run the main class as prepared
  } else {
    doRunMain()
  }
}

發佈了79 篇原創文章 · 獲贊 6 · 訪問量 9萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章