Hadoop YARN中web服務的REST API介紹

Hadoop YARN自帶了一系列的web service REST API,我們可以通過這些web service訪問集羣(cluster)、節點(nodes)、應用(application)以及應用的歷史信息。根據API返回的類型,這些URL源歸會類到不同的組。一些API返回collector類型的,有些返回singleton類型。這些web service REST API的語法如下:

http://{http address of service}/ws/{version}/{resourcepath}

  其中,{http address of service}是我們需要獲取信息的服務器地址,目前支持訪問ResourceManager, NodeManager,MapReduce application master, and history server;{version}是這些API的版本,目前只支持v1;{resourcepath}定義singleton資源或者collection資源的路徑.
  下面舉例說明這些web service怎麼用。
假設你有一個application_1388830974669_1540349作業,並且運行完了。可以通過下面的命令得到這個作業的一些信息:

$ curl --compressed -H "Accept: application/json" -X   \

GET "http://host.domain.com:8088/ws/v1/cluster/apps/application_1326821518301_0010"

上面的運行結果是返回一個Json格式的,如下:

{

   "app" : {

      "finishedTime" : 0,

      "amContainerLogs" : "http://host.domain.com:8042/node/containerlogs/container_1326821518301_0010_01_000001",

      "trackingUI" : "ApplicationMaster",

      "state" : "RUNNING",

      "user" : "user1",

      "id" : "application_1326821518301_0010",

      "clusterId" : 1326821518301,

      "finalStatus" : "UNDEFINED",

      "amHostHttpAddress" : "host.domain.com:8042",

      "progress" : 82.44703,

      "name" : "Sleep job",

      "startedTime" : 1326860715335,

      "elapsedTime" : 31814,

      "diagnostics" : "",

      "trackingUrl" : "http://host.domain.com:8088/proxy/application_1326821518301_0010/",

      "queue" : "a1"

   }

}

根據這些信息,用戶可以獲取到更多關於application_1326821518301_0010的信息,比如大家可以通過上面Json中的trackingUrl從ResourceManage中得到更進一步的信息:

$ curl --compressed -H "Accept: application/json" -X \

GET "http://host.domain.com:8088/proxy/application_1326821518301_0010/ws/v1/mapreduce/jobs"

 

 

{

   "jobs" : {

      "job" : [

         {

            "runningReduceAttempts" : 1,

            "reduceProgress" : 72.104515,

            "failedReduceAttempts" : 0,

            "newMapAttempts" : 0,

            "mapsRunning" : 0,

            "state" : "RUNNING",

            "successfulReduceAttempts" : 0,

            "reducesRunning" : 1,

            "acls" : [

               {

                  "value" : " ",

                  "name" : "mapreduce.job.acl-modify-job"

               },

               {

                  "value" : " ",

                  "name" : "mapreduce.job.acl-view-job"

               }

            ],

            "reducesPending" : 0,

            "user" : "user1",

            "reducesTotal" : 1,

            "mapsCompleted" : 1,

            "startTime" : 1326860720902,

            "id" : "job_1326821518301_10_10",

            "successfulMapAttempts" : 1,

            "runningMapAttempts" : 0,

            "newReduceAttempts" : 0,

            "name" : "Sleep job",

            "mapsPending" : 0,

            "elapsedTime" : 64432,

            "reducesCompleted" : 0,

            "mapProgress" : 100,

            "diagnostics" : "",

            "failedMapAttempts" : 0,

            "killedReduceAttempts" : 0,

            "mapsTotal" : 1,

            "uberized" : false,

            "killedMapAttempts" : 0,

            "finishTime" : 0

         }

      ]

   }

}

如果用戶希望得到上述job id爲job_1326821518301_10_10作業的一些task信息可以用下面命令執行:

$ curl --compressed -H "Accept: application/json" -X \

GET "http://host.domain.com:8088/proxy/application_1326821518301_0010/ws/v1/mapreduce/jobs/job_1326821518301_10_10/tasks"

 

輸出:

{

   "tasks" : {

      "task" : [

         {

            "progress" : 100,

            "elapsedTime" : 5059,

            "state" : "SUCCEEDED",

            "startTime" : 1326860725014,

            "id" : "task_1326821518301_10_10_m_0",

            "type" : "MAP",

            "successfulAttempt" : "attempt_1326821518301_10_10_m_0_0",

            "finishTime" : 1326860730073

         },

         {

            "progress" : 72.104515,

            "elapsedTime" : 0,

            "state" : "RUNNING",

            "startTime" : 1326860732984,

            "id" : "task_1326821518301_10_10_r_0",

            "type" : "REDUCE",

            "successfulAttempt" : "",

            "finishTime" : 0

         }

      ]

   }

}

送上面可以看出,map任務已經完成了,但是reduce任務還在跑。如果用戶需要看一下task_1326821518301_10_10_r_0 task的信息,可以用下面的命令:

$ curl --compressed -X   \

GET "http://host.domain.com:8088/proxy/application_1326821518301_0010/ws/v1/    \

mapreduce/jobs/job_1326821518301_10_10/tasks/task_1326821518301_10_10_r_0/attempts"

 

輸出:

{

   "taskAttempts" : {

      "taskAttempt" : [

         {

            "elapsedMergeTime" : 158,

            "shuffleFinishTime" : 1326860735378,

            "assignedContainerId" : "container_1326821518301_0010_01_000003",

            "progress" : 72.104515,

            "elapsedTime" : 0,

            "state" : "RUNNING",

            "elapsedShuffleTime" : 2394,

            "mergeFinishTime" : 1326860735536,

            "rack" : "/10.10.10.0",

            "elapsedReduceTime" : 0,

            "nodeHttpAddress" : "host.domain.com:8042",

            "type" : "REDUCE",

            "startTime" : 1326860732984,

            "id" : "attempt_1326821518301_10_10_r_0_0",

            "finishTime" : 0

         }

      ]

   }

}

reduce attempt 還在運行,如果用戶需要查看對應的attempt當前的counter values,可以用下面命令:

$ curl --compressed -H "Accept: application/json"  -X GET \

"http://host.domain.com:8088/proxy/application_1326821518301_0010/ws/v1/mapreduce   \

/jobs/job_1326821518301_10_10/tasks/task_1326821518301_10_10_r_0/attempts          \

/attempt_1326821518301_10_10_r_0_0/counters"

 

輸出:

{

   "JobTaskAttemptCounters" : {

      "taskAttemptCounterGroup" : [

         {

            "counterGroupName" : "org.apache.hadoop.mapreduce.FileSystemCounter",

            "counter" : [

               {

                  "value" : 4216,

                  "name" : "FILE_BYTES_READ"

               },

               {

                  "value" : 77151,

                  "name" : "FILE_BYTES_WRITTEN"

               },

               {

                  "value" : 0,

                  "name" : "FILE_READ_OPS"

               },

               {

                  "value" : 0,

                  "name" : "FILE_LARGE_READ_OPS"

               },

               {

                  "value" : 0,

                  "name" : "FILE_WRITE_OPS"

               },

               {

                  "value" : 0,

                  "name" : "HDFS_BYTES_READ"

               },

               {

                  "value" : 0,

                  "name" : "HDFS_BYTES_WRITTEN"

               },

               {

                  "value" : 0,

                  "name" : "HDFS_READ_OPS"

               },

               {

                  "value" : 0,

                  "name" : "HDFS_LARGE_READ_OPS"

               },

               {

                  "value" : 0,

                  "name" : "HDFS_WRITE_OPS"

               }

            

         },

         {

            "counterGroupName" : "org.apache.hadoop.mapreduce.TaskCounter",

            "counter" : [

               {

                  "value" : 0,

                  "name" : "COMBINE_INPUT_RECORDS"

               },

               {

                  "value" : 0,

                  "name" : "COMBINE_OUTPUT_RECORDS"

               },

               

                  "value" : 1767,

                  "name" : "REDUCE_INPUT_GROUPS"

               },

               

                  "value" : 25104,

                  "name" : "REDUCE_SHUFFLE_BYTES"

               },

               {

                  "value" : 1767,

                  "name" : "REDUCE_INPUT_RECORDS"

               },

               {

                  "value" : 0,

                  "name" : "REDUCE_OUTPUT_RECORDS"

               },

               {

                  "value" : 0,

                  "name" : "SPILLED_RECORDS"

               },

               {

                  "value" : 1,

                  "name" : "SHUFFLED_MAPS"

               },

               {

                  "value" : 0,

                  "name" : "FAILED_SHUFFLE"

               },

               {

                  "value" : 1,

                  "name" : "MERGED_MAP_OUTPUTS"

               },

               {

                  "value" : 50,

                  "name" : "GC_TIME_MILLIS"

               },

               {

                  "value" : 1580,

                  "name" : "CPU_MILLISECONDS"

               },

               {

                  "value" : 141320192,

                  "name" : "PHYSICAL_MEMORY_BYTES"

               },

              {

                  "value" : 1118552064,

                  "name" : "VIRTUAL_MEMORY_BYTES"

               },

               

                  "value" : 73728000,

                  "name" : "COMMITTED_HEAP_BYTES"

               }

            ]

         },

         

            "counterGroupName" : "Shuffle Errors",

            "counter" : [

               

                  "value" : 0,

                  "name" : "BAD_ID"

               },

               

                  "value" : 0,

                  "name" : "CONNECTION"

               },

               

                  "value" : 0,

                  "name" : "IO_ERROR"

               },

               

                  "value" : 0,

                  "name" : "WRONG_LENGTH"

               },

               

                  "value" : 0,

                  "name" : "WRONG_MAP"

               },

               

                  "value" : 0,

                  "name" : "WRONG_REDUCE"

               }

            ]

         },

         

            "counterGroupName" : "org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter",

            "counter" : [

              

                  "value" : 0,

                  "name" : "BYTES_WRITTEN"

               }

            ]

         }

      ],

      "id" : "attempt_1326821518301_10_10_r_0_0"

   }

}

當job完成之後,用戶希望從歷史服務器中獲取這些作業的信息,可以用下面命令:

$ curl --compressed -X GET                      \

"http://host.domain.com:19888/ws/v1/history/mapreduce/jobs/job_1326821518301_10_10"

 

輸出:

{

   "job" : {

      "avgReduceTime" : 1250784,

      "failedReduceAttempts" : 0,

      "state" : "SUCCEEDED",

      "successfulReduceAttempts" : 1,

      "acls" : [

         {

            "value" : " ",

            "name" : "mapreduce.job.acl-modify-job"

         },

         {

            "value" : " ",

            "name" : "mapreduce.job.acl-view-job"

         }

      ],

      "user" : "user1",

      "reducesTotal" : 1,

      "mapsCompleted" : 1,

      "startTime" : 1326860720902,

      "id" : "job_1326821518301_10_10",

      "avgMapTime" : 5059,

      "successfulMapAttempts" : 1,

      "name" : "Sleep job",

      "avgShuffleTime" : 2394,

      "reducesCompleted" : 1,

      "diagnostics" : "",

      "failedMapAttempts" : 0,

      "avgMergeTime" : 2552,

      "killedReduceAttempts" : 0,

      "mapsTotal" : 1,

      "queue" : "a1",

      "uberized" : false,

      "killedMapAttempts" : 0,

      "finishTime" : 1326861986164

   }

}

用戶也可以從ResourceManager中獲取到最終applications的信息:

$  curl --compressed -H "Accept: application/json" -X GET   \

"http://host.domain.com:8088/ws/v1/cluster/apps/application_1326821518301_0010"

 

 

輸出:

 

{

   "app" : {

      "finishedTime" : 1326861991282,

      "amContainerLogs" : "http://host.domain.com:8042/node/containerlogs/container_1326821518301_0010_01_000001",

      "trackingUI" : "History",

      "state" : "FINISHED",

      "user" : "user1",

      "id" : "application_1326821518301_0010",

      "clusterId" : 1326821518301,

      "finalStatus" : "SUCCEEDED",

      "amHostHttpAddress" : "host.domain.com:8042",

      "progress" : 100,

      "name" : "Sleep job",

      "startedTime" : 1326860715335,

      "elapsedTime" : 1275947,

      "diagnostics" : "",

      "trackingUrl" : "http://host.domain.com:8088/proxy/application_1326821518301_0010/jobhistory/job/job_1326821518301_10_10",

      "queue" : "a1"

   }

}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章