最近部门合并,两个部门的集群需要同步到一起,自然用的是【distcp】,因为两个集群的版本不一致,用hdfs可能会有问题,所以通过http端口来传输。因为两个集群都配置了HA,无法确定什么时候哪个name node处于active状态,所以需要先每次传输前先获取active node。
方式是通过JMX来获取集群信息。
解析返回的json,获取到active node后退出,开始传输数据。
#!/bin/sh
namenodes="
192.168.2.103
192.168.2.101
"
for nn in $namenodes
do
echo "node: $nn"
status=`curl "http://${nn}:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"`
if [[ $? -eq 0 ]]
then
state=`echo $status | jq '.beans'[0] | jq '.State' | sed 's/"//g'`
echo "state: ${state}"
if [[ "active" == ${state} ]]
then
activeNode=$nn
break
fi
elif [[ $? -eq 1 ]]
then
echo "Error: can not conncet to host"
else
echo "Error $?"
fi
done
echo "active: ${activeNode}"
返回的json格式
{
"beans": [
{
"name": "Hadoop:service=NameNode,name=NameNodeStatus",
"modelerType": "org.apache.hadoop.hdfs.server.namenode.NameNode",
"SecurityEnabled": false,
"NNRole": "NameNode",
"HostAndPort": "cdh01:8020",
"LastHATransitionTime": 0,
"State": "active"
}
]
}