https://blog.csdn.net/Androidlushangderen/article/details/47945597
心跳邏輯
BPServiceActor
時間太長如:
2020-03-26 18:07:55,372 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Took 5675ms to process 1 commands from NN
2020-03-26 18:07:55,372 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Took 5675ms to process 1 commands from NN
2020-03-26 18:07:55,382 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: BlockRecoveryWorker: NameNode at bigdata-us-hdpnn11.us01/10.15.62.52:8021 calls
recoverBlock(BP-1629871299-10.15.62.52-1523963075824:blk_4791946168_3725472211, targets=[DatanodeInfoWithStorage[10.15.67.28:50010,null,null], DatanodeInfoWit
hStorage[10.15.67.25:50010,null,null], DatanodeInfoWithStorage[10.15.36.51:50010,null,null]], newGenerationStamp=0, newBlock=null, isStriped=false)
2020-03-26 18:07:55,387 WARN org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol: Failed to recover block (block=BP-1629871299-10.15.62.52-1523963075
824:blk_4791946168_3725472211, datanode=DatanodeInfoWithStorage[10.15.67.28:50010,null,null])
java.io.IOException: THIS IS NOT SUPPOSED TO HAPPEN: replica.getGenerationStamp() >= recoveryId = 0, block=blk_4791946168_3725472211, replica=FinalizedReplica
, blk_4791946168_3725472211, FINALIZED
getNumBytes() = 553812
getBytesOnDisk() = 553812
getVisibleLength()= 553812
getVolume() = /data6/hadoopdata/dfs
getBlockURI() = file:/data6/hadoopdata/dfs/current/BP-1629871299-10.15.62.52-1523963075824/current/finalized/subdir31/subdir11/blk_4791946168
at org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.initReplicaRecoveryImpl(FsDatasetImpl.java:2623)
at org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.initReplicaRecovery(FsDatasetImpl.java:2576)
at org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.initReplicaRecovery(FsDatasetImpl.java:2565)
at org.apache.hadoop.hdfs.server.datanode.DataNode.initReplicaRecovery(DataNode.java:3024)
at org.apache.hadoop.hdfs.protocolPB.InterDatanodeProtocolServerSideTranslatorPB.initReplicaRecovery(InterDatanodeProtocolServerSideTranslatorPB.java:
55)
at org.apache.hadoop.hdfs.protocol.proto.InterDatanodeProtocolProtos$InterDatanodeProtocolService$2.callBlockingMethod(InterDatanodeProtocolProtos.jav
a:3105)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:524)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1025)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:882)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:828)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1903)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2709)
at sun.reflect.GeneratedConstructorAccessor75.newInstance(Unknown Source)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:121)
at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:110)
at org.apache.hadoop.hdfs.server.datanode.BlockRecoveryWorker.callInitReplicaRecovery(BlockRecoveryWorker.java:567)
at org.apache.hadoop.hdfs.server.datanode.BlockRecoveryWorker.access$400(BlockRecoveryWorker.java:57)
at org.apache.hadoop.hdfs.server.datanode.BlockRecoveryWorker$RecoveryTaskContiguous.recover(BlockRecoveryWorker.java:134)
at org.apache.hadoop.hdfs.server.datanode.BlockRecoveryWorker$1.run(BlockRecoveryWorker.java:604)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hadoop.ipc.RemoteException(java.io.IOException): THIS IS NOT SUPPOSED TO HAPPEN: replica.getGenerationStamp() >= recoveryId = 0, block=blk_4791946168_3725472211, replica=FinalizedReplica, blk_4791946168_3725472211, FINALIZED
getNumBytes() = 553812
getBytesOnDisk() = 553812
getVisibleLength()= 553812
getVolume() = /data6/hadoopdata/dfs
getBlockURI() = file:/data6/hadoopdata/dfs/current/BP-1629871299-10.15.62.52-1523963075824/current/finalized/subdir31/subdir11/blk_4791946168
at org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.initReplicaRecoveryImpl(FsDatasetImpl.java:2623)
at org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.initReplicaRecovery(FsDatasetImpl.java:2576)
at org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.initReplicaRecovery(FsDatasetImpl.java:2565)
at org.apache.hadoop.hdfs.server.datanode.DataNode.initReplicaRecovery(DataNode.java:3024)
at org.apache.hadoop.hdfs.protocolPB.InterDatanodeProtocolServerSideTranslatorPB.initReplicaRecovery(InterDatanodeProtocolServerSideTranslatorPB.java:55)
at org.apache.hadoop.hdfs.protocol.proto.InterDatanodeProtocolProtos$InterDatanodeProtocolService$2.callBlockingMethod(InterDatanodeProtocolProtos.java:3105)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:524)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1025)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:882)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:828)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1903)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2709)
at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1511)
at org.apache.hadoop.ipc.Client.call(Client.java:1457)
at org.apache.hadoop.ipc.Client.call(Client.java:1367)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:228)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:116)
at com.sun.proxy.$Proxy24.initReplicaRecovery(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.InterDatanodeProtocolTranslatorPB.initReplicaRecovery(InterDatanodeProtocolTranslatorPB.java:83)
at org.apache.hadoop.hdfs.server.datanode.BlockRecoveryWorker.callInitReplicaRecovery(BlockRecoveryWorker.java:565)
... 4 more
2020-03-26 18:07:55,392 WARN org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol: Failed to recover block (block=BP-1629871299-10.15.62.52-1523963075
824:blk_4791946168_3725472211, datanode=DatanodeInfoWithStorage[10.15.67.25:50010,null,null])
java.io.IOException: THIS IS NOT SUPPOSED TO HAPPEN: replica.getGenerationStamp() >= recoveryId = 0, block=blk_4791946168_3725472211, replica=FinalizedReplica
, blk_4791946168_3725472211, FINALIZED
getNumBytes() = 553812
getBytesOnDisk() = 553812
getVisibleLength()= 553812
getVolume() = /data6/hadoopdata/dfs
getBlockURI() = file:/data6/hadoopdata/dfs/current/BP-1629871299-10.15.62.52-1523963075824/current/finalized/subdir31/subdir11/blk_4791946168
at org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.initReplicaRecoveryImpl(FsDatasetImpl.java:2623)
at org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.initReplicaRecovery(FsDatasetImpl.java:2576)
at org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.initReplicaRecovery(FsDatasetImpl.java:2565)
at org.apache.hadoop.hdfs.server.datanode.DataNode.initReplicaRecovery(DataNode.java:3024)
at org.apache.hadoop.hdfs.protocolPB.InterDatanodeProtocolServerSideTranslatorPB.initReplicaRecovery(InterDatanodeProtocolServerSideTranslatorPB.java:
55)
at org.apache.hadoop.hdfs.protocol.proto.InterDatanodeProtocolProtos$InterDatanodeProtocolService$2.callBlockingMethod(InterDatanodeProtocolProtos.jav
a:3105)
DN
dn -> 心跳
BPServiceActor.offerService
方法:
心跳分兩種:彙報心跳
send heartbeat or block-report
心跳及塊彙報。
1.心跳彙報以下4個信息
// All heartbeat messages include following info:
// -- Datanode name
// -- data transfer port
// -- Total capacity
// -- Bytes remaining
nn收到心跳後主要更新節點信息。
nn端代碼:
// Check if this datanode should actually be shutdown instead.
if (nodeinfo != null && shouldNodeShutdown(nodeinfo)) {
setDatanodeDead(nodeinfo);
throw new DisallowedDatanodeException(nodeinfo);
}
//如果不存在此節點信息,說明此節點還未註冊,返回節點註冊命令
if (nodeinfo == null || !nodeinfo.isAlive) {
return new DatanodeCommand[]{DatanodeCommand.REGISTER};
}
updateStats(nodeinfo, false);
nodeinfo.updateHeartbeat(capacity, dfsUsed, remaining, xceiverCount);
updateStats(nodeinfo, true);
所以對於心跳第一部分,dn端主要彙報使用量,nn基本不做處理。當節點未註冊時,這時候會返回命令,即註冊節點的命令。
2.塊彙報
NN
- NameNodeRpcServer.sendHeartbeat
- namesystem.handleHeartbeat
- DatanodeManager.handleHeartbeat
/** Handle heartbeat from datanodes. */
public DatanodeCommand[] handleHeartbeat(DatanodeRegistration nodeReg,
StorageReport[] reports, final String blockPoolId,
long cacheCapacity, long cacheUsed, int xceiverCount,
int maxTransfers, int failedVolumes,
VolumeFailureSummary volumeFailureSummary,
@Nonnull SlowPeerReports slowPeers,
@Nonnull SlowDiskReports slowDisks) throws IOException {
final DatanodeDescriptor nodeinfo;
// 心跳中的節點未註冊,則下達註冊命令
try {
nodeinfo = getDatanode(nodeReg);
} catch (UnregisteredNodeException e) {
return new DatanodeCommand[]{RegisterCommand.REGISTER};
}
// Check if this datanode should actually be shutdown instead.
if (nodeinfo != null && nodeinfo.isDisallowed()) {
setDatanodeDead(nodeinfo);
throw new DisallowedDatanodeException(nodeinfo);
}
if (nodeinfo == null || !nodeinfo.isRegistered()) {
return new DatanodeCommand[]{RegisterCommand.REGISTER};
}
heartbeatManager.updateHeartbeat(nodeinfo, reports, cacheCapacity,
cacheUsed, xceiverCount, failedVolumes, volumeFailureSummary);
// If we are in safemode, do not send back any recovery / replication
// requests. Don't even drain the existing queue of work.
if (namesystem.isInSafeMode()) {
return new DatanodeCommand[0];
}
// block recovery command
final BlockRecoveryCommand brCommand = getBlockRecoveryCommand(blockPoolId,
nodeinfo);
if (brCommand != null) {
return new DatanodeCommand[]{brCommand};
}
final List<DatanodeCommand> cmds = new ArrayList<>();
// Allocate _approximately_ maxTransfers pending tasks to DataNode.
// NN chooses pending tasks based on the ratio between the lengths of
// replication and erasure-coded block queues.
int totalReplicateBlocks = nodeinfo.getNumberOfReplicateBlocks();
int totalECBlocks = nodeinfo.getNumberOfBlocksToBeErasureCoded();
int totalBlocks = totalReplicateBlocks + totalECBlocks;
if (totalBlocks > 0) {
int numReplicationTasks = (int) Math.ceil(
(double) (totalReplicateBlocks * maxTransfers) / totalBlocks);
int numECTasks = (int) Math.ceil(
(double) (totalECBlocks * maxTransfers) / totalBlocks);
if (LOG.isDebugEnabled()) {
LOG.debug("Pending replication tasks: " + numReplicationTasks
+ " erasure-coded tasks: " + numECTasks);
}
// check pending replication tasks
List<BlockTargetPair> pendingList = nodeinfo.getReplicationCommand(
numReplicationTasks);
if (pendingList != null && !pendingList.isEmpty()) {
cmds.add(new BlockCommand(DatanodeProtocol.DNA_TRANSFER, blockPoolId,
pendingList));
}
// check pending erasure coding tasks
List<BlockECReconstructionInfo> pendingECList = nodeinfo
.getErasureCodeCommand(numECTasks);
if (pendingECList != null && !pendingECList.isEmpty()) {
cmds.add(new BlockECReconstructionCommand(
DNA_ERASURE_CODING_RECONSTRUCTION, pendingECList));
}
}
// check block invalidation
Block[] blks = nodeinfo.getInvalidateBlocks(blockInvalidateLimit);
if (blks != null) {
cmds.add(new BlockCommand(DatanodeProtocol.DNA_INVALIDATE, blockPoolId,
blks));
}
// cache commands
addCacheCommands(blockPoolId, nodeinfo, cmds);
// key update command
blockManager.addKeyUpdateCommand(cmds, nodeinfo);
// check for balancer bandwidth update
if (nodeinfo.getBalancerBandwidth() > 0) {
cmds.add(new BalancerBandwidthCommand(nodeinfo.getBalancerBandwidth()));
// set back to 0 to indicate that datanode has been sent the new value
nodeinfo.setBalancerBandwidth(0);
}
if (slowPeerTracker != null) {
final Map<String, Double> slowPeersMap = slowPeers.getSlowPeers();
if (!slowPeersMap.isEmpty()) {
if (LOG.isDebugEnabled()) {
LOG.debug("DataNode " + nodeReg + " reported slow peers: " +
slowPeersMap);
}
for (String slowNodeId : slowPeersMap.keySet()) {
slowPeerTracker.addReport(slowNodeId, nodeReg.getIpcAddr(false));
}
}
}
if (slowDiskTracker != null) {
if (!slowDisks.getSlowDisks().isEmpty()) {
if (LOG.isDebugEnabled()) {
LOG.debug("DataNode " + nodeReg + " reported slow disks: " +
slowDisks.getSlowDisks());
}
slowDiskTracker.addSlowDiskReport(nodeReg.getIpcAddr(false), slowDisks);
}
}
if (!cmds.isEmpty()) {
return cmds.toArray(new DatanodeCommand[cmds.size()]);
}
return new DatanodeCommand[0];
}
1.心跳中的節點未註冊,則下達註冊命令
2.如果是standby則下達空命令。snn也是會更新狀態的。
一個給定的數據節點進行心跳信息的上報,主要做2個操作
- 1.心跳信息的記錄,避免數據節點超時
- 2.調整新的名字節點中維護的數據塊分配情況 blockManager更新。
// block recovery command
// check pending replication tasks
// check pending erasure coding tasks
// check block invalidation
無效快這裏有一個限制每次1000個。
總結:
上述僅僅處理心跳(還沒處理塊彙報),心跳到達nn,nn端開始根據信息來分析,下達命令。
那麼,問題來了:
無效塊:怎麼知道哪些塊是無效塊???