背景
隨着接入的業務增多和單業務數據量的增大,hbase的region數量迅速增長。hbase中RegionServer管理的region數量是有上限建議的,參見 HBase系列-RegionServer管理region數量上限
如何應對region個數不斷增加帶來的風險成爲面臨的一個問題。
想到2決方案
- 添加hbase集羣的機器數量,從而減少每個RegionServer管理的region數量
- 給hbase表添加壓縮和TTL時間,減少hbase中存儲的數據量,降低hbase spilte的概率,減緩region數量增長
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/**
* Author: [email protected]
* Date: 2018-10-18
* Copyright © 2018 huyisen. All Rights Reserved.
*/
public class HBaseOperationMain {
private static final Logger log = LoggerFactory.getLogger(HBaseOperationMain.class);
private static final int DAY = 86400;
private static final String CF = "info";
private static final int MERGE_SIZE = 1024 * 5;
/**
* HBase 配置信息
*/
private static Configuration config() {
Configuration config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum", "your hbase.zookeeper.quorum");
config.set("hbase.zookeeper.property.clientPort", "your hbase.zookeeper.property.clientPort");
config.set("zookeeper.znode.parent", "your zookeeper.znode.parent");
return config;
}
/**
* 添加ttl和壓縮後要手動majorCompact否者不會立刻生效
*/
private static void majorCompact(TableName tableName) throws IOException {
try (Connection connection = ConnectionFactory.createConnection(config())) {
Admin admin = connection.getAdmin();
admin.majorCompact(tableName);
}
}
/**
* 添加ttl和SNAPPY壓縮
*/
private static void compressions(TableName tableName) throws IOException {
try (Connection connection = ConnectionFactory.createConnection(config());
Admin admin = connection.getAdmin()) {
if (admin.isTableEnabled(tableName)) {
admin.disableTable(tableName);
}
HTableDescriptor htd = admin.getTableDescriptor(tableName);
HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(CF));
hcd.setCompressionType(Compression.Algorithm.SNAPPY);
hcd.setTimeToLive(DAY * 30);
admin.modifyColumn(tableName, hcd);
admin.compact(tableName);
if (admin.isTableDisabled(tableName)) {
admin.enableTable(tableName);
}
}
}
/**
* 獲取沒有添加壓縮和TTL的表
*/
private static List<TableName> getCompressions() throws IOException {
List<TableName> compressions = new ArrayList<>();
try (Connection connection = ConnectionFactory.createConnection(config());
Admin admin = connection.getAdmin()) {
TableName[] events = admin.listTableNames();
for (TableName event : events) {
HTableDescriptor htd = admin.getTableDescriptor(event);
HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(CF));
Compression.Algorithm ct = hcd.getCompactionCompressionType();
int ttl = hcd.getTimeToLive();
if (ct.compareTo(Compression.Algorithm.SNAPPY) != 0 || ttl == Integer.MAX_VALUE) {
compressions.add(event);
}
}
}
return compressions;
}
/**
* merge 相鄰的region對
*/
private static void merge(List<Pair<HRegionInfo, HRegionInfo>> merges) throws IOException {
try (Connection connection = ConnectionFactory.createConnection(config());
Admin admin = connection.getAdmin()) {
for (Pair<HRegionInfo, HRegionInfo> pair : merges) {
log.info("merge regions [{}]~[{}]", pair.first.getRegionNameAsString(), pair.second.getRegionNameAsString());
admin.mergeRegions(pair.first.getRegionName(), pair.second.getRegionName(), false);
}
log.info("merge region size={}", merges.size());
}
}
/**
* 獲取需要merge的相鄰region對
*/
private static List<Pair<HRegionInfo, HRegionInfo>> getMerges(TableName tableName) throws IOException {
Configuration configuration = config();
List<HRegionInfo> rns = new ArrayList<>();
List<Pair<HRegionInfo, HRegionInfo>> merges = new ArrayList<>();
try (Connection connection = ConnectionFactory.createConnection(configuration);
Admin admin = connection.getAdmin();
RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
ClusterStatus clusterStatus = admin.getClusterStatus();
for (HRegionLocation hrl : regionLocator.getAllRegionLocations()) {
hrl.getServerName();
HRegionInfo regionInfo = hrl.getRegionInfo();
byte[] rn = regionInfo.getRegionName();
RegionLoad rl = clusterStatus.getLoad(hrl.getServerName())
.getRegionsLoad()
.get(rn);
int size = rl.getStorefileSizeMB();
if (size <= MERGE_SIZE) {
rns.add(regionInfo);
}
}
Set<String> rrn = new HashSet<>();
for (int i = 0; i < rns.size() - 1; i++) {
Pair<HRegionInfo, HRegionInfo> pair = new Pair<>();
pair.first = rns.get(i);
if (rrn.contains(pair.first.getRegionNameAsString())) {
continue;
}
for (HRegionInfo rn : rns) {
pair.second = rn;
String regionName = pair.second.getRegionNameAsString();
if (!rrn.contains(regionName) && Bytes.toString(pair.first.getEndKey()).equals(Bytes.toString(pair.second.getStartKey()))) {
merges.add(pair);
rrn.add(regionName);
break;
}
}
}
return merges;
}
}
static class Pair<F, S> {
F first;
S second;
}
}