HBase系列-合併Region

背景

隨着接入的業務增多和單業務數據量的增大,hbase的region數量迅速增長。hbase中RegionServer管理的region數量是有上限建議的,參見 HBase系列-RegionServer管理region數量上限
如何應對region個數不斷增加帶來的風險成爲面臨的一個問題。
想到2決方案

  • 添加hbase集羣的機器數量,從而減少每個RegionServer管理的region數量
  • 給hbase表添加壓縮和TTL時間,減少hbase中存儲的數據量,降低hbase spilte的概率,減緩region數量增長
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * Author: [email protected]
 * Date: 2018-10-18
 * Copyright © 2018 huyisen. All Rights Reserved.
 */
public class HBaseOperationMain {


    private static final Logger log = LoggerFactory.getLogger(HBaseOperationMain.class);

    private static final int DAY = 86400;
    private static final String CF = "info";
    private static final int MERGE_SIZE = 1024 * 5;

    /**
     * HBase 配置信息
     */
    private static Configuration config() {
        Configuration config = HBaseConfiguration.create();
        config.set("hbase.zookeeper.quorum", "your hbase.zookeeper.quorum");
        config.set("hbase.zookeeper.property.clientPort", "your hbase.zookeeper.property.clientPort");
        config.set("zookeeper.znode.parent", "your zookeeper.znode.parent");
        return config;
    }

    /**
     * 添加ttl和壓縮後要手動majorCompact否者不會立刻生效
     */
    private static void majorCompact(TableName tableName) throws IOException {
        try (Connection connection = ConnectionFactory.createConnection(config())) {
            Admin admin = connection.getAdmin();
            admin.majorCompact(tableName);
        }
    }

    /**
     * 添加ttl和SNAPPY壓縮
     */
    private static void compressions(TableName tableName) throws IOException {
        try (Connection connection = ConnectionFactory.createConnection(config());
             Admin admin = connection.getAdmin()) {
            if (admin.isTableEnabled(tableName)) {
                admin.disableTable(tableName);
            }
            HTableDescriptor htd = admin.getTableDescriptor(tableName);
            HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(CF));
            hcd.setCompressionType(Compression.Algorithm.SNAPPY);
            hcd.setTimeToLive(DAY * 30);
            admin.modifyColumn(tableName, hcd);
            admin.compact(tableName);
            if (admin.isTableDisabled(tableName)) {
                admin.enableTable(tableName);
            }
        }
    }

    /**
     * 獲取沒有添加壓縮和TTL的表
     */
    private static List<TableName> getCompressions() throws IOException {
        List<TableName> compressions = new ArrayList<>();
        try (Connection connection = ConnectionFactory.createConnection(config());
             Admin admin = connection.getAdmin()) {
            TableName[] events = admin.listTableNames();
            for (TableName event : events) {
                HTableDescriptor htd = admin.getTableDescriptor(event);
                HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(CF));
                Compression.Algorithm ct = hcd.getCompactionCompressionType();
                int ttl = hcd.getTimeToLive();
                if (ct.compareTo(Compression.Algorithm.SNAPPY) != 0 || ttl == Integer.MAX_VALUE) {
                    compressions.add(event);
                }
            }
        }
        return compressions;
    }

    /**
     * merge 相鄰的region對
     */
    private static void merge(List<Pair<HRegionInfo, HRegionInfo>> merges) throws IOException {
        try (Connection connection = ConnectionFactory.createConnection(config());
             Admin admin = connection.getAdmin()) {
            for (Pair<HRegionInfo, HRegionInfo> pair : merges) {
                log.info("merge regions [{}]~[{}]", pair.first.getRegionNameAsString(), pair.second.getRegionNameAsString());
                admin.mergeRegions(pair.first.getRegionName(), pair.second.getRegionName(), false);
            }
            log.info("merge region size={}", merges.size());
        }
    }

    /**
     * 獲取需要merge的相鄰region對
     */
    private static List<Pair<HRegionInfo, HRegionInfo>> getMerges(TableName tableName) throws IOException {
        Configuration configuration = config();
        List<HRegionInfo> rns = new ArrayList<>();
        List<Pair<HRegionInfo, HRegionInfo>> merges = new ArrayList<>();
        try (Connection connection = ConnectionFactory.createConnection(configuration);
             Admin admin = connection.getAdmin();
             RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
            ClusterStatus clusterStatus = admin.getClusterStatus();
            for (HRegionLocation hrl : regionLocator.getAllRegionLocations()) {
                hrl.getServerName();
                HRegionInfo regionInfo = hrl.getRegionInfo();
                byte[] rn = regionInfo.getRegionName();
                RegionLoad rl = clusterStatus.getLoad(hrl.getServerName())
                        .getRegionsLoad()
                        .get(rn);
                int size = rl.getStorefileSizeMB();
                if (size <= MERGE_SIZE) {
                    rns.add(regionInfo);
                }
            }
            Set<String> rrn = new HashSet<>();
            for (int i = 0; i < rns.size() - 1; i++) {
                Pair<HRegionInfo, HRegionInfo> pair = new Pair<>();
                pair.first = rns.get(i);
                if (rrn.contains(pair.first.getRegionNameAsString())) {
                    continue;
                }
                for (HRegionInfo rn : rns) {
                    pair.second = rn;
                    String regionName = pair.second.getRegionNameAsString();
                    if (!rrn.contains(regionName) && Bytes.toString(pair.first.getEndKey()).equals(Bytes.toString(pair.second.getStartKey()))) {
                        merges.add(pair);
                        rrn.add(regionName);
                        break;
                    }
                }
            }
            return merges;
        }
    }

    static class Pair<F, S> {
        F first;
        S second;
    }
}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章