HBase 2.0版本協處理器 Endpoint使用

在 HBase2.0之前使用協處理器 Endpoint 時,使用的方式是實現CoprocessorService,Coprocessor這兩個接口,但是在2.0版本中,這麼做在 hbase regionServer的日誌中顯示 Endpoint 加載成功了,但是在 hbase 的 regionServer UI 界面,並沒有顯示已經加載的Endpoint協處理器,也沒有報錯日誌,很頭疼.同時CoprocessorService顯示是過期的,但是進源碼只看到說將會在3.0移除,也沒有說明替代方案,甚至官網 Endpoint 教程中使用的也是實現CoprocessorService,Coprocessor這兩個接口.

下面以實現統計表中rowkey 的個數(統計行)爲列:

老式寫法:

  1. proto 腳本:
option java_package = "com.ljy.coprocessor";
option java_outer_classname = "CountRows";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
option optimize_for = SPEED;

message CountRequest {
}

message CountResponse {
    required int64 count = 1 [default=0];
}

service CountRowService {
    rpc getCountRows(CountRequest)
        returns (CountResponse);
}
  1. Endpoint 代碼:
package com.ljy.coprocessor;

import com.google.protobuf.RpcCallback;
import com.google.protobuf.RpcController;
import com.google.protobuf.Service;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.Coprocessor;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.coprocessor.CoprocessorService;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.RegionScanner;
import org.apache.hadoop.hbase.shaded.protobuf.ResponseConverter;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class CountRowsSumEndpoint extends CountRows.CountRowService implements CoprocessorService, Coprocessor {

    private static Log LOG = LogFactory.getLog(CountRowsSumEndpoint.class);
    private RegionCoprocessorEnvironment rce = null;

    @Override
    public void stop(CoprocessorEnvironment env) throws IOException {
        LOG.info("=================CountRowsSumEndpoint#stop  be called");
    }

    @Override
    public void start(CoprocessorEnvironment env) throws IOException {
        rce = (RegionCoprocessorEnvironment) env;
        LOG.info("=================CountRowsSumEndpoint#start  be called");
    }

    @Override
    public void getCountRows(RpcController controller, CountRows.CountRequest request, RpcCallback<CountRows.CountResponse> done) {
        CountRows.CountResponse response = null;
        try {
            long count = getCount();
            response = CountRows.CountResponse.newBuilder().setCount(count).build();

        } catch (IOException e) {
            ResponseConverter.setControllerException(controller, e);
        }
        done.run(response);
    }

    private long getCount() throws IOException {
        if (rce == null) {
            LOG.error("===========rce is null");
            return 0;
        }
        long count = 0;
        byte[] currentRow = null;
        Scan scan = new Scan();
        try (final RegionScanner scanner = rce.getRegion().getScanner(scan)) {
            List<Cell> cells = new ArrayList<>();
            boolean hasMore;
            do {
                hasMore = scanner.nextRaw(cells);
                for (Cell cell : cells) {
                    if (currentRow == null || !CellUtil.matchingRow(cell, currentRow)) {
                        currentRow = CellUtil.cloneRow(cell);
                        count++;
                        break;
                    }
                }
                cells.clear();
            } while (hasMore);
        }
        return count;
    }

    @Override
    public Service getService() {
        return this;
    }
}
  1. 客戶端測試:
package com.ljy.coprocessor;

import org.apache.hadoop.hbase.client.coprocessor.Batch;
import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils;

import java.io.IOException;

public class RowCountCallable implements Batch.Call<CountRows.CountRowService, Long> {

    private CountRows.CountRequest request;

    public RowCountCallable(CountRows.CountRequest request) {
        this.request = request;
    }

    @Override
    public Long call(CountRows.CountRowService instance) throws IOException {
        CoprocessorRpcUtils.BlockingRpcCallback<CountRows.CountResponse> rpcCallback =
                new CoprocessorRpcUtils.BlockingRpcCallback<>();
        instance.getCountRows(null, request, rpcCallback);
        final CountRows.CountResponse response = rpcCallback.get();
        return response.hasCount() ? response.getCount() : 0;
    }
}
 @Test
    public void testRowCountEndpoint() {
        try (final Table table = HBaseUtils.getTable("testDepFilter")) {
            CountRows.CountRequest request = CountRows.CountRequest.getDefaultInstance();
            final Map<byte[], Long> longMap = table.coprocessorService(CountRows.CountRowService.class, null, null, new RowCountCallable(request));
            long totalRows = 0;
            final Set<Map.Entry<byte[], Long>> entries = longMap.entrySet();
            for (Map.Entry<byte[], Long> entry : entries) {
                totalRows += entry.getValue();
                System.out.println("Region:" + Bytes.toString(entry.getKey()) + "包含" + entry.getValue() + "記錄");
            }
            System.out.println("總記錄數:" + totalRows);
        } catch (Throwable e) {
            e.printStackTrace();
        }
    }


    @After
    public void close() {
        HBaseUtils.closePool();
    }
package com.ljy.util;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Table;

import java.io.Closeable;
import java.io.IOException;
import java.util.Objects;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class HBaseUtils {

    /**
     * 創建表連接池
     * 創建一張表的實例是個開銷很大的操作
     * 需要佔用一些網絡資源,與直接創建一些表句柄相比,使用連接池更好
     * 每次都從池子裏獲取
     * 關閉的時候將實例放入池子
     */
    private static ExecutorService pool = Executors.newFixedThreadPool(10);

    private static Connection conn;

    /**
     * 創建表連接
     */
    static {
        try {
            conn = ConnectionFactory.createConnection(getConf(), pool);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static Configuration getConf() {
        Configuration conf = HBaseConfiguration.create();
        conf.addResource(Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("core-site.xml")));
        conf.addResource(Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("hbase-site.xml")));
        return conf;
    }

    public static Connection getConnecttion() {
        return conn;
    }


    /**
     * 獲取表實例
     *
     * @param tableName
     * @return
     * @throws IOException
     */
    public static Table getTable(String tableName) throws IOException {
        return conn.getTable(TableName.valueOf(tableName));
    }


    public static Admin getAdmin() throws IOException {
        return conn.getAdmin();
    }


    public static void close(Closeable... closes) {
        for (Closeable closeable : closes) {
            if (closeable != null) {
                try {
                    closeable.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    public static void closePool() {
        if (conn != null) {
            close(conn);
        }

        if (pool != null) {
            pool.shutdown();
            pool = null;
        }
    }
}
  1. 以上代碼使用靜態加載或者是動態加載,在 RegionServer 的日誌中都顯示被 loaded,但是測試的時候因爲不走 start 方法,所以沒法拿到RegionCoprocessorEnvironment,故在getCountRows方法中獲取region 時報空指針異常.

新版本 Endpoint 代碼:

新版版本中採用實現RegionCoprocessor就可以實現 Endpoint 類型的協處理器了.

package com.ljy.coprocessor;

import com.google.protobuf.RpcCallback;
import com.google.protobuf.RpcController;
import com.google.protobuf.Service;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessor;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.RegionScanner;
import org.apache.hadoop.hbase.shaded.protobuf.ResponseConverter;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class CountRowsSumEndpoint extends CountRows.CountRowService implements RegionCoprocessor {

    private static Log LOG = LogFactory.getLog(CountRowsSumEndpoint.class);
    private RegionCoprocessorEnvironment rce = null;

    @Override
    public void stop(CoprocessorEnvironment env) throws IOException {
        LOG.info("=================CountRowsSumEndpoint#stop  be called");
    }

    @Override
    public void start(CoprocessorEnvironment env) throws IOException {
        rce = (RegionCoprocessorEnvironment) env;
        LOG.info("=================CountRowsSumEndpoint#start  be called");
    }

    @Override
    public void getCountRows(RpcController controller, CountRows.CountRequest request, RpcCallback<CountRows.CountResponse> done) {
        CountRows.CountResponse response = null;
        try {
            long count = getCount();
            response = CountRows.CountResponse.newBuilder().setCount(count).build();

        } catch (IOException e) {
            ResponseConverter.setControllerException(controller, e);
        }
        done.run(response);
    }

    private long getCount() throws IOException {
        if (rce == null) {
            LOG.error("===========rce is null");
            return 0;
        }
        long count = 0;
        byte[] currentRow = null;
        Scan scan = new Scan();
        try (final RegionScanner scanner = rce.getRegion().getScanner(scan)) {
            List<Cell> cells = new ArrayList<>();
            boolean hasMore;
            do {
                hasMore = scanner.nextRaw(cells);
                for (Cell cell : cells) {
                    if (currentRow == null || !CellUtil.matchingRow(cell, currentRow)) {
                        currentRow = CellUtil.cloneRow(cell);
                        count++;
                        break;
                    }
                }
                cells.clear();
            } while (hasMore);
        }
        return count;
    }

    @Override
    public Iterable<Service> getServices() {
        return Collections.singleton(this);
    }

}

修改後不管是採用動態加載還是靜態加載,都可以正常運行了.

以上僅僅當筆記.後面有時間可以好好梳理一下.

感嘆!!!

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章