在 HBase2.0之前使用協處理器 Endpoint 時,使用的方式是實現CoprocessorService,Coprocessor這兩個接口,但是在2.0版本中,這麼做在 hbase regionServer的日誌中顯示 Endpoint 加載成功了,但是在 hbase 的 regionServer UI 界面,並沒有顯示已經加載的Endpoint協處理器,也沒有報錯日誌,很頭疼.同時CoprocessorService顯示是過期的,但是進源碼只看到說將會在3.0移除,也沒有說明替代方案,甚至官網 Endpoint 教程中使用的也是實現CoprocessorService,Coprocessor這兩個接口.
下面以實現統計表中rowkey 的個數(統計行)爲列:
老式寫法:
- proto 腳本:
option java_package = "com.ljy.coprocessor";
option java_outer_classname = "CountRows";
option java_generic_services = true;
option java_generate_equals_and_hash = true;
option optimize_for = SPEED;
message CountRequest {
}
message CountResponse {
required int64 count = 1 [default=0];
}
service CountRowService {
rpc getCountRows(CountRequest)
returns (CountResponse);
}
- Endpoint 代碼:
package com.ljy.coprocessor;
import com.google.protobuf.RpcCallback;
import com.google.protobuf.RpcController;
import com.google.protobuf.Service;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.Coprocessor;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.coprocessor.CoprocessorService;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.RegionScanner;
import org.apache.hadoop.hbase.shaded.protobuf.ResponseConverter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class CountRowsSumEndpoint extends CountRows.CountRowService implements CoprocessorService, Coprocessor {
private static Log LOG = LogFactory.getLog(CountRowsSumEndpoint.class);
private RegionCoprocessorEnvironment rce = null;
@Override
public void stop(CoprocessorEnvironment env) throws IOException {
LOG.info("=================CountRowsSumEndpoint#stop be called");
}
@Override
public void start(CoprocessorEnvironment env) throws IOException {
rce = (RegionCoprocessorEnvironment) env;
LOG.info("=================CountRowsSumEndpoint#start be called");
}
@Override
public void getCountRows(RpcController controller, CountRows.CountRequest request, RpcCallback<CountRows.CountResponse> done) {
CountRows.CountResponse response = null;
try {
long count = getCount();
response = CountRows.CountResponse.newBuilder().setCount(count).build();
} catch (IOException e) {
ResponseConverter.setControllerException(controller, e);
}
done.run(response);
}
private long getCount() throws IOException {
if (rce == null) {
LOG.error("===========rce is null");
return 0;
}
long count = 0;
byte[] currentRow = null;
Scan scan = new Scan();
try (final RegionScanner scanner = rce.getRegion().getScanner(scan)) {
List<Cell> cells = new ArrayList<>();
boolean hasMore;
do {
hasMore = scanner.nextRaw(cells);
for (Cell cell : cells) {
if (currentRow == null || !CellUtil.matchingRow(cell, currentRow)) {
currentRow = CellUtil.cloneRow(cell);
count++;
break;
}
}
cells.clear();
} while (hasMore);
}
return count;
}
@Override
public Service getService() {
return this;
}
}
- 客戶端測試:
package com.ljy.coprocessor;
import org.apache.hadoop.hbase.client.coprocessor.Batch;
import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils;
import java.io.IOException;
public class RowCountCallable implements Batch.Call<CountRows.CountRowService, Long> {
private CountRows.CountRequest request;
public RowCountCallable(CountRows.CountRequest request) {
this.request = request;
}
@Override
public Long call(CountRows.CountRowService instance) throws IOException {
CoprocessorRpcUtils.BlockingRpcCallback<CountRows.CountResponse> rpcCallback =
new CoprocessorRpcUtils.BlockingRpcCallback<>();
instance.getCountRows(null, request, rpcCallback);
final CountRows.CountResponse response = rpcCallback.get();
return response.hasCount() ? response.getCount() : 0;
}
}
@Test
public void testRowCountEndpoint() {
try (final Table table = HBaseUtils.getTable("testDepFilter")) {
CountRows.CountRequest request = CountRows.CountRequest.getDefaultInstance();
final Map<byte[], Long> longMap = table.coprocessorService(CountRows.CountRowService.class, null, null, new RowCountCallable(request));
long totalRows = 0;
final Set<Map.Entry<byte[], Long>> entries = longMap.entrySet();
for (Map.Entry<byte[], Long> entry : entries) {
totalRows += entry.getValue();
System.out.println("Region:" + Bytes.toString(entry.getKey()) + "包含" + entry.getValue() + "記錄");
}
System.out.println("總記錄數:" + totalRows);
} catch (Throwable e) {
e.printStackTrace();
}
}
@After
public void close() {
HBaseUtils.closePool();
}
package com.ljy.util;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Table;
import java.io.Closeable;
import java.io.IOException;
import java.util.Objects;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class HBaseUtils {
/**
* 創建表連接池
* 創建一張表的實例是個開銷很大的操作
* 需要佔用一些網絡資源,與直接創建一些表句柄相比,使用連接池更好
* 每次都從池子裏獲取
* 關閉的時候將實例放入池子
*/
private static ExecutorService pool = Executors.newFixedThreadPool(10);
private static Connection conn;
/**
* 創建表連接
*/
static {
try {
conn = ConnectionFactory.createConnection(getConf(), pool);
} catch (IOException e) {
e.printStackTrace();
}
}
private static Configuration getConf() {
Configuration conf = HBaseConfiguration.create();
conf.addResource(Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("core-site.xml")));
conf.addResource(Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("hbase-site.xml")));
return conf;
}
public static Connection getConnecttion() {
return conn;
}
/**
* 獲取表實例
*
* @param tableName
* @return
* @throws IOException
*/
public static Table getTable(String tableName) throws IOException {
return conn.getTable(TableName.valueOf(tableName));
}
public static Admin getAdmin() throws IOException {
return conn.getAdmin();
}
public static void close(Closeable... closes) {
for (Closeable closeable : closes) {
if (closeable != null) {
try {
closeable.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public static void closePool() {
if (conn != null) {
close(conn);
}
if (pool != null) {
pool.shutdown();
pool = null;
}
}
}
- 以上代碼使用靜態加載或者是動態加載,在 RegionServer 的日誌中都顯示被 loaded,但是測試的時候因爲不走 start 方法,所以沒法拿到RegionCoprocessorEnvironment,故在getCountRows方法中獲取region 時報空指針異常.
新版本 Endpoint 代碼:
新版版本中採用實現RegionCoprocessor就可以實現 Endpoint 類型的協處理器了.
package com.ljy.coprocessor;
import com.google.protobuf.RpcCallback;
import com.google.protobuf.RpcController;
import com.google.protobuf.Service;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessor;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.RegionScanner;
import org.apache.hadoop.hbase.shaded.protobuf.ResponseConverter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class CountRowsSumEndpoint extends CountRows.CountRowService implements RegionCoprocessor {
private static Log LOG = LogFactory.getLog(CountRowsSumEndpoint.class);
private RegionCoprocessorEnvironment rce = null;
@Override
public void stop(CoprocessorEnvironment env) throws IOException {
LOG.info("=================CountRowsSumEndpoint#stop be called");
}
@Override
public void start(CoprocessorEnvironment env) throws IOException {
rce = (RegionCoprocessorEnvironment) env;
LOG.info("=================CountRowsSumEndpoint#start be called");
}
@Override
public void getCountRows(RpcController controller, CountRows.CountRequest request, RpcCallback<CountRows.CountResponse> done) {
CountRows.CountResponse response = null;
try {
long count = getCount();
response = CountRows.CountResponse.newBuilder().setCount(count).build();
} catch (IOException e) {
ResponseConverter.setControllerException(controller, e);
}
done.run(response);
}
private long getCount() throws IOException {
if (rce == null) {
LOG.error("===========rce is null");
return 0;
}
long count = 0;
byte[] currentRow = null;
Scan scan = new Scan();
try (final RegionScanner scanner = rce.getRegion().getScanner(scan)) {
List<Cell> cells = new ArrayList<>();
boolean hasMore;
do {
hasMore = scanner.nextRaw(cells);
for (Cell cell : cells) {
if (currentRow == null || !CellUtil.matchingRow(cell, currentRow)) {
currentRow = CellUtil.cloneRow(cell);
count++;
break;
}
}
cells.clear();
} while (hasMore);
}
return count;
}
@Override
public Iterable<Service> getServices() {
return Collections.singleton(this);
}
}
修改後不管是採用動態加載還是靜態加載,都可以正常運行了.
以上僅僅當筆記.後面有時間可以好好梳理一下.
感嘆!!!