springBoot 整合Hbase及自带聚和协处理器的使用

1、前言

springBoot整合hbase有两种方式:

  • 一种是使用spring-boot-starter-hbase,但是这种方式,使用时需要先创建hbase表的实体类和转换类,有点类似jpa,但是对于非关系型数据库,我不是很喜欢这种用法。而且spring-boot-starter-hbase只有一个1.0.0.RELEASE的版本,对于新版hbase的兼容性尚待测试。
  • 所以本文介绍的是第二种方式,使用org.apache.hbase提供的工具包,使用的版本是2.1.0


2、引入依赖

		<!-- hbase 客户端 -->
		<dependency>
			<groupId>org.apache.hbase</groupId>
			<artifactId>hbase-client</artifactId>
			<version>2.1.0</version>
		</dependency>

		<!-- hbase协处理器 -->
		<dependency>
			<groupId>org.apache.hbase</groupId>
			<artifactId>hbase-endpoint</artifactId>
			<version>2.1.0</version>
		</dependency>

3、编写工具类

@Component
public class HbaseUtil {
    
    /**
     * The Logger.
     */
    Logger logger = LoggerFactory.getLogger(HbaseUtil.class);
    
    /**
     * hbase连接对象
     */
    private Connection conn;
    
    /**
     * hbase zookeeper地址
     */
    @Value("${zookeeper.ip}")
    private String zookeeper;
    
    /**
     * hbase自带聚和协处理器
     */
    private String gatherCoprocessor = AggregateImplementation.class.getName();
    
    /**
     * 初始化连接
     */
    @PostConstruct
    private void initConnection() {
        Configuration config = getConfig();
        try {
            //获取连接
            conn = ConnectionFactory.createConnection(config);
            logger.info("初始化hbase连接");
        } catch (Exception e) {
            logger.error("初始化失败", e);
        }
    }
    
    /**
     * 获取配置对象
     * 
     * @return
     */
    private Configuration getConfig() {
        Configuration config = HBaseConfiguration.create();
        config.set("hbase.zookeeper.quorum", zookeeper);
        return config;
    }
    
    /**
     * 获取连接
     */
    private Connection getConnection() {
        if (conn.isClosed()) {
            synchronized (this) {
                if (conn.isClosed()) {
                    initConnection();
                }
            }
        }
        return conn;
    }
    
    /**
     * 获取表连接
     * 
     * @param tableName
     * @return
     * @throws IOException
     */
    private Table getTable(String tableName)
        throws IOException {
        return getConnection().getTable(TableName.valueOf(tableName));
    }
    
    /**
     * 获取admin连接
     * 
     * @return
     * @throws IOException
     */
    private Admin getAdmin()
        throws IOException {
        return getConnection().getAdmin();
    }
    
    /**
     * Creat table boolean.创建表
     *
     * @param tableName the table name表名
     * @param columnFamily the column family列族名的集合
     * @return the boolean
     */
    public boolean creatTable(String tableName, List<String> columnFamily) {
        TableName table = TableName.valueOf(tableName);
        try (Admin admin = getAdmin();) {
            if (!admin.tableExists(table)) {
                TableDescriptorBuilder tableDescriptor = TableDescriptorBuilder.newBuilder(table);
                for (String s : columnFamily) {
                    tableDescriptor.setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(s)).build());
                }
                admin.createTable(tableDescriptor.build());
            }
        } catch (Exception e) {
            logger.error("创建表失败", e);
        }
        return true;
    }
    
    /**
     * Gets all table names.获取所有表名
     *
     * @return the all table names
     */
    public List<String> getAllTableNames() {
        List<String> result = new ArrayList<>();
        try (Admin admin = getAdmin();) {
            TableName[] tableNames = admin.listTableNames();
            for (TableName tableName : tableNames) {
                result.add(tableName.getNameAsString());
            }
        } catch (Exception e) {
            logger.error("获取所有表的表名失败", e);
        }
        return result;
    }
    
    /**
     * Delete table boolean.删除表
     *
     * @param tableName the table name要删除的表名
     * @return the boolean
     */
    public boolean deleteTable(String tableName) {
        try (Admin admin = getAdmin();) {
            if (admin.tableExists(TableName.valueOf(tableName))) {
                admin.disableTable(TableName.valueOf(tableName));
                admin.deleteTable(TableName.valueOf(tableName));
                logger.debug("{} 已删除!", tableName);
            }
        } catch (Exception e) {
            logger.error(MessageFormat.format("删除表失败,tableName:{0}", tableName), e);
            return false;
        }
        return true;
    }
    
    /**
     * Save data.新增或者更新数据
     *
     * @param tableName the table name表名
     * @param rowKey the row key行key
     * @param familyName the family name列族名
     * @param columns the columns需要插入数据的列名
     * @param values the values需要插入的数据,与前面的列名一一对应
     */
    public boolean saveData(String tableName, String rowKey, String familyName, String[] columns, String[] values) {
        // 获取表
        try (Table table = getTable(tableName);) {
            saveData(table, rowKey, tableName, familyName, columns, values);
        } catch (Exception e) {
            logger.error(
                MessageFormat
                    .format("为表添加 or 更新数据失败,tableName:{0},rowKey:{1},familyName:{2}", tableName, rowKey, familyName),
                e);
            return false;
        }
        return true;
    }
    
    private boolean saveData(Table table, String rowKey, String tableName, String familyName, String[] columns,
        String[] values) {
        try {
            //设置rowkey
            Put put = new Put(Bytes.toBytes(rowKey));
            if (columns != null && values != null && columns.length == values.length) {
                for (int i = 0; i < columns.length; i++) {
                    if (columns[i] != null && values[i] != null) {
                        put.addColumn(Bytes.toBytes(familyName), Bytes.toBytes(columns[i]), Bytes.toBytes(values[i]));
                    } else {
                        throw new NullPointerException(
                            MessageFormat.format("列名和列数据都不能为空,column:{0},value:{1}", columns[i], values[i]));
                    }
                }
            }
            table.put(put);
            logger.debug("为表添加 or 更新数据成功,rowKey:{}", rowKey);
        } catch (Exception e) {
            logger.error(
                MessageFormat
                    .format("为表添加 or 更新数据失败,tableName:{0},rowKey:{1},familyName:{2}", tableName, rowKey, familyName),
                e);
            return false;
        }
        return true;
    }
    
    /**
     * Sets column value.为表的某个单元格赋值
     *
     * @param tableName the table name 表名
     * @param rowKey the row key rowKey
     * @param familyName the family name 列族
     * @param column the column 需要赋值的列名
     * @param value the value 值
     */
    public boolean setColumnValue(String tableName, String rowKey, String familyName, String column, String value) {
        return saveData(tableName, rowKey, familyName, new String[] {column}, new String[] {value});
    }
    
    /**
     * Delete by row boolean.删除指定的行
     *
     * @param tableName the table name 表名
     * @param rowKey the row key rowKey
     * @return the boolean
     */
    public boolean deleteByRow(String tableName, String rowKey) {
        try (Table table = getTable(tableName); Admin admin = getAdmin();) {
            if (admin.tableExists(TableName.valueOf(tableName))) {
                Delete delete = new Delete(Bytes.toBytes(rowKey));
                table.delete(delete);
                logger.debug("row {} 已删除!", rowKey);
            }
        } catch (Exception e) {
            logger.error(MessageFormat.format("删除指定的行失败,tableName:{0},rowKey:{1}", tableName, rowKey), e);
            return false;
        }
        return true;
    }
    
    /**
     * Gets scanner result.遍历获取表所有数据
     *
     * @param tableName the table name表名
     * @return the scanner result
     */
    public Map<String, Map<String, String>> getScannerResult(String tableName) {
        Scan scan = new Scan();
        return queryData(tableName, scan);
    }
    
    /**
     * Gets scanner range row key.根据startRowKey和stopRowKey遍历查询指定表中的所有数据
     *
     * @param tableName the table name
     * @param startRowKey the start row key
     * @param stopRowKey the stop row key
     * @return the scanner range row key
     */
    public Map<String, Map<String, String>> getScannerRangeRowKey(String tableName, String startRowKey,
        String stopRowKey) {
        Scan scan = new Scan();
        if (StringUtils.isNotEmpty(startRowKey) && StringUtils.isNotEmpty(stopRowKey)) {
            scan.withStartRow(Bytes.toBytes(startRowKey));
            scan.withStopRow(Bytes.toBytes(stopRowKey));
        }
        return queryData(tableName, scan);
    }
    
    /**
     * Gets scanner by prefix filter.通过行前缀过滤器查询数据
     *
     * @param tableName the table name
     * @param prefix the prefix 以prefix开始的行键
     * @return the scanner by prefix filter
     */
    public Map<String, Map<String, String>> getScannerByPrefixFilter(String tableName, String prefix) {
        Scan scan = new Scan();
        if (StringUtils.isNotEmpty(prefix)) {
            Filter filter = new PrefixFilter(Bytes.toBytes(prefix));
            scan.setFilter(filter);
        }
        return queryData(tableName, scan);
    }
    
    /**
     * Gets scanner by row filter.查询行键中包含特定字符的数据
     *
     * @param tableName the table name
     * @param keyword the keyword包含指定关键词的行键
     * @return the scanner by row filter
     */
    public Map<String, Map<String, String>> getScannerByRowFilter(String tableName, String keyword) {
        Scan scan = new Scan();
        if (StringUtils.isNotEmpty(keyword)) {
            Filter filter = new RowFilter(CompareOperator.GREATER_OR_EQUAL, new SubstringComparator(keyword));
            scan.setFilter(filter);
        }
        return queryData(tableName, scan);
    }
    
    /**
     * Gets row data.根据tableName和rowKey精确查询一行的数据
     *
     * @param tableName the table name
     * @param rowKey the row key
     * @return the row data
     */
    public Map<String, String> getRowData(String tableName, String rowKey) {
        //返回的键值对
        Map<String, String> result = new HashMap<>();
        Get get = new Get(Bytes.toBytes(rowKey));
        // 获取表
        try (Table table = getTable(tableName);) {
            Result hTableResult = table.get(get);
            if (hTableResult != null && !hTableResult.isEmpty()) {
                for (Cell cell : hTableResult.listCells()) {
                    result.put(Bytes.toString(CellUtil.cloneQualifier(cell)),
                        Bytes.toString(CellUtil.cloneValue(cell)));
                }
            }
        } catch (Exception e) {
            logger.error(MessageFormat.format("查询一行的数据失败,tableName:{0},rowKey:{1}", tableName, rowKey), e);
        }
        
        return result;
    }
    
    /**
     * Gets row data by list.根据多个rowkey查询数据
     *
     * @param tableName the table name
     * @param rowKeys the row keys
     * @return the row data by list
     */
    public Map<String, Map<String, String>> getRowDataByList(String tableName, List<String> rowKeys) {
        //返回的键值对
        Map<String, Map<String, String>> result = new HashMap<>();
        List<Get> getList = new ArrayList<>();
        // 获取表
        try (Table table = getTable(tableName);) {
            //把rowkey加到get里,再把get装到list中
            for (String rowkey : rowKeys) {
                Get get = new Get(Bytes.toBytes(rowkey));
                getList.add(get);
            }
            Result[] rs = table.get(getList);
            for (Result r : rs) {
                //每一行数据
                Map<String, String> columnMap = new HashMap<>();
                String rowKey = null;
                for (Cell cell : r.listCells()) {
                    if (rowKey == null) {
                        rowKey = Bytes.toString(CellUtil.cloneRow(cell));
                    }
                    columnMap.put(Bytes.toString(CellUtil.cloneQualifier(cell)),
                        Bytes.toString(CellUtil.cloneValue(cell)));
                }
                if (rowKey != null) {
                    result.put(rowKey, columnMap);
                }
            }
        } catch (Exception e) {
            logger.error(MessageFormat.format("根据多个rowkey查询数据失败,tableName:{0},rowKey:{1}", tableName, rowKeys), e);
        }
        return result;
    }
    
    /**
     * Gets column value.根据tableName、rowKey、familyName、column查询指定单元格的数据
     *
     * @param tableName the table name
     * @param rowKey the row key
     * @param familyName the family name
     * @param columnName the column name
     * @return the column value
     */
    public String getColumnValue(String tableName, String rowKey, String familyName, String columnName) {
        String str = null;
        Get get = new Get(Bytes.toBytes(rowKey));
        // 获取表
        try (Table table = getTable(tableName);) {
            Result result = table.get(get);
            if (result != null && !result.isEmpty()) {
                Cell cell = result.getColumnLatestCell(Bytes.toBytes(familyName), Bytes.toBytes(columnName));
                if (cell != null) {
                    str = Bytes.toString(CellUtil.cloneValue(cell));
                }
            }
        } catch (Exception e) {
            logger.error(MessageFormat.format("查询指定单元格的数据失败,tableName:{0},rowKey:{1},familyName:{2},columnName:{3}",
                tableName,
                rowKey,
                familyName,
                columnName), e);
        }
        
        return str;
    }
    
    private Map<String, Map<String, String>> queryData(String tableName, Scan scan) {
        //<rowKey,对应的行数据>
        Map<String, Map<String, String>> result = new HashMap<>();
        // 获取表和扫描结果对象
        try (Table table = getTable(tableName); ResultScanner rs = table.getScanner(scan);) {
            for (Result r : rs) {
                //每一行数据
                Map<String, String> columnMap = new HashMap<>();
                String rowKey = null;
                for (Cell cell : r.listCells()) {
                    if (rowKey == null) {
                        rowKey = Bytes.toString(CellUtil.cloneRow(cell));
                    }
                    columnMap.put(Bytes.toString(CellUtil.cloneQualifier(cell)),
                        Bytes.toString(CellUtil.cloneValue(cell)));
                }
                if (rowKey != null) {
                    result.put(rowKey, columnMap);
                }
            }
        } catch (Exception e) {
            logger.error(MessageFormat.format("遍历查询指定表中的所有数据失败,tableName:{0}", tableName), e);
        }
        return result;
    }
    
    /**
     * 统计表行数
     * 
     * @param tableName
     * @param family
     * @return
     */
    public long queryRowCount(String tableName, String family) {
        //设置表聚合协处理器
        setCoprocessor(tableName, gatherCoprocessor);
        long rowCount = 0;
        //创建聚合协处理器客户端
        try (AggregationClient ac = new AggregationClient(getConfig());) {
            Scan scan = new Scan();
            scan.addFamily(Bytes.toBytes(family));
            rowCount = ac.rowCount(TableName.valueOf(tableName), new LongColumnInterpreter(), scan);
        } catch (Throwable e) {
            logger.error(MessageFormat.format("统计表行数出错,tableName:{0},family:{1}", tableName, family), e);
        }
        return rowCount;
    }
    
    /**
     * 统计最大值
     * 
     * @param tableName
     * @param family
     * @param qualifier
     * @return
     */
    public double queryMaxData(String tableName, String family, String qualifier) {
        //设置协处理器
        setCoprocessor(tableName, gatherCoprocessor);
        double max = 0;
        //创建聚合协处理器客户端
        try (AggregationClient ac = new AggregationClient(getConfig());) {
            Scan scan = new Scan();
            scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
            max = ac.max(TableName.valueOf(tableName), new DoubleColumnInterpreter(), scan);
        } catch (Throwable e) {
            logger.error(
                MessageFormat.format("统计最大值出错,tableName:{0},family:{1},qualifier:{2}", tableName, family, qualifier),
                e);
        }
        return max;
    }
    
    /**
     * 统计最小值
     * 
     * @param tableName
     * @param family
     * @param qualifier
     * @return
     */
    public double queryMinData(String tableName, String family, String qualifier) {
        //设置协处理器
        setCoprocessor(tableName, gatherCoprocessor);
        double min = 0;
        //创建聚合协处理器客户端
        try (AggregationClient ac = new AggregationClient(getConfig());) {
            Scan scan = new Scan();
            scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
            min = ac.min(TableName.valueOf(tableName), new DoubleColumnInterpreter(), scan);
        } catch (Throwable e) {
            logger.error(
                MessageFormat.format("统计最小值出错,tableName:{0},family:{1},qualifier:{2}", tableName, family, qualifier),
                e);
        }
        return min;
    }
    
    /**
     * 求和
     * 
     * @param tableName
     * @param family
     * @param qualifier
     * @return
     */
    public double querySumData(String tableName, String family, String qualifier) {
        //设置协处理器
        setCoprocessor(tableName, gatherCoprocessor);
        double sum = 0;
        //创建聚合协处理器客户端
        try (AggregationClient ac = new AggregationClient(getConfig());) {
            Scan scan = new Scan();
            scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
            sum = ac.sum(TableName.valueOf(tableName), new DoubleColumnInterpreter(), scan);
        } catch (Throwable e) {
            logger.error(
                MessageFormat.format("求和出错,tableName:{0},family:{1},qualifier:{2}", tableName, family, qualifier),
                e);
        }
        return sum;
    }
    
    /**
     * 求平均值,低版本hbase有bug
     * 
     * @param tableName
     * @param family
     * @param qualifier
     * @return
     */
    public double queryAvgData(String tableName, String family, String qualifier) {
        //设置协处理器
        setCoprocessor(tableName, gatherCoprocessor);
        double avg = 0;
        //创建聚合协处理器客户端
        try (AggregationClient ac = new AggregationClient(getConfig());) {
            Scan scan = new Scan();
            scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
            avg = ac.avg(TableName.valueOf(tableName), new DoubleColumnInterpreter(), scan);
        } catch (Throwable e) {
            logger.error(
                MessageFormat.format("求平均值出错,tableName:{0},family:{1},qualifier:{2}", tableName, family, qualifier),
                e);
        }
        return avg;
    }
    
    /**
     * 设置表协处理器
     * 
     * @param tableName
     * @param coprocessorClassName
     */
    public void setCoprocessor(String tableName, String coprocessorClassName) {
        TableName table = TableName.valueOf(tableName);
        Admin admin = null;
        boolean closeTable = false;
        try {
            admin = getAdmin();
            //获取表的描述对象
            TableDescriptor htd = admin.getDescriptor(table);
            if (!htd.hasCoprocessor(coprocessorClassName)) {
                //表不包含这个协处理器,则添加
                admin.disableTable(table);
                closeTable = true;//关闭了表,则结束时要重启
                TableDescriptorBuilder htdBuilder = TableDescriptorBuilder.newBuilder(htd);
                htdBuilder.setCoprocessor(coprocessorClassName);
                admin.modifyTable(htdBuilder.build());
            }
        } catch (Exception e) {
            logger.error(MessageFormat
                .format("设置表协处理器出错,tableName:{0},coprocessorClassName:{1}", tableName, coprocessorClassName), e);
        } finally {
            try {
                if (admin != null) {
                    if (admin.isTableDisabled(table) && closeTable) {
                        admin.enableTable(table);
                    }
                    admin.close();
                }
            } catch (IOException e) {
                logger.error("关闭admin资源失败", e);
            }
        }
    }
}

4、简单说明

  1. 工具类中用到几个重要的对象Connection、Table、Admin。Connection内置了线程池管理并实现了线程安全,不需要我们额外处理;Table、Admin线程不安全,因此不能共用,每个线程使用完后必须调用close方法关闭。有兴趣深入了解可以看下这篇文章。连接HBase的正确姿势
  2. hbase自带了AggregateImplementation这个聚合协处理器,但是hbase1.x版本的,在求平均值时计算有误,因此我升级到2.1.0版本。同时,除了rowcount计数方法外,其他求和、求平均等方法,我使用的翻译类都是DoubleColumnInterpreter,因此需要字段的类型为double,否则查询会出错,当然你也可以使用其他翻译类,hbase提供了BigDecimalColumnInterpreter、DoubleColumnInterpreter、LongColumnInterpreter3种翻译类,对应BigDecimal、Double、Long3种类型,如果还不能满足需求,那就只能自己实现ColumnInterpreter接口。

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章