Hbase 版本爲 2.2.2 採用3個節點的集羣部署,壓縮算法採用SNAPPY
創建兩張表,一直使用壓縮算法一張不使用壓縮算法
test表使用SNAPPY 壓縮
create 'test', { NAME => 'info', COMPRESSION => 'SNAPPY' }
demo不採用壓縮
create 'demo', { NAME => 'info' }
使用list命令查看錶集合
使用 desc命令查看錶空間
desc 'test'
注意兩個表的壓縮區別
此時使用hadoop命令查看錶空間所佔空間大小
hadoop fs -du -h /hbase/data/defailt //沒有指定空間,默認在deefault中
[root@aims02 ~]# hadoop fs -du -h /hbase/data/default
235 235 /hbase/data/default/demo
244 244 /hbase/data/default/test
採用java工具類進行數據插入:
package com.aims.nuclear.power.config;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
/**
* @Package: com.aims.nuclear.power.config
* @Description: <>
* @Author: MILLA
* @CreateDate: 2019/12/17 14:12
* @UpdateUser: MILLA
* @UpdateDate: 2019/12/17 14:12
* @UpdateRemark: <>
* @Version: 1.0
*/
public class HbaseUtil {
public static final String c = "info";
public static void main(String[] args) throws Exception {
insert();
}
public static void insert() throws Exception {
Connection connection = getConnection();
// Admin admin = connection.getAdmin();
Table test = connection.getTable(TableName.valueOf("test"));
Table demo = connection.getTable(TableName.valueOf("demo"));
new Thread(() -> {
try {
int i = 0;
while (true) {
Thread.sleep(1L);
String rowKey = getRandStr() + System.currentTimeMillis();
insertList(test, demo, i++, rowKey);
}
} catch (Exception e) {
e.printStackTrace();
}
}).start();
}
//插入數據的方法
public static void insertList(Table test, Table demo, int i, String rowKey) {
User user = new User();
// String rowKey = getRandStr() + System.currentTimeMillis();
Put put = new Put((rowKey).getBytes());
byte[] name = Bytes.toBytes(user.getName());
byte[] age = Bytes.toBytes(user.getAge());
byte[] sex = Bytes.toBytes(user.getSex());
// byte[] part = user.getPart();
put.addColumn(c.getBytes(), "name".getBytes(), name);
put.addColumn(c.getBytes(), "Age".getBytes(), age);
put.addColumn(c.getBytes(), "Sex".getBytes(), sex);
// put.addColumn(c.getBytes(), "Part".getBytes(), part);
try {
test.put(put);
demo.put(put);
if (i % 10000 == 0) {
System.out.println("test表-->rowKey: " + rowKey + " 插入數據成功;次數:" + i + " 已插入大小:" + (i > 1024 * 1024 ? (double) i / (1024 * 1024) + "GB " : (double) i / 1024 + " MB"));
System.out.println("demo表-->rowKey: " + rowKey + " 插入數據成功;次數:" + i + " 已插入大小:" + (i > 1024 * 1024 ? (double) i / (1024 * 1024) + "GB " : (double) i / 1024 + " MB"));
}
} catch (IOException e) {
// System.out.println("test表-->rowKey: " + rowKey + " 插入數據異常");
// System.out.println("demo表-->rowKey: " + rowKey + " 插入數據異常");
e.printStackTrace();
}
}
//創建表的方法
private static void createTable(Admin admin, String tableName) throws Exception {
HTableDescriptor descriptor = new HTableDescriptor(TableName.valueOf(tableName));
HColumnDescriptor descriptor2 = new HColumnDescriptor(c);
descriptor.addFamily(descriptor2);
admin.createTable(descriptor);
}
//獲得與hbase的連接
private static Connection getConnection() throws Exception {
Configuration configuration = HBaseConfiguration.create();
// configuration.set("hbase.zookeeper.quorum", "aims03");
configuration.set("hbase.zookeeper.quorum", "aims02");
// configuration.set("hbase.zookeeper.quorum", "192.168.16.32");
configuration.set("hbase.zookeeper.port", "2181");
Connection connection = ConnectionFactory.createConnection(configuration);
return connection;
}
public static String getRandStr() {
String strs = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
StringBuffer buff = new StringBuffer();
char str = strs.charAt((int) (Math.random() * 26));
buff.append(str);
return buff.toString();
}
}
然後不停的跟蹤表空間的佔比情況:
[root@aims02 ~]# hadoop fs -du -h /hbase/data/default
28.3 G 28.3 G /hbase/data/default/demo
5.0 G 5.0 G /hbase/data/default/test
//此時的壓縮比爲:5.0/28.3≈17.67%
查詢可根據以下java工具類查詢:
package com.aims.nuclear.power.config;
/**
* @Package: com.aims.nuclear.power.config
* @Description: <>
* @Author: MILLA
* @CreateDate: 2019/12/18 12:10
* @UpdateUser: MILLA
* @UpdateDate: 2019/12/18 12:10
* @UpdateRemark: <>
* @Version: 1.0
*/
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import static com.aims.nuclear.power.config.HbaseUtil.insertList;
@Slf4j
public class HBasePoolUtils {
public static void main(String[] args) {
// testOne();
testList();
}
private static void testList() {
long demoStart = System.currentTimeMillis();
List<User> users = queryData("test", "Y1576732287729", "Y1576744250744");
long demoEnd = System.currentTimeMillis();
// long testStart = System.currentTimeMillis();
// List<User> users1 = queryData("test", "1576749130724", "A" + System.currentTimeMillis());
// long testEnd = System.currentTimeMillis();
// System.out.println("test------數據量:" + users.size() + " |--------: " + (testEnd - testStart));
System.out.println("test------數據量:" + users.size() + " |--------: " + (demoEnd - demoStart));
}
private static void testOne() {
long testStart = System.currentTimeMillis();
queryData("test", "Q1576749130546");
long testEnd = System.currentTimeMillis();
long demoStart = System.currentTimeMillis();
queryData("demo", "Q1576749130546");
long demoEnd = System.currentTimeMillis();
System.out.println("test--------------: " + (testEnd - testStart));
System.out.println("demo--------------: " + (demoEnd - demoStart));
}
private static Configuration conf = null;
// 創建一個定長線程池,可控制線程最大併發數,超出的線程會在隊列中等待。
private static ExecutorService executor = null;
private static Connection conn = null;
static {
try {
conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "aims02");
conf.set("hbase.zookeeper.property.clientPort", "2181");
conf.set("hbase.defaults.for.version.skip", "true");
executor = Executors.newFixedThreadPool(20);
conn = ConnectionFactory.createConnection(conf, executor);
} catch (IOException e) {
e.printStackTrace();
}
}
public static Connection getConn() {
return conn;
}
/**
* 查詢操作
*
* @tableName 表名
* @index 索引
* @author zhouzp
* @date 2019年1月8日
*/
public static Map<String, Object> queryData(String tableName, String index) {
Table table = null;
try {
table = conn.getTable(TableName.valueOf(tableName));
Get get = new Get(index.getBytes());
Result rs = table.get(get);
List<Cell> cells = rs.listCells();
for (Cell cell : cells) {
byte[] bytes = CellUtil.cloneValue(cell);
String family = Bytes.toString(CellUtil.cloneFamily(cell));
String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
System.out.println("family: " + family + ", qualifier:" + qualifier);
if (StringUtils.equals("Part", qualifier)) {
System.out.println("value: " + Arrays.toString((double[]) SerializeUtil.bytes2Object(bytes)));
} else if (StringUtils.equals("Age", qualifier)) {
System.out.println("value: " + Bytes.toDouble(bytes));
} else {
System.out.println("value: " + Bytes.toString(bytes));
}
System.out.println("-----------------------");
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (table != null)
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
public static List<User> queryData(String tableName, String start, String end) {
Table table = null;
List<User> userList = Lists.newArrayList();
try {
table = conn.getTable(TableName.valueOf(tableName));
Scan scan = new Scan();
scan.addFamily(Bytes.toBytes("info"));
scan.addColumn(Bytes.toBytes("info"), Bytes.toBytes("Age"));
// QualifierFilter age = new QualifierFilter(CompareOperator.EQUAL, new BinaryComparator(Bytes.toBytes("Sex")));
// scan.setStartRow(Bytes.toBytes(start));
// scan.setStopRow(Bytes.toBytes(end));
// scan.setFilter(age);
scan.withStartRow(Bytes.toBytes(start));
scan.withStartRow(Bytes.toBytes(start));
scan.withStopRow(Bytes.toBytes(end));
ResultScanner scanner = table.getScanner(scan);
for (Result result : scanner) {
userList.addAll(printRs(result));
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (table != null)
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return userList;
}
private static List<User> printRs(Result rs) {
List<User> userList = Lists.newArrayList();
List<Cell> cells = rs.listCells();
for (Cell cell : cells) {
User user = new User();
byte[] bytes = CellUtil.cloneValue(cell);
// String family = Bytes.toString(CellUtil.cloneFamily(cell));
String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
if (StringUtils.equals("Part", qualifier)) {
user.setPart(bytes);
} else if (StringUtils.equals("Age", qualifier)) {
user.setAge(Bytes.toDouble(bytes));
} else if (StringUtils.equals("Sex", qualifier)) {
user.setSex(Bytes.toString(bytes));
} else if (StringUtils.equals("name", qualifier)) {
user.setName(Bytes.toString(bytes));
}
userList.add(user);
}
return userList;
}
public static Map<String, Object> queryDataDouble(String tableName, String start, String end) {
long currentTimeMillis = System.currentTimeMillis();
Table table = null;
List<Double> doubleList = Lists.newArrayList();
List<Long> times = Lists.newArrayList();
List<String> rowKeys = Lists.newArrayList();
try {
table = conn.getTable(TableName.valueOf(tableName));
Scan scan = new Scan();
scan.addFamily(Bytes.toBytes("info"));
scan.addColumn(Bytes.toBytes("info"), Bytes.toBytes("Age"));
// QualifierFilter age = new QualifierFilter(CompareOperator.EQUAL, new BinaryComparator(Bytes.toBytes("Age")));
// scan.setStartRow(Bytes.toBytes(start));
// scan.setStopRow(Bytes.toBytes(end));
scan.withStartRow(Bytes.toBytes(start));
scan.withStopRow(Bytes.toBytes(end));
// scan.setFilter(age);
ResultScanner scanner = table.getScanner(scan);
for (Result rs : scanner) {
List<Cell> cells = rs.listCells();
for (Cell cell : cells) {
byte[] bytes = CellUtil.cloneValue(cell);
// String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));
String rowKey = Bytes.toString(CellUtil.cloneRow(cell));
// if (StringUtils.equals("Age", qualifier)) {
long timestamp = cell.getTimestamp();
doubleList.add((Bytes.toDouble(bytes) + Math.random()));
times.add(timestamp);
rowKeys.add(rowKey);
// }
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (table != null)
table.close();
} catch (IOException e) {
e.printStackTrace();
}
}
log.info("獲取數據總數爲:{},共用時:{} 毫秒", doubleList.size(), (System.currentTimeMillis() - currentTimeMillis));
Map<String, Object> data = Maps.newHashMap();
data.put("rowKeys", rowKeys);
data.put("values", doubleList);
data.put("times", times);
return data;
}
private static volatile boolean starting = true;
public static void savingData(String code) throws IOException {
Table test = conn.getTable(TableName.valueOf("test"));
Table demo = conn.getTable(TableName.valueOf("demo"));
new Thread(() -> {
try {
int i = 0;
while (starting) {
Thread.sleep(1L);
String rowKey = code.toUpperCase() + System.currentTimeMillis();
insertList(test, demo, i++, rowKey);
}
} catch (Exception e) {
e.printStackTrace();
}
}).start();
}
public static void stoppingCode() {
starting = false;
}
}
查詢數據量及所需耗時如下:
以上爲千萬數據的性能測試,應該還有優化空間,但是,一般不做降採樣,獲取這麼多數據做趨勢圖的可能性也比較小。
等過兩天跑出來幾億數據的時候再做億級的查詢,看看能否做性能上的優化。