過濾器(filter)
目錄
一:行過濾器(rowFilter)
解析:行過濾器基於rowkey來過濾數據。使用多種運算符返回符合條件的行鍵,同時過濾掉不符合條件的rowkey。
package compareFilter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import java.io.IOException;
public class rowfilter {
private Configuration configuration = null;
private Connection connection = null;
/*
* 根據rowkey查詢
*/
@Test
public void rowfilter() throws IOException {
System.out.print("begin\n");
//創建Hbase配置文件
configuration = HBaseConfiguration.create();
//創建連接
connection = ConnectionFactory.createConnection(configuration);
//根據表名獲取表實體
Table table = connection.getTable(TableName.valueOf("ns1:t1"));
//創建掃描實體
Scan scan = new Scan();
//添加掃描的列族 參數1.列族 參數2.列名
scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name"));
//添加掃描的列族 參數1.列族 參數2.列名
scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("age"));
//創建過濾器實體
Filter filter = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,new BinaryComparator(Bytes.toBytes("row3")));
//將filter實體放入掃描實體
scan.setFilter(filter);
//創建掃描返回類
ResultScanner resultScanner = table.getScanner(scan);
for (Result result:resultScanner){
System.out.println(result);
String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
Integer age = Bytes.toInt(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("age")));
System.out.println(name);
System.out.println(age);
}
resultScanner.close();
table.close();
System.out.print("end\n");
}
/*
* 根據rowkey正則表達式查詢
*/
@Test
public void rowRegexfilter() throws IOException {
System.out.print("begin\n");
//創建Hbase配置文件
configuration = HBaseConfiguration.create();
//創建連接
connection = ConnectionFactory.createConnection(configuration);
//根據表名獲取表實體
Table table = connection.getTable(TableName.valueOf("ns1:t1"));
//創建掃描實體
Scan scan = new Scan();
//添加掃描的列族 參數1.列族 參數2.列名
scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name"));
//添加掃描的列族 參數1.列族 參數2.列名
scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("age"));
//創建過濾器實體
Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator(".3"));
//將filter實體放入掃描實體
scan.setFilter(filter);
//創建掃描返回類
ResultScanner resultScanner = table.getScanner(scan);
for (Result result:resultScanner){
System.out.println(result);
String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
System.out.println(name);
}
resultScanner.close();
table.close();
System.out.print("end\n");
}
/*
* 根據rowkey字符串查詢
*/
@Test
public void rowSubStringfilter() throws IOException {
System.out.print("begin\n");
//創建Hbase配置文件
configuration = HBaseConfiguration.create();
//創建連接
connection = ConnectionFactory.createConnection(configuration);
//根據表名獲取表實體
Table table = connection.getTable(TableName.valueOf("ns1:t1"));
//創建掃描實體
Scan scan = new Scan();
//添加掃描的列族 參數1.列族 參數2.列名
scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name"));
//添加掃描的列族 參數1.列族 參數2.列名
scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("age"));
//創建過濾器實體
Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator("3"));
//將filter實體放入掃描實體
scan.setFilter(filter);
//創建掃描返回類
ResultScanner resultScanner = table.getScanner(scan);
for (Result result:resultScanner){
System.out.println(result);
String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
System.out.println(name);
}
resultScanner.close();
table.close();
System.out.print("end\n");
}
}
二:列族過濾器(FamilyFilter)
解析:列族過濾器於行過濾器相似,不過它是通過比較列族而不是比較rowkey來返回結果的。通過使用不同組合的運算符和比較器,用戶可以在列族一級篩選所需的數據。
package compareFilter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import java.io.IOException;
public class familyfilter {
private Configuration configuration = null;
private Connection connection = null;
/*
* 根據列族查詢
*/
@Test
public void familyfilter() throws IOException {
System.out.print("begin\n");
//創建Hbase配置文件
configuration = HBaseConfiguration.create();
//創建連接
connection = ConnectionFactory.createConnection(configuration);
//根據表名獲取表實體
Table table = connection.getTable(TableName.valueOf("ns1:t1"));
//創建掃描實體
Scan scan = new Scan();
//創建過濾器實體
Filter filter = new FamilyFilter(CompareFilter.CompareOp.LESS,new BinaryComparator(Bytes.toBytes("cf2")));
//將filter實體放入掃描實體
scan.setFilter(filter);
//創建掃描返回類
ResultScanner resultScanner = table.getScanner(scan);
for (Result result:resultScanner){
System.out.println(result);
String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
System.out.println(name);
}
Get get = new Get(Bytes.toBytes("row1"));
get.setFilter(filter);
Result result = table.get(get);
System.out.println("result:"+result);
resultScanner.close();
table.close();
System.out.print("end\n");
}
}
三:列名過濾器(QualifierFilter)
解析:使用列名進行篩選的類似邏輯,這種操作可以幫助用戶篩選特定的列。
package compareFilter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import java.io.IOException;
public class qualifierfilter {
private Configuration configuration = null;
private Connection connection = null;
/*
* 根據列名查詢
*/
@Test
public void familynamefilter() throws IOException {
System.out.print("begin\n");
//創建Hbase配置文件
configuration = HBaseConfiguration.create();
//創建連接
connection = ConnectionFactory.createConnection(configuration);
//根據表名獲取表實體
Table table = connection.getTable(TableName.valueOf("ns1:t1"));
//創建掃描實體
Scan scan = new Scan();
//創建過濾器實體
Filter filter = new QualifierFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,new BinaryComparator(Bytes.toBytes("age")));
//將filter實體放入掃描實體
scan.setFilter(filter);
//創建掃描返回類
ResultScanner resultScanner = table.getScanner(scan);
for (Result result:resultScanner){
System.out.println(result);
String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
System.out.println(name);
}
Get get = new Get(Bytes.toBytes("row1"));
get.setFilter(filter);
Result result = table.get(get);
System.out.println("result:"+result);
resultScanner.close();
table.close();
System.out.print("end\n");
}
}
四:值過濾器(ValueFilter)
解析:這個過濾器可以幫助用戶篩選某個特定值得單元格,與RegexStringComparator配合使用,可以使用功能強大的表達式來進行篩選,需要注意的是,在使用特定比較器的時候,只能與部分運算符配合使用。
package compareFilter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.junit.Test;
import java.io.IOException;
public class valuefilter {
private Configuration configuration = null;
private Connection connection = null;
/*
* 根據值查詢
*/
@Test
public void valueFilter() throws IOException {
//創建Hbase配置文件
configuration = HBaseConfiguration.create();
//創建連接
connection = ConnectionFactory.createConnection(configuration);
Table table = connection.getTable(TableName.valueOf("ns1:t1"));
Scan scan = new Scan();
Filter filter = new ValueFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator(".4"));
//創建掃描返回類
ResultScanner resultScanner = table.getScanner(scan);
for (Result result:resultScanner){
for (KeyValue kv :result.raw())
{
System.out.println(kv);
System.out.println(kv.getValue());
}
}
resultScanner.close();
table.close();
}
}
五:參考列過濾器(DependentColumnFilter)
解析:DependentColumnFilter主要根據所選列的時間戳的時間過濾所要查詢的數據
此過濾器提供了四種構造函數:
(1)DependentColumnFilter()
(2)DependentColumnFilter(byte[] family,byte[] qulifier)
(3)DependentColumnFilter(byte[] family,byte[] qulifier,boolean dropDependentColumn)
(4)DependentColumnFilter(byte[]family,byte[]qulifier,boolean dropDependentColumn,CompareOp valueCompareOp, WritableByteArrayComparable valueComparator)
相關參數:
boolean dropDependentColumn -- 決定參考列被返回還是丟棄,爲true時表示參考列被返回,爲false時表示被丟棄
CompareOp valueCompareOp -- 比較運算符
WritableByteArrayComparable valueComparator -- 比較器
package compareFilter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import java.io.IOException;
public class valuefilter {
private Configuration configuration = null;
private Connection connection = null;
/*
* 根據參考列查詢
*/
@Test
public void valueFilter() throws IOException {
//創建Hbase配置文件
configuration = HBaseConfiguration.create();
//創建連接
connection = ConnectionFactory.createConnection(configuration);
Table table = connection.getTable(TableName.valueOf("ns1:t1"));
Scan scan = new Scan();
Filter filter = new DependentColumnFilter(Bytes.toBytes("cf1"),Bytes.toBytes("name"),false);
//創建掃描返回類
scan.setFilter(filter);
ResultScanner resultScanner = table.getScanner(scan);
for (Result result:resultScanner){
System.out.println(result);
}
resultScanner.close();
table.close();
}
}