hbase過濾器自定義

1. 下載protobuf-2.5.0解壓,如果是window下,額外下載protoc-2.5.0-win32,解壓,將protoc.exe放在protobuf-2.5.0下的src目錄下

2. 配置環境變量,添加path路徑指向protobuf目錄的src中

3. 查看當前版本,在命令提示符中輸入命令

4. 創建一個空白的文本文件命名爲 CustomNumberComparator.proto 即後綴文件類型爲proto

5. 用記事本打開CustomNumberComparator.proto文件輸入以下內容

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// This file contains protocol buffers that are used for filters

option java_package = "com.pateo.hbase.defined.comparator";//生成java代碼的包名
option java_outer_classname = "MyComparatorProtos";//生成的類名
option java_generic_services = true;
option java_generate_equals_and_hash = true;
option optimize_for = SPEED;

// This file contains protocol buffers that are used for comparators (e.g. in filters)

message CustomNumberComparator {
    required bytes value = 1;     //自定義比較器中需序列化的字段
    required string fieldType = 2;//自定義比較器中需序列化的字段
}

6. 進入命令提示符,使用命令讀取CustomNumberComparator.proto的內容生成java代碼,即自定義比較器的序列化類

內容: protoc.exe -I=C:/proto --java_out=C:/proto C:/proto/CustomNumberComparator.proto

輸入後會在指定的/protoc中生成一個文件夾

得到自定義比較器的序列化類

7. 將生成的文件夾拷貝到idea編程工具中,注意粘貼的路徑爲java下

8. 新建一個自定義過濾器類CustomNumberComparator

9. CustomNumberComparator繼承ByteArrayComparable類,重寫方法,代碼如下

package com.pateo.hbase.defined.comparator;

import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.filter.ByteArrayComparable;
import org.apache.hadoop.hbase.util.Bytes;
import com.google.protobuf.ByteString;
import com.google.protobuf.InvalidProtocolBufferException;

import java.util.Locale;

/**
 * 自定義比較器：使用方法見 CompareTest
 *
 * @param : fieldType 傳遞數據格式的類型，支持的數據類型:double
 * @param : data 通過Bytes轉換得到的字節數組 使用注意事項 : 使用的時候要注意數據類型的匹配問題
 */
public class CustomNumberComparator extends ByteArrayComparable {
    /**
     * 目前只支持 double類型
     */
    private String fieldType;
    private byte[] data;

    /**
     * Constructor
     *
     * @param value
     * @param fieldType
     */
    public CustomNumberComparator(byte[] value, String fieldType) {
        super(value);
        this.fieldType = fieldType;
        this.fieldType = "String";//只支持
        this.data = value;
    }

    @Override
    // 重寫該方法
    public byte[] toByteArray() {

        MyComparatorProtos.CustomNumberComparator.Builder builder = MyComparatorProtos.CustomNumberComparator
                .newBuilder();
        builder.setValue(ByteString.copyFrom(this.data));
        builder.setFieldType(this.fieldType);
        return builder.build().toByteArray();
    }

    // 定義該方法，用於對象反序列化操作
    public static CustomNumberComparator parseFrom(final byte[] bytes)
            throws DeserializationException {
        MyComparatorProtos.CustomNumberComparator proto = null;
        try {
            proto = MyComparatorProtos.CustomNumberComparator.parseFrom(bytes);
        } catch (InvalidProtocolBufferException e) {
            throw new DeserializationException(e);
        }
        return new CustomNumberComparator(proto.getValue().toByteArray(),
                proto.getFieldType());
    }

    // 重寫比較方法 裏面就可以按照自己的意願來實現自己的比較器
    @Override
    public int compareTo(byte[] bytes, int offset, int length) {

        if (fieldType.equalsIgnoreCase("String")) {
            String Rowkey = Bytes.toString(bytes, offset, length).toLowerCase(Locale.ROOT);//得到rowkey
            String substring1 = Rowkey.substring(1, 5);
            String substring2 = Rowkey.substring(17, 22);
            String paramValue = byteConvertObj(String.class, this.data);
            String[] split = paramValue.split(",");
            if (substring1.contains(split[0]) && substring2.contains(split[1])) {
                return 0;
            } else {
                return 1;
            }
        }
        return 1;
    }

    private <T> T byteConvertObj(Class<T> clazz, byte[] data) {
        String clazzName = clazz.getSimpleName();
        if (clazzName.equalsIgnoreCase("Integer")) {
            Integer paramValue;
            try {
                paramValue = Bytes.toInt(data);
            } catch (IllegalArgumentException e) {
                paramValue = Integer.valueOf(Bytes.toString(data));
            }
            return (T) paramValue;
        } else if (clazzName.equalsIgnoreCase("Long")) {
            Long paramValue;
            try {
                paramValue = Bytes.toLong(data);
            } catch (IllegalArgumentException e) {
                paramValue = Long.valueOf(Bytes.toString(data));
            }
            return (T) paramValue;
        } else if (clazzName.equalsIgnoreCase("Float")) {
            Float paramValue;
            try {
                paramValue = Bytes.toFloat(data);
            } catch (IllegalArgumentException e) {
                paramValue = Float.valueOf(Bytes.toString(data));
            }
            return (T) paramValue;
        } else if (clazzName.equalsIgnoreCase("Double")) {
            Double paramValue;
            try {
                paramValue = Bytes.toDouble(data);
            } catch (IllegalArgumentException e) {
                paramValue = Double.valueOf(Bytes.toString(data));
            }
            return (T) paramValue;
        } else if (clazzName.equalsIgnoreCase("Short")) {
            Short paramValue;
            try {
                paramValue = Bytes.toShort(data);
            } catch (IllegalArgumentException e) {
                paramValue = Short.valueOf(Bytes.toString(data));
            }
            return (T) paramValue;
        }
        return (T) Bytes.toString(data);
    }
}

10.核心內容爲compareTo方法的內容,即爲過濾的邏輯實現

@Override
public int compareTo(byte[] bytes, int offset, int length) {

if (fieldType.equalsIgnoreCase("String")) {
//HbaseValue是在Hbase上搜索到的一條數據
String HbaseValue = Bytes.toString(bytes, offset, length).toLowerCase(Locale.ROOT);
String substring1 = HbaseValue.substring(1, 5);
String substring2 = HbaseValue.substring(17, 22);
String ClientValue = byteConvertObj(String.class, this.data);//客戶端傳入的過濾內容
String[] split = ClientValue.split(",");

if (substring1.contains(split[0]) && substring2.contains(split[1])) {//是否需要過濾
return 0;//選擇
} else {
return 1;//過濾
}
}
return 1;//過濾
}

11.將這個項目打成jar架包放入hbase根目錄中的lib下

選中之後自動打包成jar

將這個架包發送到hbase的lib目錄中,重啓hbase

12. 使用自定義類查詢結果

代碼如下

/**
 * 過濾器實現了類介紹
 * 行鍵過濾 RowFilter
 * 列簇名過濾 FamilyFilter
 * 列過濾 QualifierFilter
 * 值過濾 ValueFilter
 */

public static void main(String[] args) throws IOException {
    long time1 = new Date().getTime();
    //分頁查詢
    String page = getPage("00003", 2);//分頁查詢傳入起始位置與返回數量
    System.out.println("下一個起始頁碼" + page);
    long time2 = new Date().getTime();
    long l = time2 - time1;
    System.out.println("[ "+l/1000+" s ]");
}

/**
 *
 * @param lastRowkey  起始行鍵
 * @param page 頁碼
 * @return 下一次查詢的起始行鍵
 * @throws IOException
 */

public static String getPage(String lastRowkey, Integer page) throws IOException {
    //配置參數
    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.zookeeper.quorum", "mini1");
    conf.set("hbase.zookeeper.property.clientPort", "2181");
    conf.set("hbase.master", "mini1:6000");

    //創建查詢類
    Scan scan = new Scan();
    //定義過濾器
    Filter filter1 = new RowFilter(CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator("999".getBytes()));//前綴查詢
    Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new CustomNumberComparator("99,034".getBytes(),"String"));//包含查詢
    //過濾器組合
    FilterList filterlist = new FilterList();
    //filterlist.addFilter(filter1);
    //filterlist.addFilter(filter1);
    filterlist.addFilter(filter);

    //組合查詢
    scan.setFilter(filterlist);
    //scan.setStartRow(lastRowkey.getBytes());//分頁查詢使用的key爲下一個其實位置,需要添加0
    long time1 = new Date().getTime();
    HTable table = new HTable(conf, "table");
    long time2 = new Date().getTime();
    System.out.println("創建 HTable:"+(time2-time1)/1000.0+"s");
    ResultScanner scanner = table.getScanner(scan);//獲取所有結果集
    long time3 = new Date().getTime();
    System.out.println("獲取ResultScanner :"+(time3-time1)/1000.0+"s");
    String format = "  %-40s%-14s%-35s%-10s";//輸出格式
    System.out.println(String.format("%-40s%-14s", "ROW", "COLUMN+CELL"));//格式化輸出
    Long index = 0L;
    for (Result res : scanner) {//獲取一行數據
        index++;
        for (Cell cell : res.listCells()) {//獲取各個列的值
            String row = Bytes.toString(CellUtil.cloneRow(cell));//行鍵
            String value = "value=" + Bytes.toString(CellUtil.cloneValue(cell));//值
            String family = Bytes.toString(CellUtil.cloneFamily(cell));//列簇
            String col = Bytes.toString(CellUtil.cloneQualifier(cell));//列名
            String column = "column=" + family + ":" + col;//列簇與列
            String timestamp = "timestamp=" + cell.getTimestamp();//時間戳
            System.out.println(String.format(format, row, column, timestamp, value));//格式化輸出
        }
        //lastRowkey = Bytes.toString(res.getRow()) + "0";//給下一次查詢起始rowkey位置賦值
    }
    long time4 = new Date().getTime();
    System.out.println(String.format("一共 %d 條數據",index));
    System.out.println("輸出耗時 :"+(time4-time3)/1000.0+"s");
    //防止程序異常,這裏需要try-catch關閉連接
    scanner.close();
    table.close();

    //返回下一次查詢的起始行鍵,用於翻頁

    return lastRowkey;
}

hbase通過網關訪問即thrift2 <hbase_home>/bin/hbase thrift2/thrift start 啓動

hbase過濾器自定義

python 常用機器學習算法demo

有向無環圖的java實現(使用矩陣特性開發)

mysql 批處理addBatch用法

java 項目自動控制維護腳本

Matplotlib 作圖

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結