python hbase API (二)

一、python hbase API (一) thrift2環境準備

python hbase  API (一) thrift2環境準備

https://mp.csdn.net/postedit/86501781

二、python hbase  API (二) 

1、configuration.properties

將configuration.properties放在項目的resources資源目錄下

################################Database#######################################
## Hbase
hbase_host  172.8.xx.xx
hbase_port  9090
hbase_username  0
hbase_password  0
hbase_db  xx
hbase_columnfamilies  xx

## Redis
redis_host  172.8.xx.xxx
redis_port  6379
redis_username    0
redis_password    "xxx"
redis_db    1

## MySQL
sql_host 172.8.xx.xxx
sql_port    3306
sql_username    xxx
sql_password    xxx

 2、HbaseUtlis.py

#!/usr/bin/python3
# -*- coding: UTF-8 -*-

import pandas as pd
from thrift.transport import TSocket, TTransport
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
from hbase.ttypes import Mutation, BatchMutation

## READ CONFIGURATION FILE
config_file = pd.read_table(filepath_or_buffer="configuration.properties", header=None, delim_whitespace=True,
                            index_col=0).transpose()

Hbase_host = str(config_file['hbase_host'].iloc[0])
Hbase_port = str(config_file['hbase_port'].iloc[0])
Hbase_username = str(config_file['hbase_username'].iloc[0])
Hbase_password = str(config_file['hbase_password'].iloc[0])
Hbase_db = str(config_file['hbase_db'].iloc[0])
Hbase_columnfamilies = str(config_file['hbase_columnfamilies'].iloc[0])


class HbaseClient(object):
    __slots__ = ['transport', 'client']

    def __init__(self):
        # server端地址和端口,web是HMaster也就是thriftServer主機名,9090是thriftServer默認端口
        transport = TSocket.TSocket(Hbase_host, Hbase_port)
        # 可以設置超時
        transport.setTimeout(5000)
        # 設置傳輸方式(TFramedTransport或TBufferedTransport)
        self.transport = TTransport.TBufferedTransport(transport)
        # 設置傳輸協議
        protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
        # 確定客戶端
        self.client = Hbase.Client(protocol)


    ## 查詢表
    def getTableNames(self):
        self.transport.open()
        tables = self.client.getTableNames()
        self.transport.close()
        return tables


    ## 查某一行某一列數據
    def get(self, tableName, row, column):
        self.transport.open()
        result = self.client.get(Hbase_db + ':' + tableName, row, column)
        self.transport.close()
        return result


    ## 查某一行某多列數據
    def getRowWithColumns(self, tableName, row, columns):
        self.transport.open()
        addfamliy=[]
        for i in columns:
            addfamliy.append(Hbase_columnfamilies+':'+i)
        result = self.client.getRowWithColumns(Hbase_db + ':' + tableName, row, addfamliy)
        data = {}
        for item in result:
            # print(item.row)
            for column in columns:
                data[column] = item.columns.get(Hbase_columnfamilies+':'+column).value
        self.transport.close()
        return data


    ## 查某一行數據
    def getRow(self, tableName, row):
        self.transport.open()
        result = self.client.getRow(tableName, row)
        for item in result:
            data_dict = {}
            for key in result[0].columns:
                data_dict[key.replace('info:', '')] = item.columns.get(key).value
        self.transport.close()
        return data_dict


    ## 插入一行數據
    def mutateRow(self, tableName, row, hat_data):
        self.transport.open()
        mutations = []
        for key in hat_data.keys():
            mutation = Mutation(column=Hbase_columnfamilies+':'+key, value=hat_data[key])
            mutations.append(mutation)
        self.client.mutateRow(Hbase_db + ':' + tableName, row, mutations)
        self.transport.close()


    ## 插入多行數據
    def mutateRows(self,tableName, dt,current_ruleVal):
        self.transport.open()
        batchMutation = []
        for i in range(dt.shape[0]):
            curr_df = dt.iloc[i, :].astype('str')
            rowkey = str(current_ruleVal.machineID) +\
                     str(current_ruleVal.spindleID) + \
                     str(current_ruleVal.programNum) + \
                     str(curr_df['step_number']).zfill(5)
            data_dict=curr_df.to_dict()
            mutations=[]
            for column in data_dict.keys():
                message = data_dict[column]
                mutations.append(Mutation(column=Hbase_columnfamilies+':'+column, value=message))
            batchMutation.append(BatchMutation(rowkey, mutations))
        self.client.mutateRows(tableName,batchMutation)
        self.transport.close()



    ## 刪除一行數據
    def deleteAllRow(self, tableName, row):
        self.transport.open()
        self.client.deleteAllRow(Hbase_db + ':' + tableName, row)
        self.transport.close()


    ## 模糊查詢:起始rowkey掃描
    def scannerOpenWithStop(self, tableName, startRow, stopRow, refer_data):
        self.transport.open()
        columes = []
        for key in refer_data.keys():
            columes.append(refer_data[key])
        scannerId = self.client.scannerOpenWithStop(Hbase_db + ':' + tableName, startRow, stopRow, columes)
        data_list = []
        while True:
            result = self.client.scannerGet(scannerId)  # 根據ScannerID來獲取結果
            if not result:
                break
            data = {}
            for item in result:
                # rowkey=item.row
                for key in refer_data.keys():
                    data[refer_data[key]] = item.columns.get(refer_data[key]).value
                data_list.append(data)
        self.client.scannerClose(scannerId)
        self.transport.close()
        return data_list


    ## 模糊查詢:fliter 匹配
    def scannerOpenWithPrefix(self, tableName, startAndPrefix, columns):
        self.transport.open()
        addfamliy = []
        for i in columns:
            addfamliy.append(Hbase_columnfamilies+':'+ i)
        scannerId = self.client.scannerOpenWithPrefix(tableName, startAndPrefix, addfamliy)
        data_list = []
        while True:
            result = self.client.scannerGet(scannerId)  # 根據ScannerID來獲取結果
            if not result:
                break
            data = {}
            for item in result:
                # rowkey=item.row
                for column in columns:
                    data[column] = item.columns.get(Hbase_columnfamilies+':'+column).value
                data_list.append(data)
        data_df = pd.DataFrame(data_list)
        self.client.scannerClose(scannerId)
        self.transport.close()
        return data_df

三、thrift生成的代碼中都提供了那些方法

提供的方法有:
void enableTable(Bytes tableName)
enable表
void disableTable(Bytes tableName)
disable表
bool isTableEnabled(Bytes tableName)
查看錶狀態
void compact(Bytes tableNameOrRegionName)
void majorCompact(Bytes tableNameOrRegionName)
getTableNames()
getColumnDescriptors(Text tableName)
getTableRegions(Text tableName)
void createTable(Text tableName, columnFamilies)
void deleteTable(Text tableName)
get(Text tableName, Text row, Text column)
getVer(Text tableName, Text row, Text column, i32 numVersions)
getVerTs(Text tableName, Text row, Text column, i64 timestamp, i32 numVersions)
getRow(Text tableName, Text row)
getRowWithColumns(Text tableName, Text row,  columns)
getRowTs(Text tableName, Text row, i64 timestamp)
getRowWithColumnsTs(Text tableName, Text row,  columns, i64 timestamp)
getRows(Text tableName,  rows)
getRowsWithColumns(Text tableName,  rows,  columns)
getRowsTs(Text tableName,  rows, i64 timestamp)
getRowsWithColumnsTs(Text tableName,  rows,  columns, i64 timestamp)
void mutateRow(Text tableName, Text row,  mutations)
void mutateRowTs(Text tableName, Text row,  mutations, i64 timestamp)
void mutateRows(Text tableName,  rowBatches)
void mutateRowsTs(Text tableName,  rowBatches, i64 timestamp)
i64 atomicIncrement(Text tableName, Text row, Text column, i64 value)
void deleteAll(Text tableName, Text row, Text column)
void deleteAllTs(Text tableName, Text row, Text column, i64 timestamp)
void deleteAllRow(Text tableName, Text row)
void deleteAllRowTs(Text tableName, Text row, i64 timestamp)
ScannerID scannerOpenWithScan(Text tableName, TScan scan)
ScannerID scannerOpen(Text tableName, Text startRow,  columns)
ScannerID scannerOpenWithStop(Text tableName, Text startRow, Text stopRow,  columns)
ScannerID scannerOpenWithPrefix(Text tableName, Text startAndPrefix,  columns)
ScannerID scannerOpenTs(Text tableName, Text startRow,  columns, i64 timestamp)
ScannerID scannerOpenWithStopTs(Text tableName, Text startRow, Text stopRow,  columns, i64 timestamp)
scannerGet(ScannerID id)
scannerGetList(ScannerID id, i32 nbRows)
void scannerClose(ScannerID id)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章