Python學習筆記-HBase操作

服務器環境Thrift安裝

下載地址:http://mirrors.hust.edu.cn/apache/thrift/

tar -xzvf thrift-0.13.0.tar.gz

./configure --with-cpp --with-boost --with-python --without-csharp --with-java --without-erlang --without-perl --with-php --without-php_extension --without-ruby --without-haskell --without-go

make

make install

 

啓動HBase Thrift服務

hbase-2.2.2$ bin/hbase-daemon.sh start thrift

 

Python happybase庫安裝

在線:

pip install thrift

pip install happybase

離線:

https://pypi.org/project/thrift/#files

thrift-0.13.0>python setup.py install

https://pypi.org/project/happybase/#files

happybase-1.2.0>python setup.py install

 

happybase操作示例

# -*- coding:utf-8 -*-

import sys
import happybase

default_encoding = 'utf-8'
if sys.getdefaultencoding() != default_encoding:
    reload(sys)
    sys.setdefaultencoding(default_encoding)


class HBaseUtils(object):

    def __init__(self, host, port, size):
        self.pool = happybase.ConnectionPool(size=size, host=host, port=port)

    '''
    families = {"f1":dict(),"f2":dict()}
    '''
    def create_table(self, table_name, families):
        try:
            with self.pool.connection() as connection:
                connection.create_table(table_name, families)
        except Exception as e:
            print e

    def update_table_able(self, table_name, able):
        try:
            with self.pool.connection() as connection:
                is_enabled = connection.is_table_enabled(table_name)
                if able is True and not is_enabled:
                    connection.enable_table(table_name)
                elif able is False and is_enabled:
                    connection.disable_table(table_name)
        except Exception as e:
            print e

    def delete_table(self, table_name, disable=False):
        try:
            with self.pool.connection() as connection:
                connection.delete_table(table_name, disable)
        except Exception as e:
            print e

    def read_table(self, table_name):
        with self.pool.connection() as connection:
            return connection.table(table_name)

    def read_tables(self):
        with self.pool.connection() as connection:
            return connection.tables()

    def insert(self, table_name, row, data, timestamp=None, wal=True):
        try:
            with self.pool.connection() as connection:
                connection.table(table_name).put(row, data, timestamp=timestamp, wal=wal)
        except Exception as e:
            print e

    def insert_batch(self, table_name, data_list, batch_size=1000):
        try:
            with self.pool.connection() as connection:
                with connection.table(table_name).batch(batch_size=batch_size) as batch:
                    for data in data_list:
                        batch.put(data['row'], data['data'], data['timestamp'] if 'timestamp' in data else None)
        except Exception as e:
            print e

    def delete(self, table_name, row, columns=None, timestamp=None, wal=True):
        try:
            with self.pool.connection() as connection:
                connection.table(table_name).delete(row, columns=columns, timestamp=timestamp, wal=wal)
        except Exception as e:
            print e

    def read_row(self, table_name, row, columns=None, timestamp=None, include_timestamp=False):
        try:
            with self.pool.connection() as connection:
                return connection.table(table_name).row(row, columns=columns, timestamp=timestamp, include_timestamp=include_timestamp)
        except Exception as e:
            print e

    def read_rows(self, table_name, rows, columns=None, timestamp=None, include_timestamp=False, need_dict=False):
        try:
            with self.pool.connection() as connection:
                result = connection.table(table_name).rows(rows, columns=columns, timestamp=timestamp, include_timestamp=include_timestamp)
                return result if not need_dict else dict(result)
        except Exception as e:
            print e

    def read_cells(self, table_name, row, column, versions=None, timestamp=None, include_timestamp=False):
        try:
            with self.pool.connection() as connection:
                return connection.table(table_name).cells(row, column, versions=versions, timestamp=timestamp, include_timestamp=include_timestamp)
        except Exception as e:
            print e

    def read_families(self, table_name):
        try:
            with self.pool.connection() as connection:
                return connection.table(table_name).families()
        except Exception as e:
            print e

    def read_regions(self, table_name):
        try:
            with self.pool.connection() as connection:
                return connection.table(table_name).regions()
        except Exception as e:
            print e

    def read_scan(self, table_name, row_start=None, row_stop=None, row_prefix=None, columns=None,
                  filter=None, timestamp=None, include_timestamp=False, batch_size=1000,
                  scan_batching=None, limit=None, sorted_columns=False, reverse=False):
        try:
            with self.pool.connection() as connection:
                return connection.table(table_name).scan(row_start=row_start, row_stop=row_stop, row_prefix=row_prefix,
                                                         columns=columns, filter=filter, timestamp=timestamp,
                                                         include_timestamp=include_timestamp, batch_size=batch_size,
                                                         scan_batching=scan_batching, limit=limit,
                                                         sorted_columns=sorted_columns, reverse=reverse)
        except Exception as e:
            print e


if __name__ == '__main__':
    hbase_utils = HBaseUtils(host="192.168.0.123", port=9090, size=5)
    hbase_utils.create_table('user', {"basic": dict(), "profile": dict()})
    hbase_utils.update_table_able('user', True)
    table = hbase_utils.read_table('user')
    print hbase_utils.read_tables()
    hbase_utils.insert('user', '000001'.encode(), {"basic:name": "zhangsan", "basic:gender": "male"})
    hbase_utils.insert('user', '000002'.encode(), {"basic:name": "lisi", "basic:gender": "female"})
    data_list = [
        {'row': '000003'.encode(), 'data': {"basic:name": "wangwu", "basic:gender": "male"}},
        {'row': '000004'.encode(), 'data': {"basic:name": "maliu", "basic:gender": "female"}}
    ]
    hbase_utils.insert_batch('user', data_list)
    print hbase_utils.read_row('user', b'000001', include_timestamp=True)
    print hbase_utils.read_row('user', b'000003', include_timestamp=False)
    print hbase_utils.read_rows('user', [b'000001', b'000002'], include_timestamp=True)
    print hbase_utils.read_rows('user', [b'000001', b'000002'], include_timestamp=True, need_dict=True)
    print hbase_utils.read_rows('user', [b'000001', b'000002'], columns=['basic:name'], include_timestamp=True)
    print hbase_utils.read_cells('user', b'000001', 'basic:name')
    print hbase_utils.read_cells('user', b'000001', 'basic:name', include_timestamp=True)
    print hbase_utils.read_families('user')
    print hbase_utils.read_regions('user')
    hbase_utils.insert('user', '000005'.encode(), {"basic:name": "shenqi", "basic:gender": "female"})
    print hbase_utils.read_row('user', b'000005', include_timestamp=True)
    hbase_utils.delete('user', '000005'.encode(), ['basic:name'])
    print hbase_utils.read_row('user', b'000005', include_timestamp=True)
    hbase_utils.delete('user', '000005'.encode())
    print hbase_utils.read_row('user', b'000005', include_timestamp=True)
    results = hbase_utils.read_scan('user', row_start=b'000001', row_stop=b'000003')
    for result in results:
        print result

No protocol version header 異常處理

確認hbase thrift服務啓動是否正常,hbase-site.xml文件註釋thrift相關配置

 

發佈了119 篇原創文章 · 獲贊 68 · 訪問量 34萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章