使用python消費canal protobuf格式數據

使用python消費canal protobuf格式數據

canal -> kafka -> consumer. flatMessage=False
參考 canal Python客戶端.
由於canal Python客戶端是作爲canal的client直連canal 11111端口消費數據而非消費kafka數據, 所以example不能照搬, 需要做一些修改

Python3.7.4

requriments

backcall==0.1.0
bleach==3.1.0
canal-python==0.4
certifi==2019.6.16
chardet==3.0.4
confluent-kafka==1.3.0
decorator==4.4.2
docopt==0.6.2
docutils==0.15.2
idna==2.8
ipython==7.13.0
ipython-genutils==0.2.0
jedi==0.16.0
parso==0.6.2
pexpect==4.8.0
pickleshare==0.7.5
pkginfo==1.5.0.1
prompt-toolkit==3.0.4
protobuf==3.9.1
ptyprocess==0.6.0
Pygments==2.4.2
readme-renderer==24.0
requests==2.22.0
requests-toolbelt==0.9.1
six==1.12.0
tqdm==4.34.0
traitlets==4.3.3
twine==1.13.0
urllib3==1.25.3
wcwidth==0.1.8
webencodings==0.5.1

主要參考

https://github.com/haozi3156666/canal-python/blob/master/canal/client.py

# -*- coding: utf8 -*-
# __author__ = 'Fan()'
# Date: 2020-03-18

'''
Usage:
        canal_kafka_protobuf_consume.py --bootstrap-servers=<host:port,host2:port2..> [--k_user=<user> ] [--from-beginning=<false> | --from-end=<false>]  --topic=<topic_name> [--partition=<partition_number>]  [--verbose=<0>]
        canal_kafka_protobuf_consume.py -h | --help
        canal_kafka_protobuf_consume.py --version

Options:
        -h --help                                       打印幫助信息.
        --version                                       版本信息.
        --bootstrap_servers=<host:port,host2:port2..>   kafka servers
        --from-beginning=<false>                        從頭開始消費 [default: False]
        --from-end=<false>                              從最後開始消費 [default: True]
        --k_user=<user>                                 kafka用戶, 可選項
        --topic=<topic_name>                            topic名稱
        --partition=<partition_number>                  topic分區號 [default: 0]
        --verbose=<0>                                   輸出詳細信息0,1,2 默認0不輸出 [default: 0]
'''
import getpass

from docopt import docopt
from canal.protocol import CanalProtocol_pb2
from canal.protocol import EntryProtocol_pb2
from confluent_kafka import Consumer, KafkaError, TopicPartition, OFFSET_END, OFFSET_BEGINNING


class DocOptArgs:
    def __init__(self, args):
        self.topic = args['--topic']
        self.k_user = args['--k_user']
        self.verbose = int(args['--verbose'])
        self.partition = int(args['--partition'])
        self.bootstrap_servers = args['--bootstrap-servers']
        self.from_end = eval(args['--from-end'].capitalize())
        self.from_beginning = eval(args['--from-beginning'].capitalize())

        if not self.k_user:
            self.k_password = None
        elif self.k_user == 'admin':
            self.k_password = 'superSecurt'
        else:
            self.k_password = getpass.getpass("please enter kafka password: ")


class MyConsumer(DocOptArgs):
    def __init__(self, docopt_args):
        self.args = docopt_args
        DocOptArgs.__init__(self, self.args)

        if self.verbose >= 1:
            print(self.args)

    def _on_send_response(self, err, partations):
        pt = partations[0]
        if isinstance(err, KafkaError):
            print('Topic {} 偏移量 {} 提交異常. {}'.format(pt.topic, pt.offset, err))
            raise Exception(err)

    def messages(self, offset_end=True):
        config = {'bootstrap.servers': self.bootstrap_servers,
                  "group.id": self.topic,
                  'enable.auto.commit': True,
                  "fetch.wait.max.ms": 3000,
                  "max.poll.interval.ms": 60000,
                  'session.timeout.ms': 60000,
                  "on_commit": self._on_send_response,
                  "default.topic.config": {"auto.offset.reset": "latest"}}
        if self.k_user and self.k_password:
            config['security.protocol'] = 'SASL_PLAINTEXT'
            config['sasl.mechanism'] = 'SCRAM-SHA-256'
            config['sasl.username'] = self.k_user
            config['sasl.password'] = self.k_password

        consumer = Consumer(config)
        offset = OFFSET_END if offset_end else OFFSET_BEGINNING
        pt = TopicPartition(self.topic, 0, offset)
        consumer.assign([pt])
        # consumer.seek(pt)

        try:
            while True:
                ret = consumer.consume(num_messages=100, timeout=0.1)
                if ret is None:
                    print("No message Continue!")
                    continue
                for msg in ret:
                    if msg.error() is None:
                        # protobuf binary
                        yield msg.value()
                    elif msg.error():
                        if msg.error().code() == KafkaError._PARTITION_EOF:
                            continue
                    else:
                        raise Exception(msg.error())
        except Exception as e:
            print(e)
            consumer.close()
        except KeyboardInterrupt:
            consumer.close()


class Decoder:

    @staticmethod
    def create_canal_message(kafka_message):
        data = kafka_message
        packet = CanalProtocol_pb2.Packet()
        packet.MergeFromString(data)

        message = dict(id=0, entries=[])
        # 因爲從kafka獲取的canal寫入的消息, 所以這個條件應該永遠成立
        # if packet.type == CanalProtocol_pb2.PacketType.MESSAGES:
        messages = CanalProtocol_pb2.Messages()
        messages.MergeFromString(packet.body)

        for item in messages.messages:
            entry = EntryProtocol_pb2.Entry()
            entry.MergeFromString(item)
            message['entries'].append(entry)

        return message


if __name__ == '__main__':
    version = 'canal_kafka_protobuf_consume 0.1.0'
    arguments = docopt(__doc__, version=version)

    consumer = MyConsumer(arguments)

    for message in consumer.messages():
        canal_message = Decoder.create_canal_message(message)
        entries = canal_message['entries']
        for entry in entries:
            entry_type = entry.entryType
            if entry_type in [EntryProtocol_pb2.EntryType.TRANSACTIONBEGIN, EntryProtocol_pb2.EntryType.TRANSACTIONEND]:
                continue
            row_change = EntryProtocol_pb2.RowChange()
            row_change.MergeFromString(entry.storeValue)
            # event_type = row_change.eventType
            header = entry.header
            database = header.schemaName
            table = header.tableName
            binlog_file = header.logfileName
            binlog_pos = header.logfileOffset
            characterset = header.serverenCode
            es = header.executeTime
            gtid = header.gtid
            event_type = header.eventType
            for row in row_change.rowDatas:
                format_data = dict()
                if event_type == EntryProtocol_pb2.EventType.DELETE:
                    for column in row.beforeColumns:
                        format_data.update({
                            column.name: column.value
                        })
                elif event_type == EntryProtocol_pb2.EventType.INSERT:
                    for column in row.afterColumns:
                        format_data.update({
                            column.name: column.value
                        })
                else:
                    format_data['before'] = dict()
                    format_data['after'] = dict()
                    for column in row.beforeColumns:
                        format_data['before'][column.name] = column.value
                    for column in row.afterColumns:
                        format_data['after'][column.name] = column.value
                data = dict(
                    db=database,
                    table=table,
                    event_type=EntryProtocol_pb2.EventType.Name(event_type),
                    is_ddl=row_change.isDdl,
                    binlog_file=binlog_file,
                    binlog_pos=binlog_pos,
                    characterset=characterset,
                    es=es,
                    gtid=header.gtid,
                    data=format_data,
                )
                print(data)

使用效果

#python canal_kafka_protobuf_consume.py --bootstrap-servers=172.16.xx.xx:9092,172.16.xx.xx:9092,172.16.xx.xx:9092 --topic=fanboshi.monitor_delay
{'db': 'fanboshi', 'table': 'monitor_delay', 'event_type': 'INSERT', 'is_ddl': False, 'binlog_file': 'mysql-bin.000006', 'binlog_pos': 469896982, 'characterset': 'UTF-8', 'es': 1584535911000, 'gtid': 'c30c6a02-4e32-11ea-84ec-fa163edcd14e:1-2940100', 'data': {'id': '5', 'ctime': '2020-03-18 20:51:51'}}
{'db': 'fanboshi', 'table': 'monitor_delay', 'event_type': 'INSERT', 'is_ddl': False, 'binlog_file': 'mysql-bin.000006', 'binlog_pos': 469897261, 'characterset': 'UTF-8', 'es': 1584535912000, 'gtid': 'c30c6a02-4e32-11ea-84ec-fa163edcd14e:1-2940101', 'data': {'id': '6', 'ctime': '2020-03-18 20:51:52'}}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章