canal+kafka實踐——實時etl

canal解析sql數據庫的binlog並格式化數據,然後同步到kafka消息,可以用來實現實時etl

yml:

spring:
  application:
    name: canal
  canal:
    topic-prefix: etl_timely.
    destination:
      example: 0
    username:
    password:
    dealy-limit: 2000
  kafka:
    bootstrap-servers: 127.0.0.1:9092
    producer:
      acks: 1
      batch-size: 100
      client-id: canal
      retries: 3
      key-serializer: org.apache.kafka.common.serialization.StringSerializer
      value-serializer: org.apache.kafka.common.serialization.StringSerializer
      bootstrap-servers: 127.0.0.1:9092
    consumer:
      enable-auto-commit: true
      group-id: etl
      bootstrap-servers: 127.0.0.1:9092
    template:
      default-topic: etl_canal

pom:

<dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>

        <!-- https://mvnrepository.com/artifact/com.alibaba.otter/canal.common -->
        <dependency>
            <groupId>com.alibaba.otter</groupId>
            <artifactId>canal.client</artifactId>
            <version>1.0.25</version>
        </dependency>
        <dependency>
            <groupId>org.bouncycastle</groupId>
            <artifactId>bcprov-jdk15on</artifactId>
            <version>RELEASE</version>
            <scope>compile</scope>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-configuration-processor</artifactId>
            <optional>true</optional>
        </dependency>


        <!--kafka-->
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka_2.11</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
        </dependency>

        <dependency>
            <groupId>org.springframework.kafka</groupId>
            <artifactId>spring-kafka</artifactId>
        </dependency>
        <dependency>
            <groupId>org.bouncycastle</groupId>
            <artifactId>bcprov-jdk15on</artifactId>
            <version>RELEASE</version>
            <scope>compile</scope>
        </dependency>

        <!--disruptor-->
        <dependency>
            <groupId>com.lmax</groupId>
            <artifactId>disruptor</artifactId>
            <version>3.4.2</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.projectlombok/lombok -->
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.0</version>
            <scope>provided</scope>
        </dependency>

CanalClient:

package com.kexin.canal.client;

import com.alibaba.otter.canal.client.CanalConnector;
import com.alibaba.otter.canal.client.CanalConnectors;
import com.alibaba.otter.canal.common.utils.AddressUtils;
import com.alibaba.otter.canal.protocol.Message;
import com.kexin.canal.config.DisruptorConfig;
import com.kexin.canal.service.KafkaService;
import com.lmax.disruptor.BlockingWaitStrategy;
import com.lmax.disruptor.EventFactory;
import com.lmax.disruptor.RingBuffer;
import com.lmax.disruptor.dsl.Disruptor;
import com.lmax.disruptor.dsl.ProducerType;
import lombok.Getter;
import lombok.Setter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.CommandLineRunner;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;

import java.net.InetSocketAddress;
import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;

/**
 * @Author KeXin
 * @Date 2018/7/20 下午5:16
 **/
@Component
@ConfigurationProperties(prefix = "spring.canal")
public class CanalClient implements CommandLineRunner{

    @Getter
    @Setter
    private String username;

    @Getter
    @Setter
    private String password;

    @Getter
    @Setter
    private Map<String, String> destination;

    @Autowired
    KafkaService kafkaService;
    @Override
    public void run(String... args) {
        int port = 11111;
        startClient(port,destination,username,password);
    }

    /**
     * 監測數據庫變化,使用disruptor處理消息
     */
    public void startClient(int port,Map<String, String> canalDestination,String canalUsername,String canalPassword){
        canalDestination.forEach((database,partition)->{
            //啓動不加鎖隊列
            RingBuffer<DisruptorConfig.Element> buffer = getDisruptor().start();
            new Thread(()->{
                // 創建鏈接
                CanalConnector connector = CanalConnectors.newSingleConnector(new InetSocketAddress(AddressUtils.getHostIp(),
                        port), database, canalUsername, canalPassword);
                int batchSize = 100;
                try {
                    connector.connect();
                    connector.subscribe();
                    connector.rollback();
                    while (true) {
                        Message message = connector.getWithoutAck(batchSize); // 獲取指定數量的數據
                        long batchId = message.getId();
                        int size = message.getEntries().size();
                        if (batchId == -1 || size == 0) {
                            try {
                                Thread.sleep(1000);
                            } catch (InterruptedException e) {
                                e.printStackTrace();
                            }
                        } else {
                            //向buffer中填充消息
                            long sequence = buffer.next();

                            DisruptorConfig.Element element = buffer.get(sequence);
                            element.setValue(message.getEntries());

                            buffer.publish(sequence);
                        }

                        connector.ack(batchId); // 提交確認
                        // connector.rollback(batchId); // 處理失敗, 回滾數據
                    }

                } finally {
                    connector.disconnect();
                }
            }).start();
    });

    }
    // 不加鎖消費隊列
    private Disruptor<DisruptorConfig.Element> getDisruptor() {
        // 指定RingBuffer的大小  must be power of 2.
        int bufferSize = 1024;
        // 阻塞策略
        BlockingWaitStrategy strategy = new BlockingWaitStrategy();
        // 處理Event的handler,將消息發送出去send
        DisruptorConfig.CanalEventHandle kafkaEventHandler = new DisruptorConfig.CanalEventHandle(kafkaService);
        // 生產者的線程工廠
        ThreadFactory threadFactory = Executors.defaultThreadFactory();
        // 事件工廠
        EventFactory<DisruptorConfig.Element> eventFactory = DisruptorConfig.newEventFactory();

        Disruptor<DisruptorConfig.Element> disruptor = new Disruptor<>(eventFactory, bufferSize, threadFactory, ProducerType.SINGLE, strategy);
        disruptor.handleEventsWith(kafkaEventHandler);
        return disruptor;
    }

}

DisruptorConfig:

package com.kexin.canal.config;


import com.alibaba.otter.canal.protocol.CanalEntry;
import com.kexin.canal.service.KafkaService;
import com.lmax.disruptor.EventFactory;
import com.lmax.disruptor.EventHandler;

import java.util.List;

/**
 * @Author KeXin
 * @Date 2018/7/20 下午4:45
 **/
public class DisruptorConfig {

    //定義用來交換的數據類
    public static class Element{
        private List<CanalEntry.Entry> value;

        public List<CanalEntry.Entry> getValue() {
            return value;
        }

        public void setValue(List<CanalEntry.Entry> value) {
            this.value = value;
        }

    }

    //定義工廠
    public static EventFactory<Element> newEventFactory(){
        return Element::new;
    }
    //定義響應事件
    public static class CanalEventHandle implements EventHandler<Element>{
        KafkaService kafkaService;

        public CanalEventHandle(KafkaService kafkaService) {
            this.kafkaService = kafkaService;
        }
        @Override
        public void onEvent(Element element, long l, boolean b) throws Exception {
            kafkaService.sendMessage(element.getValue());
        }
    }

}

解析binlog爲自己需要的格式:

package com.kexin.canal.service;

import com.alibaba.fastjson.JSON;
import com.alibaba.otter.canal.protocol.CanalEntry;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.stereotype.Component;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * @Author KeXin
 * @Date 2018/7/20 下午4:56
 **/
@Component
public class KafkaService {
    @Autowired
    KafkaTemplate kafkaTemplate;

    @Value("${spring.canal.topic-prefix}")
    private String canalTopicPrefix;

    /**
     * 發送消息
     * @param entrys
     */
    public void sendMessage(List<CanalEntry.Entry> entrys) {
        for (CanalEntry.Entry entry : entrys) {
            if (entry.getEntryType() == CanalEntry.EntryType.TRANSACTIONBEGIN || entry.getEntryType() == CanalEntry.EntryType.TRANSACTIONEND) {
                continue;
            }

            CanalEntry.RowChange rowChange = null;
            try {
                rowChange = CanalEntry.RowChange.parseFrom(entry.getStoreValue());
            } catch (Exception e) {
                throw new RuntimeException("ERROR ## parser of eromanga-event has an error , data:" + entry.toString(),
                        e);
            }

            CanalEntry.EventType eventType = rowChange.getEventType();
            String tableName = entry.getHeader().getTableName();
            String schemaName = entry.getHeader().getSchemaName();
            long executeTime = entry.getHeader().getExecuteTime();

            //根據binlog的filename和position來定位
            System.out.println(String.format("================> binlog[%s:%s] , name[%s,%s] , eventType : %s",
                    entry.getHeader().getLogfileName(), entry.getHeader().getLogfileOffset(),
                    entry.getHeader().getSchemaName(), entry.getHeader().getTableName(),
                    eventType));

            for (CanalEntry.RowData rowData : rowChange.getRowDatasList()) {

                Map<String, Object> map = new HashMap<>();

                map.put("event_timestamp", executeTime);
                map.put("table_name", tableName);
                map.put("database_name", schemaName);
                Map<String, Object> map_info = new HashMap<>();

                if (eventType == CanalEntry.EventType.DELETE) {
                    map.put("event_op_type", "delete");
                    for(CanalEntry.Column column : rowData.getBeforeColumnsList()){
                        if(column.getValue()!=null&&!column.getValue().equals(""))
                            map_info.put(column.getName(), column.getValue());
                    }
                } else if(eventType == CanalEntry.EventType.INSERT){
                    map.put("event_op_type", "insert");
                    for(CanalEntry.Column column : rowData.getAfterColumnsList()){
                            map_info.put(column.getName(), column.getValue());
                    }
                }else {
                    map.put("event_op_type", "update");
                    for(CanalEntry.Column column : rowData.getAfterColumnsList()){
                        map_info.put(column.getName(), column.getValue());
                    }

                    Map<String, Object> beforeMap = new HashMap<>();

                    for(CanalEntry.Column column : rowData.getBeforeColumnsList()){
                        if(column.getValue()!=null&&!column.getValue().equals(""))
                            beforeMap.put(column.getName(), column.getValue());
                    }
                    map.put("beforeColumns", beforeMap);
                }
                map.put("map_info",map_info);
                System.out.println(map);
                kafkaTemplate.send( canalTopicPrefix + tableName, JSON.toJSONString(map));

            }
        }
    }

}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章