使用canal同步mysql數據到es

需求:傳統數據庫搜索速度非常慢,需要依賴搜索引擎實現快速搜索。(使用canal實現無侵入數據異構)

canal server:用於僞裝mysql slave獲取mysql binlog。
canal adapter:提供多種中間件的導入,如kafka、rocketmq、hbase、elasticsearch,可直接配置使用。
                          注:es1.1.4只支持es6.x   版本。
elasticsearch(es):彈性搜索引擎底層使用lucene實現,提供集羣分片擴展。
kbana:es可視化工具,用於可視化es數據,提供各種工具如:開發工具、索引管理等。

1、創建異構表


SET FOREIGN_KEY_CHECKS=0;

-- ----------------------------
-- Table structure for t_user
-- ----------------------------
DROP TABLE IF EXISTS `t_user`;
CREATE TABLE `t_user` (
  `id` bigint(20) NOT NULL AUTO_INCREMENT,
  `name` varchar(255) NOT NULL COMMENT '用戶姓名',
  `gender` tinyint(4) DEFAULT NULL COMMENT '性別1:男2:女',
  `phone` varchar(20) NOT NULL COMMENT '手機號碼',
  `email` varchar(50) DEFAULT NULL COMMENT '郵箱',
  `status` tinyint(4) NOT NULL DEFAULT '1' COMMENT '狀態1:啓用2:禁用',
  `birthday` date DEFAULT NULL COMMENT '出生日期',
  `id_card` varchar(20) DEFAULT NULL COMMENT '證件號碼',
  `head_portrait` varchar(255) DEFAULT NULL COMMENT '頭像',
  `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '創建時間',
  `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '最後修改時間',
  PRIMARY KEY (`id`),
  UNIQUE KEY `uk_user_phone` (`phone`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=12 DEFAULT CHARSET=utf8mb4 COMMENT='用戶表';

2、檢查mysql binlog存儲方式

[mysqld]
log-bin=mysql-bin #添加這一行就ok
binlog-format=ROW #選擇row模式
server_id=1 #配置mysql replaction需要定義,不能和canal的slaveId重複

    創建canal用戶用於拉數據

CREATE USER canal IDENTIFIED BY 'canal';  
GRANT SELECT, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'canal'@'%';
-- GRANT ALL PRIVILEGES ON *.* TO 'canal'@'%' ;
FLUSH PRIVILEGES;

3、添加canal server實例配置文件instance.properties。

################################################
## mysql serverId , v1.0.26+ will autoGen
# canal.instance.mysql.slaveId=0

# enable gtid use true/false
canal.instance.gtidon=false

# position info
canal.instance.master.address=配置數據庫地址
canal.instance.master.journal.name=
canal.instance.master.position=
canal.instance.master.timestamp=
canal.instance.master.gtid=

# rds oss binlog
canal.instance.rds.accesskey=
canal.instance.rds.secretkey=
canal.instance.rds.instanceId=

# table meta tsdb info
canal.instance.tsdb.enable=true
#canal.instance.tsdb.url=jdbc:mysql://127.0.0.1:3306/canal_tsdb
#canal.instance.tsdb.dbUsername=canal
#canal.instance.tsdb.dbPassword=canal

#canal.instance.standby.address =
#canal.instance.standby.journal.name =
#canal.instance.standby.position =
#canal.instance.standby.timestamp =
#canal.instance.standby.gtid=

# username/password
canal.instance.dbUsername=配置數據庫賬號
canal.instance.dbPassword=配置數據庫密碼
canal.instance.connectionCharset = UTF-8
# enable druid Decrypt database password
canal.instance.enableDruid=false
#canal.instance.pwdPublicKey=MFwwDQYJKoZIhvcNAQEBBQADSwAwSAJBALK4BUxdDltRRE5/zXpVEVPUgunvscYFtEip3pmLlhrWpacX7y7GCMo2/JM6LeHmiiNdH1FWgGCpUfircSwlWKUCAwEAAQ==

# table regex
canal.instance.filter.regex=.*\\..*
# table black regex
canal.instance.filter.black.regex=
# table field filter(format: schema1.tableName1:field1/field2,schema2.tableName2:field1/field2)
#canal.instance.filter.field=test1.t_product:id/subject/keywords,test2.t_company:id/name/contact/ch
# table field black filter(format: schema1.tableName1:field1/field2,schema2.tableName2:field1/field2)
#canal.instance.filter.black.field=test1.t_product:subject/product_image,test2.t_company:id/name/contact/ch

# mq config
canal.mq.topic=example
# dynamic topic route by schema or table regex
#canal.mq.dynamicTopic=mytest1.user,mytest2\\..*,.*\\..*
canal.mq.partition=0
# hash partition config
#canal.mq.partitionsNum=3
#canal.mq.partitionHash=test.table:id^name,.*\\..*
#################################################

4、修改canal adapter配置文件application.yml

server:
  port: 8081
spring:
  jackson:
    date-format: yyyy-MM-dd HH:mm:ss
    time-zone: GMT+8
    default-property-inclusion: non_null

canal.conf:
  mode: tcp # kafka rocketMQ
  canalServerHost: 127.0.0.1:11111
#  zookeeperHosts: slave1:2181
#  mqServers: 127.0.0.1:9092 #or rocketmq
#  flatMessage: true
  batchSize: 500
  syncBatchSize: 1000
  retries: 0
  timeout:
  accessKey:
  secretKey:
  srcDataSources:
    defaultDS:
      url: jdbc連接地址
      username: 賬號
      password: 密碼
  canalAdapters:
  - instance: example # canal instance Name or mq topic name
    groups:
    - groupId: g1
      outerAdapters:
      - name: logger
#      - name: rdb
#        key: mysql1
#        properties:
#          jdbc.driverClassName: com.mysql.jdbc.Driver
#          jdbc.url: jdbc:mysql://127.0.0.1:3306/mytest2?useUnicode=true
#          jdbc.username: root
#          jdbc.password: 121212
#      - name: rdb
#        key: oracle1
#        properties:
#          jdbc.driverClassName: oracle.jdbc.OracleDriver
#          jdbc.url: jdbc:oracle:thin:@localhost:49161:XE
#          jdbc.username: mytest
#          jdbc.password: m121212
#      - name: rdb
#        key: postgres1
#        properties:
#          jdbc.driverClassName: org.postgresql.Driver
#          jdbc.url: jdbc:postgresql://localhost:5432/postgres
#          jdbc.username: postgres
#          jdbc.password: 121212
#          threads: 1
#          commitSize: 3000
#      - name: hbase
#        properties:
#          hbase.zookeeper.quorum: 127.0.0.1
#          hbase.zookeeper.property.clientPort: 2181
#          zookeeper.znode.parent: /hbase
      - name: es
        hosts: 127.0.0.1:9200 #127.0.0.1:9300 # 127.0.0.1:9200 for rest mode
        properties:
          mode: rest #transport # or rest
          # security.auth: test:123456 #  only used for rest mode
          cluster.name: elasticsearch

5、添加config\es文件夾下的yml文件

dataSourceKey: defaultDS
destination: example
groupId: g1
esMapping:
  _index: user
  _type: _doc
  _id: id
  sql: "select u.id,u.name,u.phone,u.gender,u.create_time from t_user u"
  commitBatch: 3000

6、es添加索引

# 刪除索引
DELETE /user

# 創建索引和mapping
PUT /user?include_type_name=true
{
  "mappings":{
    "_doc":{
      "properties":{
        "id":{"type":"long"},
        "name":{"type":"text"},
        "phone":{"type":"text"},
        "gender":{"type":"long"},
        "create_time":{
          "type":"date"
        }
      }
    }
  }
}

# 添加mapping
PUT /user/_mapping/_doc?include_type_name=true
{
  "properties":{
    "id":{"type":"long"},
    "name":{"type":"text"},
    "phone":{"type":"text"},
    "gender":{"type":"long"},
    "create_time":{
      "type":"date",
      "format": "yyyy-MM-dd HH:mm:ss"
    }
  }
}

# 查看索引下的mapping
GET /user/_mapping

# 查詢索引下的數據
POST /user/_search
{
  "query": { "match_all": {}}
}

# 添加數據
POST /user/_doc
{
  "id":1,
  "name":"xxx",
  "gender":1,
  "phone":"17310128850",
  "create_time":"2019-10-18 10:50:00" 
}

7、使用變更sql測試

insert into t_user (name,phone,gender,create_time) values ('xxx1','111',1,now())

update t_user set name ='ccc' where id = 1

8、修改kbana配置文件,修改es地址和語言,查看es數據。

elasticsearch.hosts: ["es地址"]
i18n.locale: "zh-CN"

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章