实验环境:
- 系统:Ubuntu 20.04 Server
- 节点主机,/dev/sdb是新增硬盘
主机名 |
IP地址 |
node1 |
172.16.0.31/24 |
node1 |
172.16.0.31/24 |
Static hostname: node2
Icon name: computer-vm
Chassis: vm
Machine ID: cb06f418971e416bbca55d643cd37974
Boot ID: bc1cd664405d411a85eeae0779633db3
Virtualization: vmware
Operating System: Ubuntu 20.04.3 LTS
Kernel: Linux 5.4.0-91-generic
Architecture: x86-64
实验步骤:
- 设置网络,主机名
# node1: 172.16.0.31/24 node2:172.16.0.32/24
# 修改IP
nmcli con mod ens160 ipv4.method manual ipv4.addresses 172.16.0.31/24 ipv4.gateway 172.16.0.253 ipv4.dns 172.16.0.2,172.16.0.3 connection.autoconnect yes
# 修改主机名
hostnamectl set-hostname node1
# 修改DNS记录
vi /etc/hosts
# Cluster Nodes
172.16.0.31 node1
172.16.0.32 node2
# 创建lv磁盘
pvcreate /dev/sdb
# 创建组
vgcreate drbd-vg /dev/sdb
# 创建分区
lvcreate -L 1G -n r0 drbd-vg
# 禁止DRBD磁盘使用多路径
vi /etc/multipath.conf
# 添加规则
blacklist {
devnode "^drbd[0-9]"
}
# 安装iSCSI-Target服务
apt install tgt
# 关闭防火墙
ufw disable
# 如果不关闭防火墙,则需要放行端口
# DRBD端口
ufw allow 7790/tcp
# corosync端口
ufw allow 5405/udp
# iSCSI-Target
ufw allow iscsi-target
ufw reload
- 安装和配置DRBD
# 安装DRBD
apt install drbd-utils
# 加载模块
modprobe drbd
# 检查模块加载
lsmod | grep drbd
# 开机自动加载模块
echo drbd >> /etc/modules-load.d/modules.conf
# 修改全局配置
cd /etc/drbd.d/
vi global_common.conf
# DRBD is the result of over a decade of development by LINBIT.
# In case you need professional services for DRBD or have
# feature requests visit http://www.linbit.com
global {
usage-count no;
udev-always-use-vnr;
}
common {
# 脑裂处理
handlers {
pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f";
fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
}
disk {
# 磁盘IO错误时分离
on-io-error detach;
fencing resource-only;
}
net {
protocol C;
}
}
# 创建资源r0
vi r0.res
resource "r0" {
# DRBD磁盘
device "/dev/drbd0";
# 存储
disk "/dev/mapper/drbd--vg-r0";
meta-disk internal;
# 节点1
on "node1" {
# 地址
address 172.16.0.31:7790;
}
on "node2" {
address 172.16.0.32:7790;
}
}
- 安装和配置Corosync,Pacemaker
# 安装corosync,pacemaker
apt install pacemaker
# 编辑corosync.conf
vi /etc/corosync/corosync.conf
totem {
version: 2
secauth: off
cluster_name: mycluster
transport: udpu
}
nodelist {
node {
name: node1
ring0_addr: 172.16.0.31
nodeid: 1
}
node {
name: node2
ring0_addr: 172.16.0.32
nodeid: 2
}
}
quorum {
provider: corosync_votequorum
two_node: 1
wait_for_all: 1
last_man_standing: 1
auto_tie_breaker: 0
}
# 重启服务
systemctl restart corosync.service pacemaker.service
- 配置iSCSI集群
# 全局设置
# 集群保护
crm configure property stonith-enabled=false
# 忽略quorum
crm configure property no-quorum-policy=ignore
# 资源粘度
crm configure property rsc-options resource-stickiness=200
# 故障触发资源数
crm configure property migration-threshold=1
# 创建资源
primitive p_drbd_res ocf:linbit:drbd params drbd_resource="r0" op monitor timeout="20" interval="20" role="Slave" op monitor timeout="20" interval="10" role="Master"
# 节点配置
ms ms_drbd_res p_drbd_res meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" interleave="true"
# 虚拟IP
primitive p_ip ocf:heartbeat:IPaddr2 params ip="172.16.0.30" cidr_netmask="24" op monitor timeout="20s" interval="10s" depth="0"
# iSCSI-Target
primitive r0_target ocf:heartbeat:iSCSITarget params implementation="tgt" iqn="iqn.2021-12.drbd.stroage:target.ha" tid="1" allowed_initiators="ALL" op monitor timeout="10s" interval="10s" depth="0"
# iSCSI-Target-Lun
primitive r0_target_lun1 ocf:heartbeat:iSCSILogicalUnit params implementation="tgt" target_iqn="iqn.2021-12.drbd.stroage:target.ha" lun="1" path="/dev/drbd0" op monitor timeout="10s" interval="10s" depth="0"
# 创建资源组
group r0_group r0_target r0_target_lun1 p_ip
# DRBD运行规则
order o_drbd_before_p_drbd_res inf: ms_drbd_res:promote r0_group:start
colocation c_r0_on_drbd inf: r0_group ms_drbd_res:Master
- 测试集群
#模拟node1掉线后,ping vip只有1个丢包,故障转移成功
root@node2:/etc/drbd.d# crm status
Cluster Summary:
* Stack: corosync
* Current DC: node2 (version 2.0.3-4b1f869f0f) - partition with quorum
* Last updated: Fri Dec 24 13:12:25 2021
* Last change: Fri Dec 24 13:11:37 2021 by root via cibadmin on node2
* 2 nodes configured
* 5 resource instances configured
Node List:
* Online: [ node2 ]
* OFFLINE: [ node1 ]
Full List of Resources:
* Clone Set: ms_drbd_res [p_drbd_res] (promotable):
* Masters: [ node2 ]
* Stopped: [ node1 ]
* Resource Group: r0_group:
* r0_target (ocf::heartbeat:iSCSITarget): Started node2
* r0_target_lun1 (ocf::heartbeat:iSCSILogicalUnit): Started node2
* p_ip (ocf::heartbeat:IPaddr2): Started node2
- 参考资料
# https://linux.die.net/man/5/targets.conf
# http://crmsh.github.io/man-2.0/
# http://www.interbit.com.pl/wp-content/uploads/2013/07/ha-iscsi.pdf
# https://linbit.com/drbd-user-guide/drbd-guide-9_0-cn/
# https://www.mankier.com/package/resource-agents
# https://www.mankier.com/7/ocf_linbit_drbd