ceph 隔離級別

    ceph的隔離級別默認都是host級別,也就是說兩個副本不會同時落在同一個host 上的磁盤中。這樣就保證了有一臺機器故障不導致數據不可用。但是如果同時兩臺機器故障你呢?這就有可能數據丟失造成嚴重後果。甚至說遇到一個機架突然掉電那這個ceph機器都不可用。解決辦法就是提高隔離級別爲機架、甚至爲了避免重大自然災害導致一個機房損壞是的數據丟失隔離級別可以提高到機房級別,甚至更高。

    下面我們的例子就是隔離級別提高到機架級別。

[root@ceph-node1 opt]# cat decrushmap 
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54

# devices
device 0 osd.0 class hdd
device 1 osd.1 class ssd
device 2 osd.2 class hdd
device 3 osd.3 class ssd
device 4 osd.4 class hdd
device 5 osd.5 class ssd
device 6 osd.6 class hdd
device 7 osd.7 class hdd
device 8 osd.8 class hdd
device 9 osd.9 class hdd
device 10 osd.10 class hdd
device 11 osd.11 class hdd

# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root

# buckets
host ceph-node1 {
	id -3		# do not change unnecessarily
	id -4 class hdd		# do not change unnecessarily
	id -15 class ssd		# do not change unnecessarily
	# weight 0.058
	alg straw2
	hash 0	# rjenkins1
	item osd.0 weight 0.029
	item osd.1 weight 0.029
}
host ceph-node2 {
	id -5		# do not change unnecessarily
	id -6 class hdd		# do not change unnecessarily
	id -16 class ssd		# do not change unnecessarily
	# weight 0.058
	alg straw2
	hash 0	# rjenkins1
	item osd.2 weight 0.029
	item osd.3 weight 0.029
}
host ceph-node3 {
	id -7		# do not change unnecessarily
	id -8 class hdd		# do not change unnecessarily
	id -17 class ssd		# do not change unnecessarily
	# weight 0.058
	alg straw2
	hash 0	# rjenkins1
	item osd.4 weight 0.029
	item osd.5 weight 0.029
}
host ceph-node4 {
	id -9		# do not change unnecessarily
	id -10 class hdd		# do not change unnecessarily
	id -18 class ssd		# do not change unnecessarily
	# weight 0.058
	alg straw2
	hash 0	# rjenkins1
	item osd.6 weight 0.029
	item osd.7 weight 0.029
}
host ceph-node5 {
	id -11		# do not change unnecessarily
	id -12 class hdd		# do not change unnecessarily
	id -19 class ssd		# do not change unnecessarily
	# weight 0.058
	alg straw2
	hash 0	# rjenkins1
	item osd.8 weight 0.029
	item osd.9 weight 0.029
}
host ceph-node6 {
	id -13		# do not change unnecessarily
	id -14 class hdd		# do not change unnecessarily
	id -20 class ssd		# do not change unnecessarily
	# weight 0.058
	alg straw2
	hash 0	# rjenkins1
	item osd.10 weight 0.029
	item osd.11 weight 0.029
}


# rack

rack rack01 {
        id -101          # do not change unnecessarily
        id -102 class hdd                # do not change unnecessarily
        id -103 class ssd                # do not change unnecessarily
        # weight 0.058
        alg straw2
        hash 0  # rjenkins1
        item ceph-node1 weight 0.058
        item ceph-node2 weight 0.058
}

rack rack02 {
        id -104          # do not change unnecessarily
        id -105 class hdd                # do not change unnecessarily
        id -106 class ssd                # do not change unnecessarily
        # weight 0.058
        alg straw2
        hash 0  # rjenkins1
        item ceph-node3 weight 0.058
        item ceph-node4 weight 0.058
}

rack rack03 {
        id -107          # do not change unnecessarily
        id -108 class hdd                # do not change unnecessarily
        id -109 class ssd                # do not change unnecessarily
        # weight 0.058
        alg straw2
        hash 0  # rjenkins1
        item ceph-node5 weight 0.058
        item ceph-node6 weight 0.058
}


root default {
	id -110		# do not change unnecessarily
	id -111 class hdd		# do not change unnecessarily
	id -112 class ssd		# do not change unnecessarily
	# weight 0.354
	alg straw2
	hash 0	# rjenkins1
	item rack01 weight 0.118
	item rack02 weight 0.118
	item rack03 weight 0.118
}


# rules
rule replicated_rule {
	id 0
	type replicated
	min_size 1
	max_size 10
	step take default class hdd
	step chooseleaf firstn 0 type rack
	step emit
}
rule replicated_ssd {
	id 1
	type replicated
	min_size 1
	max_size 10
	step take default class ssd
	step chooseleaf firstn 0 type rack
	step emit
}


[root@ceph-node1 opt]# ceph osd tree
ID   CLASS WEIGHT  TYPE NAME               STATUS REWEIGHT PRI-AFF 
-110       0.35399 root default                                    
-101       0.11800     rack rack01                                 
  -3       0.05800         host ceph-node1                         
   0   hdd 0.02899             osd.0           up  1.00000 1.00000 
   1   ssd 0.02899             osd.1           up  1.00000 1.00000 
  -5       0.05800         host ceph-node2                         
   2   hdd 0.02899             osd.2           up  1.00000 1.00000 
   3   ssd 0.02899             osd.3           up  1.00000 1.00000 
-104       0.11800     rack rack02                                 
  -7       0.05800         host ceph-node3                         
   4   hdd 0.02899             osd.4           up  1.00000 1.00000 
   5   ssd 0.02899             osd.5           up  1.00000 1.00000 
  -9       0.05800         host ceph-node4                         
   6   hdd 0.02899             osd.6           up  1.00000 1.00000 
   7   hdd 0.02899             osd.7           up  1.00000 1.00000 
-107       0.11800     rack rack03                                 
 -11       0.05800         host ceph-node5                         
   8   hdd 0.02899             osd.8           up  1.00000 1.00000 
   9   hdd 0.02899             osd.9           up  1.00000 1.00000 
 -13       0.05800         host ceph-node6                         
  10   hdd 0.02899             osd.10          up  1.00000 1.00000 
  11   hdd 0.02899             osd.11          up  1.00000 1.00000

測試發現達到效果

[root@ceph-node1 opt]# ceph osd map stat rbd_data.10ab6b8b4567.0000000000000042
osdmap e73 pool 'stat' (1) object 'rbd_data.10ab6b8b4567.0000000000000042' -> pg 1.fa3e81bf (1.3f) -> up ([11,2,4], p11) acting ([11,2,4], p11)
# pg的3個副本分別放在了不同rack上

更高級別的就不在演示了。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章